625 files changed, 18024 insertions, 6599 deletions
diff --git a/Documentation/acpi/acpi-lid.txt b/Documentation/acpi/acpi-lid.txt
index 22cb3091f297..effe7af3a5af 100644
--- a/Documentation/acpi/acpi-lid.txt
+++ b/Documentation/acpi/acpi-lid.txt
@@ -59,20 +59,28 @@ button driver uses the following 3 modes in order not to trigger issues.
 If the userspace hasn't been prepared to ignore the unreliable "opened"
 events and the unreliable initial state notification, Linux users can use
 the following kernel parameters to handle the possible issues:
-A. button.lid_init_state=open:
+A. button.lid_init_state=method:
+   When this option is specified, the ACPI button driver reports the
+   initial lid state using the returning value of the _LID control method
+   and whether the "opened"/"closed" events are paired fully relies on the
+   firmware implementation.
+   This option can be used to fix some platforms where the returning value
+   of the _LID control method is reliable but the initial lid state
+   notification is missing.
+   This option is the default behavior during the period the userspace
+   isn't ready to handle the buggy AML tables.
+B. button.lid_init_state=open:
    When this option is specified, the ACPI button driver always reports the
    initial lid state as "opened" and whether the "opened"/"closed" events
    are paired fully relies on the firmware implementation.
    This may fix some platforms where the returning value of the _LID
    control method is not reliable and the initial lid state notification is
    missing.
-   This option is the default behavior during the period the userspace
-   isn't ready to handle the buggy AML tables.
 
 If the userspace has been prepared to ignore the unreliable "opened" events
 and the unreliable initial state notification, Linux users should always
 use the following kernel parameter:
-B. button.lid_init_state=ignore:
+C. button.lid_init_state=ignore:
    When this option is specified, the ACPI button driver never reports the
    initial lid state and there is a compensation mechanism implemented to
    ensure that the reliable "closed" notifications can always be delievered
diff --git a/Documentation/admin-guide/pm/cpufreq.rst b/Documentation/admin-guide/pm/cpufreq.rst
index 289c80f7760e..09aa2e949787 100644
--- a/Documentation/admin-guide/pm/cpufreq.rst
+++ b/Documentation/admin-guide/pm/cpufreq.rst
@@ -1,4 +1,5 @@
 .. |struct cpufreq_policy| replace:: :c:type:`struct cpufreq_policy <cpufreq_policy>`
+.. |intel_pstate| replace:: :doc:`intel_pstate <intel_pstate>`
 
 =======================
 CPU Performance Scaling
@@ -75,7 +76,7 @@ feedback registers, as that information is typically specific to the hardware
 interface it comes from and may not be easily represented in an abstract,
 platform-independent way.  For this reason, ``CPUFreq`` allows scaling drivers
 to bypass the governor layer and implement their own performance scaling
-algorithms.  That is done by the ``intel_pstate`` scaling driver.
+algorithms.  That is done by the |intel_pstate| scaling driver.
 
 
 ``CPUFreq`` Policy Objects
@@ -174,13 +175,13 @@ necessary to restart the scaling governor so that it can take the new online CPU
 into account.  That is achieved by invoking the governor's ``->stop`` and
 ``->start()`` callbacks, in this order, for the entire policy.
 
-As mentioned before, the ``intel_pstate`` scaling driver bypasses the scaling
+As mentioned before, the |intel_pstate| scaling driver bypasses the scaling
 governor layer of ``CPUFreq`` and provides its own P-state selection algorithms.
-Consequently, if ``intel_pstate`` is used, scaling governors are not attached to
+Consequently, if |intel_pstate| is used, scaling governors are not attached to
 new policy objects.  Instead, the driver's ``->setpolicy()`` callback is invoked
 to register per-CPU utilization update callbacks for each policy.  These
 callbacks are invoked by the CPU scheduler in the same way as for scaling
-governors, but in the ``intel_pstate`` case they both determine the P-state to
+governors, but in the |intel_pstate| case they both determine the P-state to
 use and change the hardware configuration accordingly in one go from scheduler
 context.
 
@@ -257,7 +258,7 @@ are the following:
 
 ``scaling_available_governors``
 	List of ``CPUFreq`` scaling governors present in the kernel that can
-	be attached to this policy or (if the ``intel_pstate`` scaling driver is
+	be attached to this policy or (if the |intel_pstate| scaling driver is
 	in use) list of scaling algorithms provided by the driver that can be
 	applied to this policy.
 
@@ -274,7 +275,7 @@ are the following:
 	the CPU is actually running at (due to hardware design and other
 	limitations).
 
-	Some scaling drivers (e.g. ``intel_pstate``) attempt to provide
+	Some scaling drivers (e.g. |intel_pstate|) attempt to provide
 	information more precisely reflecting the current CPU frequency through
 	this attribute, but that still may not be the exact current CPU
 	frequency as seen by the hardware at the moment.
@@ -284,13 +285,13 @@ are the following:
 
 ``scaling_governor``
 	The scaling governor currently attached to this policy or (if the
-	``intel_pstate`` scaling driver is in use) the scaling algorithm
+	|intel_pstate| scaling driver is in use) the scaling algorithm
 	provided by the driver that is currently applied to this policy.
 
 	This attribute is read-write and writing to it will cause a new scaling
 	governor to be attached to this policy or a new scaling algorithm
 	provided by the scaling driver to be applied to it (in the
-	``intel_pstate`` case), as indicated by the string written to this
+	|intel_pstate| case), as indicated by the string written to this
 	attribute (which must be one of the names listed by the
 	``scaling_available_governors`` attribute described above).
 
@@ -619,7 +620,7 @@ This file is located under :file:`/sys/devices/system/cpu/cpufreq/` and controls
 the "boost" setting for the whole system.  It is not present if the underlying
 scaling driver does not support the frequency boost mechanism (or supports it,
 but provides a driver-specific interface for controlling it, like
-``intel_pstate``).
+|intel_pstate|).
 
 If the value in this file is 1, the frequency boost mechanism is enabled.  This
 means that either the hardware can be put into states in which it is able to
diff --git a/Documentation/admin-guide/pm/index.rst b/Documentation/admin-guide/pm/index.rst
index c80f087321fc..7f148f76f432 100644
--- a/Documentation/admin-guide/pm/index.rst
+++ b/Documentation/admin-guide/pm/index.rst
@@ -6,6 +6,7 @@ Power Management
    :maxdepth: 2
 
    cpufreq
+   intel_pstate
 
 .. only::  subproject and html
 
diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst
new file mode 100644
index 000000000000..33d703989ea8
--- /dev/null
+++ b/Documentation/admin-guide/pm/intel_pstate.rst
@@ -0,0 +1,755 @@
+===============================================
+``intel_pstate`` CPU Performance Scaling Driver
+===============================================
+
+::
+
+ Copyright (c) 2017 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+
+General Information
+===================
+
+``intel_pstate`` is a part of the
+:doc:`CPU performance scaling subsystem <cpufreq>` in the Linux kernel
+(``CPUFreq``).  It is a scaling driver for the Sandy Bridge and later
+generations of Intel processors.  Note, however, that some of those processors
+may not be supported.  [To understand ``intel_pstate`` it is necessary to know
+how ``CPUFreq`` works in general, so this is the time to read :doc:`cpufreq` if
+you have not done that yet.]
+
+For the processors supported by ``intel_pstate``, the P-state concept is broader
+than just an operating frequency or an operating performance point (see the
+`LinuxCon Europe 2015 presentation by Kristen Accardi <LCEU2015_>`_ for more
+information about that).  For this reason, the representation of P-states used
+by ``intel_pstate`` internally follows the hardware specification (for details
+refer to `Intel® 64 and IA-32 Architectures Software Developer’s Manual
+Volume 3: System Programming Guide <SDM_>`_).  However, the ``CPUFreq`` core
+uses frequencies for identifying operating performance points of CPUs and
+frequencies are involved in the user space interface exposed by it, so
+``intel_pstate`` maps its internal representation of P-states to frequencies too
+(fortunately, that mapping is unambiguous).  At the same time, it would not be
+practical for ``intel_pstate`` to supply the ``CPUFreq`` core with a table of
+available frequencies due to the possible size of it, so the driver does not do
+that.  Some functionality of the core is limited by that.
+
+Since the hardware P-state selection interface used by ``intel_pstate`` is
+available at the logical CPU level, the driver always works with individual
+CPUs.  Consequently, if ``intel_pstate`` is in use, every ``CPUFreq`` policy
+object corresponds to one logical CPU and ``CPUFreq`` policies are effectively
+equivalent to CPUs.  In particular, this means that they become "inactive" every
+time the corresponding CPU is taken offline and need to be re-initialized when
+it goes back online.
+
+``intel_pstate`` is not modular, so it cannot be unloaded, which means that the
+only way to pass early-configuration-time parameters to it is via the kernel
+command line.  However, its configuration can be adjusted via ``sysfs`` to a
+great extent.  In some configurations it even is possible to unregister it via
+``sysfs`` which allows another ``CPUFreq`` scaling driver to be loaded and
+registered (see `below <status_attr_>`_).
+
+
+Operation Modes
+===============
+
+``intel_pstate`` can operate in three different modes: in the active mode with
+or without hardware-managed P-states support and in the passive mode.  Which of
+them will be in effect depends on what kernel command line options are used and
+on the capabilities of the processor.
+
+Active Mode
+-----------
+
+This is the default operation mode of ``intel_pstate``.  If it works in this
+mode, the ``scaling_driver`` policy attribute in ``sysfs`` for all ``CPUFreq``
+policies contains the string "intel_pstate".
+
+In this mode the driver bypasses the scaling governors layer of ``CPUFreq`` and
+provides its own scaling algorithms for P-state selection.  Those algorithms
+can be applied to ``CPUFreq`` policies in the same way as generic scaling
+governors (that is, through the ``scaling_governor`` policy attribute in
+``sysfs``).  [Note that different P-state selection algorithms may be chosen for
+different policies, but that is not recommended.]
+
+They are not generic scaling governors, but their names are the same as the
+names of some of those governors.  Moreover, confusingly enough, they generally
+do not work in the same way as the generic governors they share the names with.
+For example, the ``powersave`` P-state selection algorithm provided by
+``intel_pstate`` is not a counterpart of the generic ``powersave`` governor
+(roughly, it corresponds to the ``schedutil`` and ``ondemand`` governors).
+
+There are two P-state selection algorithms provided by ``intel_pstate`` in the
+active mode: ``powersave`` and ``performance``.  The way they both operate
+depends on whether or not the hardware-managed P-states (HWP) feature has been
+enabled in the processor and possibly on the processor model.
+
+Which of the P-state selection algorithms is used by default depends on the
+:c:macro:`CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE` kernel configuration option.
+Namely, if that option is set, the ``performance`` algorithm will be used by
+default, and the other one will be used by default if it is not set.
+
+Active Mode With HWP
+~~~~~~~~~~~~~~~~~~~~
+
+If the processor supports the HWP feature, it will be enabled during the
+processor initialization and cannot be disabled after that.  It is possible
+to avoid enabling it by passing the ``intel_pstate=no_hwp`` argument to the
+kernel in the command line.
+
+If the HWP feature has been enabled, ``intel_pstate`` relies on the processor to
+select P-states by itself, but still it can give hints to the processor's
+internal P-state selection logic.  What those hints are depends on which P-state
+selection algorithm has been applied to the given policy (or to the CPU it
+corresponds to).
+
+Even though the P-state selection is carried out by the processor automatically,
+``intel_pstate`` registers utilization update callbacks with the CPU scheduler
+in this mode.  However, they are not used for running a P-state selection
+algorithm, but for periodic updates of the current CPU frequency information to
+be made available from the ``scaling_cur_freq`` policy attribute in ``sysfs``.
+
+HWP + ``performance``
+.....................
+
+In this configuration ``intel_pstate`` will write 0 to the processor's
+Energy-Performance Preference (EPP) knob (if supported) or its
+Energy-Performance Bias (EPB) knob (otherwise), which means that the processor's
+internal P-state selection logic is expected to focus entirely on performance.
+
+This will override the EPP/EPB setting coming from the ``sysfs`` interface
+(see `Energy vs Performance Hints`_ below).
+
+Also, in this configuration the range of P-states available to the processor's
+internal P-state selection logic is always restricted to the upper boundary
+(that is, the maximum P-state that the driver is allowed to use).
+
+HWP + ``powersave``
+...................
+
+In this configuration ``intel_pstate`` will set the processor's
+Energy-Performance Preference (EPP) knob (if supported) or its
+Energy-Performance Bias (EPB) knob (otherwise) to whatever value it was
+previously set to via ``sysfs`` (or whatever default value it was
+set to by the platform firmware).  This usually causes the processor's
+internal P-state selection logic to be less performance-focused.
+
+Active Mode Without HWP
+~~~~~~~~~~~~~~~~~~~~~~~
+
+This is the default operation mode for processors that do not support the HWP
+feature.  It also is used by default with the ``intel_pstate=no_hwp`` argument
+in the kernel command line.  However, in this mode ``intel_pstate`` may refuse
+to work with the given processor if it does not recognize it.  [Note that
+``intel_pstate`` will never refuse to work with any processor with the HWP
+feature enabled.]
+
+In this mode ``intel_pstate`` registers utilization update callbacks with the
+CPU scheduler in order to run a P-state selection algorithm, either
+``powersave`` or ``performance``, depending on the ``scaling_cur_freq`` policy
+setting in ``sysfs``.  The current CPU frequency information to be made
+available from the ``scaling_cur_freq`` policy attribute in ``sysfs`` is
+periodically updated by those utilization update callbacks too.
+
+``performance``
+...............
+
+Without HWP, this P-state selection algorithm is always the same regardless of
+the processor model and platform configuration.
+
+It selects the maximum P-state it is allowed to use, subject to limits set via
+``sysfs``, every time the P-state selection computations are carried out by the
+driver's utilization update callback for the given CPU (that does not happen
+more often than every 10 ms), but the hardware configuration will not be changed
+if the new P-state is the same as the current one.
+
+This is the default P-state selection algorithm if the
+:c:macro:`CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE` kernel configuration option
+is set.
+
+``powersave``
+.............
+
+Without HWP, this P-state selection algorithm generally depends on the
+processor model and/or the system profile setting in the ACPI tables and there
+are two variants of it.
+
+One of them is used with processors from the Atom line and (regardless of the
+processor model) on platforms with the system profile in the ACPI tables set to
+"mobile" (laptops mostly), "tablet", "appliance PC", "desktop", or
+"workstation".  It is also used with processors supporting the HWP feature if
+that feature has not been enabled (that is, with the ``intel_pstate=no_hwp``
+argument in the kernel command line).  It is similar to the algorithm
+implemented by the generic ``schedutil`` scaling governor except that the
+utilization metric used by it is based on numbers coming from feedback
+registers of the CPU.  It generally selects P-states proportional to the
+current CPU utilization, so it is referred to as the "proportional" algorithm.
+
+The second variant of the ``powersave`` P-state selection algorithm, used in all
+of the other cases (generally, on processors from the Core line, so it is
+referred to as the "Core" algorithm), is based on the values read from the APERF
+and MPERF feedback registers and the previously requested target P-state.
+It does not really take CPU utilization into account explicitly, but as a rule
+it causes the CPU P-state to ramp up very quickly in response to increased
+utilization which is generally desirable in server environments.
+
+Regardless of the variant, this algorithm is run by the driver's utilization
+update callback for the given CPU when it is invoked by the CPU scheduler, but
+not more often than every 10 ms (that can be tweaked via ``debugfs`` in `this
+particular case <Tuning Interface in debugfs_>`_).  Like in the ``performance``
+case, the hardware configuration is not touched if the new P-state turns out to
+be the same as the current one.
+
+This is the default P-state selection algorithm if the
+:c:macro:`CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE` kernel configuration option
+is not set.
+
+Passive Mode
+------------
+
+This mode is used if the ``intel_pstate=passive`` argument is passed to the
+kernel in the command line (it implies the ``intel_pstate=no_hwp`` setting too).
+Like in the active mode without HWP support, in this mode ``intel_pstate`` may
+refuse to work with the given processor if it does not recognize it.
+
+If the driver works in this mode, the ``scaling_driver`` policy attribute in
+``sysfs`` for all ``CPUFreq`` policies contains the string "intel_cpufreq".
+Then, the driver behaves like a regular ``CPUFreq`` scaling driver.  That is,
+it is invoked by generic scaling governors when necessary to talk to the
+hardware in order to change the P-state of a CPU (in particular, the
+``schedutil`` governor can invoke it directly from scheduler context).
+
+While in this mode, ``intel_pstate`` can be used with all of the (generic)
+scaling governors listed by the ``scaling_available_governors`` policy attribute
+in ``sysfs`` (and the P-state selection algorithms described above are not
+used).  Then, it is responsible for the configuration of policy objects
+corresponding to CPUs and provides the ``CPUFreq`` core (and the scaling
+governors attached to the policy objects) with accurate information on the
+maximum and minimum operating frequencies supported by the hardware (including
+the so-called "turbo" frequency ranges).  In other words, in the passive mode
+the entire range of available P-states is exposed by ``intel_pstate`` to the
+``CPUFreq`` core.  However, in this mode the driver does not register
+utilization update callbacks with the CPU scheduler and the ``scaling_cur_freq``
+information comes from the ``CPUFreq`` core (and is the last frequency selected
+by the current scaling governor for the given policy).
+
+
+.. _turbo:
+
+Turbo P-states Support
+======================
+
+In the majority of cases, the entire range of P-states available to
+``intel_pstate`` can be divided into two sub-ranges that correspond to
+different types of processor behavior, above and below a boundary that
+will be referred to as the "turbo threshold" in what follows.
+
+The P-states above the turbo threshold are referred to as "turbo P-states" and
+the whole sub-range of P-states they belong to is referred to as the "turbo
+range".  These names are related to the Turbo Boost technology allowing a
+multicore processor to opportunistically increase the P-state of one or more
+cores if there is enough power to do that and if that is not going to cause the
+thermal envelope of the processor package to be exceeded.
+
+Specifically, if software sets the P-state of a CPU core within the turbo range
+(that is, above the turbo threshold), the processor is permitted to take over
+performance scaling control for that core and put it into turbo P-states of its
+choice going forward.  However, that permission is interpreted differently by
+different processor generations.  Namely, the Sandy Bridge generation of
+processors will never use any P-states above the last one set by software for
+the given core, even if it is within the turbo range, whereas all of the later
+processor generations will take it as a license to use any P-states from the
+turbo range, even above the one set by software.  In other words, on those
+processors setting any P-state from the turbo range will enable the processor
+to put the given core into all turbo P-states up to and including the maximum
+supported one as it sees fit.
+
+One important property of turbo P-states is that they are not sustainable.  More
+precisely, there is no guarantee that any CPUs will be able to stay in any of
+those states indefinitely, because the power distribution within the processor
+package may change over time  or the thermal envelope it was designed for might
+be exceeded if a turbo P-state was used for too long.
+
+In turn, the P-states below the turbo threshold generally are sustainable.  In
+fact, if one of them is set by software, the processor is not expected to change
+it to a lower one unless in a thermal stress or a power limit violation
+situation (a higher P-state may still be used if it is set for another CPU in
+the same package at the same time, for example).
+
+Some processors allow multiple cores to be in turbo P-states at the same time,
+but the maximum P-state that can be set for them generally depends on the number
+of cores running concurrently.  The maximum turbo P-state that can be set for 3
+cores at the same time usually is lower than the analogous maximum P-state for
+2 cores, which in turn usually is lower than the maximum turbo P-state that can
+be set for 1 core.  The one-core maximum turbo P-state is thus the maximum
+supported one overall.
+
+The maximum supported turbo P-state, the turbo threshold (the maximum supported
+non-turbo P-state) and the minimum supported P-state are specific to the
+processor model and can be determined by reading the processor's model-specific
+registers (MSRs).  Moreover, some processors support the Configurable TDP
+(Thermal Design Power) feature and, when that feature is enabled, the turbo
+threshold effectively becomes a configurable value that can be set by the
+platform firmware.
+
+Unlike ``_PSS`` objects in the ACPI tables, ``intel_pstate`` always exposes
+the entire range of available P-states, including the whole turbo range, to the
+``CPUFreq`` core and (in the passive mode) to generic scaling governors.  This
+generally causes turbo P-states to be set more often when ``intel_pstate`` is
+used relative to ACPI-based CPU performance scaling (see `below <acpi-cpufreq_>`_
+for more information).
+
+Moreover, since ``intel_pstate`` always knows what the real turbo threshold is
+(even if the Configurable TDP feature is enabled in the processor), its
+``no_turbo`` attribute in ``sysfs`` (described `below <no_turbo_attr_>`_) should
+work as expected in all cases (that is, if set to disable turbo P-states, it
+always should prevent ``intel_pstate`` from using them).
+
+
+Processor Support
+=================
+
+To handle a given processor ``intel_pstate`` requires a number of different
+pieces of information on it to be known, including:
+
+ * The minimum supported P-state.
+
+ * The maximum supported `non-turbo P-state <turbo_>`_.
+
+ * Whether or not turbo P-states are supported at all.
+
+ * The maximum supported `one-core turbo P-state <turbo_>`_ (if turbo P-states
+   are supported).
+
+ * The scaling formula to translate the driver's internal representation
+   of P-states into frequencies and the other way around.
+
+Generally, ways to obtain that information are specific to the processor model
+or family.  Although it often is possible to obtain all of it from the processor
+itself (using model-specific registers), there are cases in which hardware
+manuals need to be consulted to get to it too.
+
+For this reason, there is a list of supported processors in ``intel_pstate`` and
+the driver initialization will fail if the detected processor is not in that
+list, unless it supports the `HWP feature <Active Mode_>`_.  [The interface to
+obtain all of the information listed above is the same for all of the processors
+supporting the HWP feature, which is why they all are supported by
+``intel_pstate``.]
+
+
+User Space Interface in ``sysfs``
+=================================
+
+Global Attributes
+-----------------
+
+``intel_pstate`` exposes several global attributes (files) in ``sysfs`` to
+control its functionality at the system level.  They are located in the
+``/sys/devices/system/cpu/cpufreq/intel_pstate/`` directory and affect all
+CPUs.
+
+Some of them are not present if the ``intel_pstate=per_cpu_perf_limits``
+argument is passed to the kernel in the command line.
+
+``max_perf_pct``
+	Maximum P-state the driver is allowed to set in percent of the
+	maximum supported performance level (the highest supported `turbo
+	P-state <turbo_>`_).
+
+	This attribute will not be exposed if the
+	``intel_pstate=per_cpu_perf_limits`` argument is present in the kernel
+	command line.
+
+``min_perf_pct``
+	Minimum P-state the driver is allowed to set in percent of the
+	maximum supported performance level (the highest supported `turbo
+	P-state <turbo_>`_).
+
+	This attribute will not be exposed if the
+	``intel_pstate=per_cpu_perf_limits`` argument is present in the kernel
+	command line.
+
+``num_pstates``
+	Number of P-states supported by the processor (between 0 and 255
+	inclusive) including both turbo and non-turbo P-states (see
+	`Turbo P-states Support`_).
+
+	The value of this attribute is not affected by the ``no_turbo``
+	setting described `below <no_turbo_attr_>`_.
+
+	This attribute is read-only.
+
+``turbo_pct``
+	Ratio of the `turbo range <turbo_>`_ size to the size of the entire
+	range of supported P-states, in percent.
+
+	This attribute is read-only.
+
+.. _no_turbo_attr:
+
+``no_turbo``
+	If set (equal to 1), the driver is not allowed to set any turbo P-states
+	(see `Turbo P-states Support`_).  If unset (equalt to 0, which is the
+	default), turbo P-states can be set by the driver.
+	[Note that ``intel_pstate`` does not support the general ``boost``
+	attribute (supported by some other scaling drivers) which is replaced
+	by this one.]
+
+	This attrubute does not affect the maximum supported frequency value
+	supplied to the ``CPUFreq`` core and exposed via the policy interface,
+	but it affects the maximum possible value of per-policy P-state	limits
+	(see `Interpretation of Policy Attributes`_ below for details).
+
+.. _status_attr:
+
+``status``
+	Operation mode of the driver: "active", "passive" or "off".
+
+	"active"
+		The driver is functional and in the `active mode
+		<Active Mode_>`_.
+
+	"passive"
+		The driver is functional and in the `passive mode
+		<Passive Mode_>`_.
+
+	"off"
+		The driver is not functional (it is not registered as a scaling
+		driver with the ``CPUFreq`` core).
+
+	This attribute can be written to in order to change the driver's
+	operation mode or to unregister it.  The string written to it must be
+	one of the possible values of it and, if successful, the write will
+	cause the driver to switch over to the operation mode represented by
+	that string - or to be unregistered in the "off" case.  [Actually,
+	switching over from the active mode to the passive mode or the other
+	way around causes the driver to be unregistered and registered again
+	with a different set of callbacks, so all of its settings (the global
+	as well as the per-policy ones) are then reset to their default
+	values, possibly depending on the target operation mode.]
+
+	That only is supported in some configurations, though (for example, if
+	the `HWP feature is enabled in the processor <Active Mode With HWP_>`_,
+	the operation mode of the driver cannot be changed), and if it is not
+	supported in the current configuration, writes to this attribute with
+	fail with an appropriate error.
+
+Interpretation of Policy Attributes
+-----------------------------------
+
+The interpretation of some ``CPUFreq`` policy attributes described in
+:doc:`cpufreq` is special with ``intel_pstate`` as the current scaling driver
+and it generally depends on the driver's `operation mode <Operation Modes_>`_.
+
+First of all, the values of the ``cpuinfo_max_freq``, ``cpuinfo_min_freq`` and
+``scaling_cur_freq`` attributes are produced by applying a processor-specific
+multiplier to the internal P-state representation used by ``intel_pstate``.
+Also, the values of the ``scaling_max_freq`` and ``scaling_min_freq``
+attributes are capped by the frequency corresponding to the maximum P-state that
+the driver is allowed to set.
+
+If the ``no_turbo`` `global attribute <no_turbo_attr_>`_ is set, the driver is
+not allowed to use turbo P-states, so the maximum value of ``scaling_max_freq``
+and ``scaling_min_freq`` is limited to the maximum non-turbo P-state frequency.
+Accordingly, setting ``no_turbo`` causes ``scaling_max_freq`` and
+``scaling_min_freq`` to go down to that value if they were above it before.
+However, the old values of ``scaling_max_freq`` and ``scaling_min_freq`` will be
+restored after unsetting ``no_turbo``, unless these attributes have been written
+to after ``no_turbo`` was set.
+
+If ``no_turbo`` is not set, the maximum possible value of ``scaling_max_freq``
+and ``scaling_min_freq`` corresponds to the maximum supported turbo P-state,
+which also is the value of ``cpuinfo_max_freq`` in either case.
+
+Next, the following policy attributes have special meaning if
+``intel_pstate`` works in the `active mode <Active Mode_>`_:
+
+``scaling_available_governors``
+	List of P-state selection algorithms provided by ``intel_pstate``.
+
+``scaling_governor``
+	P-state selection algorithm provided by ``intel_pstate`` currently in
+	use with the given policy.
+
+``scaling_cur_freq``
+	Frequency of the average P-state of the CPU represented by the given
+	policy for the time interval between the last two invocations of the
+	driver's utilization update callback by the CPU scheduler for that CPU.
+
+The meaning of these attributes in the `passive mode <Passive Mode_>`_ is the
+same as for other scaling drivers.
+
+Additionally, the value of the ``scaling_driver`` attribute for ``intel_pstate``
+depends on the operation mode of the driver.  Namely, it is either
+"intel_pstate" (in the `active mode <Active Mode_>`_) or "intel_cpufreq" (in the
+`passive mode <Passive Mode_>`_).
+
+Coordination of P-State Limits
+------------------------------
+
+``intel_pstate`` allows P-state limits to be set in two ways: with the help of
+the ``max_perf_pct`` and ``min_perf_pct`` `global attributes
+<Global Attributes_>`_ or via the ``scaling_max_freq`` and ``scaling_min_freq``
+``CPUFreq`` policy attributes.  The coordination between those limits is based
+on the following rules, regardless of the current operation mode of the driver:
+
+ 1. All CPUs are affected by the global limits (that is, none of them can be
+    requested to run faster than the global maximum and none of them can be
+    requested to run slower than the global minimum).
+
+ 2. Each individual CPU is affected by its own per-policy limits (that is, it
+    cannot be requested to run faster than its own per-policy maximum and it
+    cannot be requested to run slower than its own per-policy minimum).
+
+ 3. The global and per-policy limits can be set independently.
+
+If the `HWP feature is enabled in the processor <Active Mode With HWP_>`_, the
+resulting effective values are written into its registers whenever the limits
+change in order to request its internal P-state selection logic to always set
+P-states within these limits.  Otherwise, the limits are taken into account by
+scaling governors (in the `passive mode <Passive Mode_>`_) and by the driver
+every time before setting a new P-state for a CPU.
+
+Additionally, if the ``intel_pstate=per_cpu_perf_limits`` command line argument
+is passed to the kernel, ``max_perf_pct`` and ``min_perf_pct`` are not exposed
+at all and the only way to set the limits is by using the policy attributes.
+
+
+Energy vs Performance Hints
+---------------------------
+
+If ``intel_pstate`` works in the `active mode with the HWP feature enabled
+<Active Mode With HWP_>`_ in the processor, additional attributes are present
+in every ``CPUFreq`` policy directory in ``sysfs``.  They are intended to allow
+user space to help ``intel_pstate`` to adjust the processor's internal P-state
+selection logic by focusing it on performance or on energy-efficiency, or
+somewhere between the two extremes:
+
+``energy_performance_preference``
+	Current value of the energy vs performance hint for the given policy
+	(or the CPU represented by it).
+
+	The hint can be changed by writing to this attribute.
+
+``energy_performance_available_preferences``
+	List of strings that can be written to the
+	``energy_performance_preference`` attribute.
+
+	They represent different energy vs performance hints and should be
+	self-explanatory, except that ``default`` represents whatever hint
+	value was set by the platform firmware.
+
+Strings written to the ``energy_performance_preference`` attribute are
+internally translated to integer values written to the processor's
+Energy-Performance Preference (EPP) knob (if supported) or its
+Energy-Performance Bias (EPB) knob.
+
+[Note that tasks may by migrated from one CPU to another by the scheduler's
+load-balancing algorithm and if different energy vs performance hints are
+set for those CPUs, that may lead to undesirable outcomes.  To avoid such
+issues it is better to set the same energy vs performance hint for all CPUs
+or to pin every task potentially sensitive to them to a specific CPU.]
+
+.. _acpi-cpufreq:
+
+``intel_pstate`` vs ``acpi-cpufreq``
+====================================
+
+On the majority of systems supported by ``intel_pstate``, the ACPI tables
+provided by the platform firmware contain ``_PSS`` objects returning information
+that can be used for CPU performance scaling (refer to the `ACPI specification`_
+for details on the ``_PSS`` objects and the format of the information returned
+by them).
+
+The information returned by the ACPI ``_PSS`` objects is used by the
+``acpi-cpufreq`` scaling driver.  On systems supported by ``intel_pstate``
+the ``acpi-cpufreq`` driver uses the same hardware CPU performance scaling
+interface, but the set of P-states it can use is limited by the ``_PSS``
+output.
+
+On those systems each ``_PSS`` object returns a list of P-states supported by
+the corresponding CPU which basically is a subset of the P-states range that can
+be used by ``intel_pstate`` on the same system, with one exception: the whole
+`turbo range <turbo_>`_ is represented by one item in it (the topmost one).  By
+convention, the frequency returned by ``_PSS`` for that item is greater by 1 MHz
+than the frequency of the highest non-turbo P-state listed by it, but the
+corresponding P-state representation (following the hardware specification)
+returned for it matches the maximum supported turbo P-state (or is the
+special value 255 meaning essentially "go as high as you can get").
+
+The list of P-states returned by ``_PSS`` is reflected by the table of
+available frequencies supplied by ``acpi-cpufreq`` to the ``CPUFreq`` core and
+scaling governors and the minimum and maximum supported frequencies reported by
+it come from that list as well.  In particular, given the special representation
+of the turbo range described above, this means that the maximum supported
+frequency reported by ``acpi-cpufreq`` is higher by 1 MHz than the frequency
+of the highest supported non-turbo P-state listed by ``_PSS`` which, of course,
+affects decisions made by the scaling governors, except for ``powersave`` and
+``performance``.
+
+For example, if a given governor attempts to select a frequency proportional to
+estimated CPU load and maps the load of 100% to the maximum supported frequency
+(possibly multiplied by a constant), then it will tend to choose P-states below
+the turbo threshold if ``acpi-cpufreq`` is used as the scaling driver, because
+in that case the turbo range corresponds to a small fraction of the frequency
+band it can use (1 MHz vs 1 GHz or more).  In consequence, it will only go to
+the turbo range for the highest loads and the other loads above 50% that might
+benefit from running at turbo frequencies will be given non-turbo P-states
+instead.
+
+One more issue related to that may appear on systems supporting the
+`Configurable TDP feature <turbo_>`_ allowing the platform firmware to set the
+turbo threshold.  Namely, if that is not coordinated with the lists of P-states
+returned by ``_PSS`` properly, there may be more than one item corresponding to
+a turbo P-state in those lists and there may be a problem with avoiding the
+turbo range (if desirable or necessary).  Usually, to avoid using turbo
+P-states overall, ``acpi-cpufreq`` simply avoids using the topmost state listed
+by ``_PSS``, but that is not sufficient when there are other turbo P-states in
+the list returned by it.
+
+Apart from the above, ``acpi-cpufreq`` works like ``intel_pstate`` in the
+`passive mode <Passive Mode_>`_, except that the number of P-states it can set
+is limited to the ones listed by the ACPI ``_PSS`` objects.
+
+
+Kernel Command Line Options for ``intel_pstate``
+================================================
+
+Several kernel command line options can be used to pass early-configuration-time
+parameters to ``intel_pstate`` in order to enforce specific behavior of it.  All
+of them have to be prepended with the ``intel_pstate=`` prefix.
+
+``disable``
+	Do not register ``intel_pstate`` as the scaling driver even if the
+	processor is supported by it.
+
+``passive``
+	Register ``intel_pstate`` in the `passive mode <Passive Mode_>`_ to
+	start with.
+
+	This option implies the ``no_hwp`` one described below.
+
+``force``
+	Register ``intel_pstate`` as the scaling driver instead of
+	``acpi-cpufreq`` even if the latter is preferred on the given system.
+
+	This may prevent some platform features (such as thermal controls and
+	power capping) that rely on the availability of ACPI P-states
+	information from functioning as expected, so it should be used with
+	caution.
+
+	This option does not work with processors that are not supported by
+	``intel_pstate`` and on platforms where the ``pcc-cpufreq`` scaling
+	driver is used instead of ``acpi-cpufreq``.
+
+``no_hwp``
+	Do not enable the `hardware-managed P-states (HWP) feature
+	<Active Mode With HWP_>`_ even if it is supported by the processor.
+
+``hwp_only``
+	Register ``intel_pstate`` as the scaling driver only if the
+	`hardware-managed P-states (HWP) feature <Active Mode With HWP_>`_ is
+	supported by the processor.
+
+``support_acpi_ppc``
+	Take ACPI ``_PPC`` performance limits into account.
+
+	If the preferred power management profile in the FADT (Fixed ACPI
+	Description Table) is set to "Enterprise Server" or "Performance
+	Server", the ACPI ``_PPC`` limits are taken into account by default
+	and this option has no effect.
+
+``per_cpu_perf_limits``
+	Use per-logical-CPU P-State limits (see `Coordination of P-state
+	Limits`_ for details).
+
+
+Diagnostics and Tuning
+======================
+
+Trace Events
+------------
+
+There are two static trace events that can be used for ``intel_pstate``
+diagnostics.  One of them is the ``cpu_frequency`` trace event generally used
+by ``CPUFreq``, and the other one is the ``pstate_sample`` trace event specific
+to ``intel_pstate``.  Both of them are triggered by ``intel_pstate`` only if
+it works in the `active mode <Active Mode_>`_.
+
+The following sequence of shell commands can be used to enable them and see
+their output (if the kernel is generally configured to support event tracing)::
+
+ # cd /sys/kernel/debug/tracing/
+ # echo 1 > events/power/pstate_sample/enable
+ # echo 1 > events/power/cpu_frequency/enable
+ # cat trace
+ gnome-terminal--4510  [001] ..s.  1177.680733: pstate_sample: core_busy=107 scaled=94 from=26 to=26 mperf=1143818 aperf=1230607 tsc=29838618 freq=2474476
+ cat-5235  [002] ..s.  1177.681723: cpu_frequency: state=2900000 cpu_id=2
+
+If ``intel_pstate`` works in the `passive mode <Passive Mode_>`_, the
+``cpu_frequency`` trace event will be triggered either by the ``schedutil``
+scaling governor (for the policies it is attached to), or by the ``CPUFreq``
+core (for the policies with other scaling governors).
+
+``ftrace``
+----------
+
+The ``ftrace`` interface can be used for low-level diagnostics of
+``intel_pstate``.  For example, to check how often the function to set a
+P-state is called, the ``ftrace`` filter can be set to to
+:c:func:`intel_pstate_set_pstate`::
+
+ # cd /sys/kernel/debug/tracing/
+ # cat available_filter_functions | grep -i pstate
+ intel_pstate_set_pstate
+ intel_pstate_cpu_init
+ ...
+ # echo intel_pstate_set_pstate > set_ftrace_filter
+ # echo function > current_tracer
+ # cat trace | head -15
+ # tracer: function
+ #
+ # entries-in-buffer/entries-written: 80/80   #P:4
+ #
+ #                              _-----=> irqs-off
+ #                             / _----=> need-resched
+ #                            | / _---=> hardirq/softirq
+ #                            || / _--=> preempt-depth
+ #                            ||| /     delay
+ #           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
+ #              | |       |   ||||       |         |
+             Xorg-3129  [000] ..s.  2537.644844: intel_pstate_set_pstate <-intel_pstate_timer_func
+  gnome-terminal--4510  [002] ..s.  2537.649844: intel_pstate_set_pstate <-intel_pstate_timer_func
+      gnome-shell-3409  [001] ..s.  2537.650850: intel_pstate_set_pstate <-intel_pstate_timer_func
+           <idle>-0     [000] ..s.  2537.654843: intel_pstate_set_pstate <-intel_pstate_timer_func
+
+Tuning Interface in ``debugfs``
+-------------------------------
+
+The ``powersave`` algorithm provided by ``intel_pstate`` for `the Core line of
+processors in the active mode <powersave_>`_ is based on a `PID controller`_
+whose parameters were chosen to address a number of different use cases at the
+same time.  However, it still is possible to fine-tune it to a specific workload
+and the ``debugfs`` interface under ``/sys/kernel/debug/pstate_snb/`` is
+provided for this purpose.  [Note that the ``pstate_snb`` directory will be
+present only if the specific P-state selection algorithm matching the interface
+in it actually is in use.]
+
+The following files present in that directory can be used to modify the PID
+controller parameters at run time:
+
+| ``deadband``
+| ``d_gain_pct``
+| ``i_gain_pct``
+| ``p_gain_pct``
+| ``sample_rate_ms``
+| ``setpoint``
+
+Note, however, that achieving desirable results this way generally requires
+expert-level understanding of the power vs performance tradeoff, so extra care
+is recommended when attempting to do that.
+
+
+.. _LCEU2015: http://events.linuxfoundation.org/sites/events/files/slides/LinuxConEurope_2015.pdf
+.. _SDM: http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-system-programming-manual-325384.html
+.. _ACPI specification: http://www.uefi.org/sites/default/files/resources/ACPI_6_1.pdf
+.. _PID controller: https://en.wikipedia.org/wiki/PID_controller
diff --git a/Documentation/cpu-freq/intel-pstate.txt b/Documentation/cpu-freq/intel-pstate.txt
deleted file mode 100644
index 3fdcdfd968ba..000000000000
--- a/Documentation/cpu-freq/intel-pstate.txt
+++ /dev/null
@@ -1,281 +0,0 @@
-Intel P-State driver
---------------------
-
-This driver provides an interface to control the P-State selection for the
-SandyBridge+ Intel processors.
-
-The following document explains P-States:
-http://events.linuxfoundation.org/sites/events/files/slides/LinuxConEurope_2015.pdf
-As stated in the document, P-State doesn’t exactly mean a frequency. However, for
-the sake of the relationship with cpufreq, P-State and frequency are used
-interchangeably.
-
-Understanding the cpufreq core governors and policies are important before
-discussing more details about the Intel P-State driver. Based on what callbacks
-a cpufreq driver provides to the cpufreq core, it can support two types of
-drivers:
-- with target_index() callback: In this mode, the drivers using cpufreq core
-simply provide the minimum and maximum frequency limits and an additional
-interface target_index() to set the current frequency. The cpufreq subsystem
-has a number of scaling governors ("performance", "powersave", "ondemand",
-etc.). Depending on which governor is in use, cpufreq core will call for
-transitions to a specific frequency using target_index() callback.
-- setpolicy() callback: In this mode, drivers do not provide target_index()
-callback, so cpufreq core can't request a transition to a specific frequency.
-The driver provides minimum and maximum frequency limits and callbacks to set a
-policy. The policy in cpufreq sysfs is referred to as the "scaling governor".
-The cpufreq core can request the driver to operate in any of the two policies:
-"performance" and "powersave". The driver decides which frequency to use based
-on the above policy selection considering minimum and maximum frequency limits.
-
-The Intel P-State driver falls under the latter category, which implements the
-setpolicy() callback. This driver decides what P-State to use based on the
-requested policy from the cpufreq core. If the processor is capable of
-selecting its next P-State internally, then the driver will offload this
-responsibility to the processor (aka HWP: Hardware P-States). If not, the
-driver implements algorithms to select the next P-State.
-
-Since these policies are implemented in the driver, they are not same as the
-cpufreq scaling governors implementation, even if they have the same name in
-the cpufreq sysfs (scaling_governors). For example the "performance" policy is
-similar to cpufreq’s "performance" governor, but "powersave" is completely
-different than the cpufreq "powersave" governor. The strategy here is similar
-to cpufreq "ondemand", where the requested P-State is related to the system load.
-
-Sysfs Interface
-
-In addition to the frequency-controlling interfaces provided by the cpufreq
-core, the driver provides its own sysfs files to control the P-State selection.
-These files have been added to /sys/devices/system/cpu/intel_pstate/.
-Any changes made to these files are applicable to all CPUs (even in a
-multi-package system, Refer to later section on placing "Per-CPU limits").
-
-      max_perf_pct: Limits the maximum P-State that will be requested by
-      the driver. It states it as a percentage of the available performance. The
-      available (P-State) performance may be reduced by the no_turbo
-      setting described below.
-
-      min_perf_pct: Limits the minimum P-State that will be requested by
-      the driver. It states it as a percentage of the max (non-turbo)
-      performance level.
-
-      no_turbo: Limits the driver to selecting P-State below the turbo
-      frequency range.
-
-      turbo_pct: Displays the percentage of the total performance that
-      is supported by hardware that is in the turbo range. This number
-      is independent of whether turbo has been disabled or not.
-
-      num_pstates: Displays the number of P-States that are supported
-      by hardware. This number is independent of whether turbo has
-      been disabled or not.
-
-For example, if a system has these parameters:
-	Max 1 core turbo ratio: 0x21 (Max 1 core ratio is the maximum P-State)
-	Max non turbo ratio: 0x17
-	Minimum ratio : 0x08 (Here the ratio is called max efficiency ratio)
-
-Sysfs will show :
-	max_perf_pct:100, which corresponds to 1 core ratio
-	min_perf_pct:24, max_efficiency_ratio / max 1 Core ratio
-	no_turbo:0, turbo is not disabled
-	num_pstates:26 = (max 1 Core ratio - Max Efficiency Ratio + 1)
-	turbo_pct:39 = (max 1 core ratio - max non turbo ratio) / num_pstates
-
-Refer to "Intel® 64 and IA-32 Architectures Software Developer’s Manual
-Volume 3: System Programming Guide" to understand ratios.
-
-There is one more sysfs attribute in /sys/devices/system/cpu/intel_pstate/
-that can be used for controlling the operation mode of the driver:
-
-      status: Three settings are possible:
-      "off"     - The driver is not in use at this time.
-      "active"  - The driver works as a P-state governor (default).
-      "passive" - The driver works as a regular cpufreq one and collaborates
-                  with the generic cpufreq governors (it sets P-states as
-                  requested by those governors).
-      The current setting is returned by reads from this attribute.  Writing one
-      of the above strings to it changes the operation mode as indicated by that
-      string, if possible.  If HW-managed P-states (HWP) are enabled, it is not
-      possible to change the driver's operation mode and attempts to write to
-      this attribute will fail.
-
-cpufreq sysfs for Intel P-State
-
-Since this driver registers with cpufreq, cpufreq sysfs is also presented.
-There are some important differences, which need to be considered.
-
-scaling_cur_freq: This displays the real frequency which was used during
-the last sample period instead of what is requested. Some other cpufreq driver,
-like acpi-cpufreq, displays what is requested (Some changes are on the
-way to fix this for acpi-cpufreq driver). The same is true for frequencies
-displayed at /proc/cpuinfo.
-
-scaling_governor: This displays current active policy. Since each CPU has a
-cpufreq sysfs, it is possible to set a scaling governor to each CPU. But this
-is not possible with Intel P-States, as there is one common policy for all
-CPUs. Here, the last requested policy will be applicable to all CPUs. It is
-suggested that one use the cpupower utility to change policy to all CPUs at the
-same time.
-
-scaling_setspeed: This attribute can never be used with Intel P-State.
-
-scaling_max_freq/scaling_min_freq: This interface can be used similarly to
-the max_perf_pct/min_perf_pct of Intel P-State sysfs. However since frequencies
-are converted to nearest possible P-State, this is prone to rounding errors.
-This method is not preferred to limit performance.
-
-affected_cpus: Not used
-related_cpus: Not used
-
-For contemporary Intel processors, the frequency is controlled by the
-processor itself and the P-State exposed to software is related to
-performance levels.  The idea that frequency can be set to a single
-frequency is fictional for Intel Core processors. Even if the scaling
-driver selects a single P-State, the actual frequency the processor
-will run at is selected by the processor itself.
-
-Per-CPU limits
-
-The kernel command line option "intel_pstate=per_cpu_perf_limits" forces
-the intel_pstate driver to use per-CPU performance limits.  When it is set,
-the sysfs control interface described above is subject to limitations.
-- The following controls are not available for both read and write
-	/sys/devices/system/cpu/intel_pstate/max_perf_pct
-	/sys/devices/system/cpu/intel_pstate/min_perf_pct
-- The following controls can be used to set performance limits, as far as the
-architecture of the processor permits:
-	/sys/devices/system/cpu/cpu*/cpufreq/scaling_max_freq
-	/sys/devices/system/cpu/cpu*/cpufreq/scaling_min_freq
-	/sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
-- User can still observe turbo percent and number of P-States from
-	/sys/devices/system/cpu/intel_pstate/turbo_pct
-	/sys/devices/system/cpu/intel_pstate/num_pstates
-- User can read write system wide turbo status
-	/sys/devices/system/cpu/no_turbo
-
-Support of energy performance hints
-It is possible to provide hints to the HWP algorithms in the processor
-to be more performance centric to more energy centric. When the driver
-is using HWP, two additional cpufreq sysfs attributes are presented for
-each logical CPU.
-These attributes are:
-	- energy_performance_available_preferences
-	- energy_performance_preference
-
-To get list of supported hints:
-$ cat energy_performance_available_preferences
-    default performance balance_performance balance_power power
-
-The current preference can be read or changed via cpufreq sysfs
-attribute "energy_performance_preference". Reading from this attribute
-will display current effective setting. User can write any of the valid
-preference string to this attribute. User can always restore to power-on
-default by writing "default".
-
-Since threads can migrate to different CPUs, this is possible that the
-new CPU may have different energy performance preference than the previous
-one. To avoid such issues, either threads can be pinned to specific CPUs
-or set the same energy performance preference value to all CPUs.
-
-Tuning Intel P-State driver
-
-When the performance can be tuned using PID (Proportional Integral
-Derivative) controller, debugfs files are provided for adjusting performance.
-They are presented under:
-/sys/kernel/debug/pstate_snb/
-
-The PID tunable parameters are:
-      deadband
-      d_gain_pct
-      i_gain_pct
-      p_gain_pct
-      sample_rate_ms
-      setpoint
-
-To adjust these parameters, some understanding of driver implementation is
-necessary. There are some tweeks described here, but be very careful. Adjusting
-them requires expert level understanding of power and performance relationship.
-These limits are only useful when the "powersave" policy is active.
-
--To make the system more responsive to load changes, sample_rate_ms can
-be adjusted  (current default is 10ms).
--To make the system use higher performance, even if the load is lower, setpoint
-can be adjusted to a lower number. This will also lead to faster ramp up time
-to reach the maximum P-State.
-If there are no derivative and integral coefficients, The next P-State will be
-equal to:
-	current P-State - ((setpoint - current cpu load) * p_gain_pct)
-
-For example, if the current PID parameters are (Which are defaults for the core
-processors like SandyBridge):
-      deadband = 0
-      d_gain_pct = 0
-      i_gain_pct = 0
-      p_gain_pct = 20
-      sample_rate_ms = 10
-      setpoint = 97
-
-If the current P-State = 0x08 and current load = 100, this will result in the
-next P-State = 0x08 - ((97 - 100) * 0.2) = 8.6 (rounded to 9). Here the P-State
-goes up by only 1. If during next sample interval the current load doesn't
-change and still 100, then P-State goes up by one again. This process will
-continue as long as the load is more than the setpoint until the maximum P-State
-is reached.
-
-For the same load at setpoint = 60, this will result in the next P-State
-= 0x08 - ((60 - 100) * 0.2) = 16
-So by changing the setpoint from 97 to 60, there is an increase of the
-next P-State from 9 to 16. So this will make processor execute at higher
-P-State for the same CPU load. If the load continues to be more than the
-setpoint during next sample intervals, then P-State will go up again till the
-maximum P-State is reached. But the ramp up time to reach the maximum P-State
-will be much faster when the setpoint is 60 compared to 97.
-
-Debugging Intel P-State driver
-
-Event tracing
-To debug P-State transition, the Linux event tracing interface can be used.
-There are two specific events, which can be enabled (Provided the kernel
-configs related to event tracing are enabled).
-
-# cd /sys/kernel/debug/tracing/
-# echo 1 > events/power/pstate_sample/enable
-# echo 1 > events/power/cpu_frequency/enable
-# cat trace
-gnome-terminal--4510  [001] ..s.  1177.680733: pstate_sample: core_busy=107
-	scaled=94 from=26 to=26 mperf=1143818 aperf=1230607 tsc=29838618
-		freq=2474476
-cat-5235  [002] ..s.  1177.681723: cpu_frequency: state=2900000 cpu_id=2
-
-
-Using ftrace
-
-If function level tracing is required, the Linux ftrace interface can be used.
-For example if we want to check how often a function to set a P-State is
-called, we can set ftrace filter to intel_pstate_set_pstate.
-
-# cd /sys/kernel/debug/tracing/
-# cat available_filter_functions | grep -i pstate
-intel_pstate_set_pstate
-intel_pstate_cpu_init
-...
-
-# echo intel_pstate_set_pstate > set_ftrace_filter
-# echo function > current_tracer
-# cat trace | head -15
-# tracer: function
-#
-# entries-in-buffer/entries-written: 80/80   #P:4
-#
-#                              _-----=> irqs-off
-#                             / _----=> need-resched
-#                            | / _---=> hardirq/softirq
-#                            || / _--=> preempt-depth
-#                            ||| /     delay
-#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
-#              | |       |   ||||       |         |
-            Xorg-3129  [000] ..s.  2537.644844: intel_pstate_set_pstate <-intel_pstate_timer_func
- gnome-terminal--4510  [002] ..s.  2537.649844: intel_pstate_set_pstate <-intel_pstate_timer_func
-     gnome-shell-3409  [001] ..s.  2537.650850: intel_pstate_set_pstate <-intel_pstate_timer_func
-          <idle>-0     [000] ..s.  2537.654843: intel_pstate_set_pstate <-intel_pstate_timer_func
diff --git a/Documentation/devicetree/bindings/display/brcm,bcm-vc4.txt b/Documentation/devicetree/bindings/display/brcm,bcm-vc4.txt
index ca02d3e4db91..284e2b14cfbe 100644
--- a/Documentation/devicetree/bindings/display/brcm,bcm-vc4.txt
+++ b/Documentation/devicetree/bindings/display/brcm,bcm-vc4.txt
@@ -5,7 +5,7 @@ with HDMI output and the HVS (Hardware Video Scaler) for compositing
 display planes.
 
 Required properties for VC4:
-- compatible:	Should be "brcm,bcm2835-vc4"
+- compatible:	Should be "brcm,bcm2835-vc4" or "brcm,cygnus-vc4"
 
 Required properties for Pixel Valve:
 - compatible:	Should be one of "brcm,bcm2835-pixelvalve0",
@@ -54,11 +54,14 @@ Required properties for VEC:
 		  See bindings/interrupt-controller/brcm,bcm2835-armctrl-ic.txt
 
 Required properties for V3D:
-- compatible:	Should be "brcm,bcm2835-v3d"
+- compatible:	Should be "brcm,bcm2835-v3d" or "brcm,cygnus-v3d"
 - reg:		Physical base address and length of the V3D's registers
 - interrupts:	The interrupt number
 		  See bindings/interrupt-controller/brcm,bcm2835-armctrl-ic.txt
 
+Optional properties for V3D:
+- clocks:	The clock the unit runs on
+
 Required properties for DSI:
 - compatible:	Should be "brcm,bcm2835-dsi0" or "brcm,bcm2835-dsi1"
 - reg:		Physical base address and length of the DSI block's registers
diff --git a/Documentation/devicetree/bindings/display/st,stm32-ltdc.txt b/Documentation/devicetree/bindings/display/st,stm32-ltdc.txt
new file mode 100644
index 000000000000..8e1476941c0f
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/st,stm32-ltdc.txt
@@ -0,0 +1,36 @@
+* STMicroelectronics STM32 lcd-tft display controller
+
+- ltdc: lcd-tft display controller host
+  must be a sub-node of st-display-subsystem
+  Required properties:
+  - compatible: "st,stm32-ltdc"
+  - reg: Physical base address of the IP registers and length of memory mapped region.
+  - clocks: A list of phandle + clock-specifier pairs, one for each
+    entry in 'clock-names'.
+  - clock-names: A list of clock names. For ltdc it should contain:
+      - "lcd" for the clock feeding the output pixel clock & IP clock.
+  - resets: reset to be used by the device (defined by use of RCC macro).
+  Required nodes:
+    - Video port for RGB output.
+
+Example:
+
+/ {
+	...
+	soc {
+	...
+		ltdc: display-controller@40016800 {
+			compatible = "st,stm32-ltdc";
+			reg = <0x40016800 0x200>;
+			interrupts = <88>, <89>;
+			resets = <&rcc STM32F4_APB2_RESET(LTDC)>;
+			clocks = <&rcc 1 CLK_LCD>;
+			clock-names = "lcd";
+
+			port {
+				ltdc_out_rgb: endpoint {
+				};
+			};
+		};
+	};
+};
diff --git a/Documentation/devicetree/bindings/display/zte,vou.txt b/Documentation/devicetree/bindings/display/zte,vou.txt
index 9c356284232b..38476475fd60 100644
--- a/Documentation/devicetree/bindings/display/zte,vou.txt
+++ b/Documentation/devicetree/bindings/display/zte,vou.txt
@@ -58,6 +58,18 @@ Required properties:
    integer cells.  The first cell is the offset of SYSCTRL register used
    to control TV Encoder DAC power, and the second cell is the bit mask.
 
+* VGA output device
+
+Required properties:
+ - compatible: should be "zte,zx296718-vga"
+ - reg: Physical base address and length of the VGA device IO region
+ - interrupts : VGA interrupt number to CPU
+ - clocks: Phandle with clock-specifier pointing to VGA I2C clock.
+ - clock-names: Must be "i2c_wclk".
+ - zte,vga-power-control: the phandle to SYSCTRL block followed by two
+   integer cells.  The first cell is the offset of SYSCTRL register used
+   to control VGA DAC power, and the second cell is the bit mask.
+
 Example:
 
 vou: vou@1440000 {
@@ -81,6 +93,15 @@ vou: vou@1440000 {
 			      "main_wclk", "aux_wclk";
 	};
 
+	vga: vga@8000 {
+		compatible = "zte,zx296718-vga";
+		reg = <0x8000 0x1000>;
+		interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&topcrm VGA_I2C_WCLK>;
+		clock-names = "i2c_wclk";
+		zte,vga-power-control = <&sysctrl 0x170 0xe0>;
+	};
+
 	hdmi: hdmi@c000 {
 		compatible = "zte,zx296718-hdmi";
 		reg = <0xc000 0x4000>;
diff --git a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt
index 6db22103e2dd..025cf8c9324a 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt
+++ b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt
@@ -36,7 +36,7 @@ Optional properties:
                 control gpios
 
  - threshold:   allows setting the "click"-threshold in the range
-                from 20 to 80.
+                from 0 to 80.
 
  - gain:        allows setting the sensitivity in the range from 0 to
                 31. Note that lower values indicate higher
diff --git a/Documentation/devicetree/bindings/mfd/hisilicon,hi655x.txt b/Documentation/devicetree/bindings/mfd/hisilicon,hi655x.txt
index 05485699d70e..9630ac0e4b56 100644
--- a/Documentation/devicetree/bindings/mfd/hisilicon,hi655x.txt
+++ b/Documentation/devicetree/bindings/mfd/hisilicon,hi655x.txt
@@ -16,6 +16,11 @@ Required properties:
 - reg:                  Base address of PMIC on Hi6220 SoC.
 - interrupt-controller: Hi655x has internal IRQs (has own IRQ domain).
 - pmic-gpios:           The GPIO used by PMIC IRQ.
+- #clock-cells:		From common clock binding; shall be set to 0
+
+Optional properties:
+- clock-output-names: From common clock binding to override the
+  default output clock name
 
 Example:
 	pmic: pmic@f8000000 {
@@ -24,4 +29,5 @@ Example:
 		interrupt-controller;
 		#interrupt-cells = <2>;
 		pmic-gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>;
+		#clock-cells = <0>;
 	}
diff --git a/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.txt b/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.txt
index e25436861867..9029b45b8a22 100644
--- a/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.txt
+++ b/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.txt
@@ -18,6 +18,8 @@ Optional properties:
   "ext_clock" (External clock provided to the card).
 - post-power-on-delay-ms : Delay in ms after powering the card and
 	de-asserting the reset-gpios (if any)
+- power-off-delay-us : Delay in us after asserting the reset-gpios (if any)
+	during power off of the card.
 
 Example:
 
diff --git a/Documentation/devicetree/bindings/net/fsl-fec.txt b/Documentation/devicetree/bindings/net/fsl-fec.txt
index a1e3693cca16..6f55bdd52f8a 100644
--- a/Documentation/devicetree/bindings/net/fsl-fec.txt
+++ b/Documentation/devicetree/bindings/net/fsl-fec.txt
@@ -15,6 +15,10 @@ Optional properties:
 - phy-reset-active-high : If present then the reset sequence using the GPIO
   specified in the "phy-reset-gpios" property is reversed (H=reset state,
   L=operation state).
+- phy-reset-post-delay : Post reset delay in milliseconds. If present then
+  a delay of phy-reset-post-delay milliseconds will be observed after the
+  phy-reset-gpios has been toggled. Can be omitted thus no delay is
+  observed. Delay is in range of 1ms to 1000ms. Other delays are invalid.
 - phy-supply : regulator that powers the Ethernet PHY.
 - phy-handle : phandle to the PHY device connected to this device.
 - fixed-link : Assume a fixed link. See fixed-link.txt in the same directory.
diff --git a/Documentation/gpu/index.rst b/Documentation/gpu/index.rst
index c572f092739e..037a39ac1807 100644
--- a/Documentation/gpu/index.rst
+++ b/Documentation/gpu/index.rst
@@ -12,6 +12,7 @@ Linux GPU Driver Developer's Guide
    drm-uapi
    i915
    meson
+   pl111
    tinydrm
    vc4
    vga-switcheroo
diff --git a/Documentation/gpu/pl111.rst b/Documentation/gpu/pl111.rst
new file mode 100644
index 000000000000..9b03736d33dd
--- /dev/null
+++ b/Documentation/gpu/pl111.rst
@@ -0,0 +1,6 @@
+==========================================
+ drm/pl111 ARM PrimeCell PL111 CLCD Driver
+==========================================
+
+.. kernel-doc:: drivers/gpu/drm/pl111/pl111_drv.c
+   :doc: ARM PrimeCell PL111 CLCD Driver
diff --git a/Documentation/input/devices/edt-ft5x06.rst b/Documentation/input/devices/edt-ft5x06.rst
index 2032f0b7a8fa..1ccc94b192b7 100644
--- a/Documentation/input/devices/edt-ft5x06.rst
+++ b/Documentation/input/devices/edt-ft5x06.rst
@@ -15,7 +15,7 @@ It has been tested with the following devices:
 The driver allows configuration of the touch screen via a set of sysfs files:
 
 /sys/class/input/eventX/device/device/threshold:
-    allows setting the "click"-threshold in the range from 20 to 80.
+    allows setting the "click"-threshold in the range from 0 to 80.
 
 /sys/class/input/eventX/device/device/gain:
     allows setting the sensitivity in the range from 0 to 31. Note that
diff --git a/Documentation/sound/hd-audio/models.rst b/Documentation/sound/hd-audio/models.rst
index 5338673c88d9..773d2bfacc6c 100644
--- a/Documentation/sound/hd-audio/models.rst
+++ b/Documentation/sound/hd-audio/models.rst
@@ -16,6 +16,8 @@ ALC880
     6-jack in back, 2-jack in front
 6stack-digout
     6-jack with a SPDIF out
+6stack-automute
+    6-jack with headphone jack detection
 
 ALC260
 ======
@@ -62,6 +64,8 @@ lenovo-dock
     Enables docking station I/O for some Lenovos
 hp-gpio-led
     GPIO LED support on HP laptops
+hp-dock-gpio-mic1-led
+    HP dock with mic LED support
 dell-headset-multi
     Headset jack, which can also be used as mic-in
 dell-headset-dock
@@ -72,6 +76,12 @@ alc283-sense-combo
     Combo jack sensing on ALC283
 tpt440-dock
     Pin configs for Lenovo Thinkpad Dock support
+tpt440
+    Lenovo Thinkpad T440s setup
+tpt460
+    Lenovo Thinkpad T460/560 setup
+dual-codecs
+    Lenovo laptops with dual codecs
 
 ALC66x/67x/892
 ==============
@@ -97,6 +107,8 @@ inv-dmic
     Inverted internal mic workaround
 dell-headset-multi
     Headset jack, which can also be used as mic-in
+dual-codecs
+    Lenovo laptops with dual codecs
 
 ALC680
 ======
@@ -114,6 +126,8 @@ inv-dmic
     Inverted internal mic workaround
 no-primary-hp
     VAIO Z/VGC-LN51JGB workaround (for fixed speaker DAC)
+dual-codecs
+    ALC1220 dual codecs for Gaming mobos
 
 ALC861/660
 ==========
@@ -206,65 +220,47 @@ auto
 
 Conexant 5045
 =============
-laptop-hpsense
-    Laptop with HP sense (old model laptop)
-laptop-micsense
-    Laptop with Mic sense (old model fujitsu)
-laptop-hpmicsense
-    Laptop with HP and Mic senses
-benq
-    Benq R55E
-laptop-hp530
-    HP 530 laptop
-test
-    for testing/debugging purpose, almost all controls can be
-    adjusted.  Appearing only when compiled with $CONFIG_SND_DEBUG=y
+cap-mix-amp
+    Fix max input level on mixer widget
+toshiba-p105
+    Toshiba P105 quirk
+hp-530
+    HP 530 quirk
 
 Conexant 5047
 =============
-laptop
-    Basic Laptop config 
-laptop-hp
-    Laptop config for some HP models (subdevice 30A5)
-laptop-eapd
-    Laptop config with EAPD support
-test
-    for testing/debugging purpose, almost all controls can be
-    adjusted.  Appearing only when compiled with $CONFIG_SND_DEBUG=y
+cap-mix-amp
+    Fix max input level on mixer widget
 
 Conexant 5051
 =============
-laptop
-    Basic Laptop config (default)
-hp
-    HP Spartan laptop
-hp-dv6736
-    HP dv6736
-hp-f700
-    HP Compaq Presario F700
-ideapad
-    Lenovo IdeaPad laptop
-toshiba
-    Toshiba Satellite M300
+lenovo-x200
+    Lenovo X200 quirk
 
 Conexant 5066
 =============
-laptop
-    Basic Laptop config (default)
-hp-laptop
-    HP laptops, e g G60
-asus
-    Asus K52JU, Lenovo G560
-dell-laptop
-    Dell laptops
-dell-vostro
-    Dell Vostro
-olpc-xo-1_5
-    OLPC XO 1.5
-ideapad
-    Lenovo IdeaPad U150
+stereo-dmic
+    Workaround for inverted stereo digital mic
+gpio1
+    Enable GPIO1 pin
+headphone-mic-pin
+    Enable headphone mic NID 0x18 without detection
+tp410
+    Thinkpad T400 & co quirks
 thinkpad
-    Lenovo Thinkpad
+    Thinkpad mute/mic LED quirk
+lemote-a1004
+    Lemote A1004 quirk
+lemote-a1205
+    Lemote A1205 quirk
+olpc-xo
+    OLPC XO quirk
+mute-led-eapd
+    Mute LED control via EAPD
+hp-dock
+    HP dock support
+mute-led-gpio
+    Mute LED control via GPIO
 
 STAC9200
 ========
@@ -444,6 +440,8 @@ dell-eq
     Dell desktops/laptops
 alienware
     Alienware M17x
+asus-mobo
+    Pin configs for ASUS mobo with 5.1/SPDIF out
 auto
     BIOS setup (default)
 
@@ -477,6 +475,8 @@ hp-envy-ts-bass
     Pin fixup for HP Envy TS bass speaker (NID 0x10)
 hp-bnb13-eq
     Hardware equalizer setup for HP laptops
+hp-envy-ts-bass
+    HP Envy TS bass support
 auto
     BIOS setup (default)
 
@@ -496,10 +496,22 @@ auto
 
 Cirrus Logic CS4206/4207
 ========================
+mbp53
+    MacBook Pro 5,3
 mbp55
     MacBook Pro 5,5
 imac27
     IMac 27 Inch
+imac27_122
+    iMac 12,2
+apple
+    Generic Apple quirk
+mbp101
+    MacBookPro 10,1
+mbp81
+    MacBookPro 8,1
+mba42
+    MacBookAir 4,2
 auto
     BIOS setup (default)
 
@@ -509,6 +521,10 @@ mba6
     MacBook Air 6,1 and 6,2
 gpio0
     Enable GPIO 0 amp
+mbp11
+    MacBookPro 11,2
+macmini
+    MacMini 7,1
 auto
     BIOS setup (default)
 
diff --git a/Documentation/sync_file.txt b/Documentation/sync_file.txt
index c3d033a06e8d..496fb2c3b3e6 100644
--- a/Documentation/sync_file.txt
+++ b/Documentation/sync_file.txt
@@ -1,8 +1,8 @@
-			      Sync File API Guide
-			      ~~~~~~~~~~~~~~~~~~~
+===================
+Sync File API Guide
+===================
 
-				Gustavo Padovan
-			  <gustavo at padovan dot org>
+:Author: Gustavo Padovan <gustavo at padovan dot org>
 
 This document serves as a guide for device drivers writers on what the
 sync_file API is, and how drivers can support it. Sync file is the carrier of
@@ -46,16 +46,17 @@ Creating Sync Files
 
 When a driver needs to send an out-fence userspace it creates a sync_file.
 
-Interface:
+Interface::
+
 	struct sync_file *sync_file_create(struct dma_fence *fence);
 
 The caller pass the out-fence and gets back the sync_file. That is just the
 first step, next it needs to install an fd on sync_file->file. So it gets an
-fd:
+fd::
 
 	fd = get_unused_fd_flags(O_CLOEXEC);
 
-and installs it on sync_file->file:
+and installs it on sync_file->file::
 
 	fd_install(fd, sync_file->file);
 
@@ -71,7 +72,8 @@ When userspace needs to send an in-fence to the driver it passes file descriptor
 of the Sync File to the kernel. The kernel can then retrieve the fences
 from it.
 
-Interface:
+Interface::
+
 	struct dma_fence *sync_file_get_fence(int fd);
 
 
@@ -79,5 +81,6 @@ The returned reference is owned by the caller and must be disposed of
 afterwards using dma_fence_put(). In case of error, a NULL is returned instead.
 
 References:
-[1] struct sync_file in include/linux/sync_file.h
-[2] All interfaces mentioned above defined in include/linux/sync_file.h
+
+1. struct sync_file in include/linux/sync_file.h
+2. All interfaces mentioned above defined in include/linux/sync_file.h
diff --git a/MAINTAINERS b/MAINTAINERS
index 9e984645c4b0..9b716db73616 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4235,6 +4235,12 @@ F:	include/drm/drm*
 F:	include/uapi/drm/drm*
 F:	include/linux/vga*
 
+DRM DRIVER FOR ARM PL111 CLCD
+M:	Eric Anholt <eric@anholt.net>
+T:	git git://anongit.freedesktop.org/drm/drm-misc
+S:	Supported
+F:	drivers/gpu/drm/pl111/
+
 DRM DRIVER FOR AST SERVER GRAPHICS CHIPS
 M:	Dave Airlie <airlied@redhat.com>
 S:	Odd Fixes
@@ -4242,6 +4248,8 @@ F:	drivers/gpu/drm/ast/
 
 DRM DRIVERS FOR BRIDGE CHIPS
 M:	Archit Taneja <architt@codeaurora.org>
+M:	Andrzej Hajda <a.hajda@samsung.com>
+R:	Laurent Pinchart <Laurent.pinchart@ideasonboard.com>
 S:	Maintained
 T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	drivers/gpu/drm/bridge/
@@ -4498,6 +4506,17 @@ S:	Maintained
 F:	drivers/gpu/drm/sti
 F:	Documentation/devicetree/bindings/display/st,stih4xx.txt
 
+DRM DRIVERS FOR STM
+M:	Yannick Fertre <yannick.fertre@st.com>
+M:	Philippe Cornu <philippe.cornu@st.com>
+M:	Benjamin Gaignard <benjamin.gaignard@linaro.org>
+M:	Vincent Abriou <vincent.abriou@st.com>
+L:	dri-devel@lists.freedesktop.org
+T:	git git://anongit.freedesktop.org/drm/drm-misc
+S:	Maintained
+F:	drivers/gpu/drm/stm
+F:	Documentation/devicetree/bindings/display/st,stm32-ltdc.txt
+
 DRM DRIVER FOR TDFX VIDEO CARDS
 S:	Orphan / Obsolete
 F:	drivers/gpu/drm/tdfx/
@@ -7143,7 +7162,7 @@ S:	Maintained
 F:	drivers/media/platform/rcar_jpu.c
 
 JSM Neo PCI based serial card
-M:	Gabriel Krisman Bertazi <krisman@linux.vnet.ibm.com>
+M:	Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
 L:	linux-serial@vger.kernel.org
 S:	Maintained
 F:	drivers/tty/serial/jsm/
diff --git a/Makefile b/Makefile
index 63e10bd4f14a..470bd4d9513a 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 12
 SUBLEVEL = 0
-EXTRAVERSION = -rc2
+EXTRAVERSION = -rc3
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*
diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
index 75bce2d0b1a8..49f6a6242cf9 100644
--- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
+++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
@@ -81,6 +81,45 @@
 		};
 	};
 
+	reg_sys_5v: regulator@0 {
+		compatible = "regulator-fixed";
+		regulator-name = "SYS_5V";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		regulator-boot-on;
+		regulator-always-on;
+	};
+
+	reg_vdd_3v3: regulator@1 {
+		compatible = "regulator-fixed";
+		regulator-name = "VDD_3V3";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+		regulator-boot-on;
+		regulator-always-on;
+		vin-supply = <&reg_sys_5v>;
+	};
+
+	reg_5v_hub: regulator@2 {
+		compatible = "regulator-fixed";
+		regulator-name = "5V_HUB";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		regulator-boot-on;
+		gpio = <&gpio0 7 0>;
+		regulator-always-on;
+		vin-supply = <&reg_sys_5v>;
+	};
+
+	wl1835_pwrseq: wl1835-pwrseq {
+		compatible = "mmc-pwrseq-simple";
+		/* WLAN_EN GPIO */
+		reset-gpios = <&gpio0 5 GPIO_ACTIVE_LOW>;
+		clocks = <&pmic>;
+		clock-names = "ext_clock";
+		power-off-delay-us = <10>;
+	};
+
 	soc {
 		spi0: spi@f7106000 {
 			status = "ok";
@@ -256,11 +295,31 @@
 
 		/* GPIO blocks 16 thru 19 do not appear to be routed to pins */
 
+		dwmmc_0: dwmmc0@f723d000 {
+			cap-mmc-highspeed;
+			non-removable;
+			bus-width = <0x8>;
+			vmmc-supply = <&ldo19>;
+		};
+
+		dwmmc_1: dwmmc1@f723e000 {
+			card-detect-delay = <200>;
+			cap-sd-highspeed;
+			sd-uhs-sdr12;
+			sd-uhs-sdr25;
+			sd-uhs-sdr50;
+			vqmmc-supply = <&ldo7>;
+			vmmc-supply = <&ldo10>;
+			bus-width = <0x4>;
+			disable-wp;
+			cd-gpios = <&gpio1 0 1>;
+		};
+
 		dwmmc_2: dwmmc2@f723f000 {
-			ti,non-removable;
+			bus-width = <0x4>;
 			non-removable;
-			/* WL_EN */
-			vmmc-supply = <&wlan_en_reg>;
+			vmmc-supply = <&reg_vdd_3v3>;
+			mmc-pwrseq = <&wl1835_pwrseq>;
 
 			#address-cells = <0x1>;
 			#size-cells = <0x0>;
@@ -272,18 +331,6 @@
 				interrupts = <3 IRQ_TYPE_EDGE_RISING>;
 			};
 		};
-
-		wlan_en_reg: regulator@1 {
-			compatible = "regulator-fixed";
-			regulator-name = "wlan-en-regulator";
-			regulator-min-microvolt = <1800000>;
-			regulator-max-microvolt = <1800000>;
-			/* WLAN_EN GPIO */
-			gpio = <&gpio0 5 0>;
-			/* WLAN card specific delay */
-			startup-delay-us = <70000>;
-			enable-active-high;
-		};
 	};
 
 	leds {
@@ -330,6 +377,7 @@
 	pmic: pmic@f8000000 {
 		compatible = "hisilicon,hi655x-pmic";
 		reg = <0x0 0xf8000000 0x0 0x1000>;
+		#clock-cells = <0>;
 		interrupt-controller;
 		#interrupt-cells = <2>;
 		pmic-gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>;
diff --git a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi
index 1e5129b19280..5013e4b2ea71 100644
--- a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi
+++ b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi
@@ -725,20 +725,10 @@
 			status = "disabled";
 		};
 
-		fixed_5v_hub: regulator@0 {
-			compatible = "regulator-fixed";
-			regulator-name = "fixed_5v_hub";
-			regulator-min-microvolt = <5000000>;
-			regulator-max-microvolt = <5000000>;
-			regulator-boot-on;
-			gpio = <&gpio0 7 0>;
-			regulator-always-on;
-		};
-
 		usb_phy: usbphy {
 			compatible = "hisilicon,hi6220-usb-phy";
 			#phy-cells = <0>;
-			phy-supply = <&fixed_5v_hub>;
+			phy-supply = <&reg_5v_hub>;
 			hisilicon,peripheral-syscon = <&sys_ctrl>;
 		};
 
@@ -766,17 +756,12 @@
 
 		dwmmc_0: dwmmc0@f723d000 {
 			compatible = "hisilicon,hi6220-dw-mshc";
-			num-slots = <0x1>;
-			cap-mmc-highspeed;
-			non-removable;
 			reg = <0x0 0xf723d000 0x0 0x1000>;
 			interrupts = <0x0 0x48 0x4>;
 			clocks = <&sys_ctrl 2>, <&sys_ctrl 1>;
 			clock-names = "ciu", "biu";
 			resets = <&sys_ctrl PERIPH_RSTDIS0_MMC0>;
 			reset-names = "reset";
-			bus-width = <0x8>;
-			vmmc-supply = <&ldo19>;
 			pinctrl-names = "default";
 			pinctrl-0 = <&emmc_pmx_func &emmc_clk_cfg_func
 				     &emmc_cfg_func &emmc_rst_cfg_func>;
@@ -784,13 +769,7 @@
 
 		dwmmc_1: dwmmc1@f723e000 {
 			compatible = "hisilicon,hi6220-dw-mshc";
-			num-slots = <0x1>;
-			card-detect-delay = <200>;
 			hisilicon,peripheral-syscon = <&ao_ctrl>;
-			cap-sd-highspeed;
-			sd-uhs-sdr12;
-			sd-uhs-sdr25;
-			sd-uhs-sdr50;
 			reg = <0x0 0xf723e000 0x0 0x1000>;
 			interrupts = <0x0 0x49 0x4>;
 			#address-cells = <0x1>;
@@ -799,11 +778,6 @@
 			clock-names = "ciu", "biu";
 			resets = <&sys_ctrl PERIPH_RSTDIS0_MMC1>;
 			reset-names = "reset";
-			vqmmc-supply = <&ldo7>;
-			vmmc-supply = <&ldo10>;
-			bus-width = <0x4>;
-			disable-wp;
-			cd-gpios = <&gpio1 0 1>;
 			pinctrl-names = "default", "idle";
 			pinctrl-0 = <&sd_pmx_func &sd_clk_cfg_func &sd_cfg_func>;
 			pinctrl-1 = <&sd_pmx_idle &sd_clk_cfg_idle &sd_cfg_idle>;
@@ -811,15 +785,12 @@
 
 		dwmmc_2: dwmmc2@f723f000 {
 			compatible = "hisilicon,hi6220-dw-mshc";
-			num-slots = <0x1>;
 			reg = <0x0 0xf723f000 0x0 0x1000>;
 			interrupts = <0x0 0x4a 0x4>;
 			clocks = <&sys_ctrl HI6220_MMC2_CIUCLK>, <&sys_ctrl HI6220_MMC2_CLK>;
 			clock-names = "ciu", "biu";
 			resets = <&sys_ctrl PERIPH_RSTDIS0_MMC2>;
 			reset-names = "reset";
-			bus-width = <0x4>;
-			broken-cd;
 			pinctrl-names = "default", "idle";
 			pinctrl-0 = <&sdio_pmx_func &sdio_clk_cfg_func &sdio_cfg_func>;
 			pinctrl-1 = <&sdio_pmx_idle &sdio_clk_cfg_idle &sdio_cfg_idle>;
diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h
index 3e7ce86d5c13..4d877144f377 100644
--- a/arch/powerpc/include/uapi/asm/cputable.h
+++ b/arch/powerpc/include/uapi/asm/cputable.h
@@ -46,6 +46,8 @@
 #define PPC_FEATURE2_HTM_NOSC		0x01000000
 #define PPC_FEATURE2_ARCH_3_00		0x00800000 /* ISA 3.00 */
 #define PPC_FEATURE2_HAS_IEEE128	0x00400000 /* VSX IEEE Binary Float 128-bit */
+#define PPC_FEATURE2_DARN		0x00200000 /* darn random number insn */
+#define PPC_FEATURE2_SCV		0x00100000 /* scv syscall */
 
 /*
  * IMPORTANT!
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 9b3e88b1a9c8..6f849832a669 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -124,7 +124,8 @@ extern void __restore_cpu_e6500(void);
 #define COMMON_USER_POWER9	COMMON_USER_POWER8
 #define COMMON_USER2_POWER9	(COMMON_USER2_POWER8 | \
 				 PPC_FEATURE2_ARCH_3_00 | \
-				 PPC_FEATURE2_HAS_IEEE128)
+				 PPC_FEATURE2_HAS_IEEE128 | \
+				 PPC_FEATURE2_DARN )
 
 #ifdef CONFIG_PPC_BOOK3E_64
 #define COMMON_USER_BOOKE	(COMMON_USER_PPC64 | PPC_FEATURE_BOOKE)
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 40c4887c27b6..f83056297441 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -161,7 +161,9 @@ static struct ibm_pa_feature {
 	{ .pabyte = 0,  .pabit = 3, .cpu_features  = CPU_FTR_CTRL },
 	{ .pabyte = 0,  .pabit = 6, .cpu_features  = CPU_FTR_NOEXECUTE },
 	{ .pabyte = 1,  .pabit = 2, .mmu_features  = MMU_FTR_CI_LARGE_PAGE },
+#ifdef CONFIG_PPC_RADIX_MMU
 	{ .pabyte = 40, .pabit = 0, .mmu_features  = MMU_FTR_TYPE_RADIX },
+#endif
 	{ .pabyte = 1,  .pabit = 1, .invert = 1, .cpu_features = CPU_FTR_NODSISRALIGN },
 	{ .pabyte = 5,  .pabit = 0, .cpu_features  = CPU_FTR_REAL_LE,
 				    .cpu_user_ftrs = PPC_FEATURE_TRUE_LE },
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index 96c2b8a40630..0c45cdbac4cf 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -197,7 +197,9 @@ static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr)
 	    (REGION_ID(ea) != USER_REGION_ID)) {
 
 		spin_unlock(&spu->register_lock);
-		ret = hash_page(ea, _PAGE_PRESENT | _PAGE_READ, 0x300, dsisr);
+		ret = hash_page(ea,
+				_PAGE_PRESENT | _PAGE_READ | _PAGE_PRIVILEGED,
+				0x300, dsisr);
 		spin_lock(&spu->register_lock);
 
 		if (!ret) {
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 067defeea691..78fa9395b8c5 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -714,7 +714,7 @@ static void pnv_npu2_release_context(struct kref *kref)
 void pnv_npu2_destroy_context(struct npu_context *npu_context,
 			struct pci_dev *gpdev)
 {
-	struct pnv_phb *nphb, *phb;
+	struct pnv_phb *nphb;
 	struct npu *npu;
 	struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
 	struct device_node *nvlink_dn;
@@ -728,13 +728,12 @@ void pnv_npu2_destroy_context(struct npu_context *npu_context,
 
 	nphb = pci_bus_to_host(npdev->bus)->private_data;
 	npu = &nphb->npu;
-	phb = pci_bus_to_host(gpdev->bus)->private_data;
 	nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
 	if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
 							&nvlink_index)))
 		return;
 	npu_context->npdev[npu->index][nvlink_index] = NULL;
-	opal_npu_destroy_context(phb->opal_id, npu_context->mm->context.id,
+	opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id,
 				PCI_DEVID(gpdev->bus->number, gpdev->devfn));
 	kref_put(&npu_context->kref, pnv_npu2_release_context);
 }
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cd18994a9555..4ccfacc7232a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -360,7 +360,7 @@ config SMP
 	  Management" code will be disabled if you say Y here.
 
 	  See also <file:Documentation/x86/i386/IO-APIC.txt>,
-	  <file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at
+	  <file:Documentation/lockup-watchdogs.txt> and the SMP-HOWTO available at
 	  <http://www.tldp.org/docs.html#howto>.
 
 	  If you don't know what to do here, say N.
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 5851411e60fb..bf240b920473 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -159,7 +159,7 @@ ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	# If '-Os' is enabled, disable it and print a warning.
         ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
           undefine CONFIG_CC_OPTIMIZE_FOR_SIZE
-	  $(warning Disabling CONFIG_CC_OPTIMIZE_FOR_SIZE.  Your compiler does not have -mfentry so you cannot optimize for size with CONFIG_FUNCTION_GRAPH_TRACER.)
+          $(warning Disabling CONFIG_CC_OPTIMIZE_FOR_SIZE.  Your compiler does not have -mfentry so you cannot optimize for size with CONFIG_FUNCTION_GRAPH_TRACER.)
         endif
 
     endif
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 44163e8c3868..2c860ad4fe06 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -94,7 +94,7 @@ vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o
 quiet_cmd_check_data_rel = DATAREL $@
 define cmd_check_data_rel
 	for obj in $(filter %.o,$^); do \
-		readelf -S $$obj | grep -qF .rel.local && { \
+		${CROSS_COMPILE}readelf -S $$obj | grep -qF .rel.local && { \
 			echo "error: $$obj has data relocations!" >&2; \
 			exit 1; \
 		} || true; \
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 50bc26949e9e..48ef7bb32c42 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -252,6 +252,23 @@ ENTRY(__switch_to_asm)
 END(__switch_to_asm)
 
 /*
+ * The unwinder expects the last frame on the stack to always be at the same
+ * offset from the end of the page, which allows it to validate the stack.
+ * Calling schedule_tail() directly would break that convention because its an
+ * asmlinkage function so its argument has to be pushed on the stack.  This
+ * wrapper creates a proper "end of stack" frame header before the call.
+ */
+ENTRY(schedule_tail_wrapper)
+	FRAME_BEGIN
+
+	pushl	%eax
+	call	schedule_tail
+	popl	%eax
+
+	FRAME_END
+	ret
+ENDPROC(schedule_tail_wrapper)
+/*
  * A newly forked process directly context switches into this address.
  *
  * eax: prev task we switched from
@@ -259,24 +276,15 @@ END(__switch_to_asm)
  * edi: kernel thread arg
  */
 ENTRY(ret_from_fork)
-	FRAME_BEGIN		/* help unwinder find end of stack */
-
-	/*
-	 * schedule_tail() is asmlinkage so we have to put its 'prev' argument
-	 * on the stack.
-	 */
-	pushl	%eax
-	call	schedule_tail
-	popl	%eax
+	call	schedule_tail_wrapper
 
 	testl	%ebx, %ebx
 	jnz	1f		/* kernel threads are uncommon */
 
 2:
 	/* When we fork, we trace the syscall return in the child, too. */
-	leal	FRAME_OFFSET(%esp), %eax
+	movl    %esp, %eax
 	call    syscall_return_slowpath
-	FRAME_END
 	jmp     restore_all
 
 	/* kernel thread */
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 607d72c4a485..4a4c0834f965 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -36,7 +36,6 @@
 #include <asm/smap.h>
 #include <asm/pgtable_types.h>
 #include <asm/export.h>
-#include <asm/frame.h>
 #include <linux/err.h>
 
 .code64
@@ -406,19 +405,17 @@ END(__switch_to_asm)
  * r12: kernel thread arg
  */
 ENTRY(ret_from_fork)
-	FRAME_BEGIN			/* help unwinder find end of stack */
 	movq	%rax, %rdi
-	call	schedule_tail		/* rdi: 'prev' task parameter */
+	call	schedule_tail			/* rdi: 'prev' task parameter */
 
-	testq	%rbx, %rbx		/* from kernel_thread? */
-	jnz	1f			/* kernel threads are uncommon */
+	testq	%rbx, %rbx			/* from kernel_thread? */
+	jnz	1f				/* kernel threads are uncommon */
 
 2:
-	leaq	FRAME_OFFSET(%rsp),%rdi	/* pt_regs pointer */
+	movq	%rsp, %rdi
 	call	syscall_return_slowpath	/* returns with IRQs disabled */
 	TRACE_IRQS_ON			/* user mode is traced as IRQS on */
 	SWAPGS
-	FRAME_END
 	jmp	restore_regs_and_iret
 
 1:
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 4fd5195deed0..3f9a3d2a5209 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -266,6 +266,7 @@ static inline int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *s
 #endif
 
 int mce_available(struct cpuinfo_x86 *c);
+bool mce_is_memory_error(struct mce *m);
 
 DECLARE_PER_CPU(unsigned, mce_exception_count);
 DECLARE_PER_CPU(unsigned, mce_poll_count);
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index c5b8f760473c..32e14d137416 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -409,8 +409,13 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
 		memcpy(insnbuf, replacement, a->replacementlen);
 		insnbuf_sz = a->replacementlen;
 
-		/* 0xe8 is a relative jump; fix the offset. */
-		if (*insnbuf == 0xe8 && a->replacementlen == 5) {
+		/*
+		 * 0xe8 is a relative jump; fix the offset.
+		 *
+		 * Instruction length is checked before the opcode to avoid
+		 * accessing uninitialized bytes for zero-length replacements.
+		 */
+		if (a->replacementlen == 5 && *insnbuf == 0xe8) {
 			*(s32 *)(insnbuf + 1) += replacement - instr;
 			DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx",
 				*(s32 *)(insnbuf + 1),
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 5abd4bf73d6e..5cfbaeb6529a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -499,16 +499,14 @@ static int mce_usable_address(struct mce *m)
 	return 1;
 }
 
-static bool memory_error(struct mce *m)
+bool mce_is_memory_error(struct mce *m)
 {
-	struct cpuinfo_x86 *c = &boot_cpu_data;
-
-	if (c->x86_vendor == X86_VENDOR_AMD) {
+	if (m->cpuvendor == X86_VENDOR_AMD) {
 		/* ErrCodeExt[20:16] */
 		u8 xec = (m->status >> 16) & 0x1f;
 
 		return (xec == 0x0 || xec == 0x8);
-	} else if (c->x86_vendor == X86_VENDOR_INTEL) {
+	} else if (m->cpuvendor == X86_VENDOR_INTEL) {
 		/*
 		 * Intel SDM Volume 3B - 15.9.2 Compound Error Codes
 		 *
@@ -529,6 +527,7 @@ static bool memory_error(struct mce *m)
 
 	return false;
 }
+EXPORT_SYMBOL_GPL(mce_is_memory_error);
 
 static bool cec_add_mce(struct mce *m)
 {
@@ -536,7 +535,7 @@ static bool cec_add_mce(struct mce *m)
 		return false;
 
 	/* We eat only correctable DRAM errors with usable addresses. */
-	if (memory_error(m) &&
+	if (mce_is_memory_error(m) &&
 	    !(m->status & MCI_STATUS_UC) &&
 	    mce_usable_address(m))
 		if (!cec_add_elem(m->addr >> PAGE_SHIFT))
@@ -713,7 +712,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 
 		severity = mce_severity(&m, mca_cfg.tolerant, NULL, false);
 
-		if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m))
+		if (severity == MCE_DEFERRED_SEVERITY && mce_is_memory_error(&m))
 			if (m.status & MCI_STATUS_ADDRV)
 				m.severity = severity;
 
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 0651e974dcb3..9bef1bbeba63 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -689,8 +689,12 @@ static inline void *alloc_tramp(unsigned long size)
 {
 	return module_alloc(size);
 }
-static inline void tramp_free(void *tramp)
+static inline void tramp_free(void *tramp, int size)
 {
+	int npages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+	set_memory_nx((unsigned long)tramp, npages);
+	set_memory_rw((unsigned long)tramp, npages);
 	module_memfree(tramp);
 }
 #else
@@ -699,7 +703,7 @@ static inline void *alloc_tramp(unsigned long size)
 {
 	return NULL;
 }
-static inline void tramp_free(void *tramp) { }
+static inline void tramp_free(void *tramp, int size) { }
 #endif
 
 /* Defined as markers to the end of the ftrace default trampolines */
@@ -771,7 +775,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
 	/* Copy ftrace_caller onto the trampoline memory */
 	ret = probe_kernel_read(trampoline, (void *)start_offset, size);
 	if (WARN_ON(ret < 0)) {
-		tramp_free(trampoline);
+		tramp_free(trampoline, *tramp_size);
 		return 0;
 	}
 
@@ -797,7 +801,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
 
 	/* Are we pointing to the reference? */
 	if (WARN_ON(memcmp(op_ptr.op, op_ref, 3) != 0)) {
-		tramp_free(trampoline);
+		tramp_free(trampoline, *tramp_size);
 		return 0;
 	}
 
@@ -839,7 +843,7 @@ void arch_ftrace_update_trampoline(struct ftrace_ops *ops)
 	unsigned long offset;
 	unsigned long ip;
 	unsigned int size;
-	int ret;
+	int ret, npages;
 
 	if (ops->trampoline) {
 		/*
@@ -848,11 +852,14 @@ void arch_ftrace_update_trampoline(struct ftrace_ops *ops)
 		 */
 		if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
 			return;
+		npages = PAGE_ALIGN(ops->trampoline_size) >> PAGE_SHIFT;
+		set_memory_rw(ops->trampoline, npages);
 	} else {
 		ops->trampoline = create_trampoline(ops, &size);
 		if (!ops->trampoline)
 			return;
 		ops->trampoline_size = size;
+		npages = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	}
 
 	offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS);
@@ -863,6 +870,7 @@ void arch_ftrace_update_trampoline(struct ftrace_ops *ops)
 	/* Do a safe modify in case the trampoline is executing */
 	new = ftrace_call_replace(ip, (unsigned long)func);
 	ret = update_ftrace_func(ip, new);
+	set_memory_ro(ops->trampoline, npages);
 
 	/* The update should never fail */
 	WARN_ON(ret);
@@ -939,7 +947,7 @@ void arch_ftrace_trampoline_free(struct ftrace_ops *ops)
 	if (!ops || !(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
 		return;
 
-	tramp_free((void *)ops->trampoline);
+	tramp_free((void *)ops->trampoline, ops->trampoline_size);
 	ops->trampoline = 0;
 }
 
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 5b2bbfbb3712..6b877807598b 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -52,6 +52,7 @@
 #include <linux/ftrace.h>
 #include <linux/frame.h>
 #include <linux/kasan.h>
+#include <linux/moduleloader.h>
 
 #include <asm/text-patching.h>
 #include <asm/cacheflush.h>
@@ -417,6 +418,14 @@ static void prepare_boost(struct kprobe *p, struct insn *insn)
 	}
 }
 
+/* Recover page to RW mode before releasing it */
+void free_insn_page(void *page)
+{
+	set_memory_nx((unsigned long)page & PAGE_MASK, 1);
+	set_memory_rw((unsigned long)page & PAGE_MASK, 1);
+	module_memfree(page);
+}
+
 static int arch_copy_kprobe(struct kprobe *p)
 {
 	struct insn insn;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 0b4d3c686b1e..f81823695014 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -980,8 +980,6 @@ void __init setup_arch(char **cmdline_p)
 	 */
 	x86_configure_nx();
 
-	simple_udelay_calibration();
-
 	parse_early_param();
 
 #ifdef CONFIG_MEMORY_HOTPLUG
@@ -1041,6 +1039,8 @@ void __init setup_arch(char **cmdline_p)
 	 */
 	init_hypervisor_platform();
 
+	simple_udelay_calibration();
+
 	x86_init.resources.probe_roms();
 
 	/* after parse_early_param, so could debug it */
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index 82c6d7f1fd73..b9389d72b2f7 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -104,6 +104,11 @@ static inline unsigned long *last_frame(struct unwind_state *state)
 	return (unsigned long *)task_pt_regs(state->task) - 2;
 }
 
+static bool is_last_frame(struct unwind_state *state)
+{
+	return state->bp == last_frame(state);
+}
+
 #ifdef CONFIG_X86_32
 #define GCC_REALIGN_WORDS 3
 #else
@@ -115,16 +120,15 @@ static inline unsigned long *last_aligned_frame(struct unwind_state *state)
 	return last_frame(state) - GCC_REALIGN_WORDS;
 }
 
-static bool is_last_task_frame(struct unwind_state *state)
+static bool is_last_aligned_frame(struct unwind_state *state)
 {
 	unsigned long *last_bp = last_frame(state);
 	unsigned long *aligned_bp = last_aligned_frame(state);
 
 	/*
-	 * We have to check for the last task frame at two different locations
-	 * because gcc can occasionally decide to realign the stack pointer and
-	 * change the offset of the stack frame in the prologue of a function
-	 * called by head/entry code.  Examples:
+	 * GCC can occasionally decide to realign the stack pointer and change
+	 * the offset of the stack frame in the prologue of a function called
+	 * by head/entry code.  Examples:
 	 *
 	 * <start_secondary>:
 	 *      push   %edi
@@ -141,11 +145,38 @@ static bool is_last_task_frame(struct unwind_state *state)
 	 *      push   %rbp
 	 *      mov    %rsp,%rbp
 	 *
-	 * Note that after aligning the stack, it pushes a duplicate copy of
-	 * the return address before pushing the frame pointer.
+	 * After aligning the stack, it pushes a duplicate copy of the return
+	 * address before pushing the frame pointer.
+	 */
+	return (state->bp == aligned_bp && *(aligned_bp + 1) == *(last_bp + 1));
+}
+
+static bool is_last_ftrace_frame(struct unwind_state *state)
+{
+	unsigned long *last_bp = last_frame(state);
+	unsigned long *last_ftrace_bp = last_bp - 3;
+
+	/*
+	 * When unwinding from an ftrace handler of a function called by entry
+	 * code, the stack layout of the last frame is:
+	 *
+	 *   bp
+	 *   parent ret addr
+	 *   bp
+	 *   function ret addr
+	 *   parent ret addr
+	 *   pt_regs
+	 *   -----------------
 	 */
-	return (state->bp == last_bp ||
-		(state->bp == aligned_bp && *(aligned_bp+1) == *(last_bp+1)));
+	return (state->bp == last_ftrace_bp &&
+		*state->bp == *(state->bp + 2) &&
+		*(state->bp + 1) == *(state->bp + 4));
+}
+
+static bool is_last_task_frame(struct unwind_state *state)
+{
+	return is_last_frame(state) || is_last_aligned_frame(state) ||
+	       is_last_ftrace_frame(state);
 }
 
 /*
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 1dcd2be4cce4..c8520b2c62d2 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -186,7 +186,7 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache)
 	unsigned int i, level;
 	unsigned long addr;
 
-	BUG_ON(irqs_disabled());
+	BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
 	WARN_ON(PAGE_ALIGN(start) != start);
 
 	on_each_cpu(__cpa_flush_range, NULL, 1);
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 9b78685b66e6..83a59a67757a 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -65,9 +65,11 @@ static int __init nopat(char *str)
 }
 early_param("nopat", nopat);
 
+static bool __read_mostly __pat_initialized = false;
+
 bool pat_enabled(void)
 {
-	return !!__pat_enabled;
+	return __pat_initialized;
 }
 EXPORT_SYMBOL_GPL(pat_enabled);
 
@@ -225,13 +227,14 @@ static void pat_bsp_init(u64 pat)
 	}
 
 	wrmsrl(MSR_IA32_CR_PAT, pat);
+	__pat_initialized = true;
 
 	__init_cache_modes(pat);
 }
 
 static void pat_ap_init(u64 pat)
 {
-	if (!boot_cpu_has(X86_FEATURE_PAT)) {
+	if (!this_cpu_has(X86_FEATURE_PAT)) {
 		/*
 		 * If this happens we are on a secondary CPU, but switched to
 		 * PAT on the boot CPU. We have no way to undo PAT.
@@ -306,7 +309,7 @@ void pat_init(void)
 	u64 pat;
 	struct cpuinfo_x86 *c = &boot_cpu_data;
 
-	if (!pat_enabled()) {
+	if (!__pat_enabled) {
 		init_cache_modes();
 		return;
 	}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index a69ad122ed66..f2224ffd225d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -628,25 +628,6 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q,
 }
 EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);
 
-void blk_mq_abort_requeue_list(struct request_queue *q)
-{
-	unsigned long flags;
-	LIST_HEAD(rq_list);
-
-	spin_lock_irqsave(&q->requeue_lock, flags);
-	list_splice_init(&q->requeue_list, &rq_list);
-	spin_unlock_irqrestore(&q->requeue_lock, flags);
-
-	while (!list_empty(&rq_list)) {
-		struct request *rq;
-
-		rq = list_first_entry(&rq_list, struct request, queuelist);
-		list_del_init(&rq->queuelist);
-		blk_mq_end_request(rq, -EIO);
-	}
-}
-EXPORT_SYMBOL(blk_mq_abort_requeue_list);
-
 struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
 {
 	if (tag < tags->nr_tags) {
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 504fee940052..712b018e9f54 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -887,10 +887,10 @@ int blk_register_queue(struct gendisk *disk)
 		goto unlock;
 	}
 
-	if (q->mq_ops)
+	if (q->mq_ops) {
 		__blk_mq_register_dev(dev, q);
-
-	blk_mq_debugfs_register(q);
+		blk_mq_debugfs_register(q);
+	}
 
 	kobject_uevent(&q->kobj, KOBJ_ADD);
 
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index b78db2e5fdff..fc13dd0c6e39 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -22,11 +22,11 @@ static int throtl_quantum = 32;
 #define DFL_THROTL_SLICE_HD (HZ / 10)
 #define DFL_THROTL_SLICE_SSD (HZ / 50)
 #define MAX_THROTL_SLICE (HZ)
-#define DFL_IDLE_THRESHOLD_SSD (1000L) /* 1 ms */
-#define DFL_IDLE_THRESHOLD_HD (100L * 1000) /* 100 ms */
 #define MAX_IDLE_TIME (5L * 1000 * 1000) /* 5 s */
-/* default latency target is 0, eg, guarantee IO latency by default */
-#define DFL_LATENCY_TARGET (0)
+#define MIN_THROTL_BPS (320 * 1024)
+#define MIN_THROTL_IOPS (10)
+#define DFL_LATENCY_TARGET (-1L)
+#define DFL_IDLE_THRESHOLD (0)
 
 #define SKIP_LATENCY (((u64)1) << BLK_STAT_RES_SHIFT)
 
@@ -157,6 +157,7 @@ struct throtl_grp {
 	unsigned long last_check_time;
 
 	unsigned long latency_target; /* us */
+	unsigned long latency_target_conf; /* us */
 	/* When did we start a new slice */
 	unsigned long slice_start[2];
 	unsigned long slice_end[2];
@@ -165,6 +166,7 @@ struct throtl_grp {
 	unsigned long checked_last_finish_time; /* ns / 1024 */
 	unsigned long avg_idletime; /* ns / 1024 */
 	unsigned long idletime_threshold; /* us */
+	unsigned long idletime_threshold_conf; /* us */
 
 	unsigned int bio_cnt; /* total bios */
 	unsigned int bad_bio_cnt; /* bios exceeding latency threshold */
@@ -201,8 +203,6 @@ struct throtl_data
 	unsigned int limit_index;
 	bool limit_valid[LIMIT_CNT];
 
-	unsigned long dft_idletime_threshold; /* us */
-
 	unsigned long low_upgrade_time;
 	unsigned long low_downgrade_time;
 
@@ -294,8 +294,14 @@ static uint64_t tg_bps_limit(struct throtl_grp *tg, int rw)
 
 	td = tg->td;
 	ret = tg->bps[rw][td->limit_index];
-	if (ret == 0 && td->limit_index == LIMIT_LOW)
-		return tg->bps[rw][LIMIT_MAX];
+	if (ret == 0 && td->limit_index == LIMIT_LOW) {
+		/* intermediate node or iops isn't 0 */
+		if (!list_empty(&blkg->blkcg->css.children) ||
+		    tg->iops[rw][td->limit_index])
+			return U64_MAX;
+		else
+			return MIN_THROTL_BPS;
+	}
 
 	if (td->limit_index == LIMIT_MAX && tg->bps[rw][LIMIT_LOW] &&
 	    tg->bps[rw][LIMIT_LOW] != tg->bps[rw][LIMIT_MAX]) {
@@ -315,10 +321,17 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw)
 
 	if (cgroup_subsys_on_dfl(io_cgrp_subsys) && !blkg->parent)
 		return UINT_MAX;
+
 	td = tg->td;
 	ret = tg->iops[rw][td->limit_index];
-	if (ret == 0 && tg->td->limit_index == LIMIT_LOW)
-		return tg->iops[rw][LIMIT_MAX];
+	if (ret == 0 && tg->td->limit_index == LIMIT_LOW) {
+		/* intermediate node or bps isn't 0 */
+		if (!list_empty(&blkg->blkcg->css.children) ||
+		    tg->bps[rw][td->limit_index])
+			return UINT_MAX;
+		else
+			return MIN_THROTL_IOPS;
+	}
 
 	if (td->limit_index == LIMIT_MAX && tg->iops[rw][LIMIT_LOW] &&
 	    tg->iops[rw][LIMIT_LOW] != tg->iops[rw][LIMIT_MAX]) {
@@ -482,6 +495,9 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node)
 	/* LIMIT_LOW will have default value 0 */
 
 	tg->latency_target = DFL_LATENCY_TARGET;
+	tg->latency_target_conf = DFL_LATENCY_TARGET;
+	tg->idletime_threshold = DFL_IDLE_THRESHOLD;
+	tg->idletime_threshold_conf = DFL_IDLE_THRESHOLD;
 
 	return &tg->pd;
 }
@@ -510,8 +526,6 @@ static void throtl_pd_init(struct blkg_policy_data *pd)
 	if (cgroup_subsys_on_dfl(io_cgrp_subsys) && blkg->parent)
 		sq->parent_sq = &blkg_to_tg(blkg->parent)->service_queue;
 	tg->td = td;
-
-	tg->idletime_threshold = td->dft_idletime_threshold;
 }
 
 /*
@@ -1349,7 +1363,7 @@ static int tg_print_conf_uint(struct seq_file *sf, void *v)
 	return 0;
 }
 
-static void tg_conf_updated(struct throtl_grp *tg)
+static void tg_conf_updated(struct throtl_grp *tg, bool global)
 {
 	struct throtl_service_queue *sq = &tg->service_queue;
 	struct cgroup_subsys_state *pos_css;
@@ -1367,8 +1381,26 @@ static void tg_conf_updated(struct throtl_grp *tg)
 	 * restrictions in the whole hierarchy and allows them to bypass
 	 * blk-throttle.
 	 */
-	blkg_for_each_descendant_pre(blkg, pos_css, tg_to_blkg(tg))
-		tg_update_has_rules(blkg_to_tg(blkg));
+	blkg_for_each_descendant_pre(blkg, pos_css,
+			global ? tg->td->queue->root_blkg : tg_to_blkg(tg)) {
+		struct throtl_grp *this_tg = blkg_to_tg(blkg);
+		struct throtl_grp *parent_tg;
+
+		tg_update_has_rules(this_tg);
+		/* ignore root/second level */
+		if (!cgroup_subsys_on_dfl(io_cgrp_subsys) || !blkg->parent ||
+		    !blkg->parent->parent)
+			continue;
+		parent_tg = blkg_to_tg(blkg->parent);
+		/*
+		 * make sure all children has lower idle time threshold and
+		 * higher latency target
+		 */
+		this_tg->idletime_threshold = min(this_tg->idletime_threshold,
+				parent_tg->idletime_threshold);
+		this_tg->latency_target = max(this_tg->latency_target,
+				parent_tg->latency_target);
+	}
 
 	/*
 	 * We're already holding queue_lock and know @tg is valid.  Let's
@@ -1413,7 +1445,7 @@ static ssize_t tg_set_conf(struct kernfs_open_file *of,
 	else
 		*(unsigned int *)((void *)tg + of_cft(of)->private) = v;
 
-	tg_conf_updated(tg);
+	tg_conf_updated(tg, false);
 	ret = 0;
 out_finish:
 	blkg_conf_finish(&ctx);
@@ -1497,34 +1529,34 @@ static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd,
 	    tg->iops_conf[READ][off] == iops_dft &&
 	    tg->iops_conf[WRITE][off] == iops_dft &&
 	    (off != LIMIT_LOW ||
-	     (tg->idletime_threshold == tg->td->dft_idletime_threshold &&
-	      tg->latency_target == DFL_LATENCY_TARGET)))
+	     (tg->idletime_threshold_conf == DFL_IDLE_THRESHOLD &&
+	      tg->latency_target_conf == DFL_LATENCY_TARGET)))
 		return 0;
 
-	if (tg->bps_conf[READ][off] != bps_dft)
+	if (tg->bps_conf[READ][off] != U64_MAX)
 		snprintf(bufs[0], sizeof(bufs[0]), "%llu",
 			tg->bps_conf[READ][off]);
-	if (tg->bps_conf[WRITE][off] != bps_dft)
+	if (tg->bps_conf[WRITE][off] != U64_MAX)
 		snprintf(bufs[1], sizeof(bufs[1]), "%llu",
 			tg->bps_conf[WRITE][off]);
-	if (tg->iops_conf[READ][off] != iops_dft)
+	if (tg->iops_conf[READ][off] != UINT_MAX)
 		snprintf(bufs[2], sizeof(bufs[2]), "%u",
 			tg->iops_conf[READ][off]);
-	if (tg->iops_conf[WRITE][off] != iops_dft)
+	if (tg->iops_conf[WRITE][off] != UINT_MAX)
 		snprintf(bufs[3], sizeof(bufs[3]), "%u",
 			tg->iops_conf[WRITE][off]);
 	if (off == LIMIT_LOW) {
-		if (tg->idletime_threshold == ULONG_MAX)
+		if (tg->idletime_threshold_conf == ULONG_MAX)
 			strcpy(idle_time, " idle=max");
 		else
 			snprintf(idle_time, sizeof(idle_time), " idle=%lu",
-				tg->idletime_threshold);
+				tg->idletime_threshold_conf);
 
-		if (tg->latency_target == ULONG_MAX)
+		if (tg->latency_target_conf == ULONG_MAX)
 			strcpy(latency_time, " latency=max");
 		else
 			snprintf(latency_time, sizeof(latency_time),
-				" latency=%lu", tg->latency_target);
+				" latency=%lu", tg->latency_target_conf);
 	}
 
 	seq_printf(sf, "%s rbps=%s wbps=%s riops=%s wiops=%s%s%s\n",
@@ -1563,8 +1595,8 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
 	v[2] = tg->iops_conf[READ][index];
 	v[3] = tg->iops_conf[WRITE][index];
 
-	idle_time = tg->idletime_threshold;
-	latency_time = tg->latency_target;
+	idle_time = tg->idletime_threshold_conf;
+	latency_time = tg->latency_target_conf;
 	while (true) {
 		char tok[27];	/* wiops=18446744073709551616 */
 		char *p;
@@ -1623,17 +1655,33 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
 		tg->iops_conf[READ][LIMIT_MAX]);
 	tg->iops[WRITE][LIMIT_LOW] = min(tg->iops_conf[WRITE][LIMIT_LOW],
 		tg->iops_conf[WRITE][LIMIT_MAX]);
+	tg->idletime_threshold_conf = idle_time;
+	tg->latency_target_conf = latency_time;
+
+	/* force user to configure all settings for low limit  */
+	if (!(tg->bps[READ][LIMIT_LOW] || tg->iops[READ][LIMIT_LOW] ||
+	      tg->bps[WRITE][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW]) ||
+	    tg->idletime_threshold_conf == DFL_IDLE_THRESHOLD ||
+	    tg->latency_target_conf == DFL_LATENCY_TARGET) {
+		tg->bps[READ][LIMIT_LOW] = 0;
+		tg->bps[WRITE][LIMIT_LOW] = 0;
+		tg->iops[READ][LIMIT_LOW] = 0;
+		tg->iops[WRITE][LIMIT_LOW] = 0;
+		tg->idletime_threshold = DFL_IDLE_THRESHOLD;
+		tg->latency_target = DFL_LATENCY_TARGET;
+	} else if (index == LIMIT_LOW) {
+		tg->idletime_threshold = tg->idletime_threshold_conf;
+		tg->latency_target = tg->latency_target_conf;
+	}
 
-	if (index == LIMIT_LOW) {
-		blk_throtl_update_limit_valid(tg->td);
-		if (tg->td->limit_valid[LIMIT_LOW])
+	blk_throtl_update_limit_valid(tg->td);
+	if (tg->td->limit_valid[LIMIT_LOW]) {
+		if (index == LIMIT_LOW)
 			tg->td->limit_index = LIMIT_LOW;
-		tg->idletime_threshold = (idle_time == ULONG_MAX) ?
-			ULONG_MAX : idle_time;
-		tg->latency_target = (latency_time == ULONG_MAX) ?
-			ULONG_MAX : latency_time;
-	}
-	tg_conf_updated(tg);
+	} else
+		tg->td->limit_index = LIMIT_MAX;
+	tg_conf_updated(tg, index == LIMIT_LOW &&
+		tg->td->limit_valid[LIMIT_LOW]);
 	ret = 0;
 out_finish:
 	blkg_conf_finish(&ctx);
@@ -1722,17 +1770,25 @@ static bool throtl_tg_is_idle(struct throtl_grp *tg)
 	/*
 	 * cgroup is idle if:
 	 * - single idle is too long, longer than a fixed value (in case user
-	 *   configure a too big threshold) or 4 times of slice
+	 *   configure a too big threshold) or 4 times of idletime threshold
 	 * - average think time is more than threshold
 	 * - IO latency is largely below threshold
 	 */
-	unsigned long time = jiffies_to_usecs(4 * tg->td->throtl_slice);
-
-	time = min_t(unsigned long, MAX_IDLE_TIME, time);
-	return (ktime_get_ns() >> 10) - tg->last_finish_time > time ||
-	       tg->avg_idletime > tg->idletime_threshold ||
-	       (tg->latency_target && tg->bio_cnt &&
+	unsigned long time;
+	bool ret;
+
+	time = min_t(unsigned long, MAX_IDLE_TIME, 4 * tg->idletime_threshold);
+	ret = tg->latency_target == DFL_LATENCY_TARGET ||
+	      tg->idletime_threshold == DFL_IDLE_THRESHOLD ||
+	      (ktime_get_ns() >> 10) - tg->last_finish_time > time ||
+	      tg->avg_idletime > tg->idletime_threshold ||
+	      (tg->latency_target && tg->bio_cnt &&
 		tg->bad_bio_cnt * 5 < tg->bio_cnt);
+	throtl_log(&tg->service_queue,
+		"avg_idle=%ld, idle_threshold=%ld, bad_bio=%d, total_bio=%d, is_idle=%d, scale=%d",
+		tg->avg_idletime, tg->idletime_threshold, tg->bad_bio_cnt,
+		tg->bio_cnt, ret, tg->td->scale);
+	return ret;
 }
 
 static bool throtl_tg_can_upgrade(struct throtl_grp *tg)
@@ -1828,6 +1884,7 @@ static void throtl_upgrade_state(struct throtl_data *td)
 	struct cgroup_subsys_state *pos_css;
 	struct blkcg_gq *blkg;
 
+	throtl_log(&td->service_queue, "upgrade to max");
 	td->limit_index = LIMIT_MAX;
 	td->low_upgrade_time = jiffies;
 	td->scale = 0;
@@ -1850,6 +1907,7 @@ static void throtl_downgrade_state(struct throtl_data *td, int new)
 {
 	td->scale /= 2;
 
+	throtl_log(&td->service_queue, "downgrade, scale %d", td->scale);
 	if (td->scale) {
 		td->low_upgrade_time = jiffies - td->scale * td->throtl_slice;
 		return;
@@ -2023,6 +2081,11 @@ static void throtl_update_latency_buckets(struct throtl_data *td)
 		td->avg_buckets[i].valid = true;
 		last_latency = td->avg_buckets[i].latency;
 	}
+
+	for (i = 0; i < LATENCY_BUCKET_SIZE; i++)
+		throtl_log(&td->service_queue,
+			"Latency bucket %d: latency=%ld, valid=%d", i,
+			td->avg_buckets[i].latency, td->avg_buckets[i].valid);
 }
 #else
 static inline void throtl_update_latency_buckets(struct throtl_data *td)
@@ -2354,19 +2417,14 @@ void blk_throtl_exit(struct request_queue *q)
 void blk_throtl_register_queue(struct request_queue *q)
 {
 	struct throtl_data *td;
-	struct cgroup_subsys_state *pos_css;
-	struct blkcg_gq *blkg;
 
 	td = q->td;
 	BUG_ON(!td);
 
-	if (blk_queue_nonrot(q)) {
+	if (blk_queue_nonrot(q))
 		td->throtl_slice = DFL_THROTL_SLICE_SSD;
-		td->dft_idletime_threshold = DFL_IDLE_THRESHOLD_SSD;
-	} else {
+	else
 		td->throtl_slice = DFL_THROTL_SLICE_HD;
-		td->dft_idletime_threshold = DFL_IDLE_THRESHOLD_HD;
-	}
 #ifndef CONFIG_BLK_DEV_THROTTLING_LOW
 	/* if no low limit, use previous default */
 	td->throtl_slice = DFL_THROTL_SLICE_HD;
@@ -2375,18 +2433,6 @@ void blk_throtl_register_queue(struct request_queue *q)
 	td->track_bio_latency = !q->mq_ops && !q->request_fn;
 	if (!td->track_bio_latency)
 		blk_stat_enable_accounting(q);
-
-	/*
-	 * some tg are created before queue is fully initialized, eg, nonrot
-	 * isn't initialized yet
-	 */
-	rcu_read_lock();
-	blkg_for_each_descendant_post(blkg, pos_css, q->root_blkg) {
-		struct throtl_grp *tg = blkg_to_tg(blkg);
-
-		tg->idletime_threshold = td->dft_idletime_threshold;
-	}
-	rcu_read_unlock();
 }
 
 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
diff --git a/block/partition-generic.c b/block/partition-generic.c
index ff07b9143ca4..c5ec8246e25e 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -320,8 +320,10 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
 
 	if (info) {
 		struct partition_meta_info *pinfo = alloc_part_info(disk);
-		if (!pinfo)
+		if (!pinfo) {
+			err = -ENOMEM;
 			goto out_free_stats;
+		}
 		memcpy(pinfo, info, sizeof(*info));
 		p->info = pinfo;
 	}
diff --git a/block/partitions/msdos.c b/block/partitions/msdos.c
index 93e7c1b32edd..5610cd537da7 100644
--- a/block/partitions/msdos.c
+++ b/block/partitions/msdos.c
@@ -300,6 +300,8 @@ static void parse_bsd(struct parsed_partitions *state,
 			continue;
 		bsd_start = le32_to_cpu(p->p_offset);
 		bsd_size = le32_to_cpu(p->p_size);
+		if (memcmp(flavour, "bsd\0", 4) == 0)
+			bsd_start += offset;
 		if (offset == bsd_start && size == bsd_size)
 			/* full parent partition, we have it already */
 			continue;
diff --git a/crypto/skcipher.c b/crypto/skcipher.c
index 014af741fc6a..4faa0fd53b0c 100644
--- a/crypto/skcipher.c
+++ b/crypto/skcipher.c
@@ -764,6 +764,44 @@ static int crypto_init_skcipher_ops_ablkcipher(struct crypto_tfm *tfm)
 	return 0;
 }
 
+static int skcipher_setkey_unaligned(struct crypto_skcipher *tfm,
+				     const u8 *key, unsigned int keylen)
+{
+	unsigned long alignmask = crypto_skcipher_alignmask(tfm);
+	struct skcipher_alg *cipher = crypto_skcipher_alg(tfm);
+	u8 *buffer, *alignbuffer;
+	unsigned long absize;
+	int ret;
+
+	absize = keylen + alignmask;
+	buffer = kmalloc(absize, GFP_ATOMIC);
+	if (!buffer)
+		return -ENOMEM;
+
+	alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1);
+	memcpy(alignbuffer, key, keylen);
+	ret = cipher->setkey(tfm, alignbuffer, keylen);
+	kzfree(buffer);
+	return ret;
+}
+
+static int skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			   unsigned int keylen)
+{
+	struct skcipher_alg *cipher = crypto_skcipher_alg(tfm);
+	unsigned long alignmask = crypto_skcipher_alignmask(tfm);
+
+	if (keylen < cipher->min_keysize || keylen > cipher->max_keysize) {
+		crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	if ((unsigned long)key & alignmask)
+		return skcipher_setkey_unaligned(tfm, key, keylen);
+
+	return cipher->setkey(tfm, key, keylen);
+}
+
 static void crypto_skcipher_exit_tfm(struct crypto_tfm *tfm)
 {
 	struct crypto_skcipher *skcipher = __crypto_skcipher_cast(tfm);
@@ -784,7 +822,7 @@ static int crypto_skcipher_init_tfm(struct crypto_tfm *tfm)
 	    tfm->__crt_alg->cra_type == &crypto_givcipher_type)
 		return crypto_init_skcipher_ops_ablkcipher(tfm);
 
-	skcipher->setkey = alg->setkey;
+	skcipher->setkey = skcipher_setkey;
 	skcipher->encrypt = alg->encrypt;
 	skcipher->decrypt = alg->decrypt;
 	skcipher->ivsize = alg->ivsize;
diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c
index b7c2a06963d6..25aba9b107dd 100644
--- a/drivers/acpi/button.c
+++ b/drivers/acpi/button.c
@@ -57,6 +57,7 @@
 
 #define ACPI_BUTTON_LID_INIT_IGNORE	0x00
 #define ACPI_BUTTON_LID_INIT_OPEN	0x01
+#define ACPI_BUTTON_LID_INIT_METHOD	0x02
 
 #define _COMPONENT		ACPI_BUTTON_COMPONENT
 ACPI_MODULE_NAME("button");
@@ -376,6 +377,9 @@ static void acpi_lid_initialize_state(struct acpi_device *device)
 	case ACPI_BUTTON_LID_INIT_OPEN:
 		(void)acpi_lid_notify_state(device, 1);
 		break;
+	case ACPI_BUTTON_LID_INIT_METHOD:
+		(void)acpi_lid_update_state(device);
+		break;
 	case ACPI_BUTTON_LID_INIT_IGNORE:
 	default:
 		break;
@@ -560,6 +564,9 @@ static int param_set_lid_init_state(const char *val, struct kernel_param *kp)
 	if (!strncmp(val, "open", sizeof("open") - 1)) {
 		lid_init_state = ACPI_BUTTON_LID_INIT_OPEN;
 		pr_info("Notify initial lid state as open\n");
+	} else if (!strncmp(val, "method", sizeof("method") - 1)) {
+		lid_init_state = ACPI_BUTTON_LID_INIT_METHOD;
+		pr_info("Notify initial lid state with _LID return value\n");
 	} else if (!strncmp(val, "ignore", sizeof("ignore") - 1)) {
 		lid_init_state = ACPI_BUTTON_LID_INIT_IGNORE;
 		pr_info("Do not notify initial lid state\n");
@@ -573,6 +580,8 @@ static int param_get_lid_init_state(char *buffer, struct kernel_param *kp)
 	switch (lid_init_state) {
 	case ACPI_BUTTON_LID_INIT_OPEN:
 		return sprintf(buffer, "open");
+	case ACPI_BUTTON_LID_INIT_METHOD:
+		return sprintf(buffer, "method");
 	case ACPI_BUTTON_LID_INIT_IGNORE:
 		return sprintf(buffer, "ignore");
 	default:
diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c
index 3ba1c3472cf9..fd86bec98dea 100644
--- a/drivers/acpi/nfit/mce.c
+++ b/drivers/acpi/nfit/mce.c
@@ -26,7 +26,7 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
 	struct nfit_spa *nfit_spa;
 
 	/* We only care about memory errors */
-	if (!(mce->status & MCACOD))
+	if (!mce_is_memory_error(mce))
 		return NOTIFY_DONE;
 
 	/*
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index f62082fdd670..9c36b27996fc 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -512,13 +512,12 @@ static bool wakeup_source_not_registered(struct wakeup_source *ws)
 /**
  * wakup_source_activate - Mark given wakeup source as active.
  * @ws: Wakeup source to handle.
- * @hard: If set, abort suspends in progress and wake up from suspend-to-idle.
  *
  * Update the @ws' statistics and, if @ws has just been activated, notify the PM
  * core of the event by incrementing the counter of of wakeup events being
  * processed.
  */
-static void wakeup_source_activate(struct wakeup_source *ws, bool hard)
+static void wakeup_source_activate(struct wakeup_source *ws)
 {
 	unsigned int cec;
 
@@ -526,9 +525,6 @@ static void wakeup_source_activate(struct wakeup_source *ws, bool hard)
 			"unregistered wakeup source\n"))
 		return;
 
-	if (hard)
-		pm_system_wakeup();
-
 	ws->active = true;
 	ws->active_count++;
 	ws->last_time = ktime_get();
@@ -554,7 +550,10 @@ static void wakeup_source_report_event(struct wakeup_source *ws, bool hard)
 		ws->wakeup_count++;
 
 	if (!ws->active)
-		wakeup_source_activate(ws, hard);
+		wakeup_source_activate(ws);
+
+	if (hard)
+		pm_system_wakeup();
 }
 
 /**
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index 74ed7e9a7f27..2011fec2d6ad 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -71,6 +71,15 @@ config ARM_HIGHBANK_CPUFREQ
 
 	  If in doubt, say N.
 
+config ARM_DB8500_CPUFREQ
+	tristate "ST-Ericsson DB8500 cpufreq" if COMPILE_TEST && !ARCH_U8500
+	default ARCH_U8500
+	depends on HAS_IOMEM
+	depends on !CPU_THERMAL || THERMAL
+	help
+	  This adds the CPUFreq driver for ST-Ericsson Ux500 (DB8500) SoC
+	  series.
+
 config ARM_IMX6Q_CPUFREQ
 	tristate "Freescale i.MX6 cpufreq support"
 	depends on ARCH_MXC
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index b7e78f063c4f..ab3a42cd29ef 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -53,7 +53,7 @@ obj-$(CONFIG_ARM_DT_BL_CPUFREQ)		+= arm_big_little_dt.o
 
 obj-$(CONFIG_ARM_BRCMSTB_AVS_CPUFREQ)	+= brcmstb-avs-cpufreq.o
 obj-$(CONFIG_ARCH_DAVINCI)		+= davinci-cpufreq.o
-obj-$(CONFIG_UX500_SOC_DB8500)		+= dbx500-cpufreq.o
+obj-$(CONFIG_ARM_DB8500_CPUFREQ)	+= dbx500-cpufreq.o
 obj-$(CONFIG_ARM_EXYNOS5440_CPUFREQ)	+= exynos5440-cpufreq.o
 obj-$(CONFIG_ARM_HIGHBANK_CPUFREQ)	+= highbank-cpufreq.o
 obj-$(CONFIG_ARM_IMX6Q_CPUFREQ)		+= imx6q-cpufreq.o
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 512bdbc23bbb..4a038dcf5361 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -558,8 +558,8 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
 	if (WARN_ON(!dmabuf || !dev))
 		return ERR_PTR(-EINVAL);
 
-	attach = kzalloc(sizeof(struct dma_buf_attachment), GFP_KERNEL);
-	if (attach == NULL)
+	attach = kzalloc(sizeof(*attach), GFP_KERNEL);
+	if (!attach)
 		return ERR_PTR(-ENOMEM);
 
 	attach->dev = dev;
@@ -1122,9 +1122,7 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused)
 		attach_count = 0;
 
 		list_for_each_entry(attach_obj, &buf_obj->attachments, node) {
-			seq_puts(s, "\t");
-
-			seq_printf(s, "%s\n", dev_name(attach_obj->dev));
+			seq_printf(s, "\t%s\n", dev_name(attach_obj->dev));
 			attach_count++;
 		}
 
diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 0918d3f003d6..57da14c15987 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -402,6 +402,11 @@ dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout)
 		}
 	}
 
+	if (!timeout) {
+		ret = 0;
+		goto out;
+	}
+
 	cb.base.func = dma_fence_default_wait_cb;
 	cb.task = current;
 	list_add(&cb.base.node, &fence->cb_list);
diff --git a/drivers/dma-buf/sync_debug.c b/drivers/dma-buf/sync_debug.c
index c769dc653b34..82a6e7f6d37f 100644
--- a/drivers/dma-buf/sync_debug.c
+++ b/drivers/dma-buf/sync_debug.c
@@ -110,7 +110,7 @@ static void sync_print_fence(struct seq_file *s,
 		}
 	}
 
-	seq_puts(s, "\n");
+	seq_putc(s, '\n');
 }
 
 static void sync_print_obj(struct seq_file *s, struct sync_timeline *obj)
@@ -132,9 +132,11 @@ static void sync_print_obj(struct seq_file *s, struct sync_timeline *obj)
 static void sync_print_sync_file(struct seq_file *s,
 				  struct sync_file *sync_file)
 {
+	char buf[128];
 	int i;
 
-	seq_printf(s, "[%p] %s: %s\n", sync_file, sync_file->name,
+	seq_printf(s, "[%p] %s: %s\n", sync_file,
+		   sync_file_get_name(sync_file, buf, sizeof(buf)),
 		   sync_status_str(dma_fence_get_status(sync_file->fence)));
 
 	if (dma_fence_is_array(sync_file->fence)) {
@@ -161,7 +163,7 @@ static int sync_debugfs_show(struct seq_file *s, void *unused)
 				     sync_timeline_list);
 
 		sync_print_obj(s, obj);
-		seq_puts(s, "\n");
+		seq_putc(s, '\n');
 	}
 	spin_unlock_irqrestore(&sync_timeline_list_lock, flags);
 
@@ -173,7 +175,7 @@ static int sync_debugfs_show(struct seq_file *s, void *unused)
 			container_of(pos, struct sync_file, sync_file_list);
 
 		sync_print_sync_file(s, sync_file);
-		seq_puts(s, "\n");
+		seq_putc(s, '\n');
 	}
 	spin_unlock_irqrestore(&sync_file_list_lock, flags);
 	return 0;
diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index 2321035f6204..545e2c5c4815 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -41,8 +41,6 @@ static struct sync_file *sync_file_alloc(void)
 	if (IS_ERR(sync_file->file))
 		goto err;
 
-	kref_init(&sync_file->kref);
-
 	init_waitqueue_head(&sync_file->wq);
 
 	INIT_LIST_HEAD(&sync_file->cb.node);
@@ -82,11 +80,6 @@ struct sync_file *sync_file_create(struct dma_fence *fence)
 
 	sync_file->fence = dma_fence_get(fence);
 
-	snprintf(sync_file->name, sizeof(sync_file->name), "%s-%s%llu-%d",
-		 fence->ops->get_driver_name(fence),
-		 fence->ops->get_timeline_name(fence), fence->context,
-		 fence->seqno);
-
 	return sync_file;
 }
 EXPORT_SYMBOL(sync_file_create);
@@ -131,6 +124,36 @@ struct dma_fence *sync_file_get_fence(int fd)
 }
 EXPORT_SYMBOL(sync_file_get_fence);
 
+/**
+ * sync_file_get_name - get the name of the sync_file
+ * @sync_file:		sync_file to get the fence from
+ * @buf:		destination buffer to copy sync_file name into
+ * @len:		available size of destination buffer.
+ *
+ * Each sync_file may have a name assigned either by the user (when merging
+ * sync_files together) or created from the fence it contains. In the latter
+ * case construction of the name is deferred until use, and so requires
+ * sync_file_get_name().
+ *
+ * Returns: a string representing the name.
+ */
+char *sync_file_get_name(struct sync_file *sync_file, char *buf, int len)
+{
+	if (sync_file->user_name[0]) {
+		strlcpy(buf, sync_file->user_name, len);
+	} else {
+		struct dma_fence *fence = sync_file->fence;
+
+		snprintf(buf, len, "%s-%s%llu-%d",
+			 fence->ops->get_driver_name(fence),
+			 fence->ops->get_timeline_name(fence),
+			 fence->context,
+			 fence->seqno);
+	}
+
+	return buf;
+}
+
 static int sync_file_set_fence(struct sync_file *sync_file,
 			       struct dma_fence **fences, int num_fences)
 {
@@ -268,7 +291,7 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a,
 		goto err;
 	}
 
-	strlcpy(sync_file->name, name, sizeof(sync_file->name));
+	strlcpy(sync_file->user_name, name, sizeof(sync_file->user_name));
 	return sync_file;
 
 err:
@@ -277,22 +300,15 @@ err:
 
 }
 
-static void sync_file_free(struct kref *kref)
+static int sync_file_release(struct inode *inode, struct file *file)
 {
-	struct sync_file *sync_file = container_of(kref, struct sync_file,
-						     kref);
+	struct sync_file *sync_file = file->private_data;
 
 	if (test_bit(POLL_ENABLED, &sync_file->fence->flags))
 		dma_fence_remove_callback(sync_file->fence, &sync_file->cb);
 	dma_fence_put(sync_file->fence);
 	kfree(sync_file);
-}
-
-static int sync_file_release(struct inode *inode, struct file *file)
-{
-	struct sync_file *sync_file = file->private_data;
 
-	kref_put(&sync_file->kref, sync_file_free);
 	return 0;
 }
 
@@ -422,7 +438,7 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file,
 	}
 
 no_fences:
-	strlcpy(info.name, sync_file->name, sizeof(info.name));
+	sync_file_get_name(sync_file, info.name, sizeof(info.name));
 	info.status = dma_fence_is_signaled(sync_file->fence);
 	info.num_fences = num_fences;
 
diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c
index ab3a951a17e6..ef1fafdad400 100644
--- a/drivers/firmware/efi/efi-pstore.c
+++ b/drivers/firmware/efi/efi-pstore.c
@@ -53,6 +53,7 @@ static int efi_pstore_read_func(struct efivar_entry *entry,
 	if (sscanf(name, "dump-type%u-%u-%d-%lu-%c",
 		   &record->type, &part, &cnt, &time, &data_type) == 5) {
 		record->id = generic_id(time, part, cnt);
+		record->part = part;
 		record->count = cnt;
 		record->time.tv_sec = time;
 		record->time.tv_nsec = 0;
@@ -64,6 +65,7 @@ static int efi_pstore_read_func(struct efivar_entry *entry,
 	} else if (sscanf(name, "dump-type%u-%u-%d-%lu",
 		   &record->type, &part, &cnt, &time) == 4) {
 		record->id = generic_id(time, part, cnt);
+		record->part = part;
 		record->count = cnt;
 		record->time.tv_sec = time;
 		record->time.tv_nsec = 0;
@@ -77,6 +79,7 @@ static int efi_pstore_read_func(struct efivar_entry *entry,
 		 * multiple logs, remains.
 		 */
 		record->id = generic_id(time, part, 0);
+		record->part = part;
 		record->count = 0;
 		record->time.tv_sec = time;
 		record->time.tv_nsec = 0;
@@ -241,9 +244,15 @@ static int efi_pstore_write(struct pstore_record *record)
 	efi_guid_t vendor = LINUX_EFI_CRASH_GUID;
 	int i, ret = 0;
 
+	record->time.tv_sec = get_seconds();
+	record->time.tv_nsec = 0;
+
+	record->id = generic_id(record->time.tv_sec, record->part,
+				record->count);
+
 	snprintf(name, sizeof(name), "dump-type%u-%u-%d-%lu-%c",
 		 record->type, record->part, record->count,
-		 get_seconds(), record->compressed ? 'C' : 'D');
+		 record->time.tv_sec, record->compressed ? 'C' : 'D');
 
 	for (i = 0; i < DUMP_NAME_LEN; i++)
 		efi_name[i] = name[i];
@@ -255,7 +264,6 @@ static int efi_pstore_write(struct pstore_record *record)
 	if (record->reason == KMSG_DUMP_OOPS)
 		efivar_run_worker();
 
-	record->id = record->part;
 	return ret;
 };
 
@@ -287,7 +295,7 @@ static int efi_pstore_erase_func(struct efivar_entry *entry, void *data)
 		 * holding multiple logs, remains.
 		 */
 		snprintf(name_old, sizeof(name_old), "dump-type%u-%u-%lu",
-			ed->record->type, (unsigned int)ed->record->id,
+			ed->record->type, ed->record->part,
 			ed->record->time.tv_sec);
 
 		for (i = 0; i < DUMP_NAME_LEN; i++)
@@ -320,10 +328,7 @@ static int efi_pstore_erase(struct pstore_record *record)
 	char name[DUMP_NAME_LEN];
 	efi_char16_t efi_name[DUMP_NAME_LEN];
 	int found, i;
-	unsigned int part;
 
-	do_div(record->id, 1000);
-	part = do_div(record->id, 100);
 	snprintf(name, sizeof(name), "dump-type%u-%u-%d-%lu",
 		 record->type, record->part, record->count,
 		 record->time.tv_sec);
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 78d7fc0ebb57..83cb2a88c204 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -246,6 +246,8 @@ source "drivers/gpu/drm/fsl-dcu/Kconfig"
 
 source "drivers/gpu/drm/tegra/Kconfig"
 
+source "drivers/gpu/drm/stm/Kconfig"
+
 source "drivers/gpu/drm/panel/Kconfig"
 
 source "drivers/gpu/drm/bridge/Kconfig"
@@ -274,6 +276,8 @@ source "drivers/gpu/drm/meson/Kconfig"
 
 source "drivers/gpu/drm/tinydrm/Kconfig"
 
+source "drivers/gpu/drm/pl111/Kconfig"
+
 # Keep legacy drivers last
 
 menuconfig DRM_LEGACY
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 59f0f9b696eb..c156fecfb362 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -82,6 +82,7 @@ obj-$(CONFIG_DRM_BOCHS) += bochs/
 obj-$(CONFIG_DRM_VIRTIO_GPU) += virtio/
 obj-$(CONFIG_DRM_MSM) += msm/
 obj-$(CONFIG_DRM_TEGRA) += tegra/
+obj-$(CONFIG_DRM_STM) += stm/
 obj-$(CONFIG_DRM_STI) += sti/
 obj-$(CONFIG_DRM_IMX) += imx/
 obj-$(CONFIG_DRM_MEDIATEK) += mediatek/
@@ -96,3 +97,4 @@ obj-y			+= hisilicon/
 obj-$(CONFIG_DRM_ZTE)	+= zte/
 obj-$(CONFIG_DRM_MXSFB)	+= mxsfb/
 obj-$(CONFIG_DRM_TINYDRM) += tinydrm/
+obj-$(CONFIG_DRM_PL111) += pl111/
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 660786aba7d2..20bde726419e 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -4,7 +4,7 @@
 
 FULL_AMD_PATH=$(src)/..
 
-ccflags-y := -Iinclude/drm -I$(FULL_AMD_PATH)/include/asic_reg \
+ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \
 	-I$(FULL_AMD_PATH)/include \
 	-I$(FULL_AMD_PATH)/amdgpu \
 	-I$(FULL_AMD_PATH)/scheduler \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 833c3c16501a..77ff68f9932b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -36,11 +36,11 @@
 #include <linux/hashtable.h>
 #include <linux/dma-fence.h>
 
-#include <ttm/ttm_bo_api.h>
-#include <ttm/ttm_bo_driver.h>
-#include <ttm/ttm_placement.h>
-#include <ttm/ttm_module.h>
-#include <ttm/ttm_execbuf_util.h>
+#include <drm/ttm/ttm_bo_api.h>
+#include <drm/ttm/ttm_bo_driver.h>
+#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_module.h>
+#include <drm/ttm/ttm_execbuf_util.h>
 
 #include <drm/drmP.h>
 #include <drm/drm_gem.h>
@@ -1912,10 +1912,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon);
 u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe);
 int amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int pipe);
 void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe);
-int amdgpu_get_vblank_timestamp_kms(struct drm_device *dev, unsigned int pipe,
-				    int *max_error,
-				    struct timeval *vblank_time,
-				    unsigned flags);
 long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd,
 			     unsigned long arg);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index a6649874e6ce..9f0247cdda5e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -96,7 +96,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
 	int r;
 	unsigned long total_size = 0;
 
-	array = drm_malloc_ab(num_entries, sizeof(struct amdgpu_bo_list_entry));
+	array = kvmalloc_array(num_entries, sizeof(struct amdgpu_bo_list_entry), GFP_KERNEL);
 	if (!array)
 		return -ENOMEM;
 	memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry));
@@ -148,7 +148,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
 	for (i = 0; i < list->num_entries; ++i)
 		amdgpu_bo_unref(&list->array[i].robj);
 
-	drm_free_large(list->array);
+	kvfree(list->array);
 
 	list->gds_obj = gds_obj;
 	list->gws_obj = gws_obj;
@@ -163,7 +163,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
 error_free:
 	while (i--)
 		amdgpu_bo_unref(&array[i].robj);
-	drm_free_large(array);
+	kvfree(array);
 	return r;
 }
 
@@ -224,7 +224,7 @@ void amdgpu_bo_list_free(struct amdgpu_bo_list *list)
 		amdgpu_bo_unref(&list->array[i].robj);
 
 	mutex_destroy(&list->lock);
-	drm_free_large(list->array);
+	kvfree(list->array);
 	kfree(list);
 }
 
@@ -244,8 +244,8 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
 
 	int r;
 
-	info = drm_malloc_ab(args->in.bo_number,
-			     sizeof(struct drm_amdgpu_bo_list_entry));
+	info = kvmalloc_array(args->in.bo_number,
+			     sizeof(struct drm_amdgpu_bo_list_entry), GFP_KERNEL);
 	if (!info)
 		return -ENOMEM;
 
@@ -311,11 +311,11 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
 
 	memset(args, 0, sizeof(*args));
 	args->out.list_handle = handle;
-	drm_free_large(info);
+	kvfree(info);
 
 	return 0;
 
 error_free:
-	drm_free_large(info);
+	kvfree(info);
 	return r;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 4e6b9501ab0a..5b3e0f63a115 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -194,7 +194,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 		size = p->chunks[i].length_dw;
 		cdata = (void __user *)(uintptr_t)user_chunk.chunk_data;
 
-		p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
+		p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
 		if (p->chunks[i].kdata == NULL) {
 			ret = -ENOMEM;
 			i--;
@@ -247,7 +247,7 @@ free_all_kdata:
 	i = p->nchunks - 1;
 free_partial_kdata:
 	for (; i >= 0; i--)
-		drm_free_large(p->chunks[i].kdata);
+		kvfree(p->chunks[i].kdata);
 	kfree(p->chunks);
 	p->chunks = NULL;
 	p->nchunks = 0;
@@ -505,7 +505,7 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
 			return r;
 
 		if (binding_userptr) {
-			drm_free_large(lobj->user_pages);
+			kvfree(lobj->user_pages);
 			lobj->user_pages = NULL;
 		}
 	}
@@ -571,7 +571,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 				release_pages(e->user_pages,
 					      e->robj->tbo.ttm->num_pages,
 					      false);
-				drm_free_large(e->user_pages);
+				kvfree(e->user_pages);
 				e->user_pages = NULL;
 			}
 
@@ -601,8 +601,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 		list_for_each_entry(e, &need_pages, tv.head) {
 			struct ttm_tt *ttm = e->robj->tbo.ttm;
 
-			e->user_pages = drm_calloc_large(ttm->num_pages,
-							 sizeof(struct page*));
+			e->user_pages = kvmalloc_array(ttm->num_pages,
+							 sizeof(struct page*),
+							 GFP_KERNEL | __GFP_ZERO);
 			if (!e->user_pages) {
 				r = -ENOMEM;
 				DRM_ERROR("calloc failure in %s\n", __func__);
@@ -612,7 +613,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 			r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);
 			if (r) {
 				DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n");
-				drm_free_large(e->user_pages);
+				kvfree(e->user_pages);
 				e->user_pages = NULL;
 				goto error_free_pages;
 			}
@@ -708,7 +709,7 @@ error_free_pages:
 			release_pages(e->user_pages,
 				      e->robj->tbo.ttm->num_pages,
 				      false);
-			drm_free_large(e->user_pages);
+			kvfree(e->user_pages);
 		}
 	}
 
@@ -761,7 +762,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
 		amdgpu_bo_list_put(parser->bo_list);
 
 	for (i = 0; i < parser->nchunks; i++)
-		drm_free_large(parser->chunks[i].kdata);
+		kvfree(parser->chunks[i].kdata);
 	kfree(parser->chunks);
 	if (parser->job)
 		amdgpu_job_free(parser->job);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
index 38e9b0d3659a..1cb52fd19060 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
@@ -22,7 +22,7 @@
  * Authors: Alex Deucher
  */
 
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_atombios.h"
 #include "amdgpu_i2c.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index f2d705e6a75a..31eddd85eb40 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -39,7 +39,7 @@
 #include <linux/module.h>
 #include <linux/pm_runtime.h>
 #include <linux/vga_switcheroo.h>
-#include "drm_crtc_helper.h"
+#include <drm/drm_crtc_helper.h>
 
 #include "amdgpu.h"
 #include "amdgpu_irq.h"
@@ -715,6 +715,16 @@ static const struct file_operations amdgpu_driver_kms_fops = {
 #endif
 };
 
+static bool
+amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe,
+				 bool in_vblank_irq, int *vpos, int *hpos,
+				 ktime_t *stime, ktime_t *etime,
+				 const struct drm_display_mode *mode)
+{
+	return amdgpu_get_crtc_scanoutpos(dev, pipe, 0, vpos, hpos,
+					  stime, etime, mode);
+}
+
 static struct drm_driver kms_driver = {
 	.driver_features =
 	    DRIVER_USE_AGP |
@@ -729,8 +739,8 @@ static struct drm_driver kms_driver = {
 	.get_vblank_counter = amdgpu_get_vblank_counter_kms,
 	.enable_vblank = amdgpu_enable_vblank_kms,
 	.disable_vblank = amdgpu_disable_vblank_kms,
-	.get_vblank_timestamp = amdgpu_get_vblank_timestamp_kms,
-	.get_scanout_position = amdgpu_get_crtc_scanoutpos,
+	.get_vblank_timestamp = drm_calc_vbltimestamp_from_scanoutpos,
+	.get_scanout_position = amdgpu_get_crtc_scanout_position,
 #if defined(CONFIG_DEBUG_FS)
 	.debugfs_init = amdgpu_debugfs_init,
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 96c341670782..dca4be970d13 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -945,47 +945,6 @@ void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe)
 	amdgpu_irq_put(adev, &adev->crtc_irq, idx);
 }
 
-/**
- * amdgpu_get_vblank_timestamp_kms - get vblank timestamp
- *
- * @dev: drm dev pointer
- * @crtc: crtc to get the timestamp for
- * @max_error: max error
- * @vblank_time: time value
- * @flags: flags passed to the driver
- *
- * Gets the timestamp on the requested crtc based on the
- * scanout position.  (all asics).
- * Returns postive status flags on success, negative error on failure.
- */
-int amdgpu_get_vblank_timestamp_kms(struct drm_device *dev, unsigned int pipe,
-				    int *max_error,
-				    struct timeval *vblank_time,
-				    unsigned flags)
-{
-	struct drm_crtc *crtc;
-	struct amdgpu_device *adev = dev->dev_private;
-
-	if (pipe >= dev->num_crtcs) {
-		DRM_ERROR("Invalid crtc %u\n", pipe);
-		return -EINVAL;
-	}
-
-	/* Get associated drm_crtc: */
-	crtc = &adev->mode_info.crtcs[pipe]->base;
-	if (!crtc) {
-		/* This can occur on driver load if some component fails to
-		 * initialize completely and driver is unloaded */
-		DRM_ERROR("Uninitialized crtc %d\n", pipe);
-		return -EINVAL;
-	}
-
-	/* Helper routine in DRM core does all the work: */
-	return drm_calc_vbltimestamp_from_scanoutpos(dev, pipe, max_error,
-						     vblank_time, flags,
-						     &crtc->hwmode);
-}
-
 const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_CREATE, amdgpu_gem_create_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index dbd10618ec20..43a9d3aec6c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -534,6 +534,9 @@ struct amdgpu_framebuffer {
 				((em) == ATOM_ENCODER_MODE_DP_MST))
 
 /* Driver internal use only flags of amdgpu_get_crtc_scanoutpos() */
+#define DRM_SCANOUTPOS_VALID        (1 << 0)
+#define DRM_SCANOUTPOS_IN_VBLANK    (1 << 1)
+#define DRM_SCANOUTPOS_ACCURATE     (1 << 2)
 #define USE_REAL_VBLANKSTART		(1 << 30)
 #define GET_DISTANCE_TO_VBLANKSTART	(1 << 31)
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index ac5e92e5d59d..596e3957bdd9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -24,7 +24,7 @@
  */
 
 #include <linux/firmware.h>
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_psp.h"
 #include "amdgpu_ucode.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 5db0230e45c6..b5fa003c1341 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -29,11 +29,11 @@
  *    Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
  *    Dave Airlie
  */
-#include <ttm/ttm_bo_api.h>
-#include <ttm/ttm_bo_driver.h>
-#include <ttm/ttm_placement.h>
-#include <ttm/ttm_module.h>
-#include <ttm/ttm_page_alloc.h>
+#include <drm/ttm/ttm_bo_api.h>
+#include <drm/ttm/ttm_bo_driver.h>
+#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_module.h>
+#include <drm/ttm/ttm_page_alloc.h>
 #include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
 #include <linux/seq_file.h>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 8ecf82c5fe74..83c172a6e938 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -279,8 +279,9 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 	if (!parent->entries) {
 		unsigned num_entries = amdgpu_vm_num_entries(adev, level);
 
-		parent->entries = drm_calloc_large(num_entries,
-						   sizeof(struct amdgpu_vm_pt));
+		parent->entries = kvmalloc_array(num_entries,
+						   sizeof(struct amdgpu_vm_pt),
+						   GFP_KERNEL | __GFP_ZERO);
 		if (!parent->entries)
 			return -ENOMEM;
 		memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt));
@@ -2219,7 +2220,7 @@ static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level)
 		for (i = 0; i <= level->last_entry_used; i++)
 			amdgpu_vm_free_levels(&level->entries[i]);
 
-	drm_free_large(level->entries);
+	kvfree(level->entries);
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index ec93714e4524..cb508a211b2f 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -22,7 +22,7 @@
  */
 
 #include <linux/firmware.h>
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_ucode.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_smc.c b/drivers/gpu/drm/amd/amdgpu/ci_smc.c
index 7eb9069db8e3..b8ba51e045b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_smc.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_smc.c
@@ -23,7 +23,7 @@
  */
 
 #include <linux/firmware.h>
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "cikd.h"
 #include "ppsmc.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 9d33e5641419..6b2034533f68 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -24,7 +24,7 @@
 #include <linux/firmware.h>
 #include <linux/slab.h>
 #include <linux/module.h>
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_atombios.h"
 #include "amdgpu_ih.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
index c57c3f18af01..b8918432c572 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
@@ -20,7 +20,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_ih.h"
 #include "cikd.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
index a5f294ebff5c..0c1209cdd1cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
@@ -20,7 +20,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_ih.h"
 #include "vid.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index 0cdeb6a2e4a0..3c62c45f43a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -20,7 +20,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_i2c.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index 773654a19749..c8ed0facddcd 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -20,7 +20,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_i2c.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index 1f3552967ba3..3f3a25493327 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -20,7 +20,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_i2c.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index 3c558c170e5e..3e90c19b9c7f 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -20,7 +20,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_i2c.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
index f1b479b6ac98..90bb08309a53 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
@@ -20,7 +20,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_i2c.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index ee2f2139e2eb..f7414cabd4ff 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -21,7 +21,7 @@
  *
  */
 #include <linux/firmware.h>
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_ih.h"
 #include "amdgpu_gfx.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 758d636a6f52..404d12785853 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -21,7 +21,7 @@
  *
  */
 #include <linux/firmware.h>
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_gfx.h"
 #include "vi.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 0c16b7563b73..125b11950071 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -21,7 +21,7 @@
  *
  */
 #include <linux/firmware.h>
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_gfx.h"
 #include "soc15.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index d860939152df..9776ad3d2d71 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -21,7 +21,7 @@
  *
  */
 #include <linux/firmware.h>
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "gmc_v6_0.h"
 #include "amdgpu_ucode.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 2750e5c23813..fca8e77182c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -21,7 +21,7 @@
  *
  */
 #include <linux/firmware.h>
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "cikd.h"
 #include "cik.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index f56b4089ee9f..e9c127037b39 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -21,7 +21,7 @@
  *
  */
 #include <linux/firmware.h>
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "gmc_v8_0.h"
 #include "amdgpu_ucode.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
index cb622add99a7..7a0ea27ac429 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
@@ -20,7 +20,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_ih.h"
 #include "vid.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
index 79a52ad2c80d..3bbf2ccfca89 100644
--- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
@@ -21,7 +21,7 @@
  *
  */
 
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_pm.h"
 #include "cikd.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_smc.c b/drivers/gpu/drm/amd/amdgpu/kv_smc.c
index e6b7b42acfe1..b82e33c01571 100644
--- a/drivers/gpu/drm/amd/amdgpu/kv_smc.c
+++ b/drivers/gpu/drm/amd/amdgpu/kv_smc.c
@@ -22,7 +22,7 @@
  * Authors: Alex Deucher
  */
 
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "cikd.h"
 #include "kv_dpm.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index 60a6407ba267..eef89abc0cee 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -24,7 +24,7 @@
  */
 
 #include <linux/firmware.h>
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_psp.h"
 #include "amdgpu_ucode.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index c0b1aabf282f..2431639baf47 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -24,7 +24,7 @@
 #include <linux/firmware.h>
 #include <linux/slab.h>
 #include <linux/module.h>
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_atombios.h"
 #include "amdgpu_ih.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
index 7c1c5d127281..a7ad8390981c 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
@@ -21,7 +21,7 @@
  *
  */
 
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_dpm.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c
index e66084211c74..ce25e03a077d 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c
@@ -20,7 +20,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_ih.h"
 #include "sid.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/si_smc.c b/drivers/gpu/drm/amd/amdgpu/si_smc.c
index 0726bc3b6f90..4a2fd8b61940 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_smc.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_smc.c
@@ -23,7 +23,7 @@
  */
 
 #include <linux/firmware.h>
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "sid.h"
 #include "ppsmc.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 6b55d451ae7f..e945f8b07487 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -23,7 +23,7 @@
 #include <linux/firmware.h>
 #include <linux/slab.h>
 #include <linux/module.h>
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_atomfirmware.h"
 #include "amdgpu_ih.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
index 3a5097ac2bb4..923df2c0e535 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
@@ -20,7 +20,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_ih.h"
 #include "vid.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index 071f56e439bb..3b9740fb2c41 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -20,7 +20,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_ih.h"
 #include "soc15.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index b1132f5e84fc..3a187619286f 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -21,7 +21,7 @@
  *
  */
 #include <linux/slab.h>
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_atombios.h"
 #include "amdgpu_ih.h"
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
index 7fc9b0f444cb..b400d5664252 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -2,7 +2,7 @@
 # Makefile for Heterogenous System Architecture support for AMD GPU devices
 #
 
-ccflags-y := -Iinclude/drm -Idrivers/gpu/drm/amd/include/  \
+ccflags-y := -Idrivers/gpu/drm/amd/include/  \
 		-Idrivers/gpu/drm/amd/include/asic_reg
 
 amdkfd-y	:= kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
diff --git a/drivers/gpu/drm/amd/powerplay/Makefile b/drivers/gpu/drm/amd/powerplay/Makefile
index 043e6ebab575..4e132b936e3d 100644
--- a/drivers/gpu/drm/amd/powerplay/Makefile
+++ b/drivers/gpu/drm/amd/powerplay/Makefile
@@ -1,5 +1,5 @@
 
-subdir-ccflags-y += -Iinclude/drm  \
+subdir-ccflags-y += \
 		-I$(FULL_AMD_PATH)/powerplay/inc/  \
 		-I$(FULL_AMD_PATH)/include/asic_reg  \
 		-I$(FULL_AMD_PATH)/include  \
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
index ff4ae3de6bb6..963a9e017a28 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
@@ -22,10 +22,10 @@
  */
 
 #include "pp_debug.h"
-#include "linux/delay.h"
-#include <linux/types.h>
+#include <linux/delay.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
+#include <linux/types.h>
 #include <drm/amdgpu_drm.h>
 #include "cgs_common.h"
 #include "power_state.h"
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c
index f5e8fda964f7..f6b4dd96c0ec 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c
@@ -21,8 +21,8 @@
  *
  */
 
+#include <linux/delay.h>
 #include <linux/errno.h>
-#include "linux/delay.h"
 #include "hwmgr.h"
 #include "amd_acpi.h"
 #include "pp_acpi.h"
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index 102eb6d029fa..1f01020ce3a9 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -21,11 +21,11 @@
  *
  */
 #include "pp_debug.h"
+#include <linux/delay.h>
+#include <linux/fb.h>
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <linux/fb.h>
 #include <asm/div64.h>
-#include "linux/delay.h"
 #include "pp_acpi.h"
 #include "ppatomctrl.h"
 #include "atombios.h"
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 2614af2f553f..ab17350e853d 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -20,10 +20,11 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
+
+#include <linux/delay.h>
+#include <linux/fb.h>
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <linux/fb.h>
-#include "linux/delay.h"
 
 #include "hwmgr.h"
 #include "amd_powerplay.h"
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
index 1f6744a443d4..39c7091866e8 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
@@ -20,11 +20,13 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
-#include <linux/types.h>
+
+#include <linux/delay.h>
+#include <linux/gfp.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
-#include <linux/gfp.h>
-#include "linux/delay.h"
+#include <linux/types.h>
+
 #include "cgs_common.h"
 #include "smu/smu_8_0_d.h"
 #include "smu/smu_8_0_sh_mask.h"
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c
index c0d75766bbc8..2e954a44bac1 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c
@@ -20,15 +20,16 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
-#include <linux/types.h>
+
+#include <linux/delay.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/types.h>
 #include <drm/amdgpu_drm.h>
 #include "pp_instance.h"
 #include "smumgr.h"
 #include "cgs_common.h"
-#include "linux/delay.h"
 
 MODULE_FIRMWARE("amdgpu/topaz_smc.bin");
 MODULE_FIRMWARE("amdgpu/topaz_k_smc.bin");
diff --git a/drivers/gpu/drm/arm/malidp_drv.h b/drivers/gpu/drm/arm/malidp_drv.h
index 040311ffcaec..2e2033140efc 100644
--- a/drivers/gpu/drm/arm/malidp_drv.h
+++ b/drivers/gpu/drm/arm/malidp_drv.h
@@ -65,6 +65,6 @@ void malidp_de_planes_destroy(struct drm_device *drm);
 int malidp_crtc_init(struct drm_device *drm);
 
 /* often used combination of rotational bits */
-#define MALIDP_ROTATED_MASK	(DRM_ROTATE_90 | DRM_ROTATE_270)
+#define MALIDP_ROTATED_MASK	(DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270)
 
 #endif  /* __MALIDP_DRV_H__ */
diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c
index 814fda23cead..063a8d2b0be3 100644
--- a/drivers/gpu/drm/arm/malidp_planes.c
+++ b/drivers/gpu/drm/arm/malidp_planes.c
@@ -80,7 +80,7 @@ static void malidp_plane_reset(struct drm_plane *plane)
 	state = kzalloc(sizeof(*state), GFP_KERNEL);
 	if (state) {
 		state->base.plane = plane;
-		state->base.rotation = DRM_ROTATE_0;
+		state->base.rotation = DRM_MODE_ROTATE_0;
 		plane->state = &state->base;
 	}
 }
@@ -221,7 +221,7 @@ static int malidp_de_plane_check(struct drm_plane *plane,
 		return ret;
 
 	/* packed RGB888 / BGR888 can't be rotated or flipped */
-	if (state->rotation != DRM_ROTATE_0 &&
+	if (state->rotation != DRM_MODE_ROTATE_0 &&
 	    (fb->format->format == DRM_FORMAT_RGB888 ||
 	     fb->format->format == DRM_FORMAT_BGR888))
 		return -EINVAL;
@@ -315,12 +315,12 @@ static void malidp_de_plane_update(struct drm_plane *plane,
 	val &= ~LAYER_ROT_MASK;
 
 	/* setup the rotation and axis flip bits */
-	if (plane->state->rotation & DRM_ROTATE_MASK)
-		val |= ilog2(plane->state->rotation & DRM_ROTATE_MASK) <<
+	if (plane->state->rotation & DRM_MODE_ROTATE_MASK)
+		val |= ilog2(plane->state->rotation & DRM_MODE_ROTATE_MASK) <<
 		       LAYER_ROT_OFFSET;
-	if (plane->state->rotation & DRM_REFLECT_X)
+	if (plane->state->rotation & DRM_MODE_REFLECT_X)
 		val |= LAYER_H_FLIP;
-	if (plane->state->rotation & DRM_REFLECT_Y)
+	if (plane->state->rotation & DRM_MODE_REFLECT_Y)
 		val |= LAYER_V_FLIP;
 
 	/*
@@ -370,8 +370,8 @@ int malidp_de_planes_init(struct drm_device *drm)
 	struct malidp_plane *plane = NULL;
 	enum drm_plane_type plane_type;
 	unsigned long crtcs = 1 << drm->mode_config.num_crtc;
-	unsigned long flags = DRM_ROTATE_0 | DRM_ROTATE_90 | DRM_ROTATE_180 |
-			      DRM_ROTATE_270 | DRM_REFLECT_X | DRM_REFLECT_Y;
+	unsigned long flags = DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_180 |
+			      DRM_MODE_ROTATE_270 | DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y;
 	u32 *formats;
 	int ret, i, j, n;
 
@@ -420,7 +420,7 @@ int malidp_de_planes_init(struct drm_device *drm)
 			continue;
 		}
 
-		drm_plane_create_rotation_property(&plane->base, DRM_ROTATE_0, flags);
+		drm_plane_create_rotation_property(&plane->base, DRM_MODE_ROTATE_0, flags);
 		malidp_hw_write(malidp->dev, MALIDP_ALPHA_LUT,
 				plane->layer->base + MALIDP_LAYER_COMPOSE);
 	}
diff --git a/drivers/gpu/drm/armada/armada_overlay.c b/drivers/gpu/drm/armada/armada_overlay.c
index 424e465ff407..e9a29df4b443 100644
--- a/drivers/gpu/drm/armada/armada_overlay.c
+++ b/drivers/gpu/drm/armada/armada_overlay.c
@@ -125,7 +125,7 @@ armada_ovl_plane_update(struct drm_plane *plane, struct drm_crtc *crtc,
 				 src_x, src_y, src_w, src_h);
 
 	ret = drm_plane_helper_check_update(plane, crtc, fb, &src, &dest, &clip,
-					    DRM_ROTATE_0,
+					    DRM_MODE_ROTATE_0,
 					    0, INT_MAX, true, false, &visible);
 	if (ret)
 		return ret;
diff --git a/drivers/gpu/drm/ast/Makefile b/drivers/gpu/drm/ast/Makefile
index 171aa0622b66..617fdd39519c 100644
--- a/drivers/gpu/drm/ast/Makefile
+++ b/drivers/gpu/drm/ast/Makefile
@@ -2,8 +2,6 @@
 # Makefile for the drm device driver.  This driver provides support for the
 # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
 
-ccflags-y := -Iinclude/drm
-
 ast-y := ast_drv.o ast_main.o ast_mode.o ast_fb.o ast_ttm.o ast_post.o ast_dp501.o
 
 obj-$(CONFIG_DRM_AST) := ast.o
diff --git a/drivers/gpu/drm/ast/ast_ttm.c b/drivers/gpu/drm/ast/ast_ttm.c
index e879496b8a42..58084985e6cf 100644
--- a/drivers/gpu/drm/ast/ast_ttm.c
+++ b/drivers/gpu/drm/ast/ast_ttm.c
@@ -26,8 +26,9 @@
  * Authors: Dave Airlie <airlied@redhat.com>
  */
 #include <drm/drmP.h>
+#include <drm/ttm/ttm_page_alloc.h>
+
 #include "ast_drv.h"
-#include <ttm/ttm_page_alloc.h>
 
 static inline struct ast_private *
 ast_bdev(struct ttm_bo_device *bd)
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
index 29cc10d053eb..1124200bb280 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
@@ -678,8 +678,8 @@ static int atmel_hlcdc_plane_atomic_check(struct drm_plane *p,
 		if (!state->bpp[i])
 			return -EINVAL;
 
-		switch (state->base.rotation & DRM_ROTATE_MASK) {
-		case DRM_ROTATE_90:
+		switch (state->base.rotation & DRM_MODE_ROTATE_MASK) {
+		case DRM_MODE_ROTATE_90:
 			offset = ((y_offset + state->src_y + patched_src_w - 1) /
 				  ydiv) * fb->pitches[i];
 			offset += ((x_offset + state->src_x) / xdiv) *
@@ -688,7 +688,7 @@ static int atmel_hlcdc_plane_atomic_check(struct drm_plane *p,
 					  fb->pitches[i];
 			state->pstride[i] = -fb->pitches[i] - state->bpp[i];
 			break;
-		case DRM_ROTATE_180:
+		case DRM_MODE_ROTATE_180:
 			offset = ((y_offset + state->src_y + patched_src_h - 1) /
 				  ydiv) * fb->pitches[i];
 			offset += ((x_offset + state->src_x + patched_src_w - 1) /
@@ -697,7 +697,7 @@ static int atmel_hlcdc_plane_atomic_check(struct drm_plane *p,
 					   state->bpp[i]) - fb->pitches[i];
 			state->pstride[i] = -2 * state->bpp[i];
 			break;
-		case DRM_ROTATE_270:
+		case DRM_MODE_ROTATE_270:
 			offset = ((y_offset + state->src_y) / ydiv) *
 				 fb->pitches[i];
 			offset += ((x_offset + state->src_x + patched_src_h - 1) /
@@ -707,7 +707,7 @@ static int atmel_hlcdc_plane_atomic_check(struct drm_plane *p,
 					  (2 * state->bpp[i]);
 			state->pstride[i] = fb->pitches[i] - state->bpp[i];
 			break;
-		case DRM_ROTATE_0:
+		case DRM_MODE_ROTATE_0:
 		default:
 			offset = ((y_offset + state->src_y) / ydiv) *
 				 fb->pitches[i];
@@ -864,11 +864,11 @@ static int atmel_hlcdc_plane_init_properties(struct atmel_hlcdc_plane *plane,
 		int ret;
 
 		ret = drm_plane_create_rotation_property(&plane->base,
-							 DRM_ROTATE_0,
-							 DRM_ROTATE_0 |
-							 DRM_ROTATE_90 |
-							 DRM_ROTATE_180 |
-							 DRM_ROTATE_270);
+							 DRM_MODE_ROTATE_0,
+							 DRM_MODE_ROTATE_0 |
+							 DRM_MODE_ROTATE_90 |
+							 DRM_MODE_ROTATE_180 |
+							 DRM_MODE_ROTATE_270);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/gpu/drm/bochs/Makefile b/drivers/gpu/drm/bochs/Makefile
index 844a55614920..98ef60a19e8f 100644
--- a/drivers/gpu/drm/bochs/Makefile
+++ b/drivers/gpu/drm/bochs/Makefile
@@ -1,4 +1,3 @@
-ccflags-y := -Iinclude/drm
 bochs-drm-y := bochs_drv.o bochs_mm.o bochs_kms.o bochs_fbdev.o bochs_hw.o
 
 obj-$(CONFIG_DRM_BOCHS)	+= bochs-drm.o
diff --git a/drivers/gpu/drm/bochs/bochs.h b/drivers/gpu/drm/bochs/bochs.h
index f626bab7f5e3..76c490c3cdbc 100644
--- a/drivers/gpu/drm/bochs/bochs.h
+++ b/drivers/gpu/drm/bochs/bochs.h
@@ -9,8 +9,8 @@
 
 #include <drm/drm_gem.h>
 
-#include <ttm/ttm_bo_driver.h>
-#include <ttm/ttm_page_alloc.h>
+#include <drm/ttm/ttm_bo_driver.h>
+#include <drm/ttm/ttm_page_alloc.h>
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/drivers/gpu/drm/bridge/Makefile b/drivers/gpu/drm/bridge/Makefile
index 3fe2226ee2f2..defcf1e7ca1c 100644
--- a/drivers/gpu/drm/bridge/Makefile
+++ b/drivers/gpu/drm/bridge/Makefile
@@ -1,5 +1,3 @@
-ccflags-y := -Iinclude/drm
-
 obj-$(CONFIG_DRM_ANALOGIX_ANX78XX) += analogix-anx78xx.o
 obj-$(CONFIG_DRM_DUMB_VGA_DAC) += dumb-vga-dac.o
 obj-$(CONFIG_DRM_LVDS_ENCODER) += lvds-encoder.o
diff --git a/drivers/gpu/drm/bridge/nxp-ptn3460.c b/drivers/gpu/drm/bridge/nxp-ptn3460.c
index 351704390d02..4f64e717e01b 100644
--- a/drivers/gpu/drm/bridge/nxp-ptn3460.c
+++ b/drivers/gpu/drm/bridge/nxp-ptn3460.c
@@ -20,15 +20,13 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_gpio.h>
-
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_edid.h>
 #include <drm/drm_of.h>
 #include <drm/drm_panel.h>
-
-#include "drm_crtc.h"
-#include "drm_crtc_helper.h"
-#include "drm_atomic_helper.h"
-#include "drm_edid.h"
-#include "drmP.h"
+#include <drm/drmP.h>
 
 #define PTN3460_EDID_ADDR			0x0
 #define PTN3460_EDID_EMULATION_ADDR		0x84
diff --git a/drivers/gpu/drm/bridge/parade-ps8622.c b/drivers/gpu/drm/bridge/parade-ps8622.c
index 1dcec3b97e67..6f22f9fec9bf 100644
--- a/drivers/gpu/drm/bridge/parade-ps8622.c
+++ b/drivers/gpu/drm/bridge/parade-ps8622.c
@@ -24,14 +24,12 @@
 #include <linux/of_device.h>
 #include <linux/pm.h>
 #include <linux/regulator/consumer.h>
-
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc.h>
+#include <drm/drm_crtc_helper.h>
 #include <drm/drm_of.h>
 #include <drm/drm_panel.h>
-
-#include "drmP.h"
-#include "drm_crtc.h"
-#include "drm_crtc_helper.h"
-#include "drm_atomic_helper.h"
+#include <drm/drmP.h>
 
 /* Brightness scale on the Parade chip */
 #define PS8622_MAX_BRIGHTNESS 0xff
diff --git a/drivers/gpu/drm/bridge/sii902x.c b/drivers/gpu/drm/bridge/sii902x.c
index 9126d0306ab5..9b87067c022c 100644
--- a/drivers/gpu/drm/bridge/sii902x.c
+++ b/drivers/gpu/drm/bridge/sii902x.c
@@ -160,7 +160,7 @@ static int sii902x_get_modes(struct drm_connector *connector)
 		 time_before(jiffies, timeout));
 
 	if (!(status & SII902X_SYS_CTRL_DDC_BUS_GRTD)) {
-		dev_err(&sii902x->i2c->dev, "failed to acquire the i2c bus");
+		dev_err(&sii902x->i2c->dev, "failed to acquire the i2c bus\n");
 		return -ETIMEDOUT;
 	}
 
@@ -202,7 +202,7 @@ static int sii902x_get_modes(struct drm_connector *connector)
 
 	if (status & (SII902X_SYS_CTRL_DDC_BUS_REQ |
 		      SII902X_SYS_CTRL_DDC_BUS_GRTD)) {
-		dev_err(&sii902x->i2c->dev, "failed to release the i2c bus");
+		dev_err(&sii902x->i2c->dev, "failed to release the i2c bus\n");
 		return -ETIMEDOUT;
 	}
 
@@ -298,7 +298,7 @@ static int sii902x_bridge_attach(struct drm_bridge *bridge)
 
 	if (!drm_core_check_feature(drm, DRIVER_ATOMIC)) {
 		dev_err(&sii902x->i2c->dev,
-			"sii902x driver is only compatible with DRM devices supporting atomic updates");
+			"sii902x driver is only compatible with DRM devices supporting atomic updates\n");
 		return -ENOTSUPP;
 	}
 
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
index 4e1f54a675d8..8737de8c1c52 100644
--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
@@ -173,6 +173,8 @@ struct dw_hdmi {
 
 	unsigned int reg_shift;
 	struct regmap *regm;
+	void (*enable_audio)(struct dw_hdmi *hdmi);
+	void (*disable_audio)(struct dw_hdmi *hdmi);
 };
 
 #define HDMI_IH_PHY_STAT0_RX_SENSE \
@@ -542,13 +544,41 @@ void dw_hdmi_set_sample_rate(struct dw_hdmi *hdmi, unsigned int rate)
 }
 EXPORT_SYMBOL_GPL(dw_hdmi_set_sample_rate);
 
+static void hdmi_enable_audio_clk(struct dw_hdmi *hdmi, bool enable)
+{
+	hdmi_modb(hdmi, enable ? 0 : HDMI_MC_CLKDIS_AUDCLK_DISABLE,
+		  HDMI_MC_CLKDIS_AUDCLK_DISABLE, HDMI_MC_CLKDIS);
+}
+
+static void dw_hdmi_ahb_audio_enable(struct dw_hdmi *hdmi)
+{
+	hdmi_set_cts_n(hdmi, hdmi->audio_cts, hdmi->audio_n);
+}
+
+static void dw_hdmi_ahb_audio_disable(struct dw_hdmi *hdmi)
+{
+	hdmi_set_cts_n(hdmi, hdmi->audio_cts, 0);
+}
+
+static void dw_hdmi_i2s_audio_enable(struct dw_hdmi *hdmi)
+{
+	hdmi_set_cts_n(hdmi, hdmi->audio_cts, hdmi->audio_n);
+	hdmi_enable_audio_clk(hdmi, true);
+}
+
+static void dw_hdmi_i2s_audio_disable(struct dw_hdmi *hdmi)
+{
+	hdmi_enable_audio_clk(hdmi, false);
+}
+
 void dw_hdmi_audio_enable(struct dw_hdmi *hdmi)
 {
 	unsigned long flags;
 
 	spin_lock_irqsave(&hdmi->audio_lock, flags);
 	hdmi->audio_enable = true;
-	hdmi_set_cts_n(hdmi, hdmi->audio_cts, hdmi->audio_n);
+	if (hdmi->enable_audio)
+		hdmi->enable_audio(hdmi);
 	spin_unlock_irqrestore(&hdmi->audio_lock, flags);
 }
 EXPORT_SYMBOL_GPL(dw_hdmi_audio_enable);
@@ -559,7 +589,8 @@ void dw_hdmi_audio_disable(struct dw_hdmi *hdmi)
 
 	spin_lock_irqsave(&hdmi->audio_lock, flags);
 	hdmi->audio_enable = false;
-	hdmi_set_cts_n(hdmi, hdmi->audio_cts, 0);
+	if (hdmi->disable_audio)
+		hdmi->disable_audio(hdmi);
 	spin_unlock_irqrestore(&hdmi->audio_lock, flags);
 }
 EXPORT_SYMBOL_GPL(dw_hdmi_audio_disable);
@@ -1573,11 +1604,6 @@ static void dw_hdmi_enable_video_path(struct dw_hdmi *hdmi)
 			    HDMI_MC_FLOWCTRL);
 }
 
-static void hdmi_enable_audio_clk(struct dw_hdmi *hdmi)
-{
-	hdmi_modb(hdmi, 0, HDMI_MC_CLKDIS_AUDCLK_DISABLE, HDMI_MC_CLKDIS);
-}
-
 /* Workaround to clear the overflow condition */
 static void dw_hdmi_clear_overflow(struct dw_hdmi *hdmi)
 {
@@ -1691,7 +1717,7 @@ static int dw_hdmi_setup(struct dw_hdmi *hdmi, struct drm_display_mode *mode)
 
 		/* HDMI Initialization Step E - Configure audio */
 		hdmi_clk_regenerator_update_pixel_clock(hdmi);
-		hdmi_enable_audio_clk(hdmi);
+		hdmi_enable_audio_clk(hdmi, true);
 	}
 
 	/* not for DVI mode */
@@ -2403,6 +2429,8 @@ __dw_hdmi_probe(struct platform_device *pdev,
 		audio.irq = irq;
 		audio.hdmi = hdmi;
 		audio.eld = hdmi->connector.eld;
+		hdmi->enable_audio = dw_hdmi_ahb_audio_enable;
+		hdmi->disable_audio = dw_hdmi_ahb_audio_disable;
 
 		pdevinfo.name = "dw-hdmi-ahb-audio";
 		pdevinfo.data = &audio;
@@ -2415,6 +2443,8 @@ __dw_hdmi_probe(struct platform_device *pdev,
 		audio.hdmi	= hdmi;
 		audio.write	= hdmi_writeb;
 		audio.read	= hdmi_readb;
+		hdmi->enable_audio = dw_hdmi_i2s_audio_enable;
+		hdmi->disable_audio = dw_hdmi_i2s_audio_disable;
 
 		pdevinfo.name = "dw-hdmi-i2s-audio";
 		pdevinfo.data = &audio;
diff --git a/drivers/gpu/drm/cirrus/Makefile b/drivers/gpu/drm/cirrus/Makefile
index 69ffe7006d55..919c0a336c97 100644
--- a/drivers/gpu/drm/cirrus/Makefile
+++ b/drivers/gpu/drm/cirrus/Makefile
@@ -1,4 +1,3 @@
-ccflags-y := -Iinclude/drm
 cirrus-y  := cirrus_main.o cirrus_mode.o \
 	cirrus_drv.o cirrus_fbdev.o cirrus_ttm.o
 
diff --git a/drivers/gpu/drm/cirrus/cirrus_ttm.c b/drivers/gpu/drm/cirrus/cirrus_ttm.c
index 93dbcd38355d..1ff1838c0d44 100644
--- a/drivers/gpu/drm/cirrus/cirrus_ttm.c
+++ b/drivers/gpu/drm/cirrus/cirrus_ttm.c
@@ -26,8 +26,9 @@
  * Authors: Dave Airlie <airlied@redhat.com>
  */
 #include <drm/drmP.h>
+#include <drm/ttm/ttm_page_alloc.h>
+
 #include "cirrus_drv.h"
-#include <ttm/ttm_page_alloc.h>
 
 static inline struct cirrus_device *
 cirrus_bdev(struct ttm_bo_device *bd)
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index f32506a7c1d6..e1637011e18a 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -57,6 +57,7 @@ void drm_atomic_state_default_release(struct drm_atomic_state *state)
 	kfree(state->connectors);
 	kfree(state->crtcs);
 	kfree(state->planes);
+	kfree(state->private_objs);
 }
 EXPORT_SYMBOL(drm_atomic_state_default_release);
 
@@ -184,6 +185,17 @@ void drm_atomic_state_default_clear(struct drm_atomic_state *state)
 		state->planes[i].ptr = NULL;
 		state->planes[i].state = NULL;
 	}
+
+	for (i = 0; i < state->num_private_objs; i++) {
+		void *obj_state = state->private_objs[i].obj_state;
+
+		state->private_objs[i].funcs->destroy_state(obj_state);
+		state->private_objs[i].obj = NULL;
+		state->private_objs[i].obj_state = NULL;
+		state->private_objs[i].funcs = NULL;
+	}
+	state->num_private_objs = 0;
+
 }
 EXPORT_SYMBOL(drm_atomic_state_default_clear);
 
@@ -425,7 +437,7 @@ drm_atomic_replace_property_blob(struct drm_property_blob **blob,
 }
 
 static int
-drm_atomic_replace_property_blob_from_id(struct drm_crtc *crtc,
+drm_atomic_replace_property_blob_from_id(struct drm_device *dev,
 					 struct drm_property_blob **blob,
 					 uint64_t blob_id,
 					 ssize_t expected_size,
@@ -434,7 +446,7 @@ drm_atomic_replace_property_blob_from_id(struct drm_crtc *crtc,
 	struct drm_property_blob *new_blob = NULL;
 
 	if (blob_id != 0) {
-		new_blob = drm_property_lookup_blob(crtc->dev, blob_id);
+		new_blob = drm_property_lookup_blob(dev, blob_id);
 		if (new_blob == NULL)
 			return -EINVAL;
 
@@ -483,7 +495,7 @@ int drm_atomic_crtc_set_property(struct drm_crtc *crtc,
 		drm_property_blob_put(mode);
 		return ret;
 	} else if (property == config->degamma_lut_property) {
-		ret = drm_atomic_replace_property_blob_from_id(crtc,
+		ret = drm_atomic_replace_property_blob_from_id(dev,
 					&state->degamma_lut,
 					val,
 					-1,
@@ -491,7 +503,7 @@ int drm_atomic_crtc_set_property(struct drm_crtc *crtc,
 		state->color_mgmt_changed |= replaced;
 		return ret;
 	} else if (property == config->ctm_property) {
-		ret = drm_atomic_replace_property_blob_from_id(crtc,
+		ret = drm_atomic_replace_property_blob_from_id(dev,
 					&state->ctm,
 					val,
 					sizeof(struct drm_color_ctm),
@@ -499,7 +511,7 @@ int drm_atomic_crtc_set_property(struct drm_crtc *crtc,
 		state->color_mgmt_changed |= replaced;
 		return ret;
 	} else if (property == config->gamma_lut_property) {
-		ret = drm_atomic_replace_property_blob_from_id(crtc,
+		ret = drm_atomic_replace_property_blob_from_id(dev,
 					&state->gamma_lut,
 					val,
 					-1,
@@ -769,7 +781,7 @@ int drm_atomic_plane_set_property(struct drm_plane *plane,
 	} else if (property == config->prop_src_h) {
 		state->src_h = val;
 	} else if (property == plane->rotation_property) {
-		if (!is_power_of_2(val & DRM_ROTATE_MASK))
+		if (!is_power_of_2(val & DRM_MODE_ROTATE_MASK))
 			return -EINVAL;
 		state->rotation = val;
 	} else if (property == plane->zpos_property) {
@@ -978,6 +990,59 @@ static void drm_atomic_plane_print_state(struct drm_printer *p,
 }
 
 /**
+ * drm_atomic_get_private_obj_state - get private object state
+ * @state: global atomic state
+ * @obj: private object to get the state for
+ * @funcs: pointer to the struct of function pointers that identify the object
+ * type
+ *
+ * This function returns the private object state for the given private object,
+ * allocating the state if needed. It does not grab any locks as the caller is
+ * expected to care of any required locking.
+ *
+ * RETURNS:
+ *
+ * Either the allocated state or the error code encoded into a pointer.
+ */
+void *
+drm_atomic_get_private_obj_state(struct drm_atomic_state *state, void *obj,
+			      const struct drm_private_state_funcs *funcs)
+{
+	int index, num_objs, i;
+	size_t size;
+	struct __drm_private_objs_state *arr;
+
+	for (i = 0; i < state->num_private_objs; i++)
+		if (obj == state->private_objs[i].obj &&
+		    state->private_objs[i].obj_state)
+			return state->private_objs[i].obj_state;
+
+	num_objs = state->num_private_objs + 1;
+	size = sizeof(*state->private_objs) * num_objs;
+	arr = krealloc(state->private_objs, size, GFP_KERNEL);
+	if (!arr)
+		return ERR_PTR(-ENOMEM);
+
+	state->private_objs = arr;
+	index = state->num_private_objs;
+	memset(&state->private_objs[index], 0, sizeof(*state->private_objs));
+
+	state->private_objs[index].obj_state = funcs->duplicate_state(state, obj);
+	if (!state->private_objs[index].obj_state)
+		return ERR_PTR(-ENOMEM);
+
+	state->private_objs[index].obj = obj;
+	state->private_objs[index].funcs = funcs;
+	state->num_private_objs = num_objs;
+
+	DRM_DEBUG_ATOMIC("Added new private object state %p to %p\n",
+			 state->private_objs[index].obj_state, state);
+
+	return state->private_objs[index].obj_state;
+}
+EXPORT_SYMBOL(drm_atomic_get_private_obj_state);
+
+/**
  * drm_atomic_get_connector_state - get connector state
  * @state: global atomic state object
  * @connector: connector to get state object for
@@ -1123,6 +1188,10 @@ int drm_atomic_connector_set_property(struct drm_connector *connector,
 		 */
 		if (state->link_status != DRM_LINK_STATUS_GOOD)
 			state->link_status = val;
+	} else if (property == config->aspect_ratio_property) {
+		state->picture_aspect_ratio = val;
+	} else if (property == connector->scaling_mode_property) {
+		state->scaling_mode = val;
 	} else if (connector->funcs->atomic_set_property) {
 		return connector->funcs->atomic_set_property(connector,
 				state, property, val);
@@ -1199,6 +1268,10 @@ drm_atomic_connector_get_property(struct drm_connector *connector,
 		*val = state->tv.hue;
 	} else if (property == config->link_status_property) {
 		*val = state->link_status;
+	} else if (property == config->aspect_ratio_property) {
+		*val = state->picture_aspect_ratio;
+	} else if (property == connector->scaling_mode_property) {
+		*val = state->scaling_mode;
 	} else if (connector->funcs->atomic_get_property) {
 		return connector->funcs->atomic_get_property(connector,
 				state, property, val);
@@ -1618,7 +1691,7 @@ int drm_atomic_commit(struct drm_atomic_state *state)
 	if (ret)
 		return ret;
 
-	DRM_DEBUG_ATOMIC("commiting %p\n", state);
+	DRM_DEBUG_ATOMIC("committing %p\n", state);
 
 	return config->funcs->atomic_commit(state->dev, state, false);
 }
@@ -1647,7 +1720,7 @@ int drm_atomic_nonblocking_commit(struct drm_atomic_state *state)
 	if (ret)
 		return ret;
 
-	DRM_DEBUG_ATOMIC("commiting %p nonblocking\n", state);
+	DRM_DEBUG_ATOMIC("committing %p nonblocking\n", state);
 
 	return config->funcs->atomic_commit(state->dev, state, true);
 }
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 8be9719284b0..636e561486a8 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -1070,8 +1070,8 @@ EXPORT_SYMBOL(drm_atomic_helper_commit_modeset_enables);
  *
  * Note that @pre_swap is needed since the point where we block for fences moves
  * around depending upon whether an atomic commit is blocking or
- * non-blocking. For async commit all waiting needs to happen after
- * drm_atomic_helper_swap_state() is called, but for synchronous commits we want
+ * non-blocking. For non-blocking commit all waiting needs to happen after
+ * drm_atomic_helper_swap_state() is called, but for blocking commits we want
  * to wait **before** we do anything that can't be easily rolled back. That is
  * before we call drm_atomic_helper_swap_state().
  *
@@ -2032,6 +2032,8 @@ void drm_atomic_helper_swap_state(struct drm_atomic_state *state,
 	struct drm_plane *plane;
 	struct drm_plane_state *old_plane_state, *new_plane_state;
 	struct drm_crtc_commit *commit;
+	void *obj, *obj_state;
+	const struct drm_private_state_funcs *funcs;
 
 	if (stall) {
 		for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
@@ -2092,6 +2094,9 @@ void drm_atomic_helper_swap_state(struct drm_atomic_state *state,
 		state->planes[i].state = old_plane_state;
 		plane->state = new_plane_state;
 	}
+
+	__for_each_private_obj(state, obj, obj_state, i, funcs)
+		funcs->swap_state(obj, &state->private_objs[i].obj_state);
 }
 EXPORT_SYMBOL(drm_atomic_helper_swap_state);
 
@@ -3220,7 +3225,7 @@ void drm_atomic_helper_plane_reset(struct drm_plane *plane)
 
 	if (plane->state) {
 		plane->state->plane = plane;
-		plane->state->rotation = DRM_ROTATE_0;
+		plane->state->rotation = DRM_MODE_ROTATE_0;
 	}
 }
 EXPORT_SYMBOL(drm_atomic_helper_plane_reset);
@@ -3517,7 +3522,8 @@ EXPORT_SYMBOL(drm_atomic_helper_connector_destroy_state);
  *
  * Implements support for legacy gamma correction table for drivers
  * that support color management through the DEGAMMA_LUT/GAMMA_LUT
- * properties.
+ * properties. See drm_crtc_enable_color_mgmt() and the containing chapter for
+ * how the atomic color management and gamma tables work.
  */
 int drm_atomic_helper_legacy_gamma_set(struct drm_crtc *crtc,
 				       u16 *red, u16 *green, u16 *blue,
diff --git a/drivers/gpu/drm/drm_blend.c b/drivers/gpu/drm/drm_blend.c
index a0d0d6843288..db6aeec50b82 100644
--- a/drivers/gpu/drm/drm_blend.c
+++ b/drivers/gpu/drm/drm_blend.c
@@ -119,17 +119,17 @@
  * drm_property_create_bitmask()) called "rotation" and has the following
  * bitmask enumaration values:
  *
- * DRM_ROTATE_0:
+ * DRM_MODE_ROTATE_0:
  * 	"rotate-0"
- * DRM_ROTATE_90:
+ * DRM_MODE_ROTATE_90:
  * 	"rotate-90"
- * DRM_ROTATE_180:
+ * DRM_MODE_ROTATE_180:
  * 	"rotate-180"
- * DRM_ROTATE_270:
+ * DRM_MODE_ROTATE_270:
  * 	"rotate-270"
- * DRM_REFLECT_X:
+ * DRM_MODE_REFLECT_X:
  * 	"reflect-x"
- * DRM_REFELCT_Y:
+ * DRM_MODE_REFLECT_Y:
  * 	"reflect-y"
  *
  * Rotation is the specified amount in degrees in counter clockwise direction,
@@ -142,17 +142,17 @@ int drm_plane_create_rotation_property(struct drm_plane *plane,
 				       unsigned int supported_rotations)
 {
 	static const struct drm_prop_enum_list props[] = {
-		{ __builtin_ffs(DRM_ROTATE_0) - 1,   "rotate-0" },
-		{ __builtin_ffs(DRM_ROTATE_90) - 1,  "rotate-90" },
-		{ __builtin_ffs(DRM_ROTATE_180) - 1, "rotate-180" },
-		{ __builtin_ffs(DRM_ROTATE_270) - 1, "rotate-270" },
-		{ __builtin_ffs(DRM_REFLECT_X) - 1,  "reflect-x" },
-		{ __builtin_ffs(DRM_REFLECT_Y) - 1,  "reflect-y" },
+		{ __builtin_ffs(DRM_MODE_ROTATE_0) - 1,   "rotate-0" },
+		{ __builtin_ffs(DRM_MODE_ROTATE_90) - 1,  "rotate-90" },
+		{ __builtin_ffs(DRM_MODE_ROTATE_180) - 1, "rotate-180" },
+		{ __builtin_ffs(DRM_MODE_ROTATE_270) - 1, "rotate-270" },
+		{ __builtin_ffs(DRM_MODE_REFLECT_X) - 1,  "reflect-x" },
+		{ __builtin_ffs(DRM_MODE_REFLECT_Y) - 1,  "reflect-y" },
 	};
 	struct drm_property *prop;
 
-	WARN_ON((supported_rotations & DRM_ROTATE_MASK) == 0);
-	WARN_ON(!is_power_of_2(rotation & DRM_ROTATE_MASK));
+	WARN_ON((supported_rotations & DRM_MODE_ROTATE_MASK) == 0);
+	WARN_ON(!is_power_of_2(rotation & DRM_MODE_ROTATE_MASK));
 	WARN_ON(rotation & ~supported_rotations);
 
 	prop = drm_property_create_bitmask(plane->dev, 0, "rotation",
@@ -178,14 +178,14 @@ EXPORT_SYMBOL(drm_plane_create_rotation_property);
  * @supported_rotations: Supported rotations
  *
  * Attempt to simplify the rotation to a form that is supported.
- * Eg. if the hardware supports everything except DRM_REFLECT_X
+ * Eg. if the hardware supports everything except DRM_MODE_REFLECT_X
  * one could call this function like this:
  *
- * drm_rotation_simplify(rotation, DRM_ROTATE_0 |
- *                       DRM_ROTATE_90 | DRM_ROTATE_180 |
- *                       DRM_ROTATE_270 | DRM_REFLECT_Y);
+ * drm_rotation_simplify(rotation, DRM_MODE_ROTATE_0 |
+ *                       DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_180 |
+ *                       DRM_MODE_ROTATE_270 | DRM_MODE_REFLECT_Y);
  *
- * to eliminate the DRM_ROTATE_X flag. Depending on what kind of
+ * to eliminate the DRM_MODE_ROTATE_X flag. Depending on what kind of
  * transforms the hardware supports, this function may not
  * be able to produce a supported transform, so the caller should
  * check the result afterwards.
@@ -194,9 +194,10 @@ unsigned int drm_rotation_simplify(unsigned int rotation,
 				   unsigned int supported_rotations)
 {
 	if (rotation & ~supported_rotations) {
-		rotation ^= DRM_REFLECT_X | DRM_REFLECT_Y;
-		rotation = (rotation & DRM_REFLECT_MASK) |
-		           BIT((ffs(rotation & DRM_ROTATE_MASK) + 1) % 4);
+		rotation ^= DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y;
+		rotation = (rotation & DRM_MODE_REFLECT_MASK) |
+		           BIT((ffs(rotation & DRM_MODE_ROTATE_MASK) + 1)
+		           % 4);
 	}
 
 	return rotation;
diff --git a/drivers/gpu/drm/drm_color_mgmt.c b/drivers/gpu/drm/drm_color_mgmt.c
index 533f3a3e6877..3eda500fc005 100644
--- a/drivers/gpu/drm/drm_color_mgmt.c
+++ b/drivers/gpu/drm/drm_color_mgmt.c
@@ -43,7 +43,8 @@
  *
  *	Setting this to NULL (blob property value set to 0) means a
  *	linear/pass-thru gamma table should be used. This is generally the
- *	driver boot-up state too.
+ *	driver boot-up state too. Drivers can access this blob through
+ *	&drm_crtc_state.degamma_lut.
  *
  * “DEGAMMA_LUT_SIZE”:
  *	Unsinged range property to give the size of the lookup table to be set
@@ -60,7 +61,8 @@
  *
  *	Setting this to NULL (blob property value set to 0) means a
  *	unit/pass-thru matrix should be used. This is generally the driver
- *	boot-up state too.
+ *	boot-up state too. Drivers can access the blob for the color conversion
+ *	matrix through &drm_crtc_state.ctm.
  *
  * “GAMMA_LUT”:
  *	Blob property to set the gamma lookup table (LUT) mapping pixel data
@@ -72,7 +74,8 @@
  *
  *	Setting this to NULL (blob property value set to 0) means a
  *	linear/pass-thru gamma table should be used. This is generally the
- *	driver boot-up state too.
+ *	driver boot-up state too. Drivers can access this blob through
+ *	&drm_crtc_state.gamma_lut.
  *
  * “GAMMA_LUT_SIZE”:
  *	Unsigned range property to give the size of the lookup table to be set
diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c
index 9f847615ac74..5cd61aff7857 100644
--- a/drivers/gpu/drm/drm_connector.c
+++ b/drivers/gpu/drm/drm_connector.c
@@ -941,6 +941,10 @@ EXPORT_SYMBOL(drm_mode_create_tv_properties);
  *
  * Called by a driver the first time it's needed, must be attached to desired
  * connectors.
+ *
+ * Atomic drivers should use drm_connector_attach_scaling_mode_property()
+ * instead to correctly assign &drm_connector_state.picture_aspect_ratio
+ * in the atomic state.
  */
 int drm_mode_create_scaling_mode_property(struct drm_device *dev)
 {
@@ -961,6 +965,66 @@ int drm_mode_create_scaling_mode_property(struct drm_device *dev)
 EXPORT_SYMBOL(drm_mode_create_scaling_mode_property);
 
 /**
+ * drm_connector_attach_scaling_mode_property - attach atomic scaling mode property
+ * @connector: connector to attach scaling mode property on.
+ * @scaling_mode_mask: or'ed mask of BIT(%DRM_MODE_SCALE_\*).
+ *
+ * This is used to add support for scaling mode to atomic drivers.
+ * The scaling mode will be set to &drm_connector_state.picture_aspect_ratio
+ * and can be used from &drm_connector_helper_funcs->atomic_check for validation.
+ *
+ * This is the atomic version of drm_mode_create_scaling_mode_property().
+ *
+ * Returns:
+ * Zero on success, negative errno on failure.
+ */
+int drm_connector_attach_scaling_mode_property(struct drm_connector *connector,
+					       u32 scaling_mode_mask)
+{
+	struct drm_device *dev = connector->dev;
+	struct drm_property *scaling_mode_property;
+	int i, j = 0;
+	const unsigned valid_scaling_mode_mask =
+		(1U << ARRAY_SIZE(drm_scaling_mode_enum_list)) - 1;
+
+	if (WARN_ON(hweight32(scaling_mode_mask) < 2 ||
+		    scaling_mode_mask & ~valid_scaling_mode_mask))
+		return -EINVAL;
+
+	scaling_mode_property =
+		drm_property_create(dev, DRM_MODE_PROP_ENUM, "scaling mode",
+				    hweight32(scaling_mode_mask));
+
+	if (!scaling_mode_property)
+		return -ENOMEM;
+
+	for (i = 0; i < ARRAY_SIZE(drm_scaling_mode_enum_list); i++) {
+		int ret;
+
+		if (!(BIT(i) & scaling_mode_mask))
+			continue;
+
+		ret = drm_property_add_enum(scaling_mode_property, j++,
+					    drm_scaling_mode_enum_list[i].type,
+					    drm_scaling_mode_enum_list[i].name);
+
+		if (ret) {
+			drm_property_destroy(dev, scaling_mode_property);
+
+			return ret;
+		}
+	}
+
+	drm_object_attach_property(&connector->base,
+				   scaling_mode_property, 0);
+
+	connector->scaling_mode_property = scaling_mode_property;
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_connector_attach_scaling_mode_property);
+
+/**
  * drm_mode_create_aspect_ratio_property - create aspect ratio property
  * @dev: DRM device
  *
diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index d3fc7e4e85b7..222eb1a8549b 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -737,16 +737,16 @@ static void drm_dp_mst_put_payload_id(struct drm_dp_mst_topology_mgr *mgr,
 static bool check_txmsg_state(struct drm_dp_mst_topology_mgr *mgr,
 			      struct drm_dp_sideband_msg_tx *txmsg)
 {
-	bool ret;
+	unsigned int state;
 
 	/*
 	 * All updates to txmsg->state are protected by mgr->qlock, and the two
 	 * cases we check here are terminal states. For those the barriers
 	 * provided by the wake_up/wait_event pair are enough.
 	 */
-	ret = (txmsg->state == DRM_DP_SIDEBAND_TX_RX ||
-	       txmsg->state == DRM_DP_SIDEBAND_TX_TIMEOUT);
-	return ret;
+	state = READ_ONCE(txmsg->state);
+	return (state == DRM_DP_SIDEBAND_TX_RX ||
+		state == DRM_DP_SIDEBAND_TX_TIMEOUT);
 }
 
 static int drm_dp_mst_wait_tx_reply(struct drm_dp_mst_branch *mstb,
@@ -855,7 +855,7 @@ static void drm_dp_destroy_mst_branch_device(struct kref *kref)
 	mutex_unlock(&mstb->mgr->qlock);
 
 	if (wake_tx)
-		wake_up(&mstb->mgr->tx_waitq);
+		wake_up_all(&mstb->mgr->tx_waitq);
 
 	kref_put(kref, drm_dp_free_mst_branch_device);
 }
@@ -1510,7 +1510,7 @@ static void process_single_down_tx_qlock(struct drm_dp_mst_topology_mgr *mgr)
 		if (txmsg->seqno != -1)
 			txmsg->dst->tx_slots[txmsg->seqno] = NULL;
 		txmsg->state = DRM_DP_SIDEBAND_TX_TIMEOUT;
-		wake_up(&mgr->tx_waitq);
+		wake_up_all(&mgr->tx_waitq);
 	}
 }
 
@@ -2258,7 +2258,7 @@ static int drm_dp_mst_handle_down_rep(struct drm_dp_mst_topology_mgr *mgr)
 		mstb->tx_slots[slot] = NULL;
 		mutex_unlock(&mgr->qlock);
 
-		wake_up(&mgr->tx_waitq);
+		wake_up_all(&mgr->tx_waitq);
 	}
 	return ret;
 }
@@ -2498,6 +2498,81 @@ static int drm_dp_init_vcpi(struct drm_dp_mst_topology_mgr *mgr,
 }
 
 /**
+ * drm_dp_atomic_find_vcpi_slots() - Find and add vcpi slots to the state
+ * @state: global atomic state
+ * @mgr: MST topology manager for the port
+ * @port: port to find vcpi slots for
+ * @pbn: bandwidth required for the mode in PBN
+ *
+ * RETURNS:
+ * Total slots in the atomic state assigned for this port or error
+ */
+int drm_dp_atomic_find_vcpi_slots(struct drm_atomic_state *state,
+				  struct drm_dp_mst_topology_mgr *mgr,
+				  struct drm_dp_mst_port *port, int pbn)
+{
+	struct drm_dp_mst_topology_state *topology_state;
+	int req_slots;
+
+	topology_state = drm_atomic_get_mst_topology_state(state, mgr);
+	if (topology_state == NULL)
+		return -ENOMEM;
+
+	port = drm_dp_get_validated_port_ref(mgr, port);
+	if (port == NULL)
+		return -EINVAL;
+	req_slots = DIV_ROUND_UP(pbn, mgr->pbn_div);
+	DRM_DEBUG_KMS("vcpi slots req=%d, avail=%d\n",
+			req_slots, topology_state->avail_slots);
+
+	if (req_slots > topology_state->avail_slots) {
+		drm_dp_put_port(port);
+		return -ENOSPC;
+	}
+
+	topology_state->avail_slots -= req_slots;
+	DRM_DEBUG_KMS("vcpi slots avail=%d", topology_state->avail_slots);
+
+	drm_dp_put_port(port);
+	return req_slots;
+}
+EXPORT_SYMBOL(drm_dp_atomic_find_vcpi_slots);
+
+/**
+ * drm_dp_atomic_release_vcpi_slots() - Release allocated vcpi slots
+ * @state: global atomic state
+ * @mgr: MST topology manager for the port
+ * @slots: number of vcpi slots to release
+ *
+ * RETURNS:
+ * 0 if @slots were added back to &drm_dp_mst_topology_state->avail_slots or
+ * negative error code
+ */
+int drm_dp_atomic_release_vcpi_slots(struct drm_atomic_state *state,
+				     struct drm_dp_mst_topology_mgr *mgr,
+				     int slots)
+{
+	struct drm_dp_mst_topology_state *topology_state;
+
+	topology_state = drm_atomic_get_mst_topology_state(state, mgr);
+	if (topology_state == NULL)
+		return -ENOMEM;
+
+	/* We cannot rely on port->vcpi.num_slots to update
+	 * topology_state->avail_slots as the port may not exist if the parent
+	 * branch device was unplugged. This should be fixed by tracking
+	 * per-port slot allocation in drm_dp_mst_topology_state instead of
+	 * depending on the caller to tell us how many slots to release.
+	 */
+	topology_state->avail_slots += slots;
+	DRM_DEBUG_KMS("vcpi slots released=%d, avail=%d\n",
+			slots, topology_state->avail_slots);
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_dp_atomic_release_vcpi_slots);
+
+/**
  * drm_dp_mst_allocate_vcpi() - Allocate a virtual channel
  * @mgr: manager for this port
  * @port: port to allocate a virtual channel for.
@@ -2936,6 +3011,69 @@ static void drm_dp_destroy_connector_work(struct work_struct *work)
 		(*mgr->cbs->hotplug)(mgr);
 }
 
+void *drm_dp_mst_duplicate_state(struct drm_atomic_state *state, void *obj)
+{
+	struct drm_dp_mst_topology_mgr *mgr = obj;
+	struct drm_dp_mst_topology_state *new_mst_state;
+
+	if (WARN_ON(!mgr->state))
+		return NULL;
+
+	new_mst_state = kmemdup(mgr->state, sizeof(*new_mst_state), GFP_KERNEL);
+	if (new_mst_state)
+		new_mst_state->state = state;
+	return new_mst_state;
+}
+
+void drm_dp_mst_swap_state(void *obj, void **obj_state_ptr)
+{
+	struct drm_dp_mst_topology_mgr *mgr = obj;
+	struct drm_dp_mst_topology_state **topology_state_ptr;
+
+	topology_state_ptr = (struct drm_dp_mst_topology_state **)obj_state_ptr;
+
+	mgr->state->state = (*topology_state_ptr)->state;
+	swap(*topology_state_ptr, mgr->state);
+	mgr->state->state = NULL;
+}
+
+void drm_dp_mst_destroy_state(void *obj_state)
+{
+	kfree(obj_state);
+}
+
+static const struct drm_private_state_funcs mst_state_funcs = {
+	.duplicate_state = drm_dp_mst_duplicate_state,
+	.swap_state = drm_dp_mst_swap_state,
+	.destroy_state = drm_dp_mst_destroy_state,
+};
+
+/**
+ * drm_atomic_get_mst_topology_state: get MST topology state
+ *
+ * @state: global atomic state
+ * @mgr: MST topology manager, also the private object in this case
+ *
+ * This function wraps drm_atomic_get_priv_obj_state() passing in the MST atomic
+ * state vtable so that the private object state returned is that of a MST
+ * topology object. Also, drm_atomic_get_private_obj_state() expects the caller
+ * to care of the locking, so warn if don't hold the connection_mutex.
+ *
+ * RETURNS:
+ *
+ * The MST topology state or error pointer.
+ */
+struct drm_dp_mst_topology_state *drm_atomic_get_mst_topology_state(struct drm_atomic_state *state,
+								    struct drm_dp_mst_topology_mgr *mgr)
+{
+	struct drm_device *dev = mgr->dev;
+
+	WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex));
+	return drm_atomic_get_private_obj_state(state, mgr,
+						&mst_state_funcs);
+}
+EXPORT_SYMBOL(drm_atomic_get_mst_topology_state);
+
 /**
  * drm_dp_mst_topology_mgr_init - initialise a topology manager
  * @mgr: manager struct to initialise
@@ -2980,6 +3118,15 @@ int drm_dp_mst_topology_mgr_init(struct drm_dp_mst_topology_mgr *mgr,
 	if (test_calc_pbn_mode() < 0)
 		DRM_ERROR("MST PBN self-test failed\n");
 
+	mgr->state = kzalloc(sizeof(*mgr->state), GFP_KERNEL);
+	if (mgr->state == NULL)
+		return -ENOMEM;
+	mgr->state->mgr = mgr;
+
+	/* max. time slots - one slot for MTP header */
+	mgr->state->avail_slots = 63;
+	mgr->funcs = &mst_state_funcs;
+
 	return 0;
 }
 EXPORT_SYMBOL(drm_dp_mst_topology_mgr_init);
@@ -3000,6 +3147,9 @@ void drm_dp_mst_topology_mgr_destroy(struct drm_dp_mst_topology_mgr *mgr)
 	mutex_unlock(&mgr->payload_lock);
 	mgr->dev = NULL;
 	mgr->aux = NULL;
+	kfree(mgr->state);
+	mgr->state = NULL;
+	mgr->funcs = NULL;
 }
 EXPORT_SYMBOL(drm_dp_mst_topology_mgr_destroy);
 
diff --git a/drivers/gpu/drm/drm_fb_cma_helper.c b/drivers/gpu/drm/drm_fb_cma_helper.c
index 50abd1faf38f..53f9bdf470d7 100644
--- a/drivers/gpu/drm/drm_fb_cma_helper.c
+++ b/drivers/gpu/drm/drm_fb_cma_helper.c
@@ -189,7 +189,7 @@ struct drm_framebuffer *drm_fb_cma_create_with_funcs(struct drm_device *dev,
 		obj = drm_gem_object_lookup(file_priv, mode_cmd->handles[i]);
 		if (!obj) {
 			dev_err(dev->dev, "Failed to lookup GEM object\n");
-			ret = -ENXIO;
+			ret = -ENOENT;
 			goto err_gem_object_put;
 		}
 
@@ -260,6 +260,33 @@ struct drm_gem_cma_object *drm_fb_cma_get_gem_obj(struct drm_framebuffer *fb,
 EXPORT_SYMBOL_GPL(drm_fb_cma_get_gem_obj);
 
 /**
+ * drm_fb_cma_get_gem_addr() - Get physical address for framebuffer
+ * @fb: The framebuffer
+ * @state: Which state of drm plane
+ * @plane: Which plane
+ * Return the CMA GEM address for given framebuffer.
+ *
+ * This function will usually be called from the PLANE callback functions.
+ */
+dma_addr_t drm_fb_cma_get_gem_addr(struct drm_framebuffer *fb,
+				   struct drm_plane_state *state,
+				   unsigned int plane)
+{
+	struct drm_fb_cma *fb_cma = to_fb_cma(fb);
+	dma_addr_t paddr;
+
+	if (plane >= 4)
+		return 0;
+
+	paddr = fb_cma->obj[plane]->paddr + fb->offsets[plane];
+	paddr += fb->format->cpp[plane] * (state->src_x >> 16);
+	paddr += fb->pitches[plane] * (state->src_y >> 16);
+
+	return paddr;
+}
+EXPORT_SYMBOL_GPL(drm_fb_cma_get_gem_addr);
+
+/**
  * drm_fb_cma_prepare_fb() - Prepare CMA framebuffer
  * @plane: Which plane
  * @state: Plane state attach fence to
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 1f178b878e42..574af01d3ce9 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -378,7 +378,7 @@ retry:
 			goto fail;
 		}
 
-		plane_state->rotation = DRM_ROTATE_0;
+		plane_state->rotation = DRM_MODE_ROTATE_0;
 
 		plane->old_fb = plane->fb;
 		plane_mask |= 1 << drm_plane_index(plane);
@@ -431,7 +431,7 @@ static int restore_fbdev_mode_legacy(struct drm_fb_helper *fb_helper)
 		if (plane->rotation_property)
 			drm_mode_plane_set_obj_prop(plane,
 						    plane->rotation_property,
-						    DRM_ROTATE_0);
+						    DRM_MODE_ROTATE_0);
 	}
 
 	for (i = 0; i < fb_helper->crtc_count; i++) {
diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
index 3783b659cd38..caad93dab54b 100644
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -351,9 +351,8 @@ void drm_lastclose(struct drm_device * dev)
  *
  * This function must be used by drivers as their &file_operations.release
  * method. It frees any resources associated with the open file, and calls the
- * &drm_driver.preclose and &drm_driver.lastclose driver callbacks. If this is
- * the last open file for the DRM device also proceeds to call the
- * &drm_driver.lastclose driver callback.
+ * &drm_driver.postclose driver callback. If this is the last open file for the
+ * DRM device also proceeds to call the &drm_driver.lastclose driver callback.
  *
  * RETURNS:
  *
@@ -373,7 +372,8 @@ int drm_release(struct inode *inode, struct file *filp)
 	list_del(&file_priv->lhead);
 	mutex_unlock(&dev->filelist_mutex);
 
-	if (dev->driver->preclose)
+	if (drm_core_check_feature(dev, DRIVER_LEGACY) &&
+	    dev->driver->preclose)
 		dev->driver->preclose(dev, file_priv);
 
 	/* ========================================================
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index b1e28c944637..8dc11064253d 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -521,7 +521,7 @@ struct page **drm_gem_get_pages(struct drm_gem_object *obj)
 
 	npages = obj->size >> PAGE_SHIFT;
 
-	pages = drm_malloc_ab(npages, sizeof(struct page *));
+	pages = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
 	if (pages == NULL)
 		return ERR_PTR(-ENOMEM);
 
@@ -546,7 +546,7 @@ fail:
 	while (i--)
 		put_page(pages[i]);
 
-	drm_free_large(pages);
+	kvfree(pages);
 	return ERR_CAST(p);
 }
 EXPORT_SYMBOL(drm_gem_get_pages);
@@ -582,7 +582,7 @@ void drm_gem_put_pages(struct drm_gem_object *obj, struct page **pages,
 		put_page(pages[i]);
 	}
 
-	drm_free_large(pages);
+	kvfree(pages);
 }
 EXPORT_SYMBOL(drm_gem_put_pages);
 
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index 8c866cac62dd..c7debaad67f8 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -54,7 +54,7 @@
 
 static bool
 drm_get_last_vbltimestamp(struct drm_device *dev, unsigned int pipe,
-			  struct timeval *tvblank, unsigned flags);
+			  struct timeval *tvblank, bool in_vblank_irq);
 
 static unsigned int drm_timestamp_precision = 20;  /* Default to 20 usecs. */
 
@@ -138,7 +138,7 @@ static void drm_reset_vblank_timestamp(struct drm_device *dev, unsigned int pipe
 	 */
 	do {
 		cur_vblank = __get_vblank_counter(dev, pipe);
-		rc = drm_get_last_vbltimestamp(dev, pipe, &t_vblank, 0);
+		rc = drm_get_last_vbltimestamp(dev, pipe, &t_vblank, false);
 	} while (cur_vblank != __get_vblank_counter(dev, pipe) && --count > 0);
 
 	/*
@@ -171,7 +171,7 @@ static void drm_reset_vblank_timestamp(struct drm_device *dev, unsigned int pipe
  * device vblank fields.
  */
 static void drm_update_vblank_count(struct drm_device *dev, unsigned int pipe,
-				    unsigned long flags)
+				    bool in_vblank_irq)
 {
 	struct drm_vblank_crtc *vblank = &dev->vblank[pipe];
 	u32 cur_vblank, diff;
@@ -194,7 +194,7 @@ static void drm_update_vblank_count(struct drm_device *dev, unsigned int pipe,
 	 */
 	do {
 		cur_vblank = __get_vblank_counter(dev, pipe);
-		rc = drm_get_last_vbltimestamp(dev, pipe, &t_vblank, flags);
+		rc = drm_get_last_vbltimestamp(dev, pipe, &t_vblank, in_vblank_irq);
 	} while (cur_vblank != __get_vblank_counter(dev, pipe) && --count > 0);
 
 	if (dev->max_vblank_count != 0) {
@@ -214,13 +214,13 @@ static void drm_update_vblank_count(struct drm_device *dev, unsigned int pipe,
 		 */
 		diff = DIV_ROUND_CLOSEST_ULL(diff_ns, framedur_ns);
 
-		if (diff == 0 && flags & DRM_CALLED_FROM_VBLIRQ)
+		if (diff == 0 && in_vblank_irq)
 			DRM_DEBUG_VBL("crtc %u: Redundant vblirq ignored."
 				      " diff_ns = %lld, framedur_ns = %d)\n",
 				      pipe, (long long) diff_ns, framedur_ns);
 	} else {
 		/* some kind of default for drivers w/o accurate vbl timestamping */
-		diff = (flags & DRM_CALLED_FROM_VBLIRQ) != 0;
+		diff = in_vblank_irq ? 1 : 0;
 	}
 
 	/*
@@ -253,7 +253,7 @@ static void drm_update_vblank_count(struct drm_device *dev, unsigned int pipe,
 	 * Otherwise reinitialize delayed at next vblank interrupt and assign 0
 	 * for now, to mark the vblanktimestamp as invalid.
 	 */
-	if (!rc && (flags & DRM_CALLED_FROM_VBLIRQ) == 0)
+	if (!rc && in_vblank_irq)
 		t_vblank = (struct timeval) {0, 0};
 
 	store_vblank(dev, pipe, diff, &t_vblank, cur_vblank);
@@ -291,7 +291,7 @@ u32 drm_accurate_vblank_count(struct drm_crtc *crtc)
 
 	spin_lock_irqsave(&dev->vblank_time_lock, flags);
 
-	drm_update_vblank_count(dev, pipe, 0);
+	drm_update_vblank_count(dev, pipe, false);
 	vblank = drm_vblank_count(dev, pipe);
 
 	spin_unlock_irqrestore(&dev->vblank_time_lock, flags);
@@ -349,7 +349,7 @@ static void vblank_disable_and_save(struct drm_device *dev, unsigned int pipe)
 	 * this time. This makes the count account for the entire time
 	 * between drm_crtc_vblank_on() and drm_crtc_vblank_off().
 	 */
-	drm_update_vblank_count(dev, pipe, 0);
+	drm_update_vblank_count(dev, pipe, false);
 
 	spin_unlock_irqrestore(&dev->vblank_time_lock, irqflags);
 }
@@ -684,6 +684,7 @@ void drm_calc_timestamping_constants(struct drm_crtc *crtc,
 
 	vblank->linedur_ns  = linedur_ns;
 	vblank->framedur_ns = framedur_ns;
+	vblank->hwmode = *mode;
 
 	DRM_DEBUG("crtc %u: hwmode: htotal %d, vtotal %d, vdisplay %d\n",
 		  crtc->base.id, mode->crtc_htotal,
@@ -700,10 +701,10 @@ EXPORT_SYMBOL(drm_calc_timestamping_constants);
  * @max_error: Desired maximum allowable error in timestamps (nanosecs)
  *             On return contains true maximum error of timestamp
  * @vblank_time: Pointer to struct timeval which should receive the timestamp
- * @flags: Flags to pass to driver:
- *         0 = Default,
- *         DRM_CALLED_FROM_VBLIRQ = If function is called from vbl IRQ handler
- * @mode: mode which defines the scanout timings
+ * @in_vblank_irq:
+ *     True when called from drm_crtc_handle_vblank().  Some drivers
+ *     need to apply some workarounds for gpu-specific vblank irq quirks
+ *     if flag is set.
  *
  * Implements calculation of exact vblank timestamps from given drm_display_mode
  * timings and current video scanout position of a CRTC. This can be called from
@@ -723,52 +724,62 @@ EXPORT_SYMBOL(drm_calc_timestamping_constants);
  * returns as no operation if a doublescan or interlaced video mode is
  * active. Higher level code is expected to handle this.
  *
- * Returns:
- * Negative value on error, failure or if not supported in current
- * video mode:
- *
- * -EINVAL    Invalid CRTC.
- * -EAGAIN    Temporary unavailable, e.g., called before initial modeset.
- * -ENOTSUPP  Function not supported in current display mode.
- * -EIO       Failed, e.g., due to failed scanout position query.
+ * This function can be used to implement the &drm_driver.get_vblank_timestamp
+ * directly, if the driver implements the &drm_driver.get_scanout_position hook.
  *
- * Returns or'ed positive status flags on success:
+ * Note that atomic drivers must call drm_calc_timestamping_constants() before
+ * enabling a CRTC. The atomic helpers already take care of that in
+ * drm_atomic_helper_update_legacy_modeset_state().
  *
- * DRM_VBLANKTIME_SCANOUTPOS_METHOD - Signal this method used for timestamping.
- * DRM_VBLANKTIME_INVBL - Timestamp taken while scanout was in vblank interval.
+ * Returns:
  *
+ * Returns true on success, and false on failure, i.e. when no accurate
+ * timestamp could be acquired.
  */
-int drm_calc_vbltimestamp_from_scanoutpos(struct drm_device *dev,
-					  unsigned int pipe,
-					  int *max_error,
-					  struct timeval *vblank_time,
-					  unsigned flags,
-					  const struct drm_display_mode *mode)
+bool drm_calc_vbltimestamp_from_scanoutpos(struct drm_device *dev,
+					   unsigned int pipe,
+					   int *max_error,
+					   struct timeval *vblank_time,
+					   bool in_vblank_irq)
 {
 	struct timeval tv_etime;
 	ktime_t stime, etime;
-	unsigned int vbl_status;
-	int ret = DRM_VBLANKTIME_SCANOUTPOS_METHOD;
+	bool vbl_status;
+	struct drm_crtc *crtc;
+	const struct drm_display_mode *mode;
+	struct drm_vblank_crtc *vblank = &dev->vblank[pipe];
 	int vpos, hpos, i;
 	int delta_ns, duration_ns;
 
-	if (pipe >= dev->num_crtcs) {
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return false;
+
+	crtc = drm_crtc_from_index(dev, pipe);
+
+	if (pipe >= dev->num_crtcs || !crtc) {
 		DRM_ERROR("Invalid crtc %u\n", pipe);
-		return -EINVAL;
+		return false;
 	}
 
 	/* Scanout position query not supported? Should not happen. */
 	if (!dev->driver->get_scanout_position) {
 		DRM_ERROR("Called from driver w/o get_scanout_position()!?\n");
-		return -EIO;
+		return false;
 	}
 
+	if (drm_drv_uses_atomic_modeset(dev))
+		mode = &vblank->hwmode;
+	else
+		mode = &crtc->hwmode;
+
 	/* If mode timing undefined, just return as no-op:
 	 * Happens during initial modesetting of a crtc.
 	 */
 	if (mode->crtc_clock == 0) {
 		DRM_DEBUG("crtc %u: Noop due to uninitialized mode.\n", pipe);
-		return -EAGAIN;
+		WARN_ON_ONCE(drm_drv_uses_atomic_modeset(dev));
+
+		return false;
 	}
 
 	/* Get current scanout position with system timestamp.
@@ -783,16 +794,17 @@ int drm_calc_vbltimestamp_from_scanoutpos(struct drm_device *dev,
 		 * Get vertical and horizontal scanout position vpos, hpos,
 		 * and bounding timestamps stime, etime, pre/post query.
 		 */
-		vbl_status = dev->driver->get_scanout_position(dev, pipe, flags,
+		vbl_status = dev->driver->get_scanout_position(dev, pipe,
+							       in_vblank_irq,
 							       &vpos, &hpos,
 							       &stime, &etime,
 							       mode);
 
 		/* Return as no-op if scanout query unsupported or failed. */
-		if (!(vbl_status & DRM_SCANOUTPOS_VALID)) {
-			DRM_DEBUG("crtc %u : scanoutpos query failed [0x%x].\n",
-				  pipe, vbl_status);
-			return -EIO;
+		if (!vbl_status) {
+			DRM_DEBUG("crtc %u : scanoutpos query failed.\n",
+				  pipe);
+			return false;
 		}
 
 		/* Compute uncertainty in timestamp of scanout position query. */
@@ -830,13 +842,13 @@ int drm_calc_vbltimestamp_from_scanoutpos(struct drm_device *dev,
 	etime = ktime_sub_ns(etime, delta_ns);
 	*vblank_time = ktime_to_timeval(etime);
 
-	DRM_DEBUG_VBL("crtc %u : v 0x%x p(%d,%d)@ %ld.%ld -> %ld.%ld [e %d us, %d rep]\n",
-		      pipe, vbl_status, hpos, vpos,
+	DRM_DEBUG_VBL("crtc %u : v p(%d,%d)@ %ld.%ld -> %ld.%ld [e %d us, %d rep]\n",
+		      pipe, hpos, vpos,
 		      (long)tv_etime.tv_sec, (long)tv_etime.tv_usec,
 		      (long)vblank_time->tv_sec, (long)vblank_time->tv_usec,
 		      duration_ns/1000, i);
 
-	return ret;
+	return true;
 }
 EXPORT_SYMBOL(drm_calc_vbltimestamp_from_scanoutpos);
 
@@ -854,9 +866,10 @@ static struct timeval get_drm_timestamp(void)
  * @dev: DRM device
  * @pipe: index of CRTC whose vblank timestamp to retrieve
  * @tvblank: Pointer to target struct timeval which should receive the timestamp
- * @flags: Flags to pass to driver:
- *         0 = Default,
- *         DRM_CALLED_FROM_VBLIRQ = If function is called from vbl IRQ handler
+ * @in_vblank_irq:
+ *     True when called from drm_crtc_handle_vblank().  Some drivers
+ *     need to apply some workarounds for gpu-specific vblank irq quirks
+ *     if flag is set.
  *
  * Fetches the system timestamp corresponding to the time of the most recent
  * vblank interval on specified CRTC. May call into kms-driver to
@@ -870,27 +883,25 @@ static struct timeval get_drm_timestamp(void)
  */
 static bool
 drm_get_last_vbltimestamp(struct drm_device *dev, unsigned int pipe,
-			  struct timeval *tvblank, unsigned flags)
+			  struct timeval *tvblank, bool in_vblank_irq)
 {
-	int ret;
+	bool ret = false;
 
 	/* Define requested maximum error on timestamps (nanoseconds). */
 	int max_error = (int) drm_timestamp_precision * 1000;
 
 	/* Query driver if possible and precision timestamping enabled. */
-	if (dev->driver->get_vblank_timestamp && (max_error > 0)) {
+	if (dev->driver->get_vblank_timestamp && (max_error > 0))
 		ret = dev->driver->get_vblank_timestamp(dev, pipe, &max_error,
-							tvblank, flags);
-		if (ret > 0)
-			return true;
-	}
+							tvblank, in_vblank_irq);
 
 	/* GPU high precision timestamp query unsupported or failed.
 	 * Return current monotonic/gettimeofday timestamp as best estimate.
 	 */
-	*tvblank = get_drm_timestamp();
+	if (!ret)
+		*tvblank = get_drm_timestamp();
 
-	return false;
+	return ret;
 }
 
 /**
@@ -1329,6 +1340,10 @@ void drm_crtc_vblank_off(struct drm_crtc *crtc)
 		send_vblank_event(dev, e, seq, &now);
 	}
 	spin_unlock_irqrestore(&dev->event_lock, irqflags);
+
+	/* Will be reset by the modeset helpers when re-enabling the crtc by
+	 * calling drm_calc_timestamping_constants(). */
+	vblank->hwmode.crtc_clock = 0;
 }
 EXPORT_SYMBOL(drm_crtc_vblank_off);
 
@@ -1760,7 +1775,7 @@ bool drm_handle_vblank(struct drm_device *dev, unsigned int pipe)
 		return false;
 	}
 
-	drm_update_vblank_count(dev, pipe, DRM_CALLED_FROM_VBLIRQ);
+	drm_update_vblank_count(dev, pipe, true);
 
 	spin_unlock(&dev->vblank_time_lock);
 
diff --git a/drivers/gpu/drm/drm_plane_helper.c b/drivers/gpu/drm/drm_plane_helper.c
index b84a295230fc..06aee1741e96 100644
--- a/drivers/gpu/drm/drm_plane_helper.c
+++ b/drivers/gpu/drm/drm_plane_helper.c
@@ -336,7 +336,7 @@ int drm_primary_helper_update(struct drm_plane *plane, struct drm_crtc *crtc,
 
 	ret = drm_plane_helper_check_update(plane, crtc, fb,
 					    &src, &dest, &clip,
-					    DRM_ROTATE_0,
+					    DRM_MODE_ROTATE_0,
 					    DRM_PLANE_HELPER_NO_SCALING,
 					    DRM_PLANE_HELPER_NO_SCALING,
 					    false, false, &visible);
@@ -381,6 +381,7 @@ EXPORT_SYMBOL(drm_primary_helper_update);
 /**
  * drm_primary_helper_disable() - Helper for primary plane disable
  * @plane: plane to disable
+ * @ctx: lock acquire context, not used here
  *
  * Provides a default plane disable handler for primary planes.  This is handler
  * is called in response to a userspace SetPlane operation on the plane with a
@@ -510,12 +511,10 @@ int drm_plane_helper_commit(struct drm_plane *plane,
 	if (plane_funcs->cleanup_fb)
 		plane_funcs->cleanup_fb(plane, plane_state);
 out:
-	if (plane_state) {
-		if (plane->funcs->atomic_destroy_state)
-			plane->funcs->atomic_destroy_state(plane, plane_state);
-		else
-			drm_atomic_helper_plane_destroy_state(plane, plane_state);
-	}
+	if (plane->funcs->atomic_destroy_state)
+		plane->funcs->atomic_destroy_state(plane, plane_state);
+	else
+		drm_atomic_helper_plane_destroy_state(plane, plane_state);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 954eb848b5e2..22408badc617 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -595,15 +595,18 @@ out_unlock:
 EXPORT_SYMBOL(drm_gem_prime_handle_to_fd);
 
 /**
- * drm_gem_prime_import - helper library implementation of the import callback
+ * drm_gem_prime_import_dev - core implementation of the import callback
  * @dev: drm_device to import into
  * @dma_buf: dma-buf object to import
+ * @attach_dev: struct device to dma_buf attach
  *
- * This is the implementation of the gem_prime_import functions for GEM drivers
- * using the PRIME helpers.
+ * This is the core of drm_gem_prime_import. It's designed to be called by
+ * drivers who want to use a different device structure than dev->dev for
+ * attaching via dma_buf.
  */
-struct drm_gem_object *drm_gem_prime_import(struct drm_device *dev,
-					    struct dma_buf *dma_buf)
+struct drm_gem_object *drm_gem_prime_import_dev(struct drm_device *dev,
+					    struct dma_buf *dma_buf,
+					    struct device *attach_dev)
 {
 	struct dma_buf_attachment *attach;
 	struct sg_table *sgt;
@@ -625,7 +628,7 @@ struct drm_gem_object *drm_gem_prime_import(struct drm_device *dev,
 	if (!dev->driver->gem_prime_import_sg_table)
 		return ERR_PTR(-EINVAL);
 
-	attach = dma_buf_attach(dma_buf, dev->dev);
+	attach = dma_buf_attach(dma_buf, attach_dev);
 	if (IS_ERR(attach))
 		return ERR_CAST(attach);
 
@@ -655,6 +658,21 @@ fail_detach:
 
 	return ERR_PTR(ret);
 }
+EXPORT_SYMBOL(drm_gem_prime_import_dev);
+
+/**
+ * drm_gem_prime_import - helper library implementation of the import callback
+ * @dev: drm_device to import into
+ * @dma_buf: dma-buf object to import
+ *
+ * This is the implementation of the gem_prime_import functions for GEM drivers
+ * using the PRIME helpers.
+ */
+struct drm_gem_object *drm_gem_prime_import(struct drm_device *dev,
+					    struct dma_buf *dma_buf)
+{
+	return drm_gem_prime_import_dev(dev, dma_buf, dev->dev);
+}
 EXPORT_SYMBOL(drm_gem_prime_import);
 
 /**
diff --git a/drivers/gpu/drm/drm_rect.c b/drivers/gpu/drm/drm_rect.c
index bc5575960ebc..9817c1445ba9 100644
--- a/drivers/gpu/drm/drm_rect.c
+++ b/drivers/gpu/drm/drm_rect.c
@@ -310,38 +310,38 @@ void drm_rect_rotate(struct drm_rect *r,
 {
 	struct drm_rect tmp;
 
-	if (rotation & (DRM_REFLECT_X | DRM_REFLECT_Y)) {
+	if (rotation & (DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y)) {
 		tmp = *r;
 
-		if (rotation & DRM_REFLECT_X) {
+		if (rotation & DRM_MODE_REFLECT_X) {
 			r->x1 = width - tmp.x2;
 			r->x2 = width - tmp.x1;
 		}
 
-		if (rotation & DRM_REFLECT_Y) {
+		if (rotation & DRM_MODE_REFLECT_Y) {
 			r->y1 = height - tmp.y2;
 			r->y2 = height - tmp.y1;
 		}
 	}
 
-	switch (rotation & DRM_ROTATE_MASK) {
-	case DRM_ROTATE_0:
+	switch (rotation & DRM_MODE_ROTATE_MASK) {
+	case DRM_MODE_ROTATE_0:
 		break;
-	case DRM_ROTATE_90:
+	case DRM_MODE_ROTATE_90:
 		tmp = *r;
 		r->x1 = tmp.y1;
 		r->x2 = tmp.y2;
 		r->y1 = width - tmp.x2;
 		r->y2 = width - tmp.x1;
 		break;
-	case DRM_ROTATE_180:
+	case DRM_MODE_ROTATE_180:
 		tmp = *r;
 		r->x1 = width - tmp.x2;
 		r->x2 = width - tmp.x1;
 		r->y1 = height - tmp.y2;
 		r->y2 = height - tmp.y1;
 		break;
-	case DRM_ROTATE_270:
+	case DRM_MODE_ROTATE_270:
 		tmp = *r;
 		r->x1 = height - tmp.y2;
 		r->x2 = height - tmp.y1;
@@ -373,8 +373,8 @@ EXPORT_SYMBOL(drm_rect_rotate);
  * them when doing a rotatation and its inverse.
  * That is, if you do ::
  *
- *     drm_rotate(&r, width, height, rotation);
- *     drm_rotate_inv(&r, width, height, rotation);
+ *     DRM_MODE_PROP_ROTATE(&r, width, height, rotation);
+ *     DRM_MODE_ROTATE_inv(&r, width, height, rotation);
  *
  * you will always get back the original rectangle.
  */
@@ -384,24 +384,24 @@ void drm_rect_rotate_inv(struct drm_rect *r,
 {
 	struct drm_rect tmp;
 
-	switch (rotation & DRM_ROTATE_MASK) {
-	case DRM_ROTATE_0:
+	switch (rotation & DRM_MODE_ROTATE_MASK) {
+	case DRM_MODE_ROTATE_0:
 		break;
-	case DRM_ROTATE_90:
+	case DRM_MODE_ROTATE_90:
 		tmp = *r;
 		r->x1 = width - tmp.y2;
 		r->x2 = width - tmp.y1;
 		r->y1 = tmp.x1;
 		r->y2 = tmp.x2;
 		break;
-	case DRM_ROTATE_180:
+	case DRM_MODE_ROTATE_180:
 		tmp = *r;
 		r->x1 = width - tmp.x2;
 		r->x2 = width - tmp.x1;
 		r->y1 = height - tmp.y2;
 		r->y2 = height - tmp.y1;
 		break;
-	case DRM_ROTATE_270:
+	case DRM_MODE_ROTATE_270:
 		tmp = *r;
 		r->x1 = tmp.y1;
 		r->x2 = tmp.y2;
@@ -412,15 +412,15 @@ void drm_rect_rotate_inv(struct drm_rect *r,
 		break;
 	}
 
-	if (rotation & (DRM_REFLECT_X | DRM_REFLECT_Y)) {
+	if (rotation & (DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y)) {
 		tmp = *r;
 
-		if (rotation & DRM_REFLECT_X) {
+		if (rotation & DRM_MODE_REFLECT_X) {
 			r->x1 = width - tmp.x2;
 			r->x2 = width - tmp.x1;
 		}
 
-		if (rotation & DRM_REFLECT_Y) {
+		if (rotation & DRM_MODE_REFLECT_Y) {
 			r->y1 = height - tmp.y2;
 			r->y2 = height - tmp.y1;
 		}
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index fd56f92f3469..d6fb724fc3cc 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -748,7 +748,7 @@ static struct page **etnaviv_gem_userptr_do_get_pages(
 	uintptr_t ptr;
 	unsigned int flags = 0;
 
-	pvec = drm_malloc_ab(npages, sizeof(struct page *));
+	pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
 	if (!pvec)
 		return ERR_PTR(-ENOMEM);
 
@@ -772,7 +772,7 @@ static struct page **etnaviv_gem_userptr_do_get_pages(
 
 	if (ret < 0) {
 		release_pages(pvec, pinned, 0);
-		drm_free_large(pvec);
+		kvfree(pvec);
 		return ERR_PTR(ret);
 	}
 
@@ -823,7 +823,7 @@ static int etnaviv_gem_userptr_get_pages(struct etnaviv_gem_object *etnaviv_obj)
 	mm = get_task_mm(etnaviv_obj->userptr.task);
 	pinned = 0;
 	if (mm == current->mm) {
-		pvec = drm_malloc_ab(npages, sizeof(struct page *));
+		pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
 		if (!pvec) {
 			mmput(mm);
 			return -ENOMEM;
@@ -832,7 +832,7 @@ static int etnaviv_gem_userptr_get_pages(struct etnaviv_gem_object *etnaviv_obj)
 		pinned = __get_user_pages_fast(etnaviv_obj->userptr.ptr, npages,
 					       !etnaviv_obj->userptr.ro, pvec);
 		if (pinned < 0) {
-			drm_free_large(pvec);
+			kvfree(pvec);
 			mmput(mm);
 			return pinned;
 		}
@@ -845,7 +845,7 @@ static int etnaviv_gem_userptr_get_pages(struct etnaviv_gem_object *etnaviv_obj)
 	}
 
 	release_pages(pvec, pinned, 0);
-	drm_free_large(pvec);
+	kvfree(pvec);
 
 	work = kmalloc(sizeof(*work), GFP_KERNEL);
 	if (!work) {
@@ -879,7 +879,7 @@ static void etnaviv_gem_userptr_release(struct etnaviv_gem_object *etnaviv_obj)
 		int npages = etnaviv_obj->base.size >> PAGE_SHIFT;
 
 		release_pages(etnaviv_obj->pages, npages, 0);
-		drm_free_large(etnaviv_obj->pages);
+		kvfree(etnaviv_obj->pages);
 	}
 	put_task_struct(etnaviv_obj->userptr.task);
 }
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
index 62b47972a52e..367bf952f61a 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
@@ -87,7 +87,7 @@ static void etnaviv_gem_prime_release(struct etnaviv_gem_object *etnaviv_obj)
 	 * ours, just free the array we allocated:
 	 */
 	if (etnaviv_obj->pages)
-		drm_free_large(etnaviv_obj->pages);
+		kvfree(etnaviv_obj->pages);
 
 	drm_prime_gem_destroy(&etnaviv_obj->base, etnaviv_obj->sgt);
 }
@@ -128,7 +128,7 @@ struct drm_gem_object *etnaviv_gem_prime_import_sg_table(struct drm_device *dev,
 	npages = size / PAGE_SIZE;
 
 	etnaviv_obj->sgt = sgt;
-	etnaviv_obj->pages = drm_malloc_ab(npages, sizeof(struct page *));
+	etnaviv_obj->pages = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
 	if (!etnaviv_obj->pages) {
 		ret = -ENOMEM;
 		goto fail;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
index de80ee1b71df..ee7069e93eda 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -345,9 +345,9 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 	 * Copy the command submission and bo array to kernel space in
 	 * one go, and do this outside of any locks.
 	 */
-	bos = drm_malloc_ab(args->nr_bos, sizeof(*bos));
-	relocs = drm_malloc_ab(args->nr_relocs, sizeof(*relocs));
-	stream = drm_malloc_ab(1, args->stream_size);
+	bos = kvmalloc_array(args->nr_bos, sizeof(*bos), GFP_KERNEL);
+	relocs = kvmalloc_array(args->nr_relocs, sizeof(*relocs), GFP_KERNEL);
+	stream = kvmalloc_array(1, args->stream_size, GFP_KERNEL);
 	cmdbuf = etnaviv_cmdbuf_new(gpu->cmdbuf_suballoc,
 				    ALIGN(args->stream_size, 8) + 8,
 				    args->nr_bos);
@@ -489,11 +489,11 @@ err_submit_cmds:
 	if (cmdbuf)
 		etnaviv_cmdbuf_free(cmdbuf);
 	if (stream)
-		drm_free_large(stream);
+		kvfree(stream);
 	if (bos)
-		drm_free_large(bos);
+		kvfree(bos);
 	if (relocs)
-		drm_free_large(relocs);
+		kvfree(relocs);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c
index 55a1579d11b3..c23479be4850 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c
@@ -59,7 +59,8 @@ static int exynos_drm_alloc_buf(struct exynos_drm_gem *exynos_gem)
 
 	nr_pages = exynos_gem->size >> PAGE_SHIFT;
 
-	exynos_gem->pages = drm_calloc_large(nr_pages, sizeof(struct page *));
+	exynos_gem->pages = kvmalloc_array(nr_pages, sizeof(struct page *),
+			GFP_KERNEL | __GFP_ZERO);
 	if (!exynos_gem->pages) {
 		DRM_ERROR("failed to allocate pages.\n");
 		return -ENOMEM;
@@ -101,7 +102,7 @@ err_dma_free:
 	dma_free_attrs(to_dma_dev(dev), exynos_gem->size, exynos_gem->cookie,
 		       exynos_gem->dma_addr, exynos_gem->dma_attrs);
 err_free:
-	drm_free_large(exynos_gem->pages);
+	kvfree(exynos_gem->pages);
 
 	return ret;
 }
@@ -122,7 +123,7 @@ static void exynos_drm_free_buf(struct exynos_drm_gem *exynos_gem)
 			(dma_addr_t)exynos_gem->dma_addr,
 			exynos_gem->dma_attrs);
 
-	drm_free_large(exynos_gem->pages);
+	kvfree(exynos_gem->pages);
 }
 
 static int exynos_drm_gem_handle_create(struct drm_gem_object *obj,
@@ -559,7 +560,7 @@ exynos_drm_gem_prime_import_sg_table(struct drm_device *dev,
 	exynos_gem->dma_addr = sg_dma_address(sgt->sgl);
 
 	npages = exynos_gem->size >> PAGE_SHIFT;
-	exynos_gem->pages = drm_malloc_ab(npages, sizeof(struct page *));
+	exynos_gem->pages = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
 	if (!exynos_gem->pages) {
 		ret = -ENOMEM;
 		goto err;
@@ -588,7 +589,7 @@ exynos_drm_gem_prime_import_sg_table(struct drm_device *dev,
 	return &exynos_gem->base;
 
 err_free_large:
-	drm_free_large(exynos_gem->pages);
+	kvfree(exynos_gem->pages);
 err:
 	drm_gem_object_release(&exynos_gem->base);
 	kfree(exynos_gem);
diff --git a/drivers/gpu/drm/gma500/Makefile b/drivers/gpu/drm/gma500/Makefile
index 190e55f2f891..c1c8dc18aa53 100644
--- a/drivers/gpu/drm/gma500/Makefile
+++ b/drivers/gpu/drm/gma500/Makefile
@@ -1,7 +1,6 @@
 #
 #	KMS driver for the GMA500
 #
-ccflags-y += -I$(srctree)/include/drm
 
 gma500_gfx-y += \
 	  accel_2d.o \
diff --git a/drivers/gpu/drm/gma500/mdfld_tpo_vid.c b/drivers/gpu/drm/gma500/mdfld_tpo_vid.c
index d8d4170725b2..a9420bf9a419 100644
--- a/drivers/gpu/drm/gma500/mdfld_tpo_vid.c
+++ b/drivers/gpu/drm/gma500/mdfld_tpo_vid.c
@@ -30,55 +30,20 @@
 static struct drm_display_mode *tpo_vid_get_config_mode(struct drm_device *dev)
 {
 	struct drm_display_mode *mode;
-	struct drm_psb_private *dev_priv = dev->dev_private;
-	struct oaktrail_timing_info *ti = &dev_priv->gct_data.DTD;
-	bool use_gct = false;
 
 	mode = kzalloc(sizeof(*mode), GFP_KERNEL);
 	if (!mode)
 		return NULL;
 
-	if (use_gct) {
-		mode->hdisplay = (ti->hactive_hi << 8) | ti->hactive_lo;
-		mode->vdisplay = (ti->vactive_hi << 8) | ti->vactive_lo;
-		mode->hsync_start = mode->hdisplay +
-				((ti->hsync_offset_hi << 8) |
-				ti->hsync_offset_lo);
-		mode->hsync_end = mode->hsync_start +
-				((ti->hsync_pulse_width_hi << 8) |
-				ti->hsync_pulse_width_lo);
-		mode->htotal = mode->hdisplay + ((ti->hblank_hi << 8) |
-								ti->hblank_lo);
-		mode->vsync_start =
-			mode->vdisplay + ((ti->vsync_offset_hi << 8) |
-						ti->vsync_offset_lo);
-		mode->vsync_end =
-			mode->vsync_start + ((ti->vsync_pulse_width_hi << 8) |
-						ti->vsync_pulse_width_lo);
-		mode->vtotal = mode->vdisplay +
-				((ti->vblank_hi << 8) | ti->vblank_lo);
-		mode->clock = ti->pixel_clock * 10;
-
-		dev_dbg(dev->dev, "hdisplay is %d\n", mode->hdisplay);
-		dev_dbg(dev->dev, "vdisplay is %d\n", mode->vdisplay);
-		dev_dbg(dev->dev, "HSS is %d\n", mode->hsync_start);
-		dev_dbg(dev->dev, "HSE is %d\n", mode->hsync_end);
-		dev_dbg(dev->dev, "htotal is %d\n", mode->htotal);
-		dev_dbg(dev->dev, "VSS is %d\n", mode->vsync_start);
-		dev_dbg(dev->dev, "VSE is %d\n", mode->vsync_end);
-		dev_dbg(dev->dev, "vtotal is %d\n", mode->vtotal);
-		dev_dbg(dev->dev, "clock is %d\n", mode->clock);
-	} else {
-		mode->hdisplay = 864;
-		mode->vdisplay = 480;
-		mode->hsync_start = 873;
-		mode->hsync_end = 876;
-		mode->htotal = 887;
-		mode->vsync_start = 487;
-		mode->vsync_end = 490;
-		mode->vtotal = 499;
-		mode->clock = 33264;
-	}
+	mode->hdisplay = 864;
+	mode->vdisplay = 480;
+	mode->hsync_start = 873;
+	mode->hsync_end = 876;
+	mode->htotal = 887;
+	mode->vsync_start = 487;
+	mode->vsync_end = 490;
+	mode->vtotal = 499;
+	mode->clock = 33264;
 
 	drm_mode_set_name(mode);
 	drm_mode_set_crtcinfo(mode, 0);
diff --git a/drivers/gpu/drm/hisilicon/hibmc/Makefile b/drivers/gpu/drm/hisilicon/hibmc/Makefile
index f2e04c035673..3df726696372 100644
--- a/drivers/gpu/drm/hisilicon/hibmc/Makefile
+++ b/drivers/gpu/drm/hisilicon/hibmc/Makefile
@@ -1,4 +1,3 @@
-ccflags-y := -Iinclude/drm
 hibmc-drm-y := hibmc_drm_drv.o hibmc_drm_de.o hibmc_drm_vdac.o hibmc_drm_fbdev.o hibmc_ttm.o
 
 obj-$(CONFIG_DRM_HISI_HIBMC) += hibmc-drm.o
diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_ttm.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_ttm.c
index 20732b62d4c9..ac457c779caa 100644
--- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_ttm.c
+++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_ttm.c
@@ -17,7 +17,7 @@
  */
 
 #include <drm/drm_atomic_helper.h>
-#include <ttm/ttm_page_alloc.h>
+#include <drm/ttm/ttm_page_alloc.h>
 
 #include "hibmc_drm_drv.h"
 
diff --git a/drivers/gpu/drm/i2c/Makefile b/drivers/gpu/drm/i2c/Makefile
index 43aa33baebed..a77acfc1852e 100644
--- a/drivers/gpu/drm/i2c/Makefile
+++ b/drivers/gpu/drm/i2c/Makefile
@@ -1,5 +1,3 @@
-ccflags-y := -Iinclude/drm
-
 ch7006-y := ch7006_drv.o ch7006_mode.o
 obj-$(CONFIG_DRM_I2C_CH7006) += ch7006.o
 
diff --git a/drivers/gpu/drm/i810/Makefile b/drivers/gpu/drm/i810/Makefile
index 43844ecafcc5..639f8596c978 100644
--- a/drivers/gpu/drm/i810/Makefile
+++ b/drivers/gpu/drm/i810/Makefile
@@ -2,7 +2,6 @@
 # Makefile for the drm device driver.  This driver provides support for the
 # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
 
-ccflags-y := -Iinclude/drm
 i810-y := i810_drv.o i810_dma.o
 
 obj-$(CONFIG_DRM_I810)	+= i810.o
diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index b00edd3b8800..78c5c049a347 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -61,6 +61,18 @@ config DRM_I915_SW_FENCE_DEBUG_OBJECTS
 
           If in doubt, say "N".
 
+config DRM_I915_SW_FENCE_CHECK_DAG
+        bool "Enable additional driver debugging for detecting dependency cycles"
+        depends on DRM_I915
+        default n
+        help
+          Choose this option to turn on extra driver debugging that may affect
+          performance but will catch some internal issues.
+
+          Recommended for driver developers only.
+
+          If in doubt, say "N".
+
 config DRM_I915_SELFTEST
 	bool "Enable selftests upon driver load"
 	depends on DRM_I915
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 2cf04504e494..16dccf550412 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -16,6 +16,7 @@ i915-y := i915_drv.o \
 	  i915_params.o \
 	  i915_pci.o \
           i915_suspend.o \
+	  i915_syncmap.o \
 	  i915_sw_fence.o \
 	  i915_sysfs.o \
 	  intel_csr.o \
@@ -57,6 +58,7 @@ i915-y += i915_cmd_parser.o \
 
 # general-purpose microcontroller (GuC) support
 i915-y += intel_uc.o \
+	  intel_guc_ct.o \
 	  intel_guc_log.o \
 	  intel_guc_loader.o \
 	  intel_huc.o \
diff --git a/drivers/gpu/drm/i915/dvo_ch7017.c b/drivers/gpu/drm/i915/dvo_ch7017.c
index b3c7c199200c..80b3e16cf48c 100644
--- a/drivers/gpu/drm/i915/dvo_ch7017.c
+++ b/drivers/gpu/drm/i915/dvo_ch7017.c
@@ -280,10 +280,10 @@ static void ch7017_mode_set(struct intel_dvo_device *dvo,
 			(0 << CH7017_PHASE_DETECTOR_SHIFT);
 	} else {
 		outputs_enable = CH7017_LVDS_CHANNEL_A | CH7017_CHARGE_PUMP_HIGH;
-		lvds_pll_feedback_div = CH7017_LVDS_PLL_FEEDBACK_DEFAULT_RESERVED |
+		lvds_pll_feedback_div =
+			CH7017_LVDS_PLL_FEEDBACK_DEFAULT_RESERVED |
 			(2 << CH7017_LVDS_PLL_FEED_BACK_DIVIDER_SHIFT) |
 			(3 << CH7017_LVDS_PLL_FEED_FORWARD_DIVIDER_SHIFT);
-		lvds_pll_feedback_div = 35;
 		lvds_control_2 = (3 << CH7017_LOOP_FILTER_SHIFT) |
 			(0 << CH7017_PHASE_DETECTOR_SHIFT);
 		if (1) { /* XXX: dual channel panel detection.  Assume yes for now. */
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index bada32b33237..6ae286cb5804 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -69,8 +69,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
 	gvt_dbg_sched("ring id %d workload lrca %x", ring_id,
 			workload->ctx_desc.lrca);
 
-	context_page_num = intel_lr_context_size(
-			gvt->dev_priv->engine[ring_id]);
+	context_page_num = gvt->dev_priv->engine[ring_id]->context_size;
 
 	context_page_num = context_page_num >> PAGE_SHIFT;
 
@@ -181,6 +180,7 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
 	struct intel_engine_cs *engine = dev_priv->engine[ring_id];
 	struct drm_i915_gem_request *rq;
 	struct intel_vgpu *vgpu = workload->vgpu;
+	struct intel_ring *ring;
 	int ret;
 
 	gvt_dbg_sched("ring id %d prepare to dispatch workload %p\n",
@@ -199,8 +199,9 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
 	 * shadow_ctx pages invalid. So gvt need to pin itself. After update
 	 * the guest context, gvt can unpin the shadow_ctx safely.
 	 */
-	ret = engine->context_pin(engine, shadow_ctx);
-	if (ret) {
+	ring = engine->context_pin(engine, shadow_ctx);
+	if (IS_ERR(ring)) {
+		ret = PTR_ERR(ring);
 		gvt_vgpu_err("fail to pin shadow context\n");
 		workload->status = ret;
 		mutex_unlock(&dev_priv->drm.struct_mutex);
@@ -330,8 +331,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
 	gvt_dbg_sched("ring id %d workload lrca %x\n", ring_id,
 			workload->ctx_desc.lrca);
 
-	context_page_num = intel_lr_context_size(
-			gvt->dev_priv->engine[ring_id]);
+	context_page_num = gvt->dev_priv->engine[ring_id]->context_size;
 
 	context_page_num = context_page_num >> PAGE_SHIFT;
 
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 7af100f84410..f0cb22cc0dd6 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1166,8 +1166,8 @@ static bool check_cmd(const struct intel_engine_cs *engine,
 				find_reg(engine, is_master, reg_addr);
 
 			if (!reg) {
-				DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (exec_id=%d)\n",
-						 reg_addr, *cmd, engine->exec_id);
+				DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (%s)\n",
+						 reg_addr, *cmd, engine->name);
 				return false;
 			}
 
@@ -1222,11 +1222,11 @@ static bool check_cmd(const struct intel_engine_cs *engine,
 				desc->bits[i].mask;
 
 			if (dword != desc->bits[i].expected) {
-				DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (exec_id=%d)\n",
+				DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (%s)\n",
 						 *cmd,
 						 desc->bits[i].mask,
 						 desc->bits[i].expected,
-						 dword, engine->exec_id);
+						 dword, engine->name);
 				return false;
 			}
 		}
@@ -1284,7 +1284,7 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
 
 		if (*cmd == MI_BATCH_BUFFER_END) {
 			if (needs_clflush_after) {
-				void *ptr = ptr_mask_bits(shadow_batch_obj->mm.mapping);
+				void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping);
 				drm_clflush_virt_range(ptr,
 						       (void *)(cmd + 1) - ptr);
 			}
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index d689e511744e..7e0816ccdc21 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -229,7 +229,7 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
 	int ret;
 
 	total = READ_ONCE(dev_priv->mm.object_count);
-	objects = drm_malloc_ab(total, sizeof(*objects));
+	objects = kvmalloc_array(total, sizeof(*objects), GFP_KERNEL);
 	if (!objects)
 		return -ENOMEM;
 
@@ -274,7 +274,7 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
 
 	mutex_unlock(&dev->struct_mutex);
 out:
-	drm_free_large(objects);
+	kvfree(objects);
 	return ret;
 }
 
@@ -2482,8 +2482,6 @@ static void i915_guc_client_info(struct seq_file *m,
 		client->wq_size, client->wq_offset, client->wq_tail);
 
 	seq_printf(m, "\tWork queue full: %u\n", client->no_wq_space);
-	seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail);
-	seq_printf(m, "\tLast submission result: %d\n", client->retcode);
 
 	for_each_engine(engine, dev_priv, id) {
 		u64 submissions = client->submissions[id];
@@ -2494,42 +2492,34 @@ static void i915_guc_client_info(struct seq_file *m,
 	seq_printf(m, "\tTotal: %llu\n", tot);
 }
 
-static int i915_guc_info(struct seq_file *m, void *data)
+static bool check_guc_submission(struct seq_file *m)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	const struct intel_guc *guc = &dev_priv->guc;
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	u64 total;
 
 	if (!guc->execbuf_client) {
 		seq_printf(m, "GuC submission %s\n",
 			   HAS_GUC_SCHED(dev_priv) ?
 			   "disabled" :
 			   "not supported");
-		return 0;
+		return false;
 	}
 
+	return true;
+}
+
+static int i915_guc_info(struct seq_file *m, void *data)
+{
+	struct drm_i915_private *dev_priv = node_to_i915(m->private);
+	const struct intel_guc *guc = &dev_priv->guc;
+
+	if (!check_guc_submission(m))
+		return 0;
+
 	seq_printf(m, "Doorbell map:\n");
 	seq_printf(m, "\t%*pb\n", GUC_NUM_DOORBELLS, guc->doorbell_bitmap);
 	seq_printf(m, "Doorbell next cacheline: 0x%x\n\n", guc->db_cacheline);
 
-	seq_printf(m, "GuC total action count: %llu\n", guc->action_count);
-	seq_printf(m, "GuC action failure count: %u\n", guc->action_fail);
-	seq_printf(m, "GuC last action command: 0x%x\n", guc->action_cmd);
-	seq_printf(m, "GuC last action status: 0x%x\n", guc->action_status);
-	seq_printf(m, "GuC last action error code: %d\n", guc->action_err);
-
-	total = 0;
-	seq_printf(m, "\nGuC submissions:\n");
-	for_each_engine(engine, dev_priv, id) {
-		u64 submissions = guc->submissions[id];
-		total += submissions;
-		seq_printf(m, "\t%-24s: %10llu, last seqno 0x%08x\n",
-			engine->name, submissions, guc->last_seqno[id]);
-	}
-	seq_printf(m, "\t%s: %llu\n", "Total", total);
-
 	seq_printf(m, "\nGuC execbuf client @ %p:\n", guc->execbuf_client);
 	i915_guc_client_info(m, dev_priv, guc->execbuf_client);
 
@@ -2540,36 +2530,99 @@ static int i915_guc_info(struct seq_file *m, void *data)
 	return 0;
 }
 
-static int i915_guc_log_dump(struct seq_file *m, void *data)
+static int i915_guc_stage_pool(struct seq_file *m, void *data)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct drm_i915_gem_object *obj;
-	int i = 0, pg;
+	const struct intel_guc *guc = &dev_priv->guc;
+	struct guc_stage_desc *desc = guc->stage_desc_pool_vaddr;
+	struct i915_guc_client *client = guc->execbuf_client;
+	unsigned int tmp;
+	int index;
 
-	if (!dev_priv->guc.log.vma)
+	if (!check_guc_submission(m))
 		return 0;
 
-	obj = dev_priv->guc.log.vma->obj;
-	for (pg = 0; pg < obj->base.size / PAGE_SIZE; pg++) {
-		u32 *log = kmap_atomic(i915_gem_object_get_page(obj, pg));
+	for (index = 0; index < GUC_MAX_STAGE_DESCRIPTORS; index++, desc++) {
+		struct intel_engine_cs *engine;
+
+		if (!(desc->attribute & GUC_STAGE_DESC_ATTR_ACTIVE))
+			continue;
+
+		seq_printf(m, "GuC stage descriptor %u:\n", index);
+		seq_printf(m, "\tIndex: %u\n", desc->stage_id);
+		seq_printf(m, "\tAttribute: 0x%x\n", desc->attribute);
+		seq_printf(m, "\tPriority: %d\n", desc->priority);
+		seq_printf(m, "\tDoorbell id: %d\n", desc->db_id);
+		seq_printf(m, "\tEngines used: 0x%x\n",
+			   desc->engines_used);
+		seq_printf(m, "\tDoorbell trigger phy: 0x%llx, cpu: 0x%llx, uK: 0x%x\n",
+			   desc->db_trigger_phy,
+			   desc->db_trigger_cpu,
+			   desc->db_trigger_uk);
+		seq_printf(m, "\tProcess descriptor: 0x%x\n",
+			   desc->process_desc);
+		seq_printf(m, "\tWorkqueue address: 0x%x, size: 0x%x\n",
+			   desc->wq_addr, desc->wq_size);
+		seq_putc(m, '\n');
+
+		for_each_engine_masked(engine, dev_priv, client->engines, tmp) {
+			u32 guc_engine_id = engine->guc_id;
+			struct guc_execlist_context *lrc =
+						&desc->lrc[guc_engine_id];
+
+			seq_printf(m, "\t%s LRC:\n", engine->name);
+			seq_printf(m, "\t\tContext desc: 0x%x\n",
+				   lrc->context_desc);
+			seq_printf(m, "\t\tContext id: 0x%x\n", lrc->context_id);
+			seq_printf(m, "\t\tLRCA: 0x%x\n", lrc->ring_lrca);
+			seq_printf(m, "\t\tRing begin: 0x%x\n", lrc->ring_begin);
+			seq_printf(m, "\t\tRing end: 0x%x\n", lrc->ring_end);
+			seq_putc(m, '\n');
+		}
+	}
+
+	return 0;
+}
+
+static int i915_guc_log_dump(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = m->private;
+	struct drm_i915_private *dev_priv = node_to_i915(node);
+	bool dump_load_err = !!node->info_ent->data;
+	struct drm_i915_gem_object *obj = NULL;
+	u32 *log;
+	int i = 0;
+
+	if (dump_load_err)
+		obj = dev_priv->guc.load_err_log;
+	else if (dev_priv->guc.log.vma)
+		obj = dev_priv->guc.log.vma->obj;
 
-		for (i = 0; i < PAGE_SIZE / sizeof(u32); i += 4)
-			seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
-				   *(log + i), *(log + i + 1),
-				   *(log + i + 2), *(log + i + 3));
+	if (!obj)
+		return 0;
 
-		kunmap_atomic(log);
+	log = i915_gem_object_pin_map(obj, I915_MAP_WC);
+	if (IS_ERR(log)) {
+		DRM_DEBUG("Failed to pin object\n");
+		seq_puts(m, "(log data unaccessible)\n");
+		return PTR_ERR(log);
 	}
 
+	for (i = 0; i < obj->base.size / sizeof(u32); i += 4)
+		seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
+			   *(log + i), *(log + i + 1),
+			   *(log + i + 2), *(log + i + 3));
+
 	seq_putc(m, '\n');
 
+	i915_gem_object_unpin_map(obj);
+
 	return 0;
 }
 
 static int i915_guc_log_control_get(void *data, u64 *val)
 {
-	struct drm_device *dev = data;
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *dev_priv = data;
 
 	if (!dev_priv->guc.log.vma)
 		return -EINVAL;
@@ -2581,14 +2634,13 @@ static int i915_guc_log_control_get(void *data, u64 *val)
 
 static int i915_guc_log_control_set(void *data, u64 val)
 {
-	struct drm_device *dev = data;
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *dev_priv = data;
 	int ret;
 
 	if (!dev_priv->guc.log.vma)
 		return -EINVAL;
 
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex);
 	if (ret)
 		return ret;
 
@@ -2596,7 +2648,7 @@ static int i915_guc_log_control_set(void *data, u64 val)
 	ret = i915_guc_log_control(dev_priv, val);
 	intel_runtime_pm_put(dev_priv);
 
-	mutex_unlock(&dev->struct_mutex);
+	mutex_unlock(&dev_priv->drm.struct_mutex);
 	return ret;
 }
 
@@ -2855,7 +2907,8 @@ static int i915_dmc_info(struct seq_file *m, void *unused)
 	seq_printf(m, "version: %d.%d\n", CSR_VERSION_MAJOR(csr->version),
 		   CSR_VERSION_MINOR(csr->version));
 
-	if (IS_SKYLAKE(dev_priv) && csr->version >= CSR_VERSION(1, 6)) {
+	if (IS_KABYLAKE(dev_priv) ||
+	    (IS_SKYLAKE(dev_priv) && csr->version >= CSR_VERSION(1, 6))) {
 		seq_printf(m, "DC3 -> DC5 count: %d\n",
 			   I915_READ(SKL_CSR_DC3_DC5_COUNT));
 		seq_printf(m, "DC5 -> DC6 count: %d\n",
@@ -3043,36 +3096,6 @@ static void intel_connector_info(struct seq_file *m,
 		intel_seq_print_mode(m, 2, mode);
 }
 
-static bool cursor_active(struct drm_i915_private *dev_priv, int pipe)
-{
-	u32 state;
-
-	if (IS_I845G(dev_priv) || IS_I865G(dev_priv))
-		state = I915_READ(CURCNTR(PIPE_A)) & CURSOR_ENABLE;
-	else
-		state = I915_READ(CURCNTR(pipe)) & CURSOR_MODE;
-
-	return state;
-}
-
-static bool cursor_position(struct drm_i915_private *dev_priv,
-			    int pipe, int *x, int *y)
-{
-	u32 pos;
-
-	pos = I915_READ(CURPOS(pipe));
-
-	*x = (pos >> CURSOR_X_SHIFT) & CURSOR_POS_MASK;
-	if (pos & (CURSOR_POS_SIGN << CURSOR_X_SHIFT))
-		*x = -*x;
-
-	*y = (pos >> CURSOR_Y_SHIFT) & CURSOR_POS_MASK;
-	if (pos & (CURSOR_POS_SIGN << CURSOR_Y_SHIFT))
-		*y = -*y;
-
-	return cursor_active(dev_priv, pipe);
-}
-
 static const char *plane_type(enum drm_plane_type type)
 {
 	switch (type) {
@@ -3095,17 +3118,17 @@ static const char *plane_rotation(unsigned int rotation)
 {
 	static char buf[48];
 	/*
-	 * According to doc only one DRM_ROTATE_ is allowed but this
+	 * According to doc only one DRM_MODE_ROTATE_ is allowed but this
 	 * will print them all to visualize if the values are misused
 	 */
 	snprintf(buf, sizeof(buf),
 		 "%s%s%s%s%s%s(0x%08x)",
-		 (rotation & DRM_ROTATE_0) ? "0 " : "",
-		 (rotation & DRM_ROTATE_90) ? "90 " : "",
-		 (rotation & DRM_ROTATE_180) ? "180 " : "",
-		 (rotation & DRM_ROTATE_270) ? "270 " : "",
-		 (rotation & DRM_REFLECT_X) ? "FLIPX " : "",
-		 (rotation & DRM_REFLECT_Y) ? "FLIPY " : "",
+		 (rotation & DRM_MODE_ROTATE_0) ? "0 " : "",
+		 (rotation & DRM_MODE_ROTATE_90) ? "90 " : "",
+		 (rotation & DRM_MODE_ROTATE_180) ? "180 " : "",
+		 (rotation & DRM_MODE_ROTATE_270) ? "270 " : "",
+		 (rotation & DRM_MODE_REFLECT_X) ? "FLIPX " : "",
+		 (rotation & DRM_MODE_REFLECT_Y) ? "FLIPY " : "",
 		 rotation);
 
 	return buf;
@@ -3194,9 +3217,7 @@ static int i915_display_info(struct seq_file *m, void *unused)
 	seq_printf(m, "CRTC info\n");
 	seq_printf(m, "---------\n");
 	for_each_intel_crtc(dev, crtc) {
-		bool active;
 		struct intel_crtc_state *pipe_config;
-		int x, y;
 
 		drm_modeset_lock(&crtc->base.mutex, NULL);
 		pipe_config = to_intel_crtc_state(crtc->base.state);
@@ -3208,14 +3229,18 @@ static int i915_display_info(struct seq_file *m, void *unused)
 			   yesno(pipe_config->dither), pipe_config->pipe_bpp);
 
 		if (pipe_config->base.active) {
+			struct intel_plane *cursor =
+				to_intel_plane(crtc->base.cursor);
+
 			intel_crtc_info(m, crtc);
 
-			active = cursor_position(dev_priv, crtc->pipe, &x, &y);
-			seq_printf(m, "\tcursor visible? %s, position (%d, %d), size %dx%d, addr 0x%08x, active? %s\n",
-				   yesno(crtc->cursor_base),
-				   x, y, crtc->base.cursor->state->crtc_w,
-				   crtc->base.cursor->state->crtc_h,
-				   crtc->cursor_addr, yesno(active));
+			seq_printf(m, "\tcursor visible? %s, position (%d, %d), size %dx%d, addr 0x%08x\n",
+				   yesno(cursor->base.state->visible),
+				   cursor->base.state->crtc_x,
+				   cursor->base.state->crtc_y,
+				   cursor->base.state->crtc_w,
+				   cursor->base.state->crtc_h,
+				   cursor->cursor.base);
 			intel_scaler_info(m, crtc);
 			intel_plane_info(m, crtc);
 		}
@@ -3316,7 +3341,7 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 
 		if (i915.enable_execlists) {
 			u32 ptr, read, write;
-			struct rb_node *rb;
+			unsigned int idx;
 
 			seq_printf(m, "\tExeclist status: 0x%08x %08x\n",
 				   I915_READ(RING_EXECLIST_STATUS_LO(engine)),
@@ -3334,8 +3359,7 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 			if (read > write)
 				write += GEN8_CSB_ENTRIES;
 			while (read < write) {
-				unsigned int idx = ++read % GEN8_CSB_ENTRIES;
-
+				idx = ++read % GEN8_CSB_ENTRIES;
 				seq_printf(m, "\tExeclist CSB[%d]: 0x%08x, context: %d\n",
 					   idx,
 					   I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, idx)),
@@ -3343,28 +3367,30 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 			}
 
 			rcu_read_lock();
-			rq = READ_ONCE(engine->execlist_port[0].request);
-			if (rq) {
-				seq_printf(m, "\t\tELSP[0] count=%d, ",
-					   engine->execlist_port[0].count);
-				print_request(m, rq, "rq: ");
-			} else {
-				seq_printf(m, "\t\tELSP[0] idle\n");
-			}
-			rq = READ_ONCE(engine->execlist_port[1].request);
-			if (rq) {
-				seq_printf(m, "\t\tELSP[1] count=%d, ",
-					   engine->execlist_port[1].count);
-				print_request(m, rq, "rq: ");
-			} else {
-				seq_printf(m, "\t\tELSP[1] idle\n");
+			for (idx = 0; idx < ARRAY_SIZE(engine->execlist_port); idx++) {
+				unsigned int count;
+
+				rq = port_unpack(&engine->execlist_port[idx],
+						 &count);
+				if (rq) {
+					seq_printf(m, "\t\tELSP[%d] count=%d, ",
+						   idx, count);
+					print_request(m, rq, "rq: ");
+				} else {
+					seq_printf(m, "\t\tELSP[%d] idle\n",
+						   idx);
+				}
 			}
 			rcu_read_unlock();
 
 			spin_lock_irq(&engine->timeline->lock);
-			for (rb = engine->execlist_first; rb; rb = rb_next(rb)) {
-				rq = rb_entry(rb, typeof(*rq), priotree.node);
-				print_request(m, rq, "\t\tQ ");
+			for (rb = engine->execlist_first; rb; rb = rb_next(rb)){
+				struct i915_priolist *p =
+					rb_entry(rb, typeof(*p), node);
+
+				list_for_each_entry(rq, &p->requests,
+						    priotree.link)
+					print_request(m, rq, "\t\tQ ");
 			}
 			spin_unlock_irq(&engine->timeline->lock);
 		} else if (INTEL_GEN(dev_priv) > 6) {
@@ -3704,16 +3730,10 @@ static ssize_t i915_displayport_test_active_write(struct file *file,
 	if (len == 0)
 		return 0;
 
-	input_buffer = kmalloc(len + 1, GFP_KERNEL);
-	if (!input_buffer)
-		return -ENOMEM;
+	input_buffer = memdup_user_nul(ubuf, len);
+	if (IS_ERR(input_buffer))
+		return PTR_ERR(input_buffer);
 
-	if (copy_from_user(input_buffer, ubuf, len)) {
-		status = -EFAULT;
-		goto out;
-	}
-
-	input_buffer[len] = '\0';
 	DRM_DEBUG_DRIVER("Copied %d bytes from user\n", (unsigned int)len);
 
 	drm_connector_list_iter_begin(dev, &conn_iter);
@@ -3739,7 +3759,6 @@ static ssize_t i915_displayport_test_active_write(struct file *file,
 		}
 	}
 	drm_connector_list_iter_end(&conn_iter);
-out:
 	kfree(input_buffer);
 	if (status < 0)
 		return status;
@@ -3900,6 +3919,8 @@ static void wm_latency_show(struct seq_file *m, const uint16_t wm[8])
 		num_levels = 3;
 	else if (IS_VALLEYVIEW(dev_priv))
 		num_levels = 1;
+	else if (IS_G4X(dev_priv))
+		num_levels = 3;
 	else
 		num_levels = ilk_wm_max_level(dev_priv) + 1;
 
@@ -3912,8 +3933,10 @@ static void wm_latency_show(struct seq_file *m, const uint16_t wm[8])
 		 * - WM1+ latency values in 0.5us units
 		 * - latencies are in us on gen9/vlv/chv
 		 */
-		if (INTEL_GEN(dev_priv) >= 9 || IS_VALLEYVIEW(dev_priv) ||
-		    IS_CHERRYVIEW(dev_priv))
+		if (INTEL_GEN(dev_priv) >= 9 ||
+		    IS_VALLEYVIEW(dev_priv) ||
+		    IS_CHERRYVIEW(dev_priv) ||
+		    IS_G4X(dev_priv))
 			latency *= 10;
 		else if (level > 0)
 			latency *= 5;
@@ -3974,7 +3997,7 @@ static int pri_wm_latency_open(struct inode *inode, struct file *file)
 {
 	struct drm_i915_private *dev_priv = inode->i_private;
 
-	if (INTEL_GEN(dev_priv) < 5)
+	if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv))
 		return -ENODEV;
 
 	return single_open(file, pri_wm_latency_show, dev_priv);
@@ -4016,6 +4039,8 @@ static ssize_t wm_latency_write(struct file *file, const char __user *ubuf,
 		num_levels = 3;
 	else if (IS_VALLEYVIEW(dev_priv))
 		num_levels = 1;
+	else if (IS_G4X(dev_priv))
+		num_levels = 3;
 	else
 		num_levels = ilk_wm_max_level(dev_priv) + 1;
 
@@ -4776,6 +4801,8 @@ static const struct drm_info_list i915_debugfs_list[] = {
 	{"i915_guc_info", i915_guc_info, 0},
 	{"i915_guc_load_status", i915_guc_load_status_info, 0},
 	{"i915_guc_log_dump", i915_guc_log_dump, 0},
+	{"i915_guc_load_err_log_dump", i915_guc_log_dump, 0, (void *)1},
+	{"i915_guc_stage_pool", i915_guc_stage_pool, 0},
 	{"i915_huc_load_status", i915_huc_load_status_info, 0},
 	{"i915_frequency_info", i915_frequency_info, 0},
 	{"i915_hangcheck_info", i915_hangcheck_info, 0},
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 3036d4835b0f..7b8c72776f46 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -350,6 +350,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_EXEC_SOFTPIN:
 	case I915_PARAM_HAS_EXEC_ASYNC:
 	case I915_PARAM_HAS_EXEC_FENCE:
+	case I915_PARAM_HAS_EXEC_CAPTURE:
 		/* For the time being all of these are always true;
 		 * if some supported hardware does not have one of these
 		 * features this value needs to be provided from
@@ -834,10 +835,6 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
 	intel_uc_init_early(dev_priv);
 	i915_memcpy_init_early(dev_priv);
 
-	ret = intel_engines_init_early(dev_priv);
-	if (ret)
-		return ret;
-
 	ret = i915_workqueues_init(dev_priv);
 	if (ret < 0)
 		goto err_engines;
@@ -855,7 +852,7 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
 	intel_init_audio_hooks(dev_priv);
 	ret = i915_gem_load_init(dev_priv);
 	if (ret < 0)
-		goto err_workqueues;
+		goto err_irq;
 
 	intel_display_crc_init(dev_priv);
 
@@ -867,7 +864,8 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
 
 	return 0;
 
-err_workqueues:
+err_irq:
+	intel_irq_fini(dev_priv);
 	i915_workqueues_cleanup(dev_priv);
 err_engines:
 	i915_engines_cleanup(dev_priv);
@@ -882,6 +880,7 @@ static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv)
 {
 	i915_perf_fini(dev_priv);
 	i915_gem_load_cleanup(dev_priv);
+	intel_irq_fini(dev_priv);
 	i915_workqueues_cleanup(dev_priv);
 	i915_engines_cleanup(dev_priv);
 }
@@ -947,14 +946,21 @@ static int i915_driver_init_mmio(struct drm_i915_private *dev_priv)
 
 	ret = i915_mmio_setup(dev_priv);
 	if (ret < 0)
-		goto put_bridge;
+		goto err_bridge;
 
 	intel_uncore_init(dev_priv);
+
+	ret = intel_engines_init_mmio(dev_priv);
+	if (ret)
+		goto err_uncore;
+
 	i915_gem_init_mmio(dev_priv);
 
 	return 0;
 
-put_bridge:
+err_uncore:
+	intel_uncore_fini(dev_priv);
+err_bridge:
 	pci_dev_put(dev_priv->bridge_dev);
 
 	return ret;
@@ -1213,9 +1219,8 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent)
 	struct drm_i915_private *dev_priv;
 	int ret;
 
-	/* Enable nuclear pageflip on ILK+, except vlv/chv */
-	if (!i915.nuclear_pageflip &&
-	    (match_info->gen < 5 || match_info->has_gmch_display))
+	/* Enable nuclear pageflip on ILK+ */
+	if (!i915.nuclear_pageflip && match_info->gen < 5)
 		driver.driver_features &= ~DRIVER_ATOMIC;
 
 	ret = -ENOMEM;
@@ -1272,10 +1277,6 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	dev_priv->ipc_enabled = false;
 
-	/* Everything is in place, we can now relax! */
-	DRM_INFO("Initialized %s %d.%d.%d %s for %s on minor %d\n",
-		 driver.name, driver.major, driver.minor, driver.patchlevel,
-		 driver.date, pci_name(pdev), dev_priv->drm.primary->index);
 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG))
 		DRM_INFO("DRM_I915_DEBUG enabled\n");
 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c9b0949f6c1a..35e161b5b90e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -55,6 +55,7 @@
 #include "i915_reg.h"
 #include "i915_utils.h"
 
+#include "intel_uncore.h"
 #include "intel_bios.h"
 #include "intel_dpll_mgr.h"
 #include "intel_uc.h"
@@ -79,8 +80,8 @@
 
 #define DRIVER_NAME		"i915"
 #define DRIVER_DESC		"Intel Graphics"
-#define DRIVER_DATE		"20170403"
-#define DRIVER_TIMESTAMP	1491198738
+#define DRIVER_DATE		"20170529"
+#define DRIVER_TIMESTAMP	1496041258
 
 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and
  * WARN_ON()) for hw state sanity checks to check for unexpected conditions
@@ -114,6 +115,13 @@ typedef struct {
 	fp; \
 })
 
+static inline bool is_fixed16_zero(uint_fixed_16_16_t val)
+{
+	if (val.val == 0)
+		return true;
+	return false;
+}
+
 static inline uint_fixed_16_16_t u32_to_fixed_16_16(uint32_t val)
 {
 	uint_fixed_16_16_t fp;
@@ -152,8 +160,39 @@ static inline uint_fixed_16_16_t max_fixed_16_16(uint_fixed_16_16_t max1,
 	return max;
 }
 
-static inline uint_fixed_16_16_t fixed_16_16_div_round_up(uint32_t val,
-							  uint32_t d)
+static inline uint32_t div_round_up_fixed16(uint_fixed_16_16_t val,
+					    uint_fixed_16_16_t d)
+{
+	return DIV_ROUND_UP(val.val, d.val);
+}
+
+static inline uint32_t mul_round_up_u32_fixed16(uint32_t val,
+						uint_fixed_16_16_t mul)
+{
+	uint64_t intermediate_val;
+	uint32_t result;
+
+	intermediate_val = (uint64_t) val * mul.val;
+	intermediate_val = DIV_ROUND_UP_ULL(intermediate_val, 1 << 16);
+	WARN_ON(intermediate_val >> 32);
+	result = clamp_t(uint32_t, intermediate_val, 0, ~0);
+	return result;
+}
+
+static inline uint_fixed_16_16_t mul_fixed16(uint_fixed_16_16_t val,
+					     uint_fixed_16_16_t mul)
+{
+	uint64_t intermediate_val;
+	uint_fixed_16_16_t fp;
+
+	intermediate_val = (uint64_t) val.val * mul.val;
+	intermediate_val = intermediate_val >> 16;
+	WARN_ON(intermediate_val >> 32);
+	fp.val = clamp_t(uint32_t, intermediate_val, 0, ~0);
+	return fp;
+}
+
+static inline uint_fixed_16_16_t fixed_16_16_div(uint32_t val, uint32_t d)
 {
 	uint_fixed_16_16_t fp, res;
 
@@ -162,8 +201,7 @@ static inline uint_fixed_16_16_t fixed_16_16_div_round_up(uint32_t val,
 	return res;
 }
 
-static inline uint_fixed_16_16_t fixed_16_16_div_round_up_u64(uint32_t val,
-							      uint32_t d)
+static inline uint_fixed_16_16_t fixed_16_16_div_u64(uint32_t val, uint32_t d)
 {
 	uint_fixed_16_16_t res;
 	uint64_t interm_val;
@@ -176,6 +214,17 @@ static inline uint_fixed_16_16_t fixed_16_16_div_round_up_u64(uint32_t val,
 	return res;
 }
 
+static inline uint32_t div_round_up_u32_fixed16(uint32_t val,
+						uint_fixed_16_16_t d)
+{
+	uint64_t interm_val;
+
+	interm_val = (uint64_t)val << 16;
+	interm_val = DIV_ROUND_UP_ULL(interm_val, d.val);
+	WARN_ON(interm_val >> 32);
+	return clamp_t(uint32_t, interm_val, 0, ~0);
+}
+
 static inline uint_fixed_16_16_t mul_u32_fixed_16_16(uint32_t val,
 						     uint_fixed_16_16_t mul)
 {
@@ -676,116 +725,6 @@ struct drm_i915_display_funcs {
 	void (*load_luts)(struct drm_crtc_state *crtc_state);
 };
 
-enum forcewake_domain_id {
-	FW_DOMAIN_ID_RENDER = 0,
-	FW_DOMAIN_ID_BLITTER,
-	FW_DOMAIN_ID_MEDIA,
-
-	FW_DOMAIN_ID_COUNT
-};
-
-enum forcewake_domains {
-	FORCEWAKE_RENDER = BIT(FW_DOMAIN_ID_RENDER),
-	FORCEWAKE_BLITTER = BIT(FW_DOMAIN_ID_BLITTER),
-	FORCEWAKE_MEDIA	= BIT(FW_DOMAIN_ID_MEDIA),
-	FORCEWAKE_ALL = (FORCEWAKE_RENDER |
-			 FORCEWAKE_BLITTER |
-			 FORCEWAKE_MEDIA)
-};
-
-#define FW_REG_READ  (1)
-#define FW_REG_WRITE (2)
-
-enum decoupled_power_domain {
-	GEN9_DECOUPLED_PD_BLITTER = 0,
-	GEN9_DECOUPLED_PD_RENDER,
-	GEN9_DECOUPLED_PD_MEDIA,
-	GEN9_DECOUPLED_PD_ALL
-};
-
-enum decoupled_ops {
-	GEN9_DECOUPLED_OP_WRITE = 0,
-	GEN9_DECOUPLED_OP_READ
-};
-
-enum forcewake_domains
-intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
-			       i915_reg_t reg, unsigned int op);
-
-struct intel_uncore_funcs {
-	void (*force_wake_get)(struct drm_i915_private *dev_priv,
-			       enum forcewake_domains domains);
-	void (*force_wake_put)(struct drm_i915_private *dev_priv,
-			       enum forcewake_domains domains);
-
-	uint8_t  (*mmio_readb)(struct drm_i915_private *dev_priv,
-			       i915_reg_t r, bool trace);
-	uint16_t (*mmio_readw)(struct drm_i915_private *dev_priv,
-			       i915_reg_t r, bool trace);
-	uint32_t (*mmio_readl)(struct drm_i915_private *dev_priv,
-			       i915_reg_t r, bool trace);
-	uint64_t (*mmio_readq)(struct drm_i915_private *dev_priv,
-			       i915_reg_t r, bool trace);
-
-	void (*mmio_writeb)(struct drm_i915_private *dev_priv,
-			    i915_reg_t r, uint8_t val, bool trace);
-	void (*mmio_writew)(struct drm_i915_private *dev_priv,
-			    i915_reg_t r, uint16_t val, bool trace);
-	void (*mmio_writel)(struct drm_i915_private *dev_priv,
-			    i915_reg_t r, uint32_t val, bool trace);
-};
-
-struct intel_forcewake_range {
-	u32 start;
-	u32 end;
-
-	enum forcewake_domains domains;
-};
-
-struct intel_uncore {
-	spinlock_t lock; /** lock is also taken in irq contexts. */
-
-	const struct intel_forcewake_range *fw_domains_table;
-	unsigned int fw_domains_table_entries;
-
-	struct notifier_block pmic_bus_access_nb;
-	struct intel_uncore_funcs funcs;
-
-	unsigned fifo_count;
-
-	enum forcewake_domains fw_domains;
-	enum forcewake_domains fw_domains_active;
-
-	u32 fw_set;
-	u32 fw_clear;
-	u32 fw_reset;
-
-	struct intel_uncore_forcewake_domain {
-		enum forcewake_domain_id id;
-		enum forcewake_domains mask;
-		unsigned wake_count;
-		struct hrtimer timer;
-		i915_reg_t reg_set;
-		i915_reg_t reg_ack;
-	} fw_domain[FW_DOMAIN_ID_COUNT];
-
-	int unclaimed_mmio_check;
-};
-
-#define __mask_next_bit(mask) ({					\
-	int __idx = ffs(mask) - 1;					\
-	mask &= ~BIT(__idx);						\
-	__idx;								\
-})
-
-/* Iterate over initialised fw domains */
-#define for_each_fw_domain_masked(domain__, mask__, dev_priv__, tmp__) \
-	for (tmp__ = (mask__); \
-	     tmp__ ? (domain__ = &(dev_priv__)->uncore.fw_domain[__mask_next_bit(tmp__)]), 1 : 0;)
-
-#define for_each_fw_domain(domain__, dev_priv__, tmp__) \
-	for_each_fw_domain_masked(domain__, (dev_priv__)->uncore.fw_domains, dev_priv__, tmp__)
-
 #define CSR_VERSION(major, minor)	((major) << 16 | (minor))
 #define CSR_VERSION_MAJOR(version)	((version) >> 16)
 #define CSR_VERSION_MINOR(version)	((version) & 0xffff)
@@ -821,8 +760,8 @@ struct intel_csr {
 	func(has_gmbus_irq); \
 	func(has_gmch_display); \
 	func(has_guc); \
+	func(has_guc_ct); \
 	func(has_hotplug); \
-	func(has_hw_contexts); \
 	func(has_l3_dpf); \
 	func(has_llc); \
 	func(has_logical_ring_contexts); \
@@ -1025,6 +964,9 @@ struct i915_gpu_state {
 			u32 *pages[0];
 		} *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
 
+		struct drm_i915_error_object **user_bo;
+		long user_bo_count;
+
 		struct drm_i915_error_object *wa_ctx;
 
 		struct drm_i915_error_request {
@@ -1511,11 +1453,7 @@ struct i915_gem_mm {
 	/** LRU list of objects with fence regs on them. */
 	struct list_head fence_list;
 
-	/**
-	 * Are we in a non-interruptible section of code like
-	 * modesetting?
-	 */
-	bool interruptible;
+	u64 unordered_timeline;
 
 	/* the indicator for dispatch video commands on two BSD rings */
 	atomic_t bsd_engine_dispatch_index;
@@ -1566,7 +1504,7 @@ struct i915_gpu_error {
 	 *
 	 * This is a counter which gets incremented when reset is triggered,
 	 *
-	 * Before the reset commences, the I915_RESET_IN_PROGRESS bit is set
+	 * Before the reset commences, the I915_RESET_BACKOFF bit is set
 	 * meaning that any waiters holding onto the struct_mutex should
 	 * relinquish the lock immediately in order for the reset to start.
 	 *
@@ -1763,13 +1701,15 @@ struct ilk_wm_values {
 	enum intel_ddb_partitioning partitioning;
 };
 
-struct vlv_pipe_wm {
+struct g4x_pipe_wm {
 	uint16_t plane[I915_MAX_PLANES];
+	uint16_t fbc;
 };
 
-struct vlv_sr_wm {
+struct g4x_sr_wm {
 	uint16_t plane;
 	uint16_t cursor;
+	uint16_t fbc;
 };
 
 struct vlv_wm_ddl_values {
@@ -1777,13 +1717,22 @@ struct vlv_wm_ddl_values {
 };
 
 struct vlv_wm_values {
-	struct vlv_pipe_wm pipe[3];
-	struct vlv_sr_wm sr;
+	struct g4x_pipe_wm pipe[3];
+	struct g4x_sr_wm sr;
 	struct vlv_wm_ddl_values ddl[3];
 	uint8_t level;
 	bool cxsr;
 };
 
+struct g4x_wm_values {
+	struct g4x_pipe_wm pipe[2];
+	struct g4x_sr_wm sr;
+	struct g4x_sr_wm hpll;
+	bool cxsr;
+	bool hpll_en;
+	bool fbc_en;
+};
+
 struct skl_ddb_entry {
 	uint16_t start, end;	/* in number of blocks, 'end' is exclusive */
 };
@@ -2100,7 +2049,7 @@ struct i915_oa_ops {
 		    size_t *offset);
 
 	/**
-	 * @oa_buffer_is_empty: Check if OA buffer empty (false positives OK)
+	 * @oa_buffer_check: Check for OA buffer data + update tail
 	 *
 	 * This is either called via fops or the poll check hrtimer (atomic
 	 * ctx) without any locks taken.
@@ -2113,7 +2062,7 @@ struct i915_oa_ops {
 	 * here, which will be handled gracefully - likely resulting in an
 	 * %EAGAIN error for userspace.
 	 */
-	bool (*oa_buffer_is_empty)(struct drm_i915_private *dev_priv);
+	bool (*oa_buffer_check)(struct drm_i915_private *dev_priv);
 };
 
 struct intel_cdclk_state {
@@ -2127,6 +2076,7 @@ struct drm_i915_private {
 	struct kmem_cache *vmas;
 	struct kmem_cache *requests;
 	struct kmem_cache *dependencies;
+	struct kmem_cache *priorities;
 
 	const struct intel_device_info info;
 
@@ -2362,7 +2312,6 @@ struct drm_i915_private {
 	 */
 	struct mutex av_mutex;
 
-	uint32_t hw_context_size;
 	struct list_head context_list;
 
 	u32 fdi_rx_config;
@@ -2413,6 +2362,7 @@ struct drm_i915_private {
 			struct ilk_wm_values hw;
 			struct skl_wm_values skl_hw;
 			struct vlv_wm_values vlv;
+			struct g4x_wm_values g4x;
 		};
 
 		uint8_t max_level;
@@ -2454,11 +2404,14 @@ struct drm_i915_private {
 			wait_queue_head_t poll_wq;
 			bool pollin;
 
+			/**
+			 * For rate limiting any notifications of spurious
+			 * invalid OA reports
+			 */
+			struct ratelimit_state spurious_report_rs;
+
 			bool periodic;
 			int period_exponent;
-			int timestamp_frequency;
-
-			int tail_margin;
 
 			int metrics_set;
 
@@ -2472,6 +2425,70 @@ struct drm_i915_private {
 				u8 *vaddr;
 				int format;
 				int format_size;
+
+				/**
+				 * Locks reads and writes to all head/tail state
+				 *
+				 * Consider: the head and tail pointer state
+				 * needs to be read consistently from a hrtimer
+				 * callback (atomic context) and read() fop
+				 * (user context) with tail pointer updates
+				 * happening in atomic context and head updates
+				 * in user context and the (unlikely)
+				 * possibility of read() errors needing to
+				 * reset all head/tail state.
+				 *
+				 * Note: Contention or performance aren't
+				 * currently a significant concern here
+				 * considering the relatively low frequency of
+				 * hrtimer callbacks (5ms period) and that
+				 * reads typically only happen in response to a
+				 * hrtimer event and likely complete before the
+				 * next callback.
+				 *
+				 * Note: This lock is not held *while* reading
+				 * and copying data to userspace so the value
+				 * of head observed in htrimer callbacks won't
+				 * represent any partial consumption of data.
+				 */
+				spinlock_t ptr_lock;
+
+				/**
+				 * One 'aging' tail pointer and one 'aged'
+				 * tail pointer ready to used for reading.
+				 *
+				 * Initial values of 0xffffffff are invalid
+				 * and imply that an update is required
+				 * (and should be ignored by an attempted
+				 * read)
+				 */
+				struct {
+					u32 offset;
+				} tails[2];
+
+				/**
+				 * Index for the aged tail ready to read()
+				 * data up to.
+				 */
+				unsigned int aged_tail_idx;
+
+				/**
+				 * A monotonic timestamp for when the current
+				 * aging tail pointer was read; used to
+				 * determine when it is old enough to trust.
+				 */
+				u64 aging_timestamp;
+
+				/**
+				 * Although we can always read back the head
+				 * pointer register, we prefer to avoid
+				 * trusting the HW state, just to avoid any
+				 * risk that some hardware condition could
+				 * somehow bump the head pointer unpredictably
+				 * and cause us to forward the wrong OA buffer
+				 * data to userspace.
+				 */
+				u32 head;
 			} oa_buffer;
 
 			u32 gen7_latched_oastatus1;
@@ -2870,7 +2887,6 @@ intel_info(const struct drm_i915_private *dev_priv)
 
 #define HWS_NEEDS_PHYSICAL(dev_priv)	((dev_priv)->info.hws_needs_physical)
 
-#define HAS_HW_CONTEXTS(dev_priv)	    ((dev_priv)->info.has_hw_contexts)
 #define HAS_LOGICAL_RING_CONTEXTS(dev_priv) \
 		((dev_priv)->info.has_logical_ring_contexts)
 #define USES_PPGTT(dev_priv)		(i915.enable_ppgtt)
@@ -2909,6 +2925,7 @@ intel_info(const struct drm_i915_private *dev_priv)
 #define HAS_FW_BLC(dev_priv) 	(INTEL_GEN(dev_priv) > 2)
 #define HAS_PIPE_CXSR(dev_priv) ((dev_priv)->info.has_pipe_cxsr)
 #define HAS_FBC(dev_priv)	((dev_priv)->info.has_fbc)
+#define HAS_CUR_FBC(dev_priv)	(!HAS_GMCH_DISPLAY(dev_priv) && INTEL_INFO(dev_priv)->gen >= 7)
 
 #define HAS_IPS(dev_priv)	(IS_HSW_ULT(dev_priv) || IS_BROADWELL(dev_priv))
 
@@ -2931,6 +2948,7 @@ intel_info(const struct drm_i915_private *dev_priv)
  * properties, so we have separate macros to test them.
  */
 #define HAS_GUC(dev_priv)	((dev_priv)->info.has_guc)
+#define HAS_GUC_CT(dev_priv)	((dev_priv)->info.has_guc_ct)
 #define HAS_GUC_UCODE(dev_priv)	(HAS_GUC(dev_priv))
 #define HAS_GUC_SCHED(dev_priv)	(HAS_GUC(dev_priv))
 #define HAS_HUC_UCODE(dev_priv)	(HAS_GUC(dev_priv))
@@ -2981,15 +2999,26 @@ intel_info(const struct drm_i915_private *dev_priv)
 
 #include "i915_trace.h"
 
-static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv)
+static inline bool intel_vtd_active(void)
 {
 #ifdef CONFIG_INTEL_IOMMU
-	if (INTEL_GEN(dev_priv) >= 6 && intel_iommu_gfx_mapped)
+	if (intel_iommu_gfx_mapped)
 		return true;
 #endif
 	return false;
 }
 
+static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv)
+{
+	return INTEL_GEN(dev_priv) >= 6 && intel_vtd_active();
+}
+
+static inline bool
+intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *dev_priv)
+{
+	return IS_BROXTON(dev_priv) && intel_vtd_active();
+}
+
 int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
 				int enable_ppgtt);
 
@@ -3026,7 +3055,7 @@ extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
 extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
 int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
 
-int intel_engines_init_early(struct drm_i915_private *dev_priv);
+int intel_engines_init_mmio(struct drm_i915_private *dev_priv);
 int intel_engines_init(struct drm_i915_private *dev_priv);
 
 /* intel_hotplug.c */
@@ -3063,43 +3092,10 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
 		       const char *fmt, ...);
 
 extern void intel_irq_init(struct drm_i915_private *dev_priv);
+extern void intel_irq_fini(struct drm_i915_private *dev_priv);
 int intel_irq_install(struct drm_i915_private *dev_priv);
 void intel_irq_uninstall(struct drm_i915_private *dev_priv);
 
-extern void intel_uncore_sanitize(struct drm_i915_private *dev_priv);
-extern void intel_uncore_init(struct drm_i915_private *dev_priv);
-extern bool intel_uncore_unclaimed_mmio(struct drm_i915_private *dev_priv);
-extern bool intel_uncore_arm_unclaimed_mmio_detection(struct drm_i915_private *dev_priv);
-extern void intel_uncore_fini(struct drm_i915_private *dev_priv);
-extern void intel_uncore_suspend(struct drm_i915_private *dev_priv);
-extern void intel_uncore_resume_early(struct drm_i915_private *dev_priv);
-const char *intel_uncore_forcewake_domain_to_str(const enum forcewake_domain_id id);
-void intel_uncore_forcewake_get(struct drm_i915_private *dev_priv,
-				enum forcewake_domains domains);
-void intel_uncore_forcewake_put(struct drm_i915_private *dev_priv,
-				enum forcewake_domains domains);
-/* Like above but the caller must manage the uncore.lock itself.
- * Must be used with I915_READ_FW and friends.
- */
-void intel_uncore_forcewake_get__locked(struct drm_i915_private *dev_priv,
-					enum forcewake_domains domains);
-void intel_uncore_forcewake_put__locked(struct drm_i915_private *dev_priv,
-					enum forcewake_domains domains);
-u64 intel_uncore_edram_size(struct drm_i915_private *dev_priv);
-
-void assert_forcewakes_inactive(struct drm_i915_private *dev_priv);
-
-int intel_wait_for_register(struct drm_i915_private *dev_priv,
-			    i915_reg_t reg,
-			    const u32 mask,
-			    const u32 value,
-			    const unsigned long timeout_ms);
-int intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
-			       i915_reg_t reg,
-			       const u32 mask,
-			       const u32 value,
-			       const unsigned long timeout_ms);
-
 static inline bool intel_gvt_active(struct drm_i915_private *dev_priv)
 {
 	return dev_priv->gvt;
@@ -3447,8 +3443,9 @@ int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 #define I915_PRIORITY_DISPLAY I915_PRIORITY_MAX
 
 int __must_check
-i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
-				  bool write);
+i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
+int __must_check
+i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write);
 int __must_check
 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
 struct i915_vma * __must_check
@@ -3711,8 +3708,8 @@ int  intel_lpe_audio_init(struct drm_i915_private *dev_priv);
 void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv);
 void intel_lpe_audio_irq_handler(struct drm_i915_private *dev_priv);
 void intel_lpe_audio_notify(struct drm_i915_private *dev_priv,
-			    void *eld, int port, int pipe, int tmds_clk_speed,
-			    bool dp_output, int link_rate);
+			    enum pipe pipe, enum port port,
+			    const void *eld, int ls_clock, bool dp_output);
 
 /* intel_i2c.c */
 extern int intel_setup_gmbus(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b6ac3df18b58..7ab47a84671f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -46,8 +46,6 @@
 #include <linux/dma-buf.h>
 
 static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
-static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
-static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
 
 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
 {
@@ -705,6 +703,61 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
 			       args->size, &args->handle);
 }
 
+static inline enum fb_op_origin
+fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
+{
+	return (domain == I915_GEM_DOMAIN_GTT ?
+		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
+}
+
+static void
+flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
+{
+	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+
+	if (!(obj->base.write_domain & flush_domains))
+		return;
+
+	/* No actual flushing is required for the GTT write domain.  Writes
+	 * to it "immediately" go to main memory as far as we know, so there's
+	 * no chipset flush.  It also doesn't land in render cache.
+	 *
+	 * However, we do have to enforce the order so that all writes through
+	 * the GTT land before any writes to the device, such as updates to
+	 * the GATT itself.
+	 *
+	 * We also have to wait a bit for the writes to land from the GTT.
+	 * An uncached read (i.e. mmio) seems to be ideal for the round-trip
+	 * timing. This issue has only been observed when switching quickly
+	 * between GTT writes and CPU reads from inside the kernel on recent hw,
+	 * and it appears to only affect discrete GTT blocks (i.e. on LLC
+	 * system agents we cannot reproduce this behaviour).
+	 */
+	wmb();
+
+	switch (obj->base.write_domain) {
+	case I915_GEM_DOMAIN_GTT:
+		if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
+			if (intel_runtime_pm_get_if_in_use(dev_priv)) {
+				spin_lock_irq(&dev_priv->uncore.lock);
+				POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
+				spin_unlock_irq(&dev_priv->uncore.lock);
+				intel_runtime_pm_put(dev_priv);
+			}
+		}
+
+		intel_fb_obj_flush(obj,
+				   fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
+		break;
+
+	case I915_GEM_DOMAIN_CPU:
+		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
+		break;
+	}
+
+	obj->base.write_domain = 0;
+}
+
 static inline int
 __copy_to_user_swizzled(char __user *cpu_vaddr,
 			const char *gpu_vaddr, int gpu_offset,
@@ -794,7 +847,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 			goto out;
 	}
 
-	i915_gem_object_flush_gtt_write_domain(obj);
+	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 
 	/* If we're not in the cpu read domain, set ourself into the gtt
 	 * read domain and manually flush cachelines (if required). This
@@ -846,7 +899,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
 			goto out;
 	}
 
-	i915_gem_object_flush_gtt_write_domain(obj);
+	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 
 	/* If we're not in the cpu write domain, set ourself into the
 	 * gtt write domain and manually flush cachelines (as required).
@@ -1501,13 +1554,6 @@ err:
 	return ret;
 }
 
-static inline enum fb_op_origin
-write_origin(struct drm_i915_gem_object *obj, unsigned domain)
-{
-	return (domain == I915_GEM_DOMAIN_GTT ?
-		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
-}
-
 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *i915;
@@ -1591,10 +1637,12 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 	if (err)
 		goto out_unpin;
 
-	if (read_domains & I915_GEM_DOMAIN_GTT)
-		err = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
+	if (read_domains & I915_GEM_DOMAIN_WC)
+		err = i915_gem_object_set_to_wc_domain(obj, write_domain);
+	else if (read_domains & I915_GEM_DOMAIN_GTT)
+		err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
 	else
-		err = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
+		err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
 
 	/* And bump the LRU for this access */
 	i915_gem_object_bump_inactive_ggtt(obj);
@@ -1602,7 +1650,8 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 	mutex_unlock(&dev->struct_mutex);
 
 	if (write_domain != 0)
-		intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
+		intel_fb_obj_invalidate(obj,
+					fb_write_origin(obj, write_domain));
 
 out_unpin:
 	i915_gem_object_unpin_pages(obj);
@@ -1737,6 +1786,9 @@ static unsigned int tile_row_pages(struct drm_i915_gem_object *obj)
  *     into userspace. (This view is aligned and sized appropriately for
  *     fenced access.)
  *
+ * 2 - Recognise WC as a separate cache domain so that we can flush the
+ *     delayed writes via GTT before performing direct access via WC.
+ *
  * Restrictions:
  *
  *  * snoopable objects cannot be accessed via the GTT. It can cause machine
@@ -1764,7 +1816,7 @@ static unsigned int tile_row_pages(struct drm_i915_gem_object *obj)
  */
 int i915_gem_mmap_gtt_version(void)
 {
-	return 1;
+	return 2;
 }
 
 static inline struct i915_ggtt_view
@@ -2228,7 +2280,7 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
 	if (obj->mm.mapping) {
 		void *ptr;
 
-		ptr = ptr_mask_bits(obj->mm.mapping);
+		ptr = page_mask_bits(obj->mm.mapping);
 		if (is_vmalloc_addr(ptr))
 			vunmap(ptr);
 		else
@@ -2504,7 +2556,7 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
 
 	if (n_pages > ARRAY_SIZE(stack_pages)) {
 		/* Too big for stack -- allocate temporary array instead */
-		pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY);
+		pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_TEMPORARY);
 		if (!pages)
 			return NULL;
 	}
@@ -2526,7 +2578,7 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
 	addr = vmap(pages, n_pages, 0, pgprot);
 
 	if (pages != stack_pages)
-		drm_free_large(pages);
+		kvfree(pages);
 
 	return addr;
 }
@@ -2560,7 +2612,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 	}
 	GEM_BUG_ON(!obj->mm.pages);
 
-	ptr = ptr_unpack_bits(obj->mm.mapping, has_type);
+	ptr = page_unpack_bits(obj->mm.mapping, &has_type);
 	if (ptr && has_type != type) {
 		if (pinned) {
 			ret = -EBUSY;
@@ -2582,7 +2634,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 			goto err_unpin;
 		}
 
-		obj->mm.mapping = ptr_pack_bits(ptr, type);
+		obj->mm.mapping = page_pack_bits(ptr, type);
 	}
 
 out_unlock:
@@ -2967,12 +3019,14 @@ static void engine_set_wedged(struct intel_engine_cs *engine)
 	 */
 
 	if (i915.enable_execlists) {
+		struct execlist_port *port = engine->execlist_port;
 		unsigned long flags;
+		unsigned int n;
 
 		spin_lock_irqsave(&engine->timeline->lock, flags);
 
-		i915_gem_request_put(engine->execlist_port[0].request);
-		i915_gem_request_put(engine->execlist_port[1].request);
+		for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++)
+			i915_gem_request_put(port_request(&port[n]));
 		memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
 		engine->execlist_queue = RB_ROOT;
 		engine->execlist_first = NULL;
@@ -3101,8 +3155,6 @@ i915_gem_idle_work_handler(struct work_struct *work)
 	struct drm_i915_private *dev_priv =
 		container_of(work, typeof(*dev_priv), gt.idle_work.work);
 	struct drm_device *dev = &dev_priv->drm;
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
 	bool rearm_hangcheck;
 
 	if (!READ_ONCE(dev_priv->gt.awake))
@@ -3140,10 +3192,8 @@ i915_gem_idle_work_handler(struct work_struct *work)
 	if (wait_for(intel_engines_are_idle(dev_priv), 10))
 		DRM_ERROR("Timeout waiting for engines to idle\n");
 
-	for_each_engine(engine, dev_priv, id) {
-		intel_engine_disarm_breadcrumbs(engine);
-		i915_gem_batch_pool_fini(&engine->batch_pool);
-	}
+	intel_engines_mark_idle(dev_priv);
+	i915_gem_timelines_mark_idle(dev_priv);
 
 	GEM_BUG_ON(!dev_priv->gt.awake);
 	dev_priv->gt.awake = false;
@@ -3320,56 +3370,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
 	return ret;
 }
 
-/** Flushes the GTT write domain for the object if it's dirty. */
-static void
-i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-
-	if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
-		return;
-
-	/* No actual flushing is required for the GTT write domain.  Writes
-	 * to it "immediately" go to main memory as far as we know, so there's
-	 * no chipset flush.  It also doesn't land in render cache.
-	 *
-	 * However, we do have to enforce the order so that all writes through
-	 * the GTT land before any writes to the device, such as updates to
-	 * the GATT itself.
-	 *
-	 * We also have to wait a bit for the writes to land from the GTT.
-	 * An uncached read (i.e. mmio) seems to be ideal for the round-trip
-	 * timing. This issue has only been observed when switching quickly
-	 * between GTT writes and CPU reads from inside the kernel on recent hw,
-	 * and it appears to only affect discrete GTT blocks (i.e. on LLC
-	 * system agents we cannot reproduce this behaviour).
-	 */
-	wmb();
-	if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
-		if (intel_runtime_pm_get_if_in_use(dev_priv)) {
-			spin_lock_irq(&dev_priv->uncore.lock);
-			POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
-			spin_unlock_irq(&dev_priv->uncore.lock);
-			intel_runtime_pm_put(dev_priv);
-		}
-	}
-
-	intel_fb_obj_flush(obj, write_origin(obj, I915_GEM_DOMAIN_GTT));
-
-	obj->base.write_domain = 0;
-}
-
-/** Flushes the CPU write domain for the object if it's dirty. */
-static void
-i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
-{
-	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
-		return;
-
-	i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
-	obj->base.write_domain = 0;
-}
-
 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
 {
 	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU && !obj->cache_dirty)
@@ -3390,6 +3390,69 @@ void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
 }
 
 /**
+ * Moves a single object to the WC read, and possibly write domain.
+ * @obj: object to act on
+ * @write: ask for write access or read only
+ *
+ * This function returns when the move is complete, including waiting on
+ * flushes to occur.
+ */
+int
+i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
+{
+	int ret;
+
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_LOCKED |
+				   (write ? I915_WAIT_ALL : 0),
+				   MAX_SCHEDULE_TIMEOUT,
+				   NULL);
+	if (ret)
+		return ret;
+
+	if (obj->base.write_domain == I915_GEM_DOMAIN_WC)
+		return 0;
+
+	/* Flush and acquire obj->pages so that we are coherent through
+	 * direct access in memory with previous cached writes through
+	 * shmemfs and that our cache domain tracking remains valid.
+	 * For example, if the obj->filp was moved to swap without us
+	 * being notified and releasing the pages, we would mistakenly
+	 * continue to assume that the obj remained out of the CPU cached
+	 * domain.
+	 */
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		return ret;
+
+	flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
+
+	/* Serialise direct access to this object with the barriers for
+	 * coherent writes from the GPU, by effectively invalidating the
+	 * WC domain upon first access.
+	 */
+	if ((obj->base.read_domains & I915_GEM_DOMAIN_WC) == 0)
+		mb();
+
+	/* It should now be out of any other write domains, and we can update
+	 * the domain values for our changes.
+	 */
+	GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_WC) != 0);
+	obj->base.read_domains |= I915_GEM_DOMAIN_WC;
+	if (write) {
+		obj->base.read_domains = I915_GEM_DOMAIN_WC;
+		obj->base.write_domain = I915_GEM_DOMAIN_WC;
+		obj->mm.dirty = true;
+	}
+
+	i915_gem_object_unpin_pages(obj);
+	return 0;
+}
+
+/**
  * Moves a single object to the GTT read, and possibly write domain.
  * @obj: object to act on
  * @write: ask for write access or read only
@@ -3428,7 +3491,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;
 
-	i915_gem_object_flush_cpu_write_domain(obj);
+	flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
 
 	/* Serialise direct access to this object with the barriers for
 	 * coherent writes from the GPU, by effectively invalidating the
@@ -3802,7 +3865,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
 		return 0;
 
-	i915_gem_object_flush_gtt_write_domain(obj);
+	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 
 	/* Flush the CPU cache if it's still invalid. */
 	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
@@ -3996,7 +4059,7 @@ __busy_set_if_active(const struct dma_fence *fence,
 	if (i915_gem_request_completed(rq))
 		return 0;
 
-	return flag(rq->engine->exec_id);
+	return flag(rq->engine->uabi_id);
 }
 
 static __always_inline unsigned int
@@ -4195,7 +4258,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
 	 * catch if we ever need to fix it. In the meantime, if you do spot
 	 * such a local variable, please consider fixing!
 	 */
-	if (WARN_ON(size >> PAGE_SHIFT > INT_MAX))
+	if (size >> PAGE_SHIFT > INT_MAX)
 		return ERR_PTR(-E2BIG);
 
 	if (overflows_type(size, obj->base.size))
@@ -4302,6 +4365,8 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 	intel_runtime_pm_put(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 
+	cond_resched();
+
 	llist_for_each_entry_safe(obj, on, freed, freed) {
 		GEM_BUG_ON(obj->bind_count);
 		GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
@@ -4349,8 +4414,11 @@ static void __i915_gem_free_work(struct work_struct *work)
 	 * unbound now.
 	 */
 
-	while ((freed = llist_del_all(&i915->mm.free_list)))
+	while ((freed = llist_del_all(&i915->mm.free_list))) {
 		__i915_gem_free_objects(i915, freed);
+		if (need_resched())
+			break;
+	}
 }
 
 static void __i915_gem_free_object_rcu(struct rcu_head *head)
@@ -4415,10 +4483,9 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
 	 * try to take over. The only way to remove the earlier state
 	 * is by resetting. However, resetting on earlier gen is tricky as
 	 * it may impact the display and we are uncertain about the stability
-	 * of the reset, so we only reset recent machines with logical
-	 * context support (that must be reset to remove any stray contexts).
+	 * of the reset, so this could be applied to even earlier gen.
 	 */
-	if (HAS_HW_CONTEXTS(i915)) {
+	if (INTEL_GEN(i915) >= 5) {
 		int reset = intel_gpu_reset(i915, ALL_ENGINES);
 		WARN_ON(reset && reset != -ENODEV);
 	}
@@ -4661,11 +4728,9 @@ bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value)
 	if (value >= 0)
 		return value;
 
-#ifdef CONFIG_INTEL_IOMMU
 	/* Enable semaphores on SNB when IO remapping is off */
-	if (INTEL_INFO(dev_priv)->gen == 6 && intel_iommu_gfx_mapped)
+	if (IS_GEN6(dev_priv) && intel_vtd_active())
 		return false;
-#endif
 
 	return true;
 }
@@ -4676,7 +4741,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 
 	mutex_lock(&dev_priv->drm.struct_mutex);
 
-	i915_gem_clflush_init(dev_priv);
+	dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1);
 
 	if (!i915.enable_execlists) {
 		dev_priv->gt.resume = intel_legacy_submission_resume;
@@ -4799,12 +4864,16 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
 	if (!dev_priv->dependencies)
 		goto err_requests;
 
+	dev_priv->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN);
+	if (!dev_priv->priorities)
+		goto err_dependencies;
+
 	mutex_lock(&dev_priv->drm.struct_mutex);
 	INIT_LIST_HEAD(&dev_priv->gt.timelines);
 	err = i915_gem_timeline_init__global(dev_priv);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 	if (err)
-		goto err_dependencies;
+		goto err_priorities;
 
 	INIT_LIST_HEAD(&dev_priv->context_list);
 	INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work);
@@ -4822,14 +4891,14 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
 
 	init_waitqueue_head(&dev_priv->pending_flip_queue);
 
-	dev_priv->mm.interruptible = true;
-
 	atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
 
 	spin_lock_init(&dev_priv->fb_tracking.lock);
 
 	return 0;
 
+err_priorities:
+	kmem_cache_destroy(dev_priv->priorities);
 err_dependencies:
 	kmem_cache_destroy(dev_priv->dependencies);
 err_requests:
@@ -4853,6 +4922,7 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)
 	WARN_ON(!list_empty(&dev_priv->gt.timelines));
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
+	kmem_cache_destroy(dev_priv->priorities);
 	kmem_cache_destroy(dev_priv->dependencies);
 	kmem_cache_destroy(dev_priv->requests);
 	kmem_cache_destroy(dev_priv->vmas);
@@ -4864,9 +4934,10 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)
 
 int i915_gem_freeze(struct drm_i915_private *dev_priv)
 {
-	mutex_lock(&dev_priv->drm.struct_mutex);
+	/* Discard all purgeable objects, let userspace recover those as
+	 * required after resuming.
+	 */
 	i915_gem_shrink_all(dev_priv);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	return 0;
 }
@@ -4891,12 +4962,13 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
 	 * we update that state just before writing out the image.
 	 *
 	 * To try and reduce the hibernation image, we manually shrink
-	 * the objects as well.
+	 * the objects as well, see i915_gem_freeze()
 	 */
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
 	i915_gem_shrink(dev_priv, -1UL, I915_SHRINK_UNBOUND);
+	i915_gem_drain_freed_objects(dev_priv);
 
+	mutex_lock(&dev_priv->drm.struct_mutex);
 	for (p = phases; *p; p++) {
 		list_for_each_entry(obj, *p, global_link) {
 			obj->base.read_domains = I915_GEM_DOMAIN_CPU;
diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
index 5a49487368ca..ee54597465b6 100644
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -25,6 +25,8 @@
 #ifndef __I915_GEM_H__
 #define __I915_GEM_H__
 
+#include <linux/bug.h>
+
 #ifdef CONFIG_DRM_I915_DEBUG_GEM
 #define GEM_BUG_ON(expr) BUG_ON(expr)
 #define GEM_WARN_ON(expr) WARN_ON(expr)
diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c
index ffd01e02fe94..ffac7a1f0caf 100644
--- a/drivers/gpu/drm/i915/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/i915_gem_clflush.c
@@ -27,7 +27,6 @@
 #include "i915_gem_clflush.h"
 
 static DEFINE_SPINLOCK(clflush_lock);
-static u64 clflush_context;
 
 struct clflush {
 	struct dma_fence dma; /* Must be first for dma_fence_free() */
@@ -157,7 +156,7 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 		dma_fence_init(&clflush->dma,
 			       &i915_clflush_ops,
 			       &clflush_lock,
-			       clflush_context,
+			       to_i915(obj->base.dev)->mm.unordered_timeline,
 			       0);
 		i915_sw_fence_init(&clflush->wait, i915_clflush_notify);
 
@@ -182,8 +181,3 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 		GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU);
 	}
 }
-
-void i915_gem_clflush_init(struct drm_i915_private *i915)
-{
-	clflush_context = dma_fence_context_alloc(1);
-}
diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.h b/drivers/gpu/drm/i915/i915_gem_clflush.h
index b62d61a2d15f..2455a7820937 100644
--- a/drivers/gpu/drm/i915/i915_gem_clflush.h
+++ b/drivers/gpu/drm/i915/i915_gem_clflush.h
@@ -28,7 +28,6 @@
 struct drm_i915_private;
 struct drm_i915_gem_object;
 
-void i915_gem_clflush_init(struct drm_i915_private *i915);
 void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 			     unsigned int flags);
 #define I915_CLFLUSH_FORCE BIT(0)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 8bd0c4966913..c5d1666d7071 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -92,33 +92,6 @@
 
 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
 
-static int get_context_size(struct drm_i915_private *dev_priv)
-{
-	int ret;
-	u32 reg;
-
-	switch (INTEL_GEN(dev_priv)) {
-	case 6:
-		reg = I915_READ(CXT_SIZE);
-		ret = GEN6_CXT_TOTAL_SIZE(reg) * 64;
-		break;
-	case 7:
-		reg = I915_READ(GEN7_CXT_SIZE);
-		if (IS_HASWELL(dev_priv))
-			ret = HSW_CXT_TOTAL_SIZE;
-		else
-			ret = GEN7_CXT_TOTAL_SIZE(reg) * 64;
-		break;
-	case 8:
-		ret = GEN8_CXT_TOTAL_SIZE;
-		break;
-	default:
-		BUG();
-	}
-
-	return ret;
-}
-
 void i915_gem_context_free(struct kref *ctx_ref)
 {
 	struct i915_gem_context *ctx = container_of(ctx_ref, typeof(*ctx), ref);
@@ -151,45 +124,6 @@ void i915_gem_context_free(struct kref *ctx_ref)
 	kfree(ctx);
 }
 
-static struct drm_i915_gem_object *
-alloc_context_obj(struct drm_i915_private *dev_priv, u64 size)
-{
-	struct drm_i915_gem_object *obj;
-	int ret;
-
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
-	obj = i915_gem_object_create(dev_priv, size);
-	if (IS_ERR(obj))
-		return obj;
-
-	/*
-	 * Try to make the context utilize L3 as well as LLC.
-	 *
-	 * On VLV we don't have L3 controls in the PTEs so we
-	 * shouldn't touch the cache level, especially as that
-	 * would make the object snooped which might have a
-	 * negative performance impact.
-	 *
-	 * Snooping is required on non-llc platforms in execlist
-	 * mode, but since all GGTT accesses use PAT entry 0 we
-	 * get snooping anyway regardless of cache_level.
-	 *
-	 * This is only applicable for Ivy Bridge devices since
-	 * later platforms don't have L3 control bits in the PTE.
-	 */
-	if (IS_IVYBRIDGE(dev_priv)) {
-		ret = i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC);
-		/* Failure shouldn't ever happen this early */
-		if (WARN_ON(ret)) {
-			i915_gem_object_put(obj);
-			return ERR_PTR(ret);
-		}
-	}
-
-	return obj;
-}
-
 static void context_close(struct i915_gem_context *ctx)
 {
 	i915_gem_context_set_closed(ctx);
@@ -265,26 +199,7 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 	kref_init(&ctx->ref);
 	list_add_tail(&ctx->link, &dev_priv->context_list);
 	ctx->i915 = dev_priv;
-
-	if (dev_priv->hw_context_size) {
-		struct drm_i915_gem_object *obj;
-		struct i915_vma *vma;
-
-		obj = alloc_context_obj(dev_priv, dev_priv->hw_context_size);
-		if (IS_ERR(obj)) {
-			ret = PTR_ERR(obj);
-			goto err_out;
-		}
-
-		vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL);
-		if (IS_ERR(vma)) {
-			i915_gem_object_put(obj);
-			ret = PTR_ERR(vma);
-			goto err_out;
-		}
-
-		ctx->engine[RCS].state = vma;
-	}
+	ctx->priority = I915_PRIORITY_NORMAL;
 
 	/* Default context will never have a file_priv */
 	ret = DEFAULT_CONTEXT_HANDLE;
@@ -443,21 +358,6 @@ int i915_gem_context_init(struct drm_i915_private *dev_priv)
 	BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX);
 	ida_init(&dev_priv->context_hw_ida);
 
-	if (i915.enable_execlists) {
-		/* NB: intentionally left blank. We will allocate our own
-		 * backing objects as we need them, thank you very much */
-		dev_priv->hw_context_size = 0;
-	} else if (HAS_HW_CONTEXTS(dev_priv)) {
-		dev_priv->hw_context_size =
-			round_up(get_context_size(dev_priv),
-				 I915_GTT_PAGE_SIZE);
-		if (dev_priv->hw_context_size > (1<<20)) {
-			DRM_DEBUG_DRIVER("Disabling HW Contexts; invalid size %d\n",
-					 dev_priv->hw_context_size);
-			dev_priv->hw_context_size = 0;
-		}
-	}
-
 	ctx = i915_gem_create_context(dev_priv, NULL);
 	if (IS_ERR(ctx)) {
 		DRM_ERROR("Failed to create default global context (error %ld)\n",
@@ -477,8 +377,8 @@ int i915_gem_context_init(struct drm_i915_private *dev_priv)
 	GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
 
 	DRM_DEBUG_DRIVER("%s context support initialized\n",
-			i915.enable_execlists ? "LR" :
-			dev_priv->hw_context_size ? "HW" : "fake");
+			 dev_priv->engine[RCS]->context_size ? "logical" :
+			 "fake");
 	return 0;
 }
 
@@ -941,11 +841,6 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv)
 	return 0;
 }
 
-static bool contexts_enabled(struct drm_device *dev)
-{
-	return i915.enable_execlists || to_i915(dev)->hw_context_size;
-}
-
 static bool client_is_banned(struct drm_i915_file_private *file_priv)
 {
 	return file_priv->context_bans > I915_MAX_CLIENT_CONTEXT_BANS;
@@ -954,12 +849,13 @@ static bool client_is_banned(struct drm_i915_file_private *file_priv)
 int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 				  struct drm_file *file)
 {
+	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_i915_gem_context_create *args = data;
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	struct i915_gem_context *ctx;
 	int ret;
 
-	if (!contexts_enabled(dev))
+	if (!dev_priv->engine[RCS]->context_size)
 		return -ENODEV;
 
 	if (args->pad != 0)
@@ -977,7 +873,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 	if (ret)
 		return ret;
 
-	ctx = i915_gem_create_context(to_i915(dev), file_priv);
+	ctx = i915_gem_create_context(dev_priv, file_priv);
 	mutex_unlock(&dev->struct_mutex);
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index f225bf680b6d..6176e589cf09 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -122,12 +122,36 @@ static void i915_gem_dmabuf_kunmap_atomic(struct dma_buf *dma_buf, unsigned long
 }
 static void *i915_gem_dmabuf_kmap(struct dma_buf *dma_buf, unsigned long page_num)
 {
+	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
+	struct page *page;
+
+	if (page_num >= obj->base.size >> PAGE_SHIFT)
+		return NULL;
+
+	if (!i915_gem_object_has_struct_page(obj))
+		return NULL;
+
+	if (i915_gem_object_pin_pages(obj))
+		return NULL;
+
+	/* Synchronisation is left to the caller (via .begin_cpu_access()) */
+	page = i915_gem_object_get_page(obj, page_num);
+	if (IS_ERR(page))
+		goto err_unpin;
+
+	return kmap(page);
+
+err_unpin:
+	i915_gem_object_unpin_pages(obj);
 	return NULL;
 }
 
 static void i915_gem_dmabuf_kunmap(struct dma_buf *dma_buf, unsigned long page_num, void *addr)
 {
+	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
 
+	kunmap(virt_to_page(addr));
+	i915_gem_object_unpin_pages(obj);
 }
 
 static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma)
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index a3e59c8ef27b..04211c970b9f 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1019,11 +1019,11 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
 	for (i = 0; i < count; i++)
 		total += exec[i].relocation_count;
 
-	reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset));
-	reloc = drm_malloc_ab(total, sizeof(*reloc));
+	reloc_offset = kvmalloc_array(count, sizeof(*reloc_offset), GFP_KERNEL);
+	reloc = kvmalloc_array(total, sizeof(*reloc), GFP_KERNEL);
 	if (reloc == NULL || reloc_offset == NULL) {
-		drm_free_large(reloc);
-		drm_free_large(reloc_offset);
+		kvfree(reloc);
+		kvfree(reloc_offset);
 		mutex_lock(&dev->struct_mutex);
 		return -ENOMEM;
 	}
@@ -1099,8 +1099,8 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
 	 */
 
 err:
-	drm_free_large(reloc);
-	drm_free_large(reloc_offset);
+	kvfree(reloc);
+	kvfree(reloc_offset);
 	return ret;
 }
 
@@ -1114,6 +1114,18 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 	list_for_each_entry(vma, vmas, exec_list) {
 		struct drm_i915_gem_object *obj = vma->obj;
 
+		if (vma->exec_entry->flags & EXEC_OBJECT_CAPTURE) {
+			struct i915_gem_capture_list *capture;
+
+			capture = kmalloc(sizeof(*capture), GFP_KERNEL);
+			if (unlikely(!capture))
+				return -ENOMEM;
+
+			capture->next = req->capture_list;
+			capture->vma = vma;
+			req->capture_list = capture;
+		}
+
 		if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC)
 			continue;
 
@@ -1859,13 +1871,13 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
 	}
 
 	/* Copy in the exec list from userland */
-	exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
-	exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
+	exec_list = kvmalloc_array(sizeof(*exec_list), args->buffer_count, GFP_KERNEL);
+	exec2_list = kvmalloc_array(sizeof(*exec2_list), args->buffer_count, GFP_KERNEL);
 	if (exec_list == NULL || exec2_list == NULL) {
 		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
 			  args->buffer_count);
-		drm_free_large(exec_list);
-		drm_free_large(exec2_list);
+		kvfree(exec_list);
+		kvfree(exec2_list);
 		return -ENOMEM;
 	}
 	ret = copy_from_user(exec_list,
@@ -1874,8 +1886,8 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
 	if (ret != 0) {
 		DRM_DEBUG("copy %d exec entries failed %d\n",
 			  args->buffer_count, ret);
-		drm_free_large(exec_list);
-		drm_free_large(exec2_list);
+		kvfree(exec_list);
+		kvfree(exec2_list);
 		return -EFAULT;
 	}
 
@@ -1924,8 +1936,8 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
 		}
 	}
 
-	drm_free_large(exec_list);
-	drm_free_large(exec2_list);
+	kvfree(exec_list);
+	kvfree(exec2_list);
 	return ret;
 }
 
@@ -1943,7 +1955,7 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
-	exec2_list = drm_malloc_gfp(args->buffer_count,
+	exec2_list = kvmalloc_array(args->buffer_count,
 				    sizeof(*exec2_list),
 				    GFP_TEMPORARY);
 	if (exec2_list == NULL) {
@@ -1957,7 +1969,7 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
 	if (ret != 0) {
 		DRM_DEBUG("copy %d exec entries failed %d\n",
 			  args->buffer_count, ret);
-		drm_free_large(exec2_list);
+		kvfree(exec2_list);
 		return -EFAULT;
 	}
 
@@ -1984,6 +1996,6 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
 		}
 	}
 
-	drm_free_large(exec2_list);
+	kvfree(exec2_list);
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index a0563e18d753..0c1008a2bbda 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -168,13 +168,11 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
 	if (enable_ppgtt == 3 && has_full_48bit_ppgtt)
 		return 3;
 
-#ifdef CONFIG_INTEL_IOMMU
 	/* Disable ppgtt on SNB if VT-d is on. */
-	if (IS_GEN6(dev_priv) && intel_iommu_gfx_mapped) {
+	if (IS_GEN6(dev_priv) && intel_vtd_active()) {
 		DRM_INFO("Disabling PPGTT because VT-d is on\n");
 		return 0;
 	}
-#endif
 
 	/* Early VLV doesn't have this */
 	if (IS_VALLEYVIEW(dev_priv) && dev_priv->drm.pdev->revision < 0xb) {
@@ -1992,14 +1990,10 @@ void i915_ppgtt_release(struct kref *kref)
  */
 static bool needs_idle_maps(struct drm_i915_private *dev_priv)
 {
-#ifdef CONFIG_INTEL_IOMMU
 	/* Query intel_iommu to see if we need the workaround. Presumably that
 	 * was loaded first.
 	 */
-	if (IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_iommu_gfx_mapped)
-		return true;
-#endif
-	return false;
+	return IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_vtd_active();
 }
 
 void i915_check_and_clear_faults(struct drm_i915_private *dev_priv)
@@ -2191,6 +2185,101 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
 		gen8_set_pte(&gtt_base[i], scratch_pte);
 }
 
+static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
+{
+	struct drm_i915_private *dev_priv = vm->i915;
+
+	/*
+	 * Make sure the internal GAM fifo has been cleared of all GTT
+	 * writes before exiting stop_machine(). This guarantees that
+	 * any aperture accesses waiting to start in another process
+	 * cannot back up behind the GTT writes causing a hang.
+	 * The register can be any arbitrary GAM register.
+	 */
+	POSTING_READ(GFX_FLSH_CNTL_GEN6);
+}
+
+struct insert_page {
+	struct i915_address_space *vm;
+	dma_addr_t addr;
+	u64 offset;
+	enum i915_cache_level level;
+};
+
+static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
+{
+	struct insert_page *arg = _arg;
+
+	gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0);
+	bxt_vtd_ggtt_wa(arg->vm);
+
+	return 0;
+}
+
+static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
+					  dma_addr_t addr,
+					  u64 offset,
+					  enum i915_cache_level level,
+					  u32 unused)
+{
+	struct insert_page arg = { vm, addr, offset, level };
+
+	stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
+}
+
+struct insert_entries {
+	struct i915_address_space *vm;
+	struct sg_table *st;
+	u64 start;
+	enum i915_cache_level level;
+};
+
+static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
+{
+	struct insert_entries *arg = _arg;
+
+	gen8_ggtt_insert_entries(arg->vm, arg->st, arg->start, arg->level, 0);
+	bxt_vtd_ggtt_wa(arg->vm);
+
+	return 0;
+}
+
+static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
+					     struct sg_table *st,
+					     u64 start,
+					     enum i915_cache_level level,
+					     u32 unused)
+{
+	struct insert_entries arg = { vm, st, start, level };
+
+	stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
+}
+
+struct clear_range {
+	struct i915_address_space *vm;
+	u64 start;
+	u64 length;
+};
+
+static int bxt_vtd_ggtt_clear_range__cb(void *_arg)
+{
+	struct clear_range *arg = _arg;
+
+	gen8_ggtt_clear_range(arg->vm, arg->start, arg->length);
+	bxt_vtd_ggtt_wa(arg->vm);
+
+	return 0;
+}
+
+static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm,
+					  u64 start,
+					  u64 length)
+{
+	struct clear_range arg = { vm, start, length };
+
+	stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL);
+}
+
 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
 				  u64 start, u64 length)
 {
@@ -2313,7 +2402,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 		    appgtt->base.allocate_va_range) {
 			ret = appgtt->base.allocate_va_range(&appgtt->base,
 							     vma->node.start,
-							     vma->node.size);
+							     vma->size);
 			if (ret)
 				goto err_pages;
 		}
@@ -2583,14 +2672,14 @@ static size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
 {
 	snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
 	snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
-	return snb_gmch_ctl << 25; /* 32 MB units */
+	return (size_t)snb_gmch_ctl << 25; /* 32 MB units */
 }
 
 static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
 {
 	bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
 	bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
-	return bdw_gmch_ctl << 25; /* 32 MB units */
+	return (size_t)bdw_gmch_ctl << 25; /* 32 MB units */
 }
 
 static size_t chv_get_stolen_size(u16 gmch_ctrl)
@@ -2604,11 +2693,11 @@ static size_t chv_get_stolen_size(u16 gmch_ctrl)
 	 * 0x17 to 0x1d: 4MB increments start at 36MB
 	 */
 	if (gmch_ctrl < 0x11)
-		return gmch_ctrl << 25;
+		return (size_t)gmch_ctrl << 25;
 	else if (gmch_ctrl < 0x17)
-		return (gmch_ctrl - 0x11 + 2) << 22;
+		return (size_t)(gmch_ctrl - 0x11 + 2) << 22;
 	else
-		return (gmch_ctrl - 0x17 + 9) << 22;
+		return (size_t)(gmch_ctrl - 0x17 + 9) << 22;
 }
 
 static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl)
@@ -2617,10 +2706,10 @@ static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl)
 	gen9_gmch_ctl &= BDW_GMCH_GMS_MASK;
 
 	if (gen9_gmch_ctl < 0xf0)
-		return gen9_gmch_ctl << 25; /* 32 MB units */
+		return (size_t)gen9_gmch_ctl << 25; /* 32 MB units */
 	else
 		/* 4MB increments starting at 0xf0 for 4MB */
-		return (gen9_gmch_ctl - 0xf0 + 1) << 22;
+		return (size_t)(gen9_gmch_ctl - 0xf0 + 1) << 22;
 }
 
 static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
@@ -2747,13 +2836,17 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 	struct pci_dev *pdev = dev_priv->drm.pdev;
 	unsigned int size;
 	u16 snb_gmch_ctl;
+	int err;
 
 	/* TODO: We're not aware of mappable constraints on gen8 yet */
 	ggtt->mappable_base = pci_resource_start(pdev, 2);
 	ggtt->mappable_end = pci_resource_len(pdev, 2);
 
-	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(39)))
-		pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
+	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39));
+	if (!err)
+		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
+	if (err)
+		DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
 
 	pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
 
@@ -2785,6 +2878,14 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 
 	ggtt->base.insert_entries = gen8_ggtt_insert_entries;
 
+	/* Serialize GTT updates with aperture access on BXT if VT-d is on. */
+	if (intel_ggtt_update_needs_vtd_wa(dev_priv)) {
+		ggtt->base.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
+		ggtt->base.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
+		if (ggtt->base.clear_range != nop_clear_range)
+			ggtt->base.clear_range = bxt_vtd_ggtt_clear_range__BKL;
+	}
+
 	ggtt->invalidate = gen6_ggtt_invalidate;
 
 	return ggtt_probe_common(ggtt, size);
@@ -2796,6 +2897,7 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
 	struct pci_dev *pdev = dev_priv->drm.pdev;
 	unsigned int size;
 	u16 snb_gmch_ctl;
+	int err;
 
 	ggtt->mappable_base = pci_resource_start(pdev, 2);
 	ggtt->mappable_end = pci_resource_len(pdev, 2);
@@ -2808,8 +2910,11 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
 		return -ENXIO;
 	}
 
-	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(40)))
-		pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
+	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40));
+	if (!err)
+		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
+	if (err)
+		DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
 	pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
 
 	ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl);
@@ -2928,10 +3033,8 @@ int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv)
 		 ggtt->base.total >> 20);
 	DRM_DEBUG_DRIVER("GMADR size = %lldM\n", ggtt->mappable_end >> 20);
 	DRM_DEBUG_DRIVER("GTT stolen size = %uM\n", ggtt->stolen_size >> 20);
-#ifdef CONFIG_INTEL_IOMMU
-	if (intel_iommu_gfx_mapped)
+	if (intel_vtd_active())
 		DRM_INFO("VT-d active for gfx access\n");
-#endif
 
 	return 0;
 }
@@ -3106,7 +3209,7 @@ intel_rotate_pages(struct intel_rotation_info *rot_info,
 	int ret = -ENOMEM;
 
 	/* Allocate a temporary list of source pages for random access. */
-	page_addr_list = drm_malloc_gfp(n_pages,
+	page_addr_list = kvmalloc_array(n_pages,
 					sizeof(dma_addr_t),
 					GFP_TEMPORARY);
 	if (!page_addr_list)
@@ -3139,14 +3242,14 @@ intel_rotate_pages(struct intel_rotation_info *rot_info,
 	DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages)\n",
 		      obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
 
-	drm_free_large(page_addr_list);
+	kvfree(page_addr_list);
 
 	return st;
 
 err_sg_alloc:
 	kfree(st);
 err_st_alloc:
-	drm_free_large(page_addr_list);
+	kvfree(page_addr_list);
 
 	DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
 		      obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index 174cf923c236..35e1a27729dc 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -37,8 +37,8 @@
 
 struct drm_i915_gem_object_ops {
 	unsigned int flags;
-#define I915_GEM_OBJECT_HAS_STRUCT_PAGE 0x1
-#define I915_GEM_OBJECT_IS_SHRINKABLE   0x2
+#define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0)
+#define I915_GEM_OBJECT_IS_SHRINKABLE   BIT(1)
 
 	/* Interface between the GEM object and its backing storage.
 	 * get_pages() is called once prior to the use of the associated set
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 5ddbc9499775..0d1e0d8873ef 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -61,7 +61,7 @@ static bool i915_fence_enable_signaling(struct dma_fence *fence)
 	if (i915_fence_signaled(fence))
 		return false;
 
-	intel_engine_enable_signaling(to_request(fence));
+	intel_engine_enable_signaling(to_request(fence), true);
 	return true;
 }
 
@@ -159,7 +159,7 @@ i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt)
 {
 	struct i915_dependency *dep, *next;
 
-	GEM_BUG_ON(!RB_EMPTY_NODE(&pt->node));
+	GEM_BUG_ON(!list_empty(&pt->link));
 
 	/* Everyone we depended upon (the fences we wait to be signaled)
 	 * should retire before us and remove themselves from our list.
@@ -185,7 +185,7 @@ i915_priotree_init(struct i915_priotree *pt)
 {
 	INIT_LIST_HEAD(&pt->signalers_list);
 	INIT_LIST_HEAD(&pt->waiters_list);
-	RB_CLEAR_NODE(&pt->node);
+	INIT_LIST_HEAD(&pt->link);
 	pt->priority = INT_MIN;
 }
 
@@ -214,12 +214,12 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
 		}
 
 		/* Finally reset hw state */
-		tl->seqno = seqno;
 		intel_engine_init_global_seqno(engine, seqno);
+		tl->seqno = seqno;
 
 		list_for_each_entry(timeline, &i915->gt.timelines, link)
-			memset(timeline->engine[id].sync_seqno, 0,
-			       sizeof(timeline->engine[id].sync_seqno));
+			memset(timeline->engine[id].global_sync, 0,
+			       sizeof(timeline->engine[id].global_sync));
 	}
 
 	return 0;
@@ -271,6 +271,48 @@ void i915_gem_retire_noop(struct i915_gem_active *active,
 	/* Space left intentionally blank */
 }
 
+static void advance_ring(struct drm_i915_gem_request *request)
+{
+	unsigned int tail;
+
+	/* We know the GPU must have read the request to have
+	 * sent us the seqno + interrupt, so use the position
+	 * of tail of the request to update the last known position
+	 * of the GPU head.
+	 *
+	 * Note this requires that we are always called in request
+	 * completion order.
+	 */
+	if (list_is_last(&request->ring_link, &request->ring->request_list)) {
+		/* We may race here with execlists resubmitting this request
+		 * as we retire it. The resubmission will move the ring->tail
+		 * forwards (to request->wa_tail). We either read the
+		 * current value that was written to hw, or the value that
+		 * is just about to be. Either works, if we miss the last two
+		 * noops - they are safe to be replayed on a reset.
+		 */
+		tail = READ_ONCE(request->ring->tail);
+	} else {
+		tail = request->postfix;
+	}
+	list_del(&request->ring_link);
+
+	request->ring->head = tail;
+}
+
+static void free_capture_list(struct drm_i915_gem_request *request)
+{
+	struct i915_gem_capture_list *capture;
+
+	capture = request->capture_list;
+	while (capture) {
+		struct i915_gem_capture_list *next = capture->next;
+
+		kfree(capture);
+		capture = next;
+	}
+}
+
 static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
@@ -287,16 +329,6 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 	list_del_init(&request->link);
 	spin_unlock_irq(&engine->timeline->lock);
 
-	/* We know the GPU must have read the request to have
-	 * sent us the seqno + interrupt, so use the position
-	 * of tail of the request to update the last known position
-	 * of the GPU head.
-	 *
-	 * Note this requires that we are always called in request
-	 * completion order.
-	 */
-	list_del(&request->ring_link);
-	request->ring->head = request->postfix;
 	if (!--request->i915->gt.active_requests) {
 		GEM_BUG_ON(!request->i915->gt.awake);
 		mod_delayed_work(request->i915->wq,
@@ -304,6 +336,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 				 msecs_to_jiffies(100));
 	}
 	unreserve_seqno(request->engine);
+	advance_ring(request);
+
+	free_capture_list(request);
 
 	/* Walk through the active list, calling retire on each. This allows
 	 * objects to track their GPU activity and mark themselves as idle
@@ -402,7 +437,7 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
 	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
 	request->global_seqno = seqno;
 	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
-		intel_engine_enable_signaling(request);
+		intel_engine_enable_signaling(request, false);
 	spin_unlock(&request->lock);
 
 	engine->emit_breadcrumb(request,
@@ -503,9 +538,6 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
  *
  * @engine: engine that we wish to issue the request on.
  * @ctx: context that the request will be associated with.
- *       This can be NULL if the request is not directly related to
- *       any specific user context, in which case this function will
- *       choose an appropriate context to use.
  *
  * Returns a pointer to the allocated request if successful,
  * or an error code if not.
@@ -516,6 +548,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 {
 	struct drm_i915_private *dev_priv = engine->i915;
 	struct drm_i915_gem_request *req;
+	struct intel_ring *ring;
 	int ret;
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
@@ -530,9 +563,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	 * GGTT space, so do this first before we reserve a seqno for
 	 * ourselves.
 	 */
-	ret = engine->context_pin(engine, ctx);
-	if (ret)
-		return ERR_PTR(ret);
+	ring = engine->context_pin(engine, ctx);
+	if (IS_ERR(ring))
+		return ERR_CAST(ring);
+	GEM_BUG_ON(!ring);
 
 	ret = reserve_seqno(engine);
 	if (ret)
@@ -598,11 +632,13 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	req->i915 = dev_priv;
 	req->engine = engine;
 	req->ctx = ctx;
+	req->ring = ring;
 
 	/* No zalloc, must clear what we need by hand */
 	req->global_seqno = 0;
 	req->file_priv = NULL;
 	req->batch = NULL;
+	req->capture_list = NULL;
 
 	/*
 	 * Reserve space in the ring buffer for all the commands required to
@@ -623,7 +659,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	 * GPU processing the request, we never over-estimate the
 	 * position of the head.
 	 */
-	req->head = req->ring->tail;
+	req->head = req->ring->emit;
 
 	/* Check that we didn't interrupt ourselves with a new request */
 	GEM_BUG_ON(req->timeline->seqno != req->fence.seqno);
@@ -651,6 +687,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
 	int ret;
 
 	GEM_BUG_ON(to == from);
+	GEM_BUG_ON(to->timeline == from->timeline);
 
 	if (i915_gem_request_completed(from))
 		return 0;
@@ -663,9 +700,6 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
 			return ret;
 	}
 
-	if (to->timeline == from->timeline)
-		return 0;
-
 	if (to->engine == from->engine) {
 		ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
 						       &from->submit,
@@ -674,55 +708,45 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
 	}
 
 	seqno = i915_gem_request_global_seqno(from);
-	if (!seqno) {
-		ret = i915_sw_fence_await_dma_fence(&to->submit,
-						    &from->fence, 0,
-						    GFP_KERNEL);
-		return ret < 0 ? ret : 0;
-	}
+	if (!seqno)
+		goto await_dma_fence;
 
-	if (seqno <= to->timeline->sync_seqno[from->engine->id])
-		return 0;
+	if (!to->engine->semaphore.sync_to) {
+		if (!__i915_gem_request_started(from, seqno))
+			goto await_dma_fence;
 
-	trace_i915_gem_ring_sync_to(to, from);
-	if (!i915.semaphores) {
-		if (!i915_spin_request(from, TASK_INTERRUPTIBLE, 2)) {
-			ret = i915_sw_fence_await_dma_fence(&to->submit,
-							    &from->fence, 0,
-							    GFP_KERNEL);
-			if (ret < 0)
-				return ret;
-		}
+		if (!__i915_spin_request(from, seqno, TASK_INTERRUPTIBLE, 2))
+			goto await_dma_fence;
 	} else {
+		GEM_BUG_ON(!from->engine->semaphore.signal);
+
+		if (seqno <= to->timeline->global_sync[from->engine->id])
+			return 0;
+
+		trace_i915_gem_ring_sync_to(to, from);
 		ret = to->engine->semaphore.sync_to(to, from);
 		if (ret)
 			return ret;
+
+		to->timeline->global_sync[from->engine->id] = seqno;
 	}
 
-	to->timeline->sync_seqno[from->engine->id] = seqno;
 	return 0;
+
+await_dma_fence:
+	ret = i915_sw_fence_await_dma_fence(&to->submit,
+					    &from->fence, 0,
+					    GFP_KERNEL);
+	return ret < 0 ? ret : 0;
 }
 
 int
 i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req,
 				 struct dma_fence *fence)
 {
-	struct dma_fence_array *array;
+	struct dma_fence **child = &fence;
+	unsigned int nchild = 1;
 	int ret;
-	int i;
-
-	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
-		return 0;
-
-	if (dma_fence_is_i915(fence))
-		return i915_gem_request_await_request(req, to_request(fence));
-
-	if (!dma_fence_is_array(fence)) {
-		ret = i915_sw_fence_await_dma_fence(&req->submit,
-						    fence, I915_FENCE_TIMEOUT,
-						    GFP_KERNEL);
-		return ret < 0 ? ret : 0;
-	}
 
 	/* Note that if the fence-array was created in signal-on-any mode,
 	 * we should *not* decompose it into its individual fences. However,
@@ -731,21 +755,46 @@ i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req,
 	 * amdgpu and we should not see any incoming fence-array from
 	 * sync-file being in signal-on-any mode.
 	 */
+	if (dma_fence_is_array(fence)) {
+		struct dma_fence_array *array = to_dma_fence_array(fence);
+
+		child = array->fences;
+		nchild = array->num_fences;
+		GEM_BUG_ON(!nchild);
+	}
+
+	do {
+		fence = *child++;
+		if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+			continue;
+
+		/*
+		 * Requests on the same timeline are explicitly ordered, along
+		 * with their dependencies, by i915_add_request() which ensures
+		 * that requests are submitted in-order through each ring.
+		 */
+		if (fence->context == req->fence.context)
+			continue;
 
-	array = to_dma_fence_array(fence);
-	for (i = 0; i < array->num_fences; i++) {
-		struct dma_fence *child = array->fences[i];
+		/* Squash repeated waits to the same timelines */
+		if (fence->context != req->i915->mm.unordered_timeline &&
+		    intel_timeline_sync_is_later(req->timeline, fence))
+			continue;
 
-		if (dma_fence_is_i915(child))
+		if (dma_fence_is_i915(fence))
 			ret = i915_gem_request_await_request(req,
-							     to_request(child));
+							     to_request(fence));
 		else
-			ret = i915_sw_fence_await_dma_fence(&req->submit,
-							    child, I915_FENCE_TIMEOUT,
+			ret = i915_sw_fence_await_dma_fence(&req->submit, fence,
+							    I915_FENCE_TIMEOUT,
 							    GFP_KERNEL);
 		if (ret < 0)
 			return ret;
-	}
+
+		/* Record the latest fence used against each timeline */
+		if (fence->context != req->i915->mm.unordered_timeline)
+			intel_timeline_sync_set(req->timeline, fence);
+	} while (--nchild);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 129c58bb4805..7b7c84369d78 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -67,12 +67,18 @@ struct i915_dependency {
 struct i915_priotree {
 	struct list_head signalers_list; /* those before us, we depend upon */
 	struct list_head waiters_list; /* those after us, they depend upon us */
-	struct rb_node node;
+	struct list_head link;
 	int priority;
 #define I915_PRIORITY_MAX 1024
+#define I915_PRIORITY_NORMAL 0
 #define I915_PRIORITY_MIN (-I915_PRIORITY_MAX)
 };
 
+struct i915_gem_capture_list {
+	struct i915_gem_capture_list *next;
+	struct i915_vma *vma;
+};
+
 /**
  * Request queue structure.
  *
@@ -167,6 +173,12 @@ struct drm_i915_gem_request {
 	 * error state dump only).
 	 */
 	struct i915_vma *batch;
+	/** Additional buffers requested by userspace to be captured upon
+	 * a GPU hang. The vma/obj on this list are protected by their
+	 * active reference - all objects on this list must also be
+	 * on the active_list (of their final request).
+	 */
+	struct i915_gem_capture_list *capture_list;
 	struct list_head active_list;
 
 	/** Time at which this request was emitted, in jiffies. */
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 129ed303a6c4..0fd2b58ce475 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -35,9 +35,9 @@
 #include "i915_drv.h"
 #include "i915_trace.h"
 
-static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
+static bool shrinker_lock(struct drm_i915_private *dev_priv, bool *unlock)
 {
-	switch (mutex_trylock_recursive(&dev->struct_mutex)) {
+	switch (mutex_trylock_recursive(&dev_priv->drm.struct_mutex)) {
 	case MUTEX_TRYLOCK_FAILED:
 		return false;
 
@@ -53,24 +53,29 @@ static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
 	BUG();
 }
 
-static void i915_gem_shrinker_unlock(struct drm_device *dev, bool unlock)
+static void shrinker_unlock(struct drm_i915_private *dev_priv, bool unlock)
 {
 	if (!unlock)
 		return;
 
-	mutex_unlock(&dev->struct_mutex);
-
-	/* expedite the RCU grace period to free some request slabs */
-	synchronize_rcu_expedited();
+	mutex_unlock(&dev_priv->drm.struct_mutex);
 }
 
 static bool any_vma_pinned(struct drm_i915_gem_object *obj)
 {
 	struct i915_vma *vma;
 
-	list_for_each_entry(vma, &obj->vma_list, obj_link)
+	list_for_each_entry(vma, &obj->vma_list, obj_link) {
+		/* Only GGTT vma may be permanently pinned, and are always
+		 * at the start of the list. We can stop hunting as soon
+		 * as we see a ppGTT vma.
+		 */
+		if (!i915_vma_is_ggtt(vma))
+			break;
+
 		if (i915_vma_is_pinned(vma))
 			return true;
+	}
 
 	return false;
 }
@@ -156,7 +161,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 	unsigned long count = 0;
 	bool unlock;
 
-	if (!i915_gem_shrinker_lock(&dev_priv->drm, &unlock))
+	if (!shrinker_lock(dev_priv, &unlock))
 		return 0;
 
 	trace_i915_gem_shrink(dev_priv, target, flags);
@@ -244,7 +249,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 
 	i915_gem_retire_requests(dev_priv);
 
-	i915_gem_shrinker_unlock(&dev_priv->drm, unlock);
+	shrinker_unlock(dev_priv, unlock);
 
 	return count;
 }
@@ -274,8 +279,6 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv)
 				I915_SHRINK_ACTIVE);
 	intel_runtime_pm_put(dev_priv);
 
-	synchronize_rcu(); /* wait for our earlier RCU delayed slab frees */
-
 	return freed;
 }
 
@@ -284,12 +287,11 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
 {
 	struct drm_i915_private *dev_priv =
 		container_of(shrinker, struct drm_i915_private, mm.shrinker);
-	struct drm_device *dev = &dev_priv->drm;
 	struct drm_i915_gem_object *obj;
 	unsigned long count;
 	bool unlock;
 
-	if (!i915_gem_shrinker_lock(dev, &unlock))
+	if (!shrinker_lock(dev_priv, &unlock))
 		return 0;
 
 	i915_gem_retire_requests(dev_priv);
@@ -304,7 +306,7 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
 			count += obj->base.size >> PAGE_SHIFT;
 	}
 
-	i915_gem_shrinker_unlock(dev, unlock);
+	shrinker_unlock(dev_priv, unlock);
 
 	return count;
 }
@@ -314,11 +316,10 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
 {
 	struct drm_i915_private *dev_priv =
 		container_of(shrinker, struct drm_i915_private, mm.shrinker);
-	struct drm_device *dev = &dev_priv->drm;
 	unsigned long freed;
 	bool unlock;
 
-	if (!i915_gem_shrinker_lock(dev, &unlock))
+	if (!shrinker_lock(dev_priv, &unlock))
 		return SHRINK_STOP;
 
 	freed = i915_gem_shrink(dev_priv,
@@ -332,26 +333,20 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
 					 I915_SHRINK_BOUND |
 					 I915_SHRINK_UNBOUND);
 
-	i915_gem_shrinker_unlock(dev, unlock);
+	shrinker_unlock(dev_priv, unlock);
 
 	return freed;
 }
 
-struct shrinker_lock_uninterruptible {
-	bool was_interruptible;
-	bool unlock;
-};
-
 static bool
-i915_gem_shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv,
-				       struct shrinker_lock_uninterruptible *slu,
-				       int timeout_ms)
+shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv, bool *unlock,
+			      int timeout_ms)
 {
 	unsigned long timeout = jiffies + msecs_to_jiffies_timeout(timeout_ms);
 
 	do {
 		if (i915_gem_wait_for_idle(dev_priv, 0) == 0 &&
-		    i915_gem_shrinker_lock(&dev_priv->drm, &slu->unlock))
+		    shrinker_lock(dev_priv, unlock))
 			break;
 
 		schedule_timeout_killable(1);
@@ -364,29 +359,19 @@ i915_gem_shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv,
 		}
 	} while (1);
 
-	slu->was_interruptible = dev_priv->mm.interruptible;
-	dev_priv->mm.interruptible = false;
 	return true;
 }
 
-static void
-i915_gem_shrinker_unlock_uninterruptible(struct drm_i915_private *dev_priv,
-					 struct shrinker_lock_uninterruptible *slu)
-{
-	dev_priv->mm.interruptible = slu->was_interruptible;
-	i915_gem_shrinker_unlock(&dev_priv->drm, slu->unlock);
-}
-
 static int
 i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
 {
 	struct drm_i915_private *dev_priv =
 		container_of(nb, struct drm_i915_private, mm.oom_notifier);
-	struct shrinker_lock_uninterruptible slu;
 	struct drm_i915_gem_object *obj;
 	unsigned long unevictable, bound, unbound, freed_pages;
+	bool unlock;
 
-	if (!i915_gem_shrinker_lock_uninterruptible(dev_priv, &slu, 5000))
+	if (!shrinker_lock_uninterruptible(dev_priv, &unlock, 5000))
 		return NOTIFY_DONE;
 
 	freed_pages = i915_gem_shrink_all(dev_priv);
@@ -415,7 +400,7 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
 			bound += obj->base.size >> PAGE_SHIFT;
 	}
 
-	i915_gem_shrinker_unlock_uninterruptible(dev_priv, &slu);
+	shrinker_unlock(dev_priv, unlock);
 
 	if (freed_pages || unbound || bound)
 		pr_info("Purging GPU memory, %lu pages freed, "
@@ -435,12 +420,12 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 {
 	struct drm_i915_private *dev_priv =
 		container_of(nb, struct drm_i915_private, mm.vmap_notifier);
-	struct shrinker_lock_uninterruptible slu;
 	struct i915_vma *vma, *next;
 	unsigned long freed_pages = 0;
+	bool unlock;
 	int ret;
 
-	if (!i915_gem_shrinker_lock_uninterruptible(dev_priv, &slu, 5000))
+	if (!shrinker_lock_uninterruptible(dev_priv, &unlock, 5000))
 		return NOTIFY_DONE;
 
 	/* Force everything onto the inactive lists */
@@ -465,7 +450,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 	}
 
 out:
-	i915_gem_shrinker_unlock_uninterruptible(dev_priv, &slu);
+	shrinker_unlock(dev_priv, unlock);
 
 	*(unsigned long *)ptr += freed_pages;
 	return NOTIFY_DONE;
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index f3abdc27c5dd..681db6083f4d 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -414,12 +414,10 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv)
 		return 0;
 	}
 
-#ifdef CONFIG_INTEL_IOMMU
-	if (intel_iommu_gfx_mapped && INTEL_GEN(dev_priv) < 8) {
+	if (intel_vtd_active() && INTEL_GEN(dev_priv) < 8) {
 		DRM_INFO("DMAR active, disabling use of stolen memory\n");
 		return 0;
 	}
-#endif
 
 	if (ggtt->stolen_size == 0)
 		return 0;
diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.c b/drivers/gpu/drm/i915/i915_gem_timeline.c
index b596ca7ee058..c597ce277a04 100644
--- a/drivers/gpu/drm/i915/i915_gem_timeline.c
+++ b/drivers/gpu/drm/i915/i915_gem_timeline.c
@@ -23,6 +23,32 @@
  */
 
 #include "i915_drv.h"
+#include "i915_syncmap.h"
+
+static void __intel_timeline_init(struct intel_timeline *tl,
+				  struct i915_gem_timeline *parent,
+				  u64 context,
+				  struct lock_class_key *lockclass,
+				  const char *lockname)
+{
+	tl->fence_context = context;
+	tl->common = parent;
+#ifdef CONFIG_DEBUG_SPINLOCK
+	__raw_spin_lock_init(&tl->lock.rlock, lockname, lockclass);
+#else
+	spin_lock_init(&tl->lock);
+#endif
+	init_request_active(&tl->last_request, NULL);
+	INIT_LIST_HEAD(&tl->requests);
+	i915_syncmap_init(&tl->sync);
+}
+
+static void __intel_timeline_fini(struct intel_timeline *tl)
+{
+	GEM_BUG_ON(!list_empty(&tl->requests));
+
+	i915_syncmap_free(&tl->sync);
+}
 
 static int __i915_gem_timeline_init(struct drm_i915_private *i915,
 				    struct i915_gem_timeline *timeline,
@@ -35,6 +61,14 @@ static int __i915_gem_timeline_init(struct drm_i915_private *i915,
 
 	lockdep_assert_held(&i915->drm.struct_mutex);
 
+	/*
+	 * Ideally we want a set of engines on a single leaf as we expect
+	 * to mostly be tracking synchronisation between engines. It is not
+	 * a huge issue if this is not the case, but we may want to mitigate
+	 * any page crossing penalties if they become an issue.
+	 */
+	BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES);
+
 	timeline->i915 = i915;
 	timeline->name = kstrdup(name ?: "[kernel]", GFP_KERNEL);
 	if (!timeline->name)
@@ -44,19 +78,10 @@ static int __i915_gem_timeline_init(struct drm_i915_private *i915,
 
 	/* Called during early_init before we know how many engines there are */
 	fences = dma_fence_context_alloc(ARRAY_SIZE(timeline->engine));
-	for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) {
-		struct intel_timeline *tl = &timeline->engine[i];
-
-		tl->fence_context = fences++;
-		tl->common = timeline;
-#ifdef CONFIG_DEBUG_SPINLOCK
-		__raw_spin_lock_init(&tl->lock.rlock, lockname, lockclass);
-#else
-		spin_lock_init(&tl->lock);
-#endif
-		init_request_active(&tl->last_request, NULL);
-		INIT_LIST_HEAD(&tl->requests);
-	}
+	for (i = 0; i < ARRAY_SIZE(timeline->engine); i++)
+		__intel_timeline_init(&timeline->engine[i],
+				      timeline, fences++,
+				      lockclass, lockname);
 
 	return 0;
 }
@@ -81,18 +106,52 @@ int i915_gem_timeline_init__global(struct drm_i915_private *i915)
 					&class, "&global_timeline->lock");
 }
 
+/**
+ * i915_gem_timelines_mark_idle -- called when the driver idles
+ * @i915 - the drm_i915_private device
+ *
+ * When the driver is completely idle, we know that all of our sync points
+ * have been signaled and our tracking is then entirely redundant. Any request
+ * to wait upon an older sync point will be completed instantly as we know
+ * the fence is signaled and therefore we will not even look them up in the
+ * sync point map.
+ */
+void i915_gem_timelines_mark_idle(struct drm_i915_private *i915)
+{
+	struct i915_gem_timeline *timeline;
+	int i;
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+
+	list_for_each_entry(timeline, &i915->gt.timelines, link) {
+		for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) {
+			struct intel_timeline *tl = &timeline->engine[i];
+
+			/*
+			 * All known fences are completed so we can scrap
+			 * the current sync point tracking and start afresh,
+			 * any attempt to wait upon a previous sync point
+			 * will be skipped as the fence was signaled.
+			 */
+			i915_syncmap_free(&tl->sync);
+		}
+	}
+}
+
 void i915_gem_timeline_fini(struct i915_gem_timeline *timeline)
 {
 	int i;
 
 	lockdep_assert_held(&timeline->i915->drm.struct_mutex);
 
-	for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) {
-		struct intel_timeline *tl = &timeline->engine[i];
-
-		GEM_BUG_ON(!list_empty(&tl->requests));
-	}
+	for (i = 0; i < ARRAY_SIZE(timeline->engine); i++)
+		__intel_timeline_fini(&timeline->engine[i]);
 
 	list_del(&timeline->link);
 	kfree(timeline->name);
 }
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/mock_timeline.c"
+#include "selftests/i915_gem_timeline.c"
+#endif
diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h
index 6c53e14cab2a..bfb5eb94c64d 100644
--- a/drivers/gpu/drm/i915/i915_gem_timeline.h
+++ b/drivers/gpu/drm/i915/i915_gem_timeline.h
@@ -27,7 +27,9 @@
 
 #include <linux/list.h>
 
+#include "i915_utils.h"
 #include "i915_gem_request.h"
+#include "i915_syncmap.h"
 
 struct i915_gem_timeline;
 
@@ -55,7 +57,25 @@ struct intel_timeline {
 	 * struct_mutex.
 	 */
 	struct i915_gem_active last_request;
-	u32 sync_seqno[I915_NUM_ENGINES];
+
+	/**
+	 * We track the most recent seqno that we wait on in every context so
+	 * that we only have to emit a new await and dependency on a more
+	 * recent sync point. As the contexts may be executed out-of-order, we
+	 * have to track each individually and can not rely on an absolute
+	 * global_seqno. When we know that all tracked fences are completed
+	 * (i.e. when the driver is idle), we know that the syncmap is
+	 * redundant and we can discard it without loss of generality.
+	 */
+	struct i915_syncmap *sync;
+	/**
+	 * Separately to the inter-context seqno map above, we track the last
+	 * barrier (e.g. semaphore wait) to the global engine timelines. Note
+	 * that this tracks global_seqno rather than the context.seqno, and
+	 * so it is subject to the limitations of hw wraparound and that we
+	 * may need to revoke global_seqno (on pre-emption).
+	 */
+	u32 global_sync[I915_NUM_ENGINES];
 
 	struct i915_gem_timeline *common;
 };
@@ -73,6 +93,31 @@ int i915_gem_timeline_init(struct drm_i915_private *i915,
 			   struct i915_gem_timeline *tl,
 			   const char *name);
 int i915_gem_timeline_init__global(struct drm_i915_private *i915);
+void i915_gem_timelines_mark_idle(struct drm_i915_private *i915);
 void i915_gem_timeline_fini(struct i915_gem_timeline *tl);
 
+static inline int __intel_timeline_sync_set(struct intel_timeline *tl,
+					    u64 context, u32 seqno)
+{
+	return i915_syncmap_set(&tl->sync, context, seqno);
+}
+
+static inline int intel_timeline_sync_set(struct intel_timeline *tl,
+					  const struct dma_fence *fence)
+{
+	return __intel_timeline_sync_set(tl, fence->context, fence->seqno);
+}
+
+static inline bool __intel_timeline_sync_is_later(struct intel_timeline *tl,
+						  u64 context, u32 seqno)
+{
+	return i915_syncmap_is_later(&tl->sync, context, seqno);
+}
+
+static inline bool intel_timeline_sync_is_later(struct intel_timeline *tl,
+						const struct dma_fence *fence)
+{
+	return __intel_timeline_sync_is_later(tl, fence->context, fence->seqno);
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 58ccf8b8ca1c..1a0ce1dc68f5 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -507,7 +507,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 	ret = -ENOMEM;
 	pinned = 0;
 
-	pvec = drm_malloc_gfp(npages, sizeof(struct page *), GFP_TEMPORARY);
+	pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_TEMPORARY);
 	if (pvec != NULL) {
 		struct mm_struct *mm = obj->userptr.mm->mm;
 		unsigned int flags = 0;
@@ -555,7 +555,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 	mutex_unlock(&obj->mm.lock);
 
 	release_pages(pvec, pinned, 0);
-	drm_free_large(pvec);
+	kvfree(pvec);
 
 	i915_gem_object_put(obj);
 	put_task_struct(work->task);
@@ -642,7 +642,7 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
 	pinned = 0;
 
 	if (mm == current->mm) {
-		pvec = drm_malloc_gfp(num_pages, sizeof(struct page *),
+		pvec = kvmalloc_array(num_pages, sizeof(struct page *),
 				      GFP_TEMPORARY |
 				      __GFP_NORETRY |
 				      __GFP_NOWARN);
@@ -669,7 +669,7 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
 
 	if (IS_ERR(pages))
 		release_pages(pvec, pinned, 0);
-	drm_free_large(pvec);
+	kvfree(pvec);
 
 	return pages;
 }
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 8effc59f5cb5..e18f350bc364 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -712,6 +712,10 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 			print_error_obj(m, dev_priv->engine[i], NULL, obj);
 		}
 
+		for (j = 0; j < ee->user_bo_count; j++)
+			print_error_obj(m, dev_priv->engine[i],
+					"user", ee->user_bo[j]);
+
 		if (ee->num_requests) {
 			err_printf(m, "%s --- %d requests\n",
 				   dev_priv->engine[i]->name,
@@ -825,11 +829,15 @@ void __i915_gpu_state_free(struct kref *error_ref)
 {
 	struct i915_gpu_state *error =
 		container_of(error_ref, typeof(*error), ref);
-	int i;
+	long i, j;
 
 	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
 		struct drm_i915_error_engine *ee = &error->engine[i];
 
+		for (j = 0; j < ee->user_bo_count; j++)
+			i915_error_object_free(ee->user_bo[j]);
+		kfree(ee->user_bo);
+
 		i915_error_object_free(ee->batchbuffer);
 		i915_error_object_free(ee->wa_batchbuffer);
 		i915_error_object_free(ee->ringbuffer);
@@ -1316,12 +1324,17 @@ static void engine_record_requests(struct intel_engine_cs *engine,
 static void error_record_engine_execlists(struct intel_engine_cs *engine,
 					  struct drm_i915_error_engine *ee)
 {
+	const struct execlist_port *port = engine->execlist_port;
 	unsigned int n;
 
-	for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++)
-		if (engine->execlist_port[n].request)
-			record_request(engine->execlist_port[n].request,
-				       &ee->execlist[n]);
+	for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
+		struct drm_i915_gem_request *rq = port_request(&port[n]);
+
+		if (!rq)
+			break;
+
+		record_request(rq, &ee->execlist[n]);
+	}
 }
 
 static void record_context(struct drm_i915_error_context *e,
@@ -1346,6 +1359,35 @@ static void record_context(struct drm_i915_error_context *e,
 	e->active = ctx->active_count;
 }
 
+static void request_record_user_bo(struct drm_i915_gem_request *request,
+				   struct drm_i915_error_engine *ee)
+{
+	struct i915_gem_capture_list *c;
+	struct drm_i915_error_object **bo;
+	long count;
+
+	count = 0;
+	for (c = request->capture_list; c; c = c->next)
+		count++;
+
+	bo = NULL;
+	if (count)
+		bo = kcalloc(count, sizeof(*bo), GFP_ATOMIC);
+	if (!bo)
+		return;
+
+	count = 0;
+	for (c = request->capture_list; c; c = c->next) {
+		bo[count] = i915_error_object_create(request->i915, c->vma);
+		if (!bo[count])
+			break;
+		count++;
+	}
+
+	ee->user_bo = bo;
+	ee->user_bo_count = count;
+}
+
 static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
 				  struct i915_gpu_state *error)
 {
@@ -1392,6 +1434,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
 				ee->wa_batchbuffer =
 					i915_error_object_create(dev_priv,
 								 engine->scratch);
+			request_record_user_bo(request, ee);
 
 			ee->ctx =
 				i915_error_object_create(dev_priv,
@@ -1560,6 +1603,9 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv,
 		error->done_reg = I915_READ(DONE_REG);
 	}
 
+	if (INTEL_GEN(dev_priv) >= 5)
+		error->ccid = I915_READ(CCID);
+
 	/* 3: Feature specific registers */
 	if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) {
 		error->gam_ecochk = I915_READ(GAM_ECOCHK);
@@ -1567,9 +1613,6 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv,
 	}
 
 	/* 4: Everything else */
-	if (HAS_HW_CONTEXTS(dev_priv))
-		error->ccid = I915_READ(CCID);
-
 	if (INTEL_GEN(dev_priv) >= 8) {
 		error->ier = I915_READ(GEN8_DE_MISC_IER);
 		for (i = 0; i < 4; i++)
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 1642fff9cf13..e6e0c6ef1084 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -480,9 +480,7 @@ static void guc_wq_item_append(struct i915_guc_client *client,
 	GEM_BUG_ON(freespace < wqi_size);
 
 	/* The GuC firmware wants the tail index in QWords, not bytes */
-	tail = rq->tail;
-	assert_ring_tail_valid(rq->ring, rq->tail);
-	tail >>= 3;
+	tail = intel_ring_set_tail(rq->ring, rq->tail) >> 3;
 	GEM_BUG_ON(tail > WQ_RING_TAIL_MAX);
 
 	/* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
@@ -616,12 +614,6 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq)
 	b_ret = guc_ring_doorbell(client);
 
 	client->submissions[engine_id] += 1;
-	client->retcode = b_ret;
-	if (b_ret)
-		client->b_fail += 1;
-
-	guc->submissions[engine_id] += 1;
-	guc->last_seqno[engine_id] = rq->global_seqno;
 
 	spin_unlock_irqrestore(&client->wq_lock, flags);
 }
@@ -651,47 +643,68 @@ static void nested_enable_signaling(struct drm_i915_gem_request *rq)
 	trace_dma_fence_enable_signal(&rq->fence);
 
 	spin_lock_nested(&rq->lock, SINGLE_DEPTH_NESTING);
-	intel_engine_enable_signaling(rq);
+	intel_engine_enable_signaling(rq, true);
 	spin_unlock(&rq->lock);
 }
 
+static void port_assign(struct execlist_port *port,
+			struct drm_i915_gem_request *rq)
+{
+	GEM_BUG_ON(rq == port_request(port));
+
+	if (port_isset(port))
+		i915_gem_request_put(port_request(port));
+
+	port_set(port, i915_gem_request_get(rq));
+	nested_enable_signaling(rq);
+}
+
 static bool i915_guc_dequeue(struct intel_engine_cs *engine)
 {
 	struct execlist_port *port = engine->execlist_port;
-	struct drm_i915_gem_request *last = port[0].request;
+	struct drm_i915_gem_request *last = port_request(port);
 	struct rb_node *rb;
 	bool submit = false;
 
 	spin_lock_irq(&engine->timeline->lock);
 	rb = engine->execlist_first;
+	GEM_BUG_ON(rb_first(&engine->execlist_queue) != rb);
 	while (rb) {
-		struct drm_i915_gem_request *rq =
-			rb_entry(rb, typeof(*rq), priotree.node);
-
-		if (last && rq->ctx != last->ctx) {
-			if (port != engine->execlist_port)
-				break;
-
-			i915_gem_request_assign(&port->request, last);
-			nested_enable_signaling(last);
-			port++;
+		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+		struct drm_i915_gem_request *rq, *rn;
+
+		list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
+			if (last && rq->ctx != last->ctx) {
+				if (port != engine->execlist_port) {
+					__list_del_many(&p->requests,
+							&rq->priotree.link);
+					goto done;
+				}
+
+				if (submit)
+					port_assign(port, last);
+				port++;
+			}
+
+			INIT_LIST_HEAD(&rq->priotree.link);
+			rq->priotree.priority = INT_MAX;
+
+			i915_guc_submit(rq);
+			trace_i915_gem_request_in(rq, port_index(port, engine));
+			last = rq;
+			submit = true;
 		}
 
 		rb = rb_next(rb);
-		rb_erase(&rq->priotree.node, &engine->execlist_queue);
-		RB_CLEAR_NODE(&rq->priotree.node);
-		rq->priotree.priority = INT_MAX;
-
-		i915_guc_submit(rq);
-		trace_i915_gem_request_in(rq, port - engine->execlist_port);
-		last = rq;
-		submit = true;
-	}
-	if (submit) {
-		i915_gem_request_assign(&port->request, last);
-		nested_enable_signaling(last);
-		engine->execlist_first = rb;
+		rb_erase(&p->node, &engine->execlist_queue);
+		INIT_LIST_HEAD(&p->requests);
+		if (p->priority != I915_PRIORITY_NORMAL)
+			kmem_cache_free(engine->i915->priorities, p);
 	}
+done:
+	engine->execlist_first = rb;
+	if (submit)
+		port_assign(port, last);
 	spin_unlock_irq(&engine->timeline->lock);
 
 	return submit;
@@ -705,17 +718,19 @@ static void i915_guc_irq_handler(unsigned long data)
 	bool submit;
 
 	do {
-		rq = port[0].request;
+		rq = port_request(&port[0]);
 		while (rq && i915_gem_request_completed(rq)) {
 			trace_i915_gem_request_out(rq);
 			i915_gem_request_put(rq);
-			port[0].request = port[1].request;
-			port[1].request = NULL;
-			rq = port[0].request;
+
+			port[0] = port[1];
+			memset(&port[1], 0, sizeof(port[1]));
+
+			rq = port_request(&port[0]);
 		}
 
 		submit = false;
-		if (!port[1].request)
+		if (!port_count(&port[1]))
 			submit = i915_guc_dequeue(engine);
 	} while (submit);
 }
@@ -1053,8 +1068,7 @@ static int guc_ads_create(struct intel_guc *guc)
 		dev_priv->engine[RCS]->status_page.ggtt_offset;
 
 	for_each_engine(engine, dev_priv, id)
-		blob->ads.eng_state_size[engine->guc_id] =
-			intel_lr_context_size(engine);
+		blob->ads.eng_state_size[engine->guc_id] = engine->context_size;
 
 	base = guc_ggtt_offset(vma);
 	blob->ads.scheduler_policies = base + ptr_offset(blob, policies);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index fd97fe00cd0d..7b7f55a28eec 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -720,9 +720,7 @@ static u32 i915_get_vblank_counter(struct drm_device *dev, unsigned int pipe)
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	i915_reg_t high_frame, low_frame;
 	u32 high1, high2, low, pixel, vbl_start, hsync_start, htotal;
-	struct intel_crtc *intel_crtc = intel_get_crtc_for_pipe(dev_priv,
-								pipe);
-	const struct drm_display_mode *mode = &intel_crtc->base.hwmode;
+	const struct drm_display_mode *mode = &dev->vblank[pipe].hwmode;
 	unsigned long irqflags;
 
 	htotal = mode->crtc_htotal;
@@ -779,13 +777,17 @@ static int __intel_get_crtc_scanline(struct intel_crtc *crtc)
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	const struct drm_display_mode *mode = &crtc->base.hwmode;
+	const struct drm_display_mode *mode;
+	struct drm_vblank_crtc *vblank;
 	enum pipe pipe = crtc->pipe;
 	int position, vtotal;
 
 	if (!crtc->active)
 		return -1;
 
+	vblank = &crtc->base.dev->vblank[drm_crtc_index(&crtc->base)];
+	mode = &vblank->hwmode;
+
 	vtotal = mode->crtc_vtotal;
 	if (mode->flags & DRM_MODE_FLAG_INTERLACE)
 		vtotal /= 2;
@@ -827,10 +829,10 @@ static int __intel_get_crtc_scanline(struct intel_crtc *crtc)
 	return (position + crtc->scanline_offset) % vtotal;
 }
 
-static int i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
-				    unsigned int flags, int *vpos, int *hpos,
-				    ktime_t *stime, ktime_t *etime,
-				    const struct drm_display_mode *mode)
+static bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
+				     bool in_vblank_irq, int *vpos, int *hpos,
+				     ktime_t *stime, ktime_t *etime,
+				     const struct drm_display_mode *mode)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct intel_crtc *intel_crtc = intel_get_crtc_for_pipe(dev_priv,
@@ -838,13 +840,12 @@ static int i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
 	int position;
 	int vbl_start, vbl_end, hsync_start, htotal, vtotal;
 	bool in_vbl = true;
-	int ret = 0;
 	unsigned long irqflags;
 
 	if (WARN_ON(!mode->crtc_clock)) {
 		DRM_DEBUG_DRIVER("trying to get scanoutpos for disabled "
 				 "pipe %c\n", pipe_name(pipe));
-		return 0;
+		return false;
 	}
 
 	htotal = mode->crtc_htotal;
@@ -859,8 +860,6 @@ static int i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
 		vtotal /= 2;
 	}
 
-	ret |= DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE;
-
 	/*
 	 * Lock uncore.lock, as we will do multiple timing critical raw
 	 * register reads, potentially with preemption disabled, so the
@@ -944,11 +943,7 @@ static int i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
 		*hpos = position - (*vpos * htotal);
 	}
 
-	/* In vblank? */
-	if (in_vbl)
-		ret |= DRM_SCANOUTPOS_IN_VBLANK;
-
-	return ret;
+	return true;
 }
 
 int intel_get_crtc_scanline(struct intel_crtc *crtc)
@@ -964,37 +959,6 @@ int intel_get_crtc_scanline(struct intel_crtc *crtc)
 	return position;
 }
 
-static int i915_get_vblank_timestamp(struct drm_device *dev, unsigned int pipe,
-			      int *max_error,
-			      struct timeval *vblank_time,
-			      unsigned flags)
-{
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_crtc *crtc;
-
-	if (pipe >= INTEL_INFO(dev_priv)->num_pipes) {
-		DRM_ERROR("Invalid crtc %u\n", pipe);
-		return -EINVAL;
-	}
-
-	/* Get drm_crtc to timestamp: */
-	crtc = intel_get_crtc_for_pipe(dev_priv, pipe);
-	if (crtc == NULL) {
-		DRM_ERROR("Invalid crtc %u\n", pipe);
-		return -EINVAL;
-	}
-
-	if (!crtc->base.hwmode.crtc_clock) {
-		DRM_DEBUG_KMS("crtc %u is disabled\n", pipe);
-		return -EBUSY;
-	}
-
-	/* Helper routine in DRM core does all the work: */
-	return drm_calc_vbltimestamp_from_scanoutpos(dev, pipe, max_error,
-						     vblank_time, flags,
-						     &crtc->base.hwmode);
-}
-
 static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
 {
 	u32 busy_up, busy_down, max_avg, min_avg;
@@ -1236,7 +1200,7 @@ out:
 static void ivybridge_parity_work(struct work_struct *work)
 {
 	struct drm_i915_private *dev_priv =
-		container_of(work, struct drm_i915_private, l3_parity.error_work);
+		container_of(work, typeof(*dev_priv), l3_parity.error_work);
 	u32 error_status, row, bank, subbank;
 	char *parity_event[6];
 	uint32_t misccpctl;
@@ -1353,14 +1317,16 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
 		ivybridge_parity_error_irq_handler(dev_priv, gt_iir);
 }
 
-static __always_inline void
+static void
 gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)
 {
 	bool tasklet = false;
 
 	if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) {
-		set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-		tasklet = true;
+		if (port_count(&engine->execlist_port[0])) {
+			__set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+			tasklet = true;
+		}
 	}
 
 	if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) {
@@ -2953,7 +2919,6 @@ static void vlv_display_irq_postinstall(struct drm_i915_private *dev_priv)
 	u32 pipestat_mask;
 	u32 enable_mask;
 	enum pipe pipe;
-	u32 val;
 
 	pipestat_mask = PLANE_FLIP_DONE_INT_STATUS_VLV |
 			PIPE_CRC_DONE_INTERRUPT_STATUS;
@@ -2964,18 +2929,16 @@ static void vlv_display_irq_postinstall(struct drm_i915_private *dev_priv)
 
 	enable_mask = I915_DISPLAY_PORT_INTERRUPT |
 		I915_DISPLAY_PIPE_A_EVENT_INTERRUPT |
-		I915_DISPLAY_PIPE_B_EVENT_INTERRUPT;
+		I915_DISPLAY_PIPE_B_EVENT_INTERRUPT |
+		I915_LPE_PIPE_A_INTERRUPT |
+		I915_LPE_PIPE_B_INTERRUPT;
+
 	if (IS_CHERRYVIEW(dev_priv))
-		enable_mask |= I915_DISPLAY_PIPE_C_EVENT_INTERRUPT;
+		enable_mask |= I915_DISPLAY_PIPE_C_EVENT_INTERRUPT |
+			I915_LPE_PIPE_C_INTERRUPT;
 
 	WARN_ON(dev_priv->irq_mask != ~0);
 
-	val = (I915_LPE_PIPE_A_INTERRUPT |
-		I915_LPE_PIPE_B_INTERRUPT |
-		I915_LPE_PIPE_C_INTERRUPT);
-
-	enable_mask |= val;
-
 	dev_priv->irq_mask = ~enable_mask;
 
 	GEN5_IRQ_INIT(VLV_, dev_priv->irq_mask, enable_mask);
@@ -4233,11 +4196,15 @@ static void i965_irq_uninstall(struct drm_device * dev)
 void intel_irq_init(struct drm_i915_private *dev_priv)
 {
 	struct drm_device *dev = &dev_priv->drm;
+	int i;
 
 	intel_hpd_init_work(dev_priv);
 
 	INIT_WORK(&dev_priv->rps.work, gen6_pm_rps_work);
+
 	INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
+	for (i = 0; i < MAX_L3_SLICES; ++i)
+		dev_priv->l3_parity.remap_info[i] = NULL;
 
 	if (HAS_GUC_SCHED(dev_priv))
 		dev_priv->pm_guc_events = GEN9_GUC_TO_HOST_INT_EVENT;
@@ -4294,7 +4261,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
 
 	dev_priv->hotplug.hpd_storm_threshold = HPD_STORM_DEFAULT_THRESHOLD;
 
-	dev->driver->get_vblank_timestamp = i915_get_vblank_timestamp;
+	dev->driver->get_vblank_timestamp = drm_calc_vbltimestamp_from_scanoutpos;
 	dev->driver->get_scanout_position = i915_get_crtc_scanoutpos;
 
 	if (IS_CHERRYVIEW(dev_priv)) {
@@ -4363,6 +4330,20 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
 }
 
 /**
+ * intel_irq_fini - deinitializes IRQ support
+ * @i915: i915 device instance
+ *
+ * This function deinitializes all the IRQ support.
+ */
+void intel_irq_fini(struct drm_i915_private *i915)
+{
+	int i;
+
+	for (i = 0; i < MAX_L3_SLICES; ++i)
+		kfree(i915->l3_parity.remap_info[i]);
+}
+
+/**
  * intel_irq_install - enables the hardware interrupt
  * @dev_priv: i915 device instance
  *
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index f87b0c4e564d..f80db2ccd92f 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -220,7 +220,6 @@ static const struct intel_device_info intel_ironlake_m_info = {
 	.has_rc6 = 1, \
 	.has_rc6p = 1, \
 	.has_gmbus_irq = 1, \
-	.has_hw_contexts = 1, \
 	.has_aliasing_ppgtt = 1, \
 	GEN_DEFAULT_PIPEOFFSETS, \
 	CURSOR_OFFSETS
@@ -245,7 +244,6 @@ static const struct intel_device_info intel_sandybridge_m_info = {
 	.has_rc6 = 1, \
 	.has_rc6p = 1, \
 	.has_gmbus_irq = 1, \
-	.has_hw_contexts = 1, \
 	.has_aliasing_ppgtt = 1, \
 	.has_full_ppgtt = 1, \
 	GEN_DEFAULT_PIPEOFFSETS, \
@@ -280,7 +278,6 @@ static const struct intel_device_info intel_valleyview_info = {
 	.has_runtime_pm = 1,
 	.has_rc6 = 1,
 	.has_gmbus_irq = 1,
-	.has_hw_contexts = 1,
 	.has_gmch_display = 1,
 	.has_hotplug = 1,
 	.has_aliasing_ppgtt = 1,
@@ -340,7 +337,6 @@ static const struct intel_device_info intel_cherryview_info = {
 	.has_resource_streamer = 1,
 	.has_rc6 = 1,
 	.has_gmbus_irq = 1,
-	.has_hw_contexts = 1,
 	.has_logical_ring_contexts = 1,
 	.has_gmch_display = 1,
 	.has_aliasing_ppgtt = 1,
@@ -387,7 +383,6 @@ static const struct intel_device_info intel_skylake_gt3_info = {
 	.has_rc6 = 1, \
 	.has_dp_mst = 1, \
 	.has_gmbus_irq = 1, \
-	.has_hw_contexts = 1, \
 	.has_logical_ring_contexts = 1, \
 	.has_guc = 1, \
 	.has_decoupled_mmio = 1, \
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 060b171480d5..85269bcc8372 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -205,25 +205,49 @@
 
 #define OA_TAKEN(tail, head)	((tail - head) & (OA_BUFFER_SIZE - 1))
 
-/* There's a HW race condition between OA unit tail pointer register updates and
+/**
+ * DOC: OA Tail Pointer Race
+ *
+ * There's a HW race condition between OA unit tail pointer register updates and
  * writes to memory whereby the tail pointer can sometimes get ahead of what's
- * been written out to the OA buffer so far.
+ * been written out to the OA buffer so far (in terms of what's visible to the
+ * CPU).
+ *
+ * Although this can be observed explicitly while copying reports to userspace
+ * by checking for a zeroed report-id field in tail reports, we want to account
+ * for this earlier, as part of the _oa_buffer_check to avoid lots of redundant
+ * read() attempts.
+ *
+ * In effect we define a tail pointer for reading that lags the real tail
+ * pointer by at least %OA_TAIL_MARGIN_NSEC nanoseconds, which gives enough
+ * time for the corresponding reports to become visible to the CPU.
+ *
+ * To manage this we actually track two tail pointers:
+ *  1) An 'aging' tail with an associated timestamp that is tracked until we
+ *     can trust the corresponding data is visible to the CPU; at which point
+ *     it is considered 'aged'.
+ *  2) An 'aged' tail that can be used for read()ing.
+ *
+ * The two separate pointers let us decouple read()s from tail pointer aging.
+ *
+ * The tail pointers are checked and updated at a limited rate within a hrtimer
+ * callback (the same callback that is used for delivering POLLIN events)
  *
- * Although this can be observed explicitly by checking for a zeroed report-id
- * field in tail reports, it seems preferable to account for this earlier e.g.
- * as part of the _oa_buffer_is_empty checks to minimize -EAGAIN polling cycles
- * in this situation.
+ * Initially the tails are marked invalid with %INVALID_TAIL_PTR which
+ * indicates that an updated tail pointer is needed.
  *
- * To give time for the most recent reports to land before they may be copied to
- * userspace, the driver operates as if the tail pointer effectively lags behind
- * the HW tail pointer by 'tail_margin' bytes. The margin in bytes is calculated
- * based on this constant in nanoseconds, the current OA sampling exponent
- * and current report size.
+ * Most of the implementation details for this workaround are in
+ * gen7_oa_buffer_check_unlocked() and gen7_appand_oa_reports()
  *
- * There is also a fallback check while reading to simply skip over reports with
- * a zeroed report-id.
+ * Note for posterity: previously the driver used to define an effective tail
+ * pointer that lagged the real pointer by a 'tail margin' measured in bytes
+ * derived from %OA_TAIL_MARGIN_NSEC and the configured sampling frequency.
+ * This was flawed considering that the OA unit may also automatically generate
+ * non-periodic reports (such as on context switch) or the OA unit may be
+ * enabled without any periodic sampling.
  */
 #define OA_TAIL_MARGIN_NSEC	100000ULL
+#define INVALID_TAIL_PTR	0xffffffff
 
 /* frequency for checking whether the OA unit has written new reports to the
  * circular OA buffer...
@@ -308,27 +332,121 @@ struct perf_open_properties {
 	int oa_period_exponent;
 };
 
-/* NB: This is either called via fops or the poll check hrtimer (atomic ctx)
+/**
+ * gen7_oa_buffer_check_unlocked - check for data and update tail ptr state
+ * @dev_priv: i915 device instance
+ *
+ * This is either called via fops (for blocking reads in user ctx) or the poll
+ * check hrtimer (atomic ctx) to check the OA buffer tail pointer and check
+ * if there is data available for userspace to read.
  *
- * It's safe to read OA config state here unlocked, assuming that this is only
- * called while the stream is enabled, while the global OA configuration can't
- * be modified.
+ * This function is central to providing a workaround for the OA unit tail
+ * pointer having a race with respect to what data is visible to the CPU.
+ * It is responsible for reading tail pointers from the hardware and giving
+ * the pointers time to 'age' before they are made available for reading.
+ * (See description of OA_TAIL_MARGIN_NSEC above for further details.)
  *
- * Note: we don't lock around the head/tail reads even though there's the slim
- * possibility of read() fop errors forcing a re-init of the OA buffer
- * pointers.  A race here could result in a false positive !empty status which
- * is acceptable.
+ * Besides returning true when there is data available to read() this function
+ * also has the side effect of updating the oa_buffer.tails[], .aging_timestamp
+ * and .aged_tail_idx state used for reading.
+ *
+ * Note: It's safe to read OA config state here unlocked, assuming that this is
+ * only called while the stream is enabled, while the global OA configuration
+ * can't be modified.
+ *
+ * Returns: %true if the OA buffer contains data, else %false
  */
-static bool gen7_oa_buffer_is_empty_fop_unlocked(struct drm_i915_private *dev_priv)
+static bool gen7_oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
 {
 	int report_size = dev_priv->perf.oa.oa_buffer.format_size;
-	u32 oastatus2 = I915_READ(GEN7_OASTATUS2);
-	u32 oastatus1 = I915_READ(GEN7_OASTATUS1);
-	u32 head = oastatus2 & GEN7_OASTATUS2_HEAD_MASK;
-	u32 tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
+	unsigned long flags;
+	unsigned int aged_idx;
+	u32 oastatus1;
+	u32 head, hw_tail, aged_tail, aging_tail;
+	u64 now;
+
+	/* We have to consider the (unlikely) possibility that read() errors
+	 * could result in an OA buffer reset which might reset the head,
+	 * tails[] and aged_tail state.
+	 */
+	spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+
+	/* NB: The head we observe here might effectively be a little out of
+	 * date (between head and tails[aged_idx].offset if there is currently
+	 * a read() in progress.
+	 */
+	head = dev_priv->perf.oa.oa_buffer.head;
+
+	aged_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
+	aged_tail = dev_priv->perf.oa.oa_buffer.tails[aged_idx].offset;
+	aging_tail = dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset;
 
-	return OA_TAKEN(tail, head) <
-		dev_priv->perf.oa.tail_margin + report_size;
+	oastatus1 = I915_READ(GEN7_OASTATUS1);
+	hw_tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
+
+	/* The tail pointer increases in 64 byte increments,
+	 * not in report_size steps...
+	 */
+	hw_tail &= ~(report_size - 1);
+
+	now = ktime_get_mono_fast_ns();
+
+	/* Update the aged tail
+	 *
+	 * Flip the tail pointer available for read()s once the aging tail is
+	 * old enough to trust that the corresponding data will be visible to
+	 * the CPU...
+	 *
+	 * Do this before updating the aging pointer in case we may be able to
+	 * immediately start aging a new pointer too (if new data has become
+	 * available) without needing to wait for a later hrtimer callback.
+	 */
+	if (aging_tail != INVALID_TAIL_PTR &&
+	    ((now - dev_priv->perf.oa.oa_buffer.aging_timestamp) >
+	     OA_TAIL_MARGIN_NSEC)) {
+		aged_idx ^= 1;
+		dev_priv->perf.oa.oa_buffer.aged_tail_idx = aged_idx;
+
+		aged_tail = aging_tail;
+
+		/* Mark that we need a new pointer to start aging... */
+		dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR;
+		aging_tail = INVALID_TAIL_PTR;
+	}
+
+	/* Update the aging tail
+	 *
+	 * We throttle aging tail updates until we have a new tail that
+	 * represents >= one report more data than is already available for
+	 * reading. This ensures there will be enough data for a successful
+	 * read once this new pointer has aged and ensures we will give the new
+	 * pointer time to age.
+	 */
+	if (aging_tail == INVALID_TAIL_PTR &&
+	    (aged_tail == INVALID_TAIL_PTR ||
+	     OA_TAKEN(hw_tail, aged_tail) >= report_size)) {
+		struct i915_vma *vma = dev_priv->perf.oa.oa_buffer.vma;
+		u32 gtt_offset = i915_ggtt_offset(vma);
+
+		/* Be paranoid and do a bounds check on the pointer read back
+		 * from hardware, just in case some spurious hardware condition
+		 * could put the tail out of bounds...
+		 */
+		if (hw_tail >= gtt_offset &&
+		    hw_tail < (gtt_offset + OA_BUFFER_SIZE)) {
+			dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset =
+				aging_tail = hw_tail;
+			dev_priv->perf.oa.oa_buffer.aging_timestamp = now;
+		} else {
+			DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %u\n",
+				  hw_tail);
+		}
+	}
+
+	spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+
+	return aged_tail == INVALID_TAIL_PTR ?
+		false : OA_TAKEN(aged_tail, head) >= report_size;
 }
 
 /**
@@ -421,8 +539,6 @@ static int append_oa_sample(struct i915_perf_stream *stream,
  * @buf: destination buffer given by userspace
  * @count: the number of bytes userspace wants to read
  * @offset: (inout): the current position for writing into @buf
- * @head_ptr: (inout): the current oa buffer cpu read position
- * @tail: the current oa buffer gpu write position
  *
  * Notably any error condition resulting in a short read (-%ENOSPC or
  * -%EFAULT) will be returned even though one or more records may
@@ -431,7 +547,7 @@ static int append_oa_sample(struct i915_perf_stream *stream,
  * userspace.
  *
  * Note: reports are consumed from the head, and appended to the
- * tail, so the head chases the tail?... If you think that's mad
+ * tail, so the tail chases the head?... If you think that's mad
  * and back-to-front you're not alone, but this follows the
  * Gen PRM naming convention.
  *
@@ -440,57 +556,55 @@ static int append_oa_sample(struct i915_perf_stream *stream,
 static int gen7_append_oa_reports(struct i915_perf_stream *stream,
 				  char __user *buf,
 				  size_t count,
-				  size_t *offset,
-				  u32 *head_ptr,
-				  u32 tail)
+				  size_t *offset)
 {
 	struct drm_i915_private *dev_priv = stream->dev_priv;
 	int report_size = dev_priv->perf.oa.oa_buffer.format_size;
 	u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr;
-	int tail_margin = dev_priv->perf.oa.tail_margin;
 	u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
 	u32 mask = (OA_BUFFER_SIZE - 1);
-	u32 head;
+	size_t start_offset = *offset;
+	unsigned long flags;
+	unsigned int aged_tail_idx;
+	u32 head, tail;
 	u32 taken;
 	int ret = 0;
 
 	if (WARN_ON(!stream->enabled))
 		return -EIO;
 
-	head = *head_ptr - gtt_offset;
-	tail -= gtt_offset;
+	spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
 
-	/* The OA unit is expected to wrap the tail pointer according to the OA
-	 * buffer size and since we should never write a misaligned head
-	 * pointer we don't expect to read one back either...
-	 */
-	if (tail > OA_BUFFER_SIZE || head > OA_BUFFER_SIZE ||
-	    head % report_size) {
-		DRM_ERROR("Inconsistent OA buffer pointer (head = %u, tail = %u): force restart\n",
-			  head, tail);
-		dev_priv->perf.oa.ops.oa_disable(dev_priv);
-		dev_priv->perf.oa.ops.oa_enable(dev_priv);
-		*head_ptr = I915_READ(GEN7_OASTATUS2) &
-			GEN7_OASTATUS2_HEAD_MASK;
-		return -EIO;
-	}
+	head = dev_priv->perf.oa.oa_buffer.head;
+	aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
+	tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset;
 
+	spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
 
-	/* The tail pointer increases in 64 byte increments, not in report_size
-	 * steps...
+	/* An invalid tail pointer here means we're still waiting for the poll
+	 * hrtimer callback to give us a pointer
 	 */
-	tail &= ~(report_size - 1);
+	if (tail == INVALID_TAIL_PTR)
+		return -EAGAIN;
 
-	/* Move the tail pointer back by the current tail_margin to account for
-	 * the possibility that the latest reports may not have really landed
-	 * in memory yet...
+	/* NB: oa_buffer.head/tail include the gtt_offset which we don't want
+	 * while indexing relative to oa_buf_base.
 	 */
+	head -= gtt_offset;
+	tail -= gtt_offset;
 
-	if (OA_TAKEN(tail, head) < report_size + tail_margin)
-		return -EAGAIN;
+	/* An out of bounds or misaligned head or tail pointer implies a driver
+	 * bug since we validate + align the tail pointers we read from the
+	 * hardware and we are in full control of the head pointer which should
+	 * only be incremented by multiples of the report size (notably also
+	 * all a power of two).
+	 */
+	if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
+		      tail > OA_BUFFER_SIZE || tail % report_size,
+		      "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
+		      head, tail))
+		return -EIO;
 
-	tail -= tail_margin;
-	tail &= mask;
 
 	for (/* none */;
 	     (taken = OA_TAKEN(tail, head));
@@ -518,7 +632,8 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
 		 * copying it to userspace...
 		 */
 		if (report32[0] == 0) {
-			DRM_NOTE("Skipping spurious, invalid OA report\n");
+			if (__ratelimit(&dev_priv->perf.oa.spurious_report_rs))
+				DRM_NOTE("Skipping spurious, invalid OA report\n");
 			continue;
 		}
 
@@ -535,7 +650,21 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
 		report32[0] = 0;
 	}
 
-	*head_ptr = gtt_offset + head;
+	if (start_offset != *offset) {
+		spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+
+		/* We removed the gtt_offset for the copy loop above, indexing
+		 * relative to oa_buf_base so put back here...
+		 */
+		head += gtt_offset;
+
+		I915_WRITE(GEN7_OASTATUS2,
+			   ((head & GEN7_OASTATUS2_HEAD_MASK) |
+			    OA_MEM_SELECT_GGTT));
+		dev_priv->perf.oa.oa_buffer.head = head;
+
+		spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+	}
 
 	return ret;
 }
@@ -562,22 +691,14 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
 			size_t *offset)
 {
 	struct drm_i915_private *dev_priv = stream->dev_priv;
-	int report_size = dev_priv->perf.oa.oa_buffer.format_size;
-	u32 oastatus2;
 	u32 oastatus1;
-	u32 head;
-	u32 tail;
 	int ret;
 
 	if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr))
 		return -EIO;
 
-	oastatus2 = I915_READ(GEN7_OASTATUS2);
 	oastatus1 = I915_READ(GEN7_OASTATUS1);
 
-	head = oastatus2 & GEN7_OASTATUS2_HEAD_MASK;
-	tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
-
 	/* XXX: On Haswell we don't have a safe way to clear oastatus1
 	 * bits while the OA unit is enabled (while the tail pointer
 	 * may be updated asynchronously) so we ignore status bits
@@ -616,11 +737,7 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
 		dev_priv->perf.oa.ops.oa_disable(dev_priv);
 		dev_priv->perf.oa.ops.oa_enable(dev_priv);
 
-		oastatus2 = I915_READ(GEN7_OASTATUS2);
 		oastatus1 = I915_READ(GEN7_OASTATUS1);
-
-		head = oastatus2 & GEN7_OASTATUS2_HEAD_MASK;
-		tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
 	}
 
 	if (unlikely(oastatus1 & GEN7_OASTATUS1_REPORT_LOST)) {
@@ -632,29 +749,7 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
 			GEN7_OASTATUS1_REPORT_LOST;
 	}
 
-	ret = gen7_append_oa_reports(stream, buf, count, offset,
-				     &head, tail);
-
-	/* All the report sizes are a power of two and the
-	 * head should always be incremented by some multiple
-	 * of the report size.
-	 *
-	 * A warning here, but notably if we later read back a
-	 * misaligned pointer we will treat that as a bug since
-	 * it could lead to a buffer overrun.
-	 */
-	WARN_ONCE(head & (report_size - 1),
-		  "i915: Writing misaligned OA head pointer");
-
-	/* Note: we update the head pointer here even if an error
-	 * was returned since the error may represent a short read
-	 * where some some reports were successfully copied.
-	 */
-	I915_WRITE(GEN7_OASTATUS2,
-		   ((head & GEN7_OASTATUS2_HEAD_MASK) |
-		    OA_MEM_SELECT_GGTT));
-
-	return ret;
+	return gen7_append_oa_reports(stream, buf, count, offset);
 }
 
 /**
@@ -679,14 +774,8 @@ static int i915_oa_wait_unlocked(struct i915_perf_stream *stream)
 	if (!dev_priv->perf.oa.periodic)
 		return -EIO;
 
-	/* Note: the oa_buffer_is_empty() condition is ok to run unlocked as it
-	 * just performs mmio reads of the OA buffer head + tail pointers and
-	 * it's assumed we're handling some operation that implies the stream
-	 * can't be destroyed until completion (such as a read()) that ensures
-	 * the device + OA buffer can't disappear
-	 */
 	return wait_event_interruptible(dev_priv->perf.oa.poll_wq,
-					!dev_priv->perf.oa.ops.oa_buffer_is_empty(dev_priv));
+					dev_priv->perf.oa.ops.oa_buffer_check(dev_priv));
 }
 
 /**
@@ -744,6 +833,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 {
 	struct drm_i915_private *dev_priv = stream->dev_priv;
 	struct intel_engine_cs *engine = dev_priv->engine[RCS];
+	struct intel_ring *ring;
 	int ret;
 
 	ret = i915_mutex_lock_interruptible(&dev_priv->drm);
@@ -755,9 +845,10 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 	 *
 	 * NB: implied RCS engine...
 	 */
-	ret = engine->context_pin(engine, stream->ctx);
-	if (ret)
-		goto unlock;
+	ring = engine->context_pin(engine, stream->ctx);
+	mutex_unlock(&dev_priv->drm.struct_mutex);
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
 
 	/* Explicitly track the ID (instead of calling i915_ggtt_offset()
 	 * on the fly) considering the difference with gen8+ and
@@ -766,10 +857,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 	dev_priv->perf.oa.specific_ctx_id =
 		i915_ggtt_offset(stream->ctx->engine[engine->id].state);
 
-unlock:
-	mutex_unlock(&dev_priv->drm.struct_mutex);
-
-	return ret;
+	return 0;
 }
 
 /**
@@ -824,19 +912,36 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 		oa_put_render_ctx_id(stream);
 
 	dev_priv->perf.oa.exclusive_stream = NULL;
+
+	if (dev_priv->perf.oa.spurious_report_rs.missed) {
+		DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
+			 dev_priv->perf.oa.spurious_report_rs.missed);
+	}
 }
 
 static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
 {
 	u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
 
 	/* Pre-DevBDW: OABUFFER must be set with counters off,
 	 * before OASTATUS1, but after OASTATUS2
 	 */
 	I915_WRITE(GEN7_OASTATUS2, gtt_offset | OA_MEM_SELECT_GGTT); /* head */
+	dev_priv->perf.oa.oa_buffer.head = gtt_offset;
+
 	I915_WRITE(GEN7_OABUFFER, gtt_offset);
+
 	I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */
 
+	/* Mark that we need updated tail pointers to read from... */
+	dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
+	dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
+
+	spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+
 	/* On Haswell we have to track which OASTATUS1 flags we've
 	 * already seen since they can't be cleared while periodic
 	 * sampling is enabled.
@@ -1094,12 +1199,6 @@ static void i915_oa_stream_disable(struct i915_perf_stream *stream)
 		hrtimer_cancel(&dev_priv->perf.oa.poll_check_timer);
 }
 
-static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent)
-{
-	return div_u64(1000000000ULL * (2ULL << exponent),
-		       dev_priv->perf.oa.timestamp_frequency);
-}
-
 static const struct i915_perf_stream_ops i915_oa_stream_ops = {
 	.destroy = i915_oa_stream_destroy,
 	.enable = i915_oa_stream_enable,
@@ -1173,6 +1272,26 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 		return -EINVAL;
 	}
 
+	/* We set up some ratelimit state to potentially throttle any _NOTES
+	 * about spurious, invalid OA reports which we don't forward to
+	 * userspace.
+	 *
+	 * The initialization is associated with opening the stream (not driver
+	 * init) considering we print a _NOTE about any throttling when closing
+	 * the stream instead of waiting until driver _fini which no one would
+	 * ever see.
+	 *
+	 * Using the same limiting factors as printk_ratelimit()
+	 */
+	ratelimit_state_init(&dev_priv->perf.oa.spurious_report_rs,
+			     5 * HZ, 10);
+	/* Since we use a DRM_NOTE for spurious reports it would be
+	 * inconsistent to let __ratelimit() automatically print a warning for
+	 * throttling.
+	 */
+	ratelimit_set_flags(&dev_priv->perf.oa.spurious_report_rs,
+			    RATELIMIT_MSG_ON_RELEASE);
+
 	stream->sample_size = sizeof(struct drm_i915_perf_record_header);
 
 	format_size = dev_priv->perf.oa.oa_formats[props->oa_format].size;
@@ -1190,20 +1309,9 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	dev_priv->perf.oa.metrics_set = props->metrics_set;
 
 	dev_priv->perf.oa.periodic = props->oa_periodic;
-	if (dev_priv->perf.oa.periodic) {
-		u32 tail;
-
+	if (dev_priv->perf.oa.periodic)
 		dev_priv->perf.oa.period_exponent = props->oa_period_exponent;
 
-		/* See comment for OA_TAIL_MARGIN_NSEC for details
-		 * about this tail_margin...
-		 */
-		tail = div64_u64(OA_TAIL_MARGIN_NSEC,
-				 oa_exponent_to_ns(dev_priv,
-						   props->oa_period_exponent));
-		dev_priv->perf.oa.tail_margin = (tail + 1) * format_size;
-	}
-
 	if (stream->ctx) {
 		ret = oa_get_render_ctx_id(stream);
 		if (ret)
@@ -1352,7 +1460,15 @@ static ssize_t i915_perf_read(struct file *file,
 		mutex_unlock(&dev_priv->perf.lock);
 	}
 
-	if (ret >= 0) {
+	/* We allow the poll checking to sometimes report false positive POLLIN
+	 * events where we might actually report EAGAIN on read() if there's
+	 * not really any data available. In this situation though we don't
+	 * want to enter a busy loop between poll() reporting a POLLIN event
+	 * and read() returning -EAGAIN. Clearing the oa.pollin state here
+	 * effectively ensures we back off until the next hrtimer callback
+	 * before reporting another POLLIN event.
+	 */
+	if (ret >= 0 || ret == -EAGAIN) {
 		/* Maybe make ->pollin per-stream state if we support multiple
 		 * concurrent streams in the future.
 		 */
@@ -1368,7 +1484,7 @@ static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer)
 		container_of(hrtimer, typeof(*dev_priv),
 			     perf.oa.poll_check_timer);
 
-	if (!dev_priv->perf.oa.ops.oa_buffer_is_empty(dev_priv)) {
+	if (dev_priv->perf.oa.ops.oa_buffer_check(dev_priv)) {
 		dev_priv->perf.oa.pollin = true;
 		wake_up(&dev_priv->perf.oa.poll_wq);
 	}
@@ -1817,11 +1933,13 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
 			break;
 		case DRM_I915_PERF_PROP_OA_FORMAT:
 			if (value == 0 || value >= I915_OA_FORMAT_MAX) {
-				DRM_DEBUG("Invalid OA report format\n");
+				DRM_DEBUG("Out-of-range OA report format %llu\n",
+					  value);
 				return -EINVAL;
 			}
 			if (!dev_priv->perf.oa.oa_formats[value].size) {
-				DRM_DEBUG("Invalid OA report format\n");
+				DRM_DEBUG("Unsupported OA report format %llu\n",
+					  value);
 				return -EINVAL;
 			}
 			props->oa_format = value;
@@ -2063,6 +2181,7 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
 	INIT_LIST_HEAD(&dev_priv->perf.streams);
 	mutex_init(&dev_priv->perf.lock);
 	spin_lock_init(&dev_priv->perf.hook_lock);
+	spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock);
 
 	dev_priv->perf.oa.ops.init_oa_buffer = gen7_init_oa_buffer;
 	dev_priv->perf.oa.ops.enable_metric_set = hsw_enable_metric_set;
@@ -2070,10 +2189,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
 	dev_priv->perf.oa.ops.oa_enable = gen7_oa_enable;
 	dev_priv->perf.oa.ops.oa_disable = gen7_oa_disable;
 	dev_priv->perf.oa.ops.read = gen7_oa_read;
-	dev_priv->perf.oa.ops.oa_buffer_is_empty =
-		gen7_oa_buffer_is_empty_fop_unlocked;
-
-	dev_priv->perf.oa.timestamp_frequency = 12500000;
+	dev_priv->perf.oa.ops.oa_buffer_check =
+		gen7_oa_buffer_check_unlocked;
 
 	dev_priv->perf.oa.oa_formats = hsw_oa_formats;
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 5a7c63e64381..89888adb9af1 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -85,6 +85,14 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define VECS_HW		3
 #define VCS2_HW		4
 
+/* Engine class */
+
+#define RENDER_CLASS		0
+#define VIDEO_DECODE_CLASS	1
+#define VIDEO_ENHANCEMENT_CLASS	2
+#define COPY_ENGINE_CLASS	3
+#define OTHER_CLASS		4
+
 /* PCI config space */
 
 #define MCHBAR_I915 0x44
@@ -3366,16 +3374,6 @@ enum skl_disp_power_wells {
 #define GEN7_CXT_VFSTATE_SIZE(ctx_reg)	(((ctx_reg) >> 0) & 0x3f)
 #define GEN7_CXT_TOTAL_SIZE(ctx_reg)	(GEN7_CXT_EXTENDED_SIZE(ctx_reg) + \
 					 GEN7_CXT_VFSTATE_SIZE(ctx_reg))
-/* Haswell does have the CXT_SIZE register however it does not appear to be
- * valid. Now, docs explain in dwords what is in the context object. The full
- * size is 70720 bytes, however, the power context and execlist context will
- * never be saved (power context is stored elsewhere, and execlists don't work
- * on HSW) - so the final size, including the extra state required for the
- * Resource Streamer, is 66944 bytes, which rounds to 17 pages.
- */
-#define HSW_CXT_TOTAL_SIZE		(17 * PAGE_SIZE)
-/* Same as Haswell, but 72064 bytes now. */
-#define GEN8_CXT_TOTAL_SIZE		(18 * PAGE_SIZE)
 
 enum {
 	INTEL_ADVANCED_CONTEXT = 0,
@@ -5441,9 +5439,7 @@ enum {
 #define   CURSOR_MODE_128_ARGB_AX ((1 << 5) | CURSOR_MODE_128_32B_AX)
 #define   CURSOR_MODE_256_ARGB_AX ((1 << 5) | CURSOR_MODE_256_32B_AX)
 #define   CURSOR_MODE_64_ARGB_AX ((1 << 5) | CURSOR_MODE_64_32B_AX)
-#define   MCURSOR_PIPE_SELECT	(1 << 28)
-#define   MCURSOR_PIPE_A	0x00
-#define   MCURSOR_PIPE_B	(1 << 28)
+#define   MCURSOR_PIPE_SELECT(pipe)	((pipe) << 28)
 #define   MCURSOR_GAMMA_ENABLE  (1 << 26)
 #define   CURSOR_ROTATE_180	(1<<15)
 #define   CURSOR_TRICKLE_FEED_DISABLE	(1 << 14)
@@ -5453,7 +5449,9 @@ enum {
 #define   CURSOR_POS_SIGN       0x8000
 #define   CURSOR_X_SHIFT        0
 #define   CURSOR_Y_SHIFT        16
-#define CURSIZE			_MMIO(0x700a0)
+#define CURSIZE			_MMIO(0x700a0) /* 845/865 */
+#define _CUR_FBC_CTL_A		0x700a0 /* ivb+ */
+#define   CUR_FBC_CTL_EN	(1 << 31)
 #define _CURBCNTR		0x700c0
 #define _CURBBASE		0x700c4
 #define _CURBPOS		0x700c8
@@ -5469,6 +5467,7 @@ enum {
 #define CURCNTR(pipe) _CURSOR2(pipe, _CURACNTR)
 #define CURBASE(pipe) _CURSOR2(pipe, _CURABASE)
 #define CURPOS(pipe) _CURSOR2(pipe, _CURAPOS)
+#define CUR_FBC_CTL(pipe) _CURSOR2(pipe, _CUR_FBC_CTL_A)
 
 #define CURSOR_A_OFFSET 0x70080
 #define CURSOR_B_OFFSET 0x700c0
@@ -5501,8 +5500,7 @@ enum {
 #define   DISPPLANE_PIPE_CSC_ENABLE		(1<<24)
 #define   DISPPLANE_SEL_PIPE_SHIFT		24
 #define   DISPPLANE_SEL_PIPE_MASK		(3<<DISPPLANE_SEL_PIPE_SHIFT)
-#define   DISPPLANE_SEL_PIPE_A			0
-#define   DISPPLANE_SEL_PIPE_B			(1<<DISPPLANE_SEL_PIPE_SHIFT)
+#define   DISPPLANE_SEL_PIPE(pipe)		((pipe)<<DISPPLANE_SEL_PIPE_SHIFT)
 #define   DISPPLANE_SRC_KEY_ENABLE		(1<<22)
 #define   DISPPLANE_SRC_KEY_DISABLE		0
 #define   DISPPLANE_LINE_DOUBLE			(1<<20)
@@ -8280,7 +8278,7 @@ enum {
 
 /* MIPI DSI registers */
 
-#define _MIPI_PORT(port, a, c)	((port) ? c : a)	/* ports A and C only */
+#define _MIPI_PORT(port, a, c)	(((port) == PORT_A) ? a : c)	/* ports A and C only */
 #define _MMIO_MIPI(port, a, c)	_MMIO(_MIPI_PORT(port, a, c))
 
 #define MIPIO_TXESC_CLK_DIV1			_MMIO(0x160004)
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index a277f8eb7beb..474d23c0c0ce 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -12,6 +12,7 @@
 #include <linux/reservation.h>
 
 #include "i915_sw_fence.h"
+#include "i915_selftest.h"
 
 #define I915_SW_FENCE_FLAG_ALLOC BIT(3) /* after WQ_FLAG_* for safety */
 
@@ -120,34 +121,6 @@ void i915_sw_fence_fini(struct i915_sw_fence *fence)
 }
 #endif
 
-static void i915_sw_fence_release(struct kref *kref)
-{
-	struct i915_sw_fence *fence = container_of(kref, typeof(*fence), kref);
-
-	WARN_ON(atomic_read(&fence->pending) > 0);
-	debug_fence_destroy(fence);
-
-	if (fence->flags & I915_SW_FENCE_MASK) {
-		__i915_sw_fence_notify(fence, FENCE_FREE);
-	} else {
-		i915_sw_fence_fini(fence);
-		kfree(fence);
-	}
-}
-
-static void i915_sw_fence_put(struct i915_sw_fence *fence)
-{
-	debug_fence_assert(fence);
-	kref_put(&fence->kref, i915_sw_fence_release);
-}
-
-static struct i915_sw_fence *i915_sw_fence_get(struct i915_sw_fence *fence)
-{
-	debug_fence_assert(fence);
-	kref_get(&fence->kref);
-	return fence;
-}
-
 static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence,
 					struct list_head *continuation)
 {
@@ -202,13 +175,15 @@ static void __i915_sw_fence_complete(struct i915_sw_fence *fence,
 
 	debug_fence_set_state(fence, DEBUG_FENCE_IDLE, DEBUG_FENCE_NOTIFY);
 
-	if (fence->flags & I915_SW_FENCE_MASK &&
-	    __i915_sw_fence_notify(fence, FENCE_COMPLETE) != NOTIFY_DONE)
+	if (__i915_sw_fence_notify(fence, FENCE_COMPLETE) != NOTIFY_DONE)
 		return;
 
 	debug_fence_set_state(fence, DEBUG_FENCE_NOTIFY, DEBUG_FENCE_IDLE);
 
 	__i915_sw_fence_wake_up_all(fence, continuation);
+
+	debug_fence_destroy(fence);
+	__i915_sw_fence_notify(fence, FENCE_FREE);
 }
 
 static void i915_sw_fence_complete(struct i915_sw_fence *fence)
@@ -232,33 +207,26 @@ void __i915_sw_fence_init(struct i915_sw_fence *fence,
 			  const char *name,
 			  struct lock_class_key *key)
 {
-	BUG_ON((unsigned long)fn & ~I915_SW_FENCE_MASK);
+	BUG_ON(!fn || (unsigned long)fn & ~I915_SW_FENCE_MASK);
 
 	debug_fence_init(fence);
 
 	__init_waitqueue_head(&fence->wait, name, key);
-	kref_init(&fence->kref);
 	atomic_set(&fence->pending, 1);
 	fence->flags = (unsigned long)fn;
 }
 
-static void __i915_sw_fence_commit(struct i915_sw_fence *fence)
-{
-	i915_sw_fence_complete(fence);
-	i915_sw_fence_put(fence);
-}
-
 void i915_sw_fence_commit(struct i915_sw_fence *fence)
 {
 	debug_fence_activate(fence);
-	__i915_sw_fence_commit(fence);
+	i915_sw_fence_complete(fence);
 }
 
 static int i915_sw_fence_wake(wait_queue_t *wq, unsigned mode, int flags, void *key)
 {
 	list_del(&wq->task_list);
 	__i915_sw_fence_complete(wq->private, key);
-	i915_sw_fence_put(wq->private);
+
 	if (wq->flags & I915_SW_FENCE_FLAG_ALLOC)
 		kfree(wq);
 	return 0;
@@ -307,7 +275,7 @@ static bool i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
 	unsigned long flags;
 	bool err;
 
-	if (!IS_ENABLED(CONFIG_I915_SW_FENCE_CHECK_DAG))
+	if (!IS_ENABLED(CONFIG_DRM_I915_SW_FENCE_CHECK_DAG))
 		return false;
 
 	spin_lock_irqsave(&i915_sw_fence_lock, flags);
@@ -353,7 +321,7 @@ static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
 	INIT_LIST_HEAD(&wq->task_list);
 	wq->flags = pending;
 	wq->func = i915_sw_fence_wake;
-	wq->private = i915_sw_fence_get(fence);
+	wq->private = fence;
 
 	i915_sw_fence_await(fence);
 
@@ -402,7 +370,7 @@ static void timer_i915_sw_fence_wake(unsigned long data)
 	dma_fence_put(cb->dma);
 	cb->dma = NULL;
 
-	__i915_sw_fence_commit(cb->fence);
+	i915_sw_fence_complete(cb->fence);
 	cb->timer.function = NULL;
 }
 
@@ -413,7 +381,7 @@ static void dma_i915_sw_fence_wake(struct dma_fence *dma,
 
 	del_timer_sync(&cb->timer);
 	if (cb->timer.function)
-		__i915_sw_fence_commit(cb->fence);
+		i915_sw_fence_complete(cb->fence);
 	dma_fence_put(cb->dma);
 
 	kfree(cb);
@@ -440,7 +408,7 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
 		return dma_fence_wait(dma, false);
 	}
 
-	cb->fence = i915_sw_fence_get(fence);
+	cb->fence = fence;
 	i915_sw_fence_await(fence);
 
 	cb->dma = NULL;
@@ -523,3 +491,7 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
 
 	return ret;
 }
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_sw_fence.c"
+#endif
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
index d31cefbbcc04..1d3b6051daaf 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.h
+++ b/drivers/gpu/drm/i915/i915_sw_fence.h
@@ -23,7 +23,6 @@ struct reservation_object;
 struct i915_sw_fence {
 	wait_queue_head_t wait;
 	unsigned long flags;
-	struct kref kref;
 	atomic_t pending;
 };
 
diff --git a/drivers/gpu/drm/i915/i915_syncmap.c b/drivers/gpu/drm/i915/i915_syncmap.c
new file mode 100644
index 000000000000..0087acf731a8
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_syncmap.c
@@ -0,0 +1,412 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/slab.h>
+
+#include "i915_syncmap.h"
+
+#include "i915_gem.h" /* GEM_BUG_ON() */
+#include "i915_selftest.h"
+
+#define SHIFT ilog2(KSYNCMAP)
+#define MASK (KSYNCMAP - 1)
+
+/*
+ * struct i915_syncmap is a layer of a radixtree that maps a u64 fence
+ * context id to the last u32 fence seqno waited upon from that context.
+ * Unlike lib/radixtree it uses a parent pointer that allows traversal back to
+ * the root. This allows us to access the whole tree via a single pointer
+ * to the most recently used layer. We expect fence contexts to be dense
+ * and most reuse to be on the same i915_gem_context but on neighbouring
+ * engines (i.e. on adjacent contexts) and reuse the same leaf, a very
+ * effective lookup cache. If the new lookup is not on the same leaf, we
+ * expect it to be on the neighbouring branch.
+ *
+ * A leaf holds an array of u32 seqno, and has height 0. The bitmap field
+ * allows us to store whether a particular seqno is valid (i.e. allows us
+ * to distinguish unset from 0).
+ *
+ * A branch holds an array of layer pointers, and has height > 0, and always
+ * has at least 2 layers (either branches or leaves) below it.
+ *
+ * For example,
+ *	for x in
+ *	  0 1 2 0x10 0x11 0x200 0x201
+ *	  0x500000 0x500001 0x503000 0x503001
+ *	  0xE<<60:
+ *		i915_syncmap_set(&sync, x, lower_32_bits(x));
+ * will build a tree like:
+ *	0xXXXXXXXXXXXXXXXX
+ *	0-> 0x0000000000XXXXXX
+ *	|   0-> 0x0000000000000XXX
+ *	|   |   0-> 0x00000000000000XX
+ *	|   |   |   0-> 0x000000000000000X 0:0, 1:1, 2:2
+ *	|   |   |   1-> 0x000000000000001X 0:10, 1:11
+ *	|   |   2-> 0x000000000000020X 0:200, 1:201
+ *	|   5-> 0x000000000050XXXX
+ *	|       0-> 0x000000000050000X 0:500000, 1:500001
+ *	|       3-> 0x000000000050300X 0:503000, 1:503001
+ *	e-> 0xe00000000000000X e:e
+ */
+
+struct i915_syncmap {
+	u64 prefix;
+	unsigned int height;
+	unsigned int bitmap;
+	struct i915_syncmap *parent;
+	/*
+	 * Following this header is an array of either seqno or child pointers:
+	 * union {
+	 *	u32 seqno[KSYNCMAP];
+	 *	struct i915_syncmap *child[KSYNCMAP];
+	 * };
+	 */
+};
+
+/**
+ * i915_syncmap_init -- initialise the #i915_syncmap
+ * @root - pointer to the #i915_syncmap
+ */
+void i915_syncmap_init(struct i915_syncmap **root)
+{
+	BUILD_BUG_ON_NOT_POWER_OF_2(KSYNCMAP);
+	BUILD_BUG_ON_NOT_POWER_OF_2(SHIFT);
+	BUILD_BUG_ON(KSYNCMAP > BITS_PER_BYTE * sizeof((*root)->bitmap));
+	*root = NULL;
+}
+
+static inline u32 *__sync_seqno(struct i915_syncmap *p)
+{
+	GEM_BUG_ON(p->height);
+	return (u32 *)(p + 1);
+}
+
+static inline struct i915_syncmap **__sync_child(struct i915_syncmap *p)
+{
+	GEM_BUG_ON(!p->height);
+	return (struct i915_syncmap **)(p + 1);
+}
+
+static inline unsigned int
+__sync_branch_idx(const struct i915_syncmap *p, u64 id)
+{
+	return (id >> p->height) & MASK;
+}
+
+static inline unsigned int
+__sync_leaf_idx(const struct i915_syncmap *p, u64 id)
+{
+	GEM_BUG_ON(p->height);
+	return id & MASK;
+}
+
+static inline u64 __sync_branch_prefix(const struct i915_syncmap *p, u64 id)
+{
+	return id >> p->height >> SHIFT;
+}
+
+static inline u64 __sync_leaf_prefix(const struct i915_syncmap *p, u64 id)
+{
+	GEM_BUG_ON(p->height);
+	return id >> SHIFT;
+}
+
+static inline bool seqno_later(u32 a, u32 b)
+{
+	return (s32)(a - b) >= 0;
+}
+
+/**
+ * i915_syncmap_is_later -- compare against the last know sync point
+ * @root - pointer to the #i915_syncmap
+ * @id - the context id (other timeline) we are synchronising to
+ * @seqno - the sequence number along the other timeline
+ *
+ * If we have already synchronised this @root timeline with another (@id) then
+ * we can omit any repeated or earlier synchronisation requests. If the two
+ * timelines are already coupled, we can also omit the dependency between the
+ * two as that is already known via the timeline.
+ *
+ * Returns true if the two timelines are already synchronised wrt to @seqno,
+ * false if not and the synchronisation must be emitted.
+ */
+bool i915_syncmap_is_later(struct i915_syncmap **root, u64 id, u32 seqno)
+{
+	struct i915_syncmap *p;
+	unsigned int idx;
+
+	p = *root;
+	if (!p)
+		return false;
+
+	if (likely(__sync_leaf_prefix(p, id) == p->prefix))
+		goto found;
+
+	/* First climb the tree back to a parent branch */
+	do {
+		p = p->parent;
+		if (!p)
+			return false;
+
+		if (__sync_branch_prefix(p, id) == p->prefix)
+			break;
+	} while (1);
+
+	/* And then descend again until we find our leaf */
+	do {
+		if (!p->height)
+			break;
+
+		p = __sync_child(p)[__sync_branch_idx(p, id)];
+		if (!p)
+			return false;
+
+		if (__sync_branch_prefix(p, id) != p->prefix)
+			return false;
+	} while (1);
+
+	*root = p;
+found:
+	idx = __sync_leaf_idx(p, id);
+	if (!(p->bitmap & BIT(idx)))
+		return false;
+
+	return seqno_later(__sync_seqno(p)[idx], seqno);
+}
+
+static struct i915_syncmap *
+__sync_alloc_leaf(struct i915_syncmap *parent, u64 id)
+{
+	struct i915_syncmap *p;
+
+	p = kmalloc(sizeof(*p) + KSYNCMAP * sizeof(u32), GFP_KERNEL);
+	if (unlikely(!p))
+		return NULL;
+
+	p->parent = parent;
+	p->height = 0;
+	p->bitmap = 0;
+	p->prefix = __sync_leaf_prefix(p, id);
+	return p;
+}
+
+static inline void __sync_set_seqno(struct i915_syncmap *p, u64 id, u32 seqno)
+{
+	unsigned int idx = __sync_leaf_idx(p, id);
+
+	p->bitmap |= BIT(idx);
+	__sync_seqno(p)[idx] = seqno;
+}
+
+static inline void __sync_set_child(struct i915_syncmap *p,
+				    unsigned int idx,
+				    struct i915_syncmap *child)
+{
+	p->bitmap |= BIT(idx);
+	__sync_child(p)[idx] = child;
+}
+
+static noinline int __sync_set(struct i915_syncmap **root, u64 id, u32 seqno)
+{
+	struct i915_syncmap *p = *root;
+	unsigned int idx;
+
+	if (!p) {
+		p = __sync_alloc_leaf(NULL, id);
+		if (unlikely(!p))
+			return -ENOMEM;
+
+		goto found;
+	}
+
+	/* Caller handled the likely cached case */
+	GEM_BUG_ON(__sync_leaf_prefix(p, id) == p->prefix);
+
+	/* Climb back up the tree until we find a common prefix */
+	do {
+		if (!p->parent)
+			break;
+
+		p = p->parent;
+
+		if (__sync_branch_prefix(p, id) == p->prefix)
+			break;
+	} while (1);
+
+	/*
+	 * No shortcut, we have to descend the tree to find the right layer
+	 * containing this fence.
+	 *
+	 * Each layer in the tree holds 16 (KSYNCMAP) pointers, either fences
+	 * or lower layers. Leaf nodes (height = 0) contain the fences, all
+	 * other nodes (height > 0) are internal layers that point to a lower
+	 * node. Each internal layer has at least 2 descendents.
+	 *
+	 * Starting at the top, we check whether the current prefix matches. If
+	 * it doesn't, we have gone past our target and need to insert a join
+	 * into the tree, and a new leaf node for the target as a descendent
+	 * of the join, as well as the original layer.
+	 *
+	 * The matching prefix means we are still following the right branch
+	 * of the tree. If it has height 0, we have found our leaf and just
+	 * need to replace the fence slot with ourselves. If the height is
+	 * not zero, our slot contains the next layer in the tree (unless
+	 * it is empty, in which case we can add ourselves as a new leaf).
+	 * As descend the tree the prefix grows (and height decreases).
+	 */
+	do {
+		struct i915_syncmap *next;
+
+		if (__sync_branch_prefix(p, id) != p->prefix) {
+			unsigned int above;
+
+			/* Insert a join above the current layer */
+			next = kzalloc(sizeof(*next) + KSYNCMAP * sizeof(next),
+				       GFP_KERNEL);
+			if (unlikely(!next))
+				return -ENOMEM;
+
+			/* Compute the height at which these two diverge */
+			above = fls64(__sync_branch_prefix(p, id) ^ p->prefix);
+			above = round_up(above, SHIFT);
+			next->height = above + p->height;
+			next->prefix = __sync_branch_prefix(next, id);
+
+			/* Insert the join into the parent */
+			if (p->parent) {
+				idx = __sync_branch_idx(p->parent, id);
+				__sync_child(p->parent)[idx] = next;
+				GEM_BUG_ON(!(p->parent->bitmap & BIT(idx)));
+			}
+			next->parent = p->parent;
+
+			/* Compute the idx of the other branch, not our id! */
+			idx = p->prefix >> (above - SHIFT) & MASK;
+			__sync_set_child(next, idx, p);
+			p->parent = next;
+
+			/* Ascend to the join */
+			p = next;
+		} else {
+			if (!p->height)
+				break;
+		}
+
+		/* Descend into the next layer */
+		GEM_BUG_ON(!p->height);
+		idx = __sync_branch_idx(p, id);
+		next = __sync_child(p)[idx];
+		if (!next) {
+			next = __sync_alloc_leaf(p, id);
+			if (unlikely(!next))
+				return -ENOMEM;
+
+			__sync_set_child(p, idx, next);
+			p = next;
+			break;
+		}
+
+		p = next;
+	} while (1);
+
+found:
+	GEM_BUG_ON(p->prefix != __sync_leaf_prefix(p, id));
+	__sync_set_seqno(p, id, seqno);
+	*root = p;
+	return 0;
+}
+
+/**
+ * i915_syncmap_set -- mark the most recent syncpoint between contexts
+ * @root - pointer to the #i915_syncmap
+ * @id - the context id (other timeline) we have synchronised to
+ * @seqno - the sequence number along the other timeline
+ *
+ * When we synchronise this @root timeline with another (@id), we also know
+ * that we have synchronized with all previous seqno along that timeline. If
+ * we then have a request to synchronise with the same seqno or older, we can
+ * omit it, see i915_syncmap_is_later()
+ *
+ * Returns 0 on success, or a negative error code.
+ */
+int i915_syncmap_set(struct i915_syncmap **root, u64 id, u32 seqno)
+{
+	struct i915_syncmap *p = *root;
+
+	/*
+	 * We expect to be called in sequence following is_later(id), which
+	 * should have preloaded the root for us.
+	 */
+	if (likely(p && __sync_leaf_prefix(p, id) == p->prefix)) {
+		__sync_set_seqno(p, id, seqno);
+		return 0;
+	}
+
+	return __sync_set(root, id, seqno);
+}
+
+static void __sync_free(struct i915_syncmap *p)
+{
+	if (p->height) {
+		unsigned int i;
+
+		while ((i = ffs(p->bitmap))) {
+			p->bitmap &= ~0u << i;
+			__sync_free(__sync_child(p)[i - 1]);
+		}
+	}
+
+	kfree(p);
+}
+
+/**
+ * i915_syncmap_free -- free all memory associated with the syncmap
+ * @root - pointer to the #i915_syncmap
+ *
+ * Either when the timeline is to be freed and we no longer need the sync
+ * point tracking, or when the fences are all known to be signaled and the
+ * sync point tracking is redundant, we can free the #i915_syncmap to recover
+ * its allocations.
+ *
+ * Will reinitialise the @root pointer so that the #i915_syncmap is ready for
+ * reuse.
+ */
+void i915_syncmap_free(struct i915_syncmap **root)
+{
+	struct i915_syncmap *p;
+
+	p = *root;
+	if (!p)
+		return;
+
+	while (p->parent)
+		p = p->parent;
+
+	__sync_free(p);
+	*root = NULL;
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_syncmap.c"
+#endif
diff --git a/drivers/gpu/drm/i915/i915_syncmap.h b/drivers/gpu/drm/i915/i915_syncmap.h
new file mode 100644
index 000000000000..0653f70bee82
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_syncmap.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __I915_SYNCMAP_H__
+#define __I915_SYNCMAP_H__
+
+#include <linux/types.h>
+
+struct i915_syncmap;
+#define KSYNCMAP 16 /* radix of the tree, how many slots in each layer */
+
+void i915_syncmap_init(struct i915_syncmap **root);
+int i915_syncmap_set(struct i915_syncmap **root, u64 id, u32 seqno);
+bool i915_syncmap_is_later(struct i915_syncmap **root, u64 id, u32 seqno);
+void i915_syncmap_free(struct i915_syncmap **root);
+
+#endif /* __I915_SYNCMAP_H__ */
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index f3fdfda5e558..1eef3fae4db3 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -181,13 +181,10 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
 	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
 	struct drm_device *dev = &dev_priv->drm;
 	struct i915_gem_context *ctx;
-	u32 *temp = NULL; /* Just here to make handling failures easy */
 	int slice = (int)(uintptr_t)attr->private;
+	u32 **remap_info;
 	int ret;
 
-	if (!HAS_HW_CONTEXTS(dev_priv))
-		return -ENXIO;
-
 	ret = l3_access_valid(dev_priv, offset);
 	if (ret)
 		return ret;
@@ -196,11 +193,12 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
 	if (ret)
 		return ret;
 
-	if (!dev_priv->l3_parity.remap_info[slice]) {
-		temp = kzalloc(GEN7_L3LOG_SIZE, GFP_KERNEL);
-		if (!temp) {
-			mutex_unlock(&dev->struct_mutex);
-			return -ENOMEM;
+	remap_info = &dev_priv->l3_parity.remap_info[slice];
+	if (!*remap_info) {
+		*remap_info = kzalloc(GEN7_L3LOG_SIZE, GFP_KERNEL);
+		if (!*remap_info) {
+			ret = -ENOMEM;
+			goto out;
 		}
 	}
 
@@ -208,18 +206,18 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
 	 * aren't propagated. Since I cannot find a stable way to reset the GPU
 	 * at this point it is left as a TODO.
 	*/
-	if (temp)
-		dev_priv->l3_parity.remap_info[slice] = temp;
-
-	memcpy(dev_priv->l3_parity.remap_info[slice] + (offset/4), buf, count);
+	memcpy(*remap_info + (offset/4), buf, count);
 
 	/* NB: We defer the remapping until we switch to the context */
 	list_for_each_entry(ctx, &dev_priv->context_list, link)
 		ctx->remap_slice |= (1<<slice);
 
+	ret = count;
+
+out:
 	mutex_unlock(&dev->struct_mutex);
 
-	return count;
+	return ret;
 }
 
 static struct bin_attribute dpf_attrs = {
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 66404c5aee82..b24a83d43559 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -89,6 +89,55 @@ TRACE_EVENT(intel_memory_cxsr,
 		      __entry->frame[PIPE_C], __entry->scanline[PIPE_C])
 );
 
+TRACE_EVENT(g4x_wm,
+	    TP_PROTO(struct intel_crtc *crtc, const struct g4x_wm_values *wm),
+	    TP_ARGS(crtc, wm),
+
+	    TP_STRUCT__entry(
+			     __field(enum pipe, pipe)
+			     __field(u32, frame)
+			     __field(u32, scanline)
+			     __field(u16, primary)
+			     __field(u16, sprite)
+			     __field(u16, cursor)
+			     __field(u16, sr_plane)
+			     __field(u16, sr_cursor)
+			     __field(u16, sr_fbc)
+			     __field(u16, hpll_plane)
+			     __field(u16, hpll_cursor)
+			     __field(u16, hpll_fbc)
+			     __field(bool, cxsr)
+			     __field(bool, hpll)
+			     __field(bool, fbc)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->pipe = crtc->pipe;
+			   __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev,
+										       crtc->pipe);
+			   __entry->scanline = intel_get_crtc_scanline(crtc);
+			   __entry->primary = wm->pipe[crtc->pipe].plane[PLANE_PRIMARY];
+			   __entry->sprite = wm->pipe[crtc->pipe].plane[PLANE_SPRITE0];
+			   __entry->cursor = wm->pipe[crtc->pipe].plane[PLANE_CURSOR];
+			   __entry->sr_plane = wm->sr.plane;
+			   __entry->sr_cursor = wm->sr.cursor;
+			   __entry->sr_fbc = wm->sr.fbc;
+			   __entry->hpll_plane = wm->hpll.plane;
+			   __entry->hpll_cursor = wm->hpll.cursor;
+			   __entry->hpll_fbc = wm->hpll.fbc;
+			   __entry->cxsr = wm->cxsr;
+			   __entry->hpll = wm->hpll_en;
+			   __entry->fbc = wm->fbc_en;
+			   ),
+
+	    TP_printk("pipe %c, frame=%u, scanline=%u, wm %d/%d/%d, sr %s/%d/%d/%d, hpll %s/%d/%d/%d, fbc %s",
+		      pipe_name(__entry->pipe), __entry->frame, __entry->scanline,
+		      __entry->primary, __entry->sprite, __entry->cursor,
+		      yesno(__entry->cxsr), __entry->sr_plane, __entry->sr_cursor, __entry->sr_fbc,
+		      yesno(__entry->hpll), __entry->hpll_plane, __entry->hpll_cursor, __entry->hpll_fbc,
+		      yesno(__entry->fbc))
+);
+
 TRACE_EVENT(vlv_wm,
 	    TP_PROTO(struct intel_crtc *crtc, const struct vlv_wm_values *wm),
 	    TP_ARGS(crtc, wm),
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index c5455d36b617..16ecd1ab108d 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -70,20 +70,27 @@
 #define overflows_type(x, T) \
 	(sizeof(x) > sizeof(T) && (x) >> (sizeof(T) * BITS_PER_BYTE))
 
-#define ptr_mask_bits(ptr) ({						\
+#define ptr_mask_bits(ptr, n) ({					\
 	unsigned long __v = (unsigned long)(ptr);			\
-	(typeof(ptr))(__v & PAGE_MASK);					\
+	(typeof(ptr))(__v & -BIT(n));					\
 })
 
-#define ptr_unpack_bits(ptr, bits) ({					\
+#define ptr_unmask_bits(ptr, n) ((unsigned long)(ptr) & (BIT(n) - 1))
+
+#define ptr_unpack_bits(ptr, bits, n) ({				\
 	unsigned long __v = (unsigned long)(ptr);			\
-	(bits) = __v & ~PAGE_MASK;					\
-	(typeof(ptr))(__v & PAGE_MASK);					\
+	*(bits) = __v & (BIT(n) - 1);					\
+	(typeof(ptr))(__v & -BIT(n));					\
 })
 
-#define ptr_pack_bits(ptr, bits)					\
+#define ptr_pack_bits(ptr, bits, n)					\
 	((typeof(ptr))((unsigned long)(ptr) | (bits)))
 
+#define page_mask_bits(ptr) ptr_mask_bits(ptr, PAGE_SHIFT)
+#define page_unmask_bits(ptr) ptr_unmask_bits(ptr, PAGE_SHIFT)
+#define page_pack_bits(ptr, bits) ptr_pack_bits(ptr, bits, PAGE_SHIFT)
+#define page_unpack_bits(ptr, bits) ptr_unpack_bits(ptr, bits, PAGE_SHIFT)
+
 #define ptr_offset(ptr, member) offsetof(typeof(*(ptr)), member)
 
 #define fetch_and_zero(ptr) ({						\
@@ -92,4 +99,19 @@
 	__T;								\
 })
 
+#define __mask_next_bit(mask) ({					\
+	int __idx = ffs(mask) - 1;					\
+	mask &= ~BIT(__idx);						\
+	__idx;								\
+})
+
+#include <linux/list.h>
+
+static inline void __list_del_many(struct list_head *head,
+				   struct list_head *first)
+{
+	first->prev = head;
+	WRITE_ONCE(head->next, first);
+}
+
 #endif /* !__I915_UTILS_H */
diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
index cfb47293fd53..4325cb0a04f5 100644
--- a/drivers/gpu/drm/i915/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
@@ -55,7 +55,7 @@ intel_create_plane_state(struct drm_plane *plane)
 		return NULL;
 
 	state->base.plane = plane;
-	state->base.rotation = DRM_ROTATE_0;
+	state->base.rotation = DRM_MODE_ROTATE_0;
 	state->ckey.flags = I915_SET_COLORKEY_NONE;
 
 	return state;
@@ -102,23 +102,7 @@ void
 intel_plane_destroy_state(struct drm_plane *plane,
 			  struct drm_plane_state *state)
 {
-	struct i915_vma *vma;
-
-	vma = fetch_and_zero(&to_intel_plane_state(state)->vma);
-
-	/*
-	 * FIXME: Normally intel_cleanup_plane_fb handles destruction of vma.
-	 * We currently don't clear all planes during driver unload, so we have
-	 * to be able to unpin vma here for now.
-	 *
-	 * Normally this can only happen during unload when kmscon is disabled
-	 * and userspace doesn't attempt to set a framebuffer at all.
-	 */
-	if (vma) {
-		mutex_lock(&plane->dev->struct_mutex);
-		intel_unpin_fb_vma(vma);
-		mutex_unlock(&plane->dev->struct_mutex);
-	}
+	WARN_ON(to_intel_plane_state(state)->vma);
 
 	drm_atomic_helper_plane_destroy_state(plane, state);
 }
@@ -178,14 +162,14 @@ int intel_plane_atomic_check_with_state(struct intel_crtc_state *crtc_state,
 
 	/* CHV ignores the mirror bit when the rotate bit is set :( */
 	if (IS_CHERRYVIEW(dev_priv) &&
-	    state->rotation & DRM_ROTATE_180 &&
-	    state->rotation & DRM_REFLECT_X) {
+	    state->rotation & DRM_MODE_ROTATE_180 &&
+	    state->rotation & DRM_MODE_REFLECT_X) {
 		DRM_DEBUG_KMS("Cannot rotate and reflect at the same time\n");
 		return -EINVAL;
 	}
 
 	intel_state->base.visible = false;
-	ret = intel_plane->check_plane(plane, crtc_state, intel_state);
+	ret = intel_plane->check_plane(intel_plane, crtc_state, intel_state);
 	if (ret)
 		return ret;
 
@@ -235,14 +219,14 @@ static void intel_plane_atomic_update(struct drm_plane *plane,
 		trace_intel_update_plane(plane,
 					 to_intel_crtc(crtc));
 
-		intel_plane->update_plane(plane,
+		intel_plane->update_plane(intel_plane,
 					  to_intel_crtc_state(crtc->state),
 					  intel_state);
 	} else {
 		trace_intel_disable_plane(plane,
 					  to_intel_crtc(crtc));
 
-		intel_plane->disable_plane(plane, crtc);
+		intel_plane->disable_plane(intel_plane, to_intel_crtc(crtc));
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c
index 52c207e81f41..d805b6e6fe71 100644
--- a/drivers/gpu/drm/i915/intel_audio.c
+++ b/drivers/gpu/drm/i915/intel_audio.c
@@ -632,20 +632,9 @@ void intel_audio_codec_enable(struct intel_encoder *intel_encoder,
 						 (int) port, (int) pipe);
 	}
 
-	switch (intel_encoder->type) {
-	case INTEL_OUTPUT_HDMI:
-		intel_lpe_audio_notify(dev_priv, connector->eld, port, pipe,
-				       crtc_state->port_clock,
-				       false, 0);
-		break;
-	case INTEL_OUTPUT_DP:
-		intel_lpe_audio_notify(dev_priv, connector->eld, port, pipe,
-				       adjusted_mode->crtc_clock,
-				       true, crtc_state->port_clock);
-		break;
-	default:
-		break;
-	}
+	intel_lpe_audio_notify(dev_priv, pipe, port, connector->eld,
+			       crtc_state->port_clock,
+			       intel_encoder->type == INTEL_OUTPUT_DP);
 }
 
 /**
@@ -680,7 +669,7 @@ void intel_audio_codec_disable(struct intel_encoder *intel_encoder)
 						 (int) port, (int) pipe);
 	}
 
-	intel_lpe_audio_notify(dev_priv, NULL, port, pipe, 0, false, 0);
+	intel_lpe_audio_notify(dev_priv, pipe, port, NULL, 0, false);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 9ccbf26124c6..183afcb036aa 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -64,10 +64,12 @@ static unsigned long wait_timeout(void)
 
 static noinline void missed_breadcrumb(struct intel_engine_cs *engine)
 {
-	DRM_DEBUG_DRIVER("%s missed breadcrumb at %pF, irq posted? %s\n",
+	DRM_DEBUG_DRIVER("%s missed breadcrumb at %pF, irq posted? %s, current seqno=%x, last=%x\n",
 			 engine->name, __builtin_return_address(0),
 			 yesno(test_bit(ENGINE_IRQ_BREADCRUMB,
-					&engine->irq_posted)));
+					&engine->irq_posted)),
+			 intel_engine_get_seqno(engine),
+			 intel_engine_last_submit(engine));
 
 	set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
 }
@@ -665,12 +667,13 @@ static int intel_breadcrumbs_signaler(void *arg)
 	return 0;
 }
 
-void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
+void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
+				   bool wakeup)
 {
 	struct intel_engine_cs *engine = request->engine;
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
 	struct rb_node *parent, **p;
-	bool first, wakeup;
+	bool first;
 	u32 seqno;
 
 	/* Note that we may be called from an interrupt handler on another
@@ -703,7 +706,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
 	 * If we are the oldest waiter, enable the irq (after which we
 	 * must double check that the seqno did not complete).
 	 */
-	wakeup = __intel_engine_add_wait(engine, &request->signaling.wait);
+	wakeup &= __intel_engine_add_wait(engine, &request->signaling.wait);
 
 	/* Now insert ourselves into the retirement ordered list of signals
 	 * on this engine. We track the oldest seqno as that will be the
diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c
index f29a226e24d8..29792972d55d 100644
--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/intel_cdclk.c
@@ -1071,9 +1071,15 @@ static int bxt_calc_cdclk(int max_pixclk)
 
 static int glk_calc_cdclk(int max_pixclk)
 {
-	if (max_pixclk > 2 * 158400)
+	/*
+	 * FIXME: Avoid using a pixel clock that is more than 99% of the cdclk
+	 * as a temporary workaround. Use a higher cdclk instead. (Note that
+	 * intel_compute_max_dotclk() limits the max pixel clock to 99% of max
+	 * cdclk.)
+	 */
+	if (max_pixclk > DIV_ROUND_UP(2 * 158400 * 99, 100))
 		return 316800;
-	else if (max_pixclk > 2 * 79200)
+	else if (max_pixclk > DIV_ROUND_UP(2 * 79200 * 99, 100))
 		return 158400;
 	else
 		return 79200;
@@ -1664,7 +1670,11 @@ static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv)
 	int max_cdclk_freq = dev_priv->max_cdclk_freq;
 
 	if (IS_GEMINILAKE(dev_priv))
-		return 2 * max_cdclk_freq;
+		/*
+		 * FIXME: Limiting to 99% as a temporary workaround. See
+		 * glk_calc_cdclk() for details.
+		 */
+		return 2 * max_cdclk_freq * 99 / 100;
 	else if (INTEL_INFO(dev_priv)->gen >= 9 ||
 		 IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
 		return max_cdclk_freq;
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 2797bf37c3ac..84a1f5e85153 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -777,13 +777,6 @@ out:
 	return ret;
 }
 
-static int intel_crt_set_property(struct drm_connector *connector,
-				  struct drm_property *property,
-				  uint64_t value)
-{
-	return 0;
-}
-
 void intel_crt_reset(struct drm_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->dev);
@@ -814,10 +807,9 @@ static const struct drm_connector_funcs intel_crt_connector_funcs = {
 	.late_register = intel_connector_register,
 	.early_unregister = intel_connector_unregister,
 	.destroy = intel_crt_destroy,
-	.set_property = intel_crt_set_property,
+	.set_property = drm_atomic_helper_connector_set_property,
 	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
-	.atomic_get_property = intel_connector_atomic_get_property,
 };
 
 static const struct drm_connector_helper_funcs intel_crt_connector_helper_funcs = {
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index 7d01dfe7faac..3718341662c2 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -337,7 +337,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
 	} else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
 		for_each_pipe(dev_priv, pipe)
 			info->num_sprites[pipe] = 2;
-	} else if (INTEL_GEN(dev_priv) >= 5) {
+	} else if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) {
 		for_each_pipe(dev_priv, pipe)
 			info->num_sprites[pipe] = 1;
 	}
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 3617927af269..7fa21df5bcd7 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1277,7 +1277,7 @@ static void assert_sprites_disabled(struct drm_i915_private *dev_priv,
 		I915_STATE_WARN(val & SPRITE_ENABLE,
 		     "sprite %c assertion failure, should be off on pipe %c but is still active\n",
 		     plane_name(pipe), pipe_name(pipe));
-	} else if (INTEL_GEN(dev_priv) >= 5) {
+	} else if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) {
 		u32 val = I915_READ(DVSCNTR(pipe));
 		I915_STATE_WARN(val & DVS_ENABLE,
 		     "sprite %c assertion failure, should be off on pipe %c but is still active\n",
@@ -2084,6 +2084,18 @@ intel_fill_fb_ggtt_view(struct i915_ggtt_view *view,
 	}
 }
 
+static unsigned int intel_cursor_alignment(const struct drm_i915_private *dev_priv)
+{
+	if (IS_I830(dev_priv))
+		return 16 * 1024;
+	else if (IS_I85X(dev_priv))
+		return 256;
+	else if (IS_I845G(dev_priv) || IS_I865G(dev_priv))
+		return 32;
+	else
+		return 4 * 1024;
+}
+
 static unsigned int intel_linear_alignment(const struct drm_i915_private *dev_priv)
 {
 	if (INTEL_INFO(dev_priv)->gen >= 9)
@@ -2386,11 +2398,17 @@ u32 intel_compute_tile_offset(int *x, int *y,
 			      const struct intel_plane_state *state,
 			      int plane)
 {
-	const struct drm_i915_private *dev_priv = to_i915(state->base.plane->dev);
+	struct intel_plane *intel_plane = to_intel_plane(state->base.plane);
+	struct drm_i915_private *dev_priv = to_i915(intel_plane->base.dev);
 	const struct drm_framebuffer *fb = state->base.fb;
 	unsigned int rotation = state->base.rotation;
 	int pitch = intel_fb_pitch(fb, plane, rotation);
-	u32 alignment = intel_surf_alignment(fb, plane);
+	u32 alignment;
+
+	if (intel_plane->id == PLANE_CURSOR)
+		alignment = intel_cursor_alignment(dev_priv);
+	else
+		alignment = intel_surf_alignment(fb, plane);
 
 	return _intel_compute_tile_offset(dev_priv, x, y, fb, plane, pitch,
 					  rotation, alignment);
@@ -2468,7 +2486,7 @@ intel_fill_fb_info(struct drm_i915_private *dev_priv,
 
 		offset = _intel_compute_tile_offset(dev_priv, &x, &y,
 						    fb, i, fb->pitches[i],
-						    DRM_ROTATE_0, tile_size);
+						    DRM_MODE_ROTATE_0, tile_size);
 		offset /= tile_size;
 
 		if (fb->modifier != DRM_FORMAT_MOD_LINEAR) {
@@ -2503,7 +2521,7 @@ intel_fill_fb_info(struct drm_i915_private *dev_priv,
 			drm_rect_rotate(&r,
 					rot_info->plane[i].width * tile_width,
 					rot_info->plane[i].height * tile_height,
-					DRM_ROTATE_270);
+					DRM_MODE_ROTATE_270);
 			x = r.x1;
 			y = r.y1;
 
@@ -2750,7 +2768,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
 				false);
 	intel_pre_disable_primary_noatomic(&intel_crtc->base);
 	trace_intel_disable_plane(primary, intel_crtc);
-	intel_plane->disable_plane(primary, &intel_crtc->base);
+	intel_plane->disable_plane(intel_plane, intel_crtc);
 
 	return;
 
@@ -2939,7 +2957,7 @@ int skl_check_plane_surface(struct intel_plane_state *plane_state)
 	if (drm_rotation_90_or_270(rotation))
 		drm_rect_rotate(&plane_state->base.src,
 				fb->width << 16, fb->height << 16,
-				DRM_ROTATE_270);
+				DRM_MODE_ROTATE_270);
 
 	/*
 	 * Handle the AUX surface first since
@@ -2981,10 +2999,8 @@ static u32 i9xx_plane_ctl(const struct intel_crtc_state *crtc_state,
 	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
 		dspcntr |= DISPPLANE_PIPE_CSC_ENABLE;
 
-	if (INTEL_GEN(dev_priv) < 4) {
-		if (crtc->pipe == PIPE_B)
-			dspcntr |= DISPPLANE_SEL_PIPE_B;
-	}
+	if (INTEL_GEN(dev_priv) < 4)
+		dspcntr |= DISPPLANE_SEL_PIPE(crtc->pipe);
 
 	switch (fb->format->format) {
 	case DRM_FORMAT_C8:
@@ -3017,10 +3033,10 @@ static u32 i9xx_plane_ctl(const struct intel_crtc_state *crtc_state,
 	    fb->modifier == I915_FORMAT_MOD_X_TILED)
 		dspcntr |= DISPPLANE_TILED;
 
-	if (rotation & DRM_ROTATE_180)
+	if (rotation & DRM_MODE_ROTATE_180)
 		dspcntr |= DISPPLANE_ROTATE_180;
 
-	if (rotation & DRM_REFLECT_X)
+	if (rotation & DRM_MODE_REFLECT_X)
 		dspcntr |= DISPPLANE_MIRROR;
 
 	return dspcntr;
@@ -3048,10 +3064,10 @@ int i9xx_check_plane_surface(struct intel_plane_state *plane_state)
 		int src_w = drm_rect_width(&plane_state->base.src) >> 16;
 		int src_h = drm_rect_height(&plane_state->base.src) >> 16;
 
-		if (rotation & DRM_ROTATE_180) {
+		if (rotation & DRM_MODE_ROTATE_180) {
 			src_x += src_w - 1;
 			src_y += src_h - 1;
-		} else if (rotation & DRM_REFLECT_X) {
+		} else if (rotation & DRM_MODE_REFLECT_X) {
 			src_x += src_w - 1;
 		}
 	}
@@ -3063,14 +3079,14 @@ int i9xx_check_plane_surface(struct intel_plane_state *plane_state)
 	return 0;
 }
 
-static void i9xx_update_primary_plane(struct drm_plane *primary,
+static void i9xx_update_primary_plane(struct intel_plane *primary,
 				      const struct intel_crtc_state *crtc_state,
 				      const struct intel_plane_state *plane_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(primary->dev);
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
-	struct drm_framebuffer *fb = plane_state->base.fb;
-	int plane = intel_crtc->plane;
+	struct drm_i915_private *dev_priv = to_i915(primary->base.dev);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	const struct drm_framebuffer *fb = plane_state->base.fb;
+	enum plane plane = primary->plane;
 	u32 linear_offset;
 	u32 dspcntr = plane_state->ctl;
 	i915_reg_t reg = DSPCNTR(plane);
@@ -3081,12 +3097,12 @@ static void i9xx_update_primary_plane(struct drm_plane *primary,
 	linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0);
 
 	if (INTEL_GEN(dev_priv) >= 4)
-		intel_crtc->dspaddr_offset = plane_state->main.offset;
+		crtc->dspaddr_offset = plane_state->main.offset;
 	else
-		intel_crtc->dspaddr_offset = linear_offset;
+		crtc->dspaddr_offset = linear_offset;
 
-	intel_crtc->adjusted_x = x;
-	intel_crtc->adjusted_y = y;
+	crtc->adjusted_x = x;
+	crtc->adjusted_y = y;
 
 	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
 
@@ -3112,31 +3128,29 @@ static void i9xx_update_primary_plane(struct drm_plane *primary,
 	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
 		I915_WRITE_FW(DSPSURF(plane),
 			      intel_plane_ggtt_offset(plane_state) +
-			      intel_crtc->dspaddr_offset);
+			      crtc->dspaddr_offset);
 		I915_WRITE_FW(DSPOFFSET(plane), (y << 16) | x);
 	} else if (INTEL_GEN(dev_priv) >= 4) {
 		I915_WRITE_FW(DSPSURF(plane),
 			      intel_plane_ggtt_offset(plane_state) +
-			      intel_crtc->dspaddr_offset);
+			      crtc->dspaddr_offset);
 		I915_WRITE_FW(DSPTILEOFF(plane), (y << 16) | x);
 		I915_WRITE_FW(DSPLINOFF(plane), linear_offset);
 	} else {
 		I915_WRITE_FW(DSPADDR(plane),
 			      intel_plane_ggtt_offset(plane_state) +
-			      intel_crtc->dspaddr_offset);
+			      crtc->dspaddr_offset);
 	}
 	POSTING_READ_FW(reg);
 
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 }
 
-static void i9xx_disable_primary_plane(struct drm_plane *primary,
-				       struct drm_crtc *crtc)
+static void i9xx_disable_primary_plane(struct intel_plane *primary,
+				       struct intel_crtc *crtc)
 {
-	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	int plane = intel_crtc->plane;
+	struct drm_i915_private *dev_priv = to_i915(primary->base.dev);
+	enum plane plane = primary->plane;
 	unsigned long irqflags;
 
 	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
@@ -3271,17 +3285,17 @@ static u32 skl_plane_ctl_tiling(uint64_t fb_modifier)
 static u32 skl_plane_ctl_rotation(unsigned int rotation)
 {
 	switch (rotation) {
-	case DRM_ROTATE_0:
+	case DRM_MODE_ROTATE_0:
 		break;
 	/*
-	 * DRM_ROTATE_ is counter clockwise to stay compatible with Xrandr
+	 * DRM_MODE_ROTATE_ is counter clockwise to stay compatible with Xrandr
 	 * while i915 HW rotation is clockwise, thats why this swapping.
 	 */
-	case DRM_ROTATE_90:
+	case DRM_MODE_ROTATE_90:
 		return PLANE_CTL_ROTATE_270;
-	case DRM_ROTATE_180:
+	case DRM_MODE_ROTATE_180:
 		return PLANE_CTL_ROTATE_180;
-	case DRM_ROTATE_270:
+	case DRM_MODE_ROTATE_270:
 		return PLANE_CTL_ROTATE_90;
 	default:
 		MISSING_CASE(rotation);
@@ -3321,16 +3335,15 @@ u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state,
 	return plane_ctl;
 }
 
-static void skylake_update_primary_plane(struct drm_plane *plane,
+static void skylake_update_primary_plane(struct intel_plane *plane,
 					 const struct intel_crtc_state *crtc_state,
 					 const struct intel_plane_state *plane_state)
 {
-	struct drm_device *dev = plane->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
-	struct drm_framebuffer *fb = plane_state->base.fb;
-	enum plane_id plane_id = to_intel_plane(plane)->id;
-	enum pipe pipe = to_intel_plane(plane)->pipe;
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	const struct drm_framebuffer *fb = plane_state->base.fb;
+	enum plane_id plane_id = plane->id;
+	enum pipe pipe = plane->pipe;
 	u32 plane_ctl = plane_state->ctl;
 	unsigned int rotation = plane_state->base.rotation;
 	u32 stride = skl_plane_stride(fb, 0, rotation);
@@ -3352,10 +3365,10 @@ static void skylake_update_primary_plane(struct drm_plane *plane,
 	dst_w--;
 	dst_h--;
 
-	intel_crtc->dspaddr_offset = surf_addr;
+	crtc->dspaddr_offset = surf_addr;
 
-	intel_crtc->adjusted_x = src_x;
-	intel_crtc->adjusted_y = src_y;
+	crtc->adjusted_x = src_x;
+	crtc->adjusted_y = src_y;
 
 	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
 
@@ -3394,13 +3407,12 @@ static void skylake_update_primary_plane(struct drm_plane *plane,
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 }
 
-static void skylake_disable_primary_plane(struct drm_plane *primary,
-					  struct drm_crtc *crtc)
+static void skylake_disable_primary_plane(struct intel_plane *primary,
+					  struct intel_crtc *crtc)
 {
-	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	enum plane_id plane_id = to_intel_plane(primary)->id;
-	enum pipe pipe = to_intel_plane(primary)->pipe;
+	struct drm_i915_private *dev_priv = to_i915(primary->base.dev);
+	enum plane_id plane_id = primary->id;
+	enum pipe pipe = primary->pipe;
 	unsigned long irqflags;
 
 	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
@@ -3433,7 +3445,7 @@ static void intel_update_primary_planes(struct drm_device *dev)
 			trace_intel_update_plane(&plane->base,
 						 to_intel_crtc(crtc));
 
-			plane->update_plane(&plane->base,
+			plane->update_plane(plane,
 					    to_intel_crtc_state(crtc->state),
 					    plane_state);
 		}
@@ -4671,7 +4683,7 @@ int skl_update_scaler_crtc(struct intel_crtc_state *state)
 	const struct drm_display_mode *adjusted_mode = &state->base.adjusted_mode;
 
 	return skl_update_scaler(state, !state->base.active, SKL_CRTC_INDEX,
-		&state->scaler_state.scaler_id, DRM_ROTATE_0,
+		&state->scaler_state.scaler_id, DRM_MODE_ROTATE_0,
 		state->pipe_src_w, state->pipe_src_h,
 		adjusted_mode->crtc_hdisplay, adjusted_mode->crtc_vdisplay);
 }
@@ -4861,12 +4873,9 @@ static void intel_crtc_dpms_overlay_disable(struct intel_crtc *intel_crtc)
 {
 	if (intel_crtc->overlay) {
 		struct drm_device *dev = intel_crtc->base.dev;
-		struct drm_i915_private *dev_priv = to_i915(dev);
 
 		mutex_lock(&dev->struct_mutex);
-		dev_priv->mm.interruptible = false;
 		(void) intel_overlay_switch_off(intel_crtc->overlay);
-		dev_priv->mm.interruptible = true;
 		mutex_unlock(&dev->struct_mutex);
 	}
 
@@ -5086,7 +5095,7 @@ static void intel_crtc_disable_planes(struct drm_crtc *crtc, unsigned plane_mask
 	intel_crtc_dpms_overlay_disable(intel_crtc);
 
 	drm_for_each_plane_mask(p, dev, plane_mask)
-		to_intel_plane(p)->disable_plane(p, crtc);
+		to_intel_plane(p)->disable_plane(to_intel_plane(p), intel_crtc);
 
 	/*
 	 * FIXME: Once we grow proper nuclear flip support out of this we need
@@ -5722,6 +5731,8 @@ static void i9xx_set_pll_dividers(struct intel_crtc *crtc)
 static void i9xx_crtc_enable(struct intel_crtc_state *pipe_config,
 			     struct drm_atomic_state *old_state)
 {
+	struct intel_atomic_state *old_intel_state =
+		to_intel_atomic_state(old_state);
 	struct drm_crtc *crtc = pipe_config->base.crtc;
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
@@ -5754,7 +5765,11 @@ static void i9xx_crtc_enable(struct intel_crtc_state *pipe_config,
 
 	intel_color_load_luts(&pipe_config->base);
 
-	intel_update_watermarks(intel_crtc);
+	if (dev_priv->display.initial_watermarks != NULL)
+		dev_priv->display.initial_watermarks(old_intel_state,
+						     intel_crtc->config);
+	else
+		intel_update_watermarks(intel_crtc);
 	intel_enable_pipe(intel_crtc);
 
 	assert_vblank_disabled(crtc);
@@ -5920,9 +5935,10 @@ void intel_encoder_destroy(struct drm_encoder *encoder)
 
 /* Cross check the actual hw state with our own modeset state tracking (and it's
  * internal consistency). */
-static void intel_connector_verify_state(struct intel_connector *connector)
+static void intel_connector_verify_state(struct drm_crtc_state *crtc_state,
+					 struct drm_connector_state *conn_state)
 {
-	struct drm_crtc *crtc = connector->base.state->crtc;
+	struct intel_connector *connector = to_intel_connector(conn_state->connector);
 
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
 		      connector->base.base.id,
@@ -5930,15 +5946,14 @@ static void intel_connector_verify_state(struct intel_connector *connector)
 
 	if (connector->get_hw_state(connector)) {
 		struct intel_encoder *encoder = connector->encoder;
-		struct drm_connector_state *conn_state = connector->base.state;
 
-		I915_STATE_WARN(!crtc,
+		I915_STATE_WARN(!crtc_state,
 			 "connector enabled without attached crtc\n");
 
-		if (!crtc)
+		if (!crtc_state)
 			return;
 
-		I915_STATE_WARN(!crtc->state->active,
+		I915_STATE_WARN(!crtc_state->active,
 		      "connector is active, but attached crtc isn't\n");
 
 		if (!encoder || encoder->type == INTEL_OUTPUT_DP_MST)
@@ -5950,9 +5965,9 @@ static void intel_connector_verify_state(struct intel_connector *connector)
 		I915_STATE_WARN(conn_state->crtc != encoder->base.crtc,
 			"attached encoder crtc differs from connector crtc\n");
 	} else {
-		I915_STATE_WARN(crtc && crtc->state->active,
+		I915_STATE_WARN(crtc_state && crtc_state->active,
 			"attached crtc is active, but connector isn't\n");
-		I915_STATE_WARN(!crtc && connector->base.state->best_encoder,
+		I915_STATE_WARN(!crtc_state && conn_state->best_encoder,
 			"best encoder set without crtc!\n");
 	}
 }
@@ -6372,8 +6387,8 @@ static void vlv_pllb_recal_opamp(struct drm_i915_private *dev_priv, enum pipe
 	vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW9(1), reg_val);
 
 	reg_val = vlv_dpio_read(dev_priv, pipe, VLV_REF_DW13);
-	reg_val &= 0x8cffffff;
-	reg_val = 0x8c000000;
+	reg_val &= 0x00ffffff;
+	reg_val |= 0x8c000000;
 	vlv_dpio_write(dev_priv, pipe, VLV_REF_DW13, reg_val);
 
 	reg_val = vlv_dpio_read(dev_priv, pipe, VLV_PLL_DW9(1));
@@ -8177,9 +8192,6 @@ static int ironlake_crtc_compute_clock(struct intel_crtc *crtc,
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct dpll reduced_clock;
-	bool has_reduced_clock = false;
-	struct intel_shared_dpll *pll;
 	const struct intel_limit *limit;
 	int refclk = 120000;
 
@@ -8221,20 +8233,14 @@ static int ironlake_crtc_compute_clock(struct intel_crtc *crtc,
 		return -EINVAL;
 	}
 
-	ironlake_compute_dpll(crtc, crtc_state,
-			      has_reduced_clock ? &reduced_clock : NULL);
+	ironlake_compute_dpll(crtc, crtc_state, NULL);
 
-	pll = intel_get_shared_dpll(crtc, crtc_state, NULL);
-	if (pll == NULL) {
+	if (!intel_get_shared_dpll(crtc, crtc_state, NULL)) {
 		DRM_DEBUG_DRIVER("failed to find PLL for pipe %c\n",
 				 pipe_name(crtc->pipe));
 		return -EINVAL;
 	}
 
-	if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_LVDS) &&
-	    has_reduced_clock)
-		crtc->lowfreq_avail = true;
-
 	return 0;
 }
 
@@ -9138,38 +9144,171 @@ out:
 	return active;
 }
 
+static u32 intel_cursor_base(const struct intel_plane_state *plane_state)
+{
+	struct drm_i915_private *dev_priv =
+		to_i915(plane_state->base.plane->dev);
+	const struct drm_framebuffer *fb = plane_state->base.fb;
+	const struct drm_i915_gem_object *obj = intel_fb_obj(fb);
+	u32 base;
+
+	if (INTEL_INFO(dev_priv)->cursor_needs_physical)
+		base = obj->phys_handle->busaddr;
+	else
+		base = intel_plane_ggtt_offset(plane_state);
+
+	base += plane_state->main.offset;
+
+	/* ILK+ do this automagically */
+	if (HAS_GMCH_DISPLAY(dev_priv) &&
+	    plane_state->base.rotation & DRM_MODE_ROTATE_180)
+		base += (plane_state->base.crtc_h *
+			 plane_state->base.crtc_w - 1) * fb->format->cpp[0];
+
+	return base;
+}
+
+static u32 intel_cursor_position(const struct intel_plane_state *plane_state)
+{
+	int x = plane_state->base.crtc_x;
+	int y = plane_state->base.crtc_y;
+	u32 pos = 0;
+
+	if (x < 0) {
+		pos |= CURSOR_POS_SIGN << CURSOR_X_SHIFT;
+		x = -x;
+	}
+	pos |= x << CURSOR_X_SHIFT;
+
+	if (y < 0) {
+		pos |= CURSOR_POS_SIGN << CURSOR_Y_SHIFT;
+		y = -y;
+	}
+	pos |= y << CURSOR_Y_SHIFT;
+
+	return pos;
+}
+
+static bool intel_cursor_size_ok(const struct intel_plane_state *plane_state)
+{
+	const struct drm_mode_config *config =
+		&plane_state->base.plane->dev->mode_config;
+	int width = plane_state->base.crtc_w;
+	int height = plane_state->base.crtc_h;
+
+	return width > 0 && width <= config->cursor_width &&
+		height > 0 && height <= config->cursor_height;
+}
+
+static int intel_check_cursor(struct intel_crtc_state *crtc_state,
+			      struct intel_plane_state *plane_state)
+{
+	const struct drm_framebuffer *fb = plane_state->base.fb;
+	int src_x, src_y;
+	u32 offset;
+	int ret;
+
+	ret = drm_plane_helper_check_state(&plane_state->base,
+					   &plane_state->clip,
+					   DRM_PLANE_HELPER_NO_SCALING,
+					   DRM_PLANE_HELPER_NO_SCALING,
+					   true, true);
+	if (ret)
+		return ret;
+
+	if (!fb)
+		return 0;
+
+	if (fb->modifier != DRM_FORMAT_MOD_LINEAR) {
+		DRM_DEBUG_KMS("cursor cannot be tiled\n");
+		return -EINVAL;
+	}
+
+	src_x = plane_state->base.src_x >> 16;
+	src_y = plane_state->base.src_y >> 16;
+
+	intel_add_fb_offsets(&src_x, &src_y, plane_state, 0);
+	offset = intel_compute_tile_offset(&src_x, &src_y, plane_state, 0);
+
+	if (src_x != 0 || src_y != 0) {
+		DRM_DEBUG_KMS("Arbitrary cursor panning not supported\n");
+		return -EINVAL;
+	}
+
+	plane_state->main.offset = offset;
+
+	return 0;
+}
+
 static u32 i845_cursor_ctl(const struct intel_crtc_state *crtc_state,
 			   const struct intel_plane_state *plane_state)
 {
-	unsigned int width = plane_state->base.crtc_w;
-	unsigned int stride = roundup_pow_of_two(width) * 4;
+	const struct drm_framebuffer *fb = plane_state->base.fb;
 
-	switch (stride) {
-	default:
-		WARN_ONCE(1, "Invalid cursor width/stride, width=%u, stride=%u\n",
-			  width, stride);
-		stride = 256;
-		/* fallthrough */
+	return CURSOR_ENABLE |
+		CURSOR_GAMMA_ENABLE |
+		CURSOR_FORMAT_ARGB |
+		CURSOR_STRIDE(fb->pitches[0]);
+}
+
+static bool i845_cursor_size_ok(const struct intel_plane_state *plane_state)
+{
+	int width = plane_state->base.crtc_w;
+
+	/*
+	 * 845g/865g are only limited by the width of their cursors,
+	 * the height is arbitrary up to the precision of the register.
+	 */
+	return intel_cursor_size_ok(plane_state) && IS_ALIGNED(width, 64);
+}
+
+static int i845_check_cursor(struct intel_plane *plane,
+			     struct intel_crtc_state *crtc_state,
+			     struct intel_plane_state *plane_state)
+{
+	const struct drm_framebuffer *fb = plane_state->base.fb;
+	int ret;
+
+	ret = intel_check_cursor(crtc_state, plane_state);
+	if (ret)
+		return ret;
+
+	/* if we want to turn off the cursor ignore width and height */
+	if (!fb)
+		return 0;
+
+	/* Check for which cursor types we support */
+	if (!i845_cursor_size_ok(plane_state)) {
+		DRM_DEBUG("Cursor dimension %dx%d not supported\n",
+			  plane_state->base.crtc_w,
+			  plane_state->base.crtc_h);
+		return -EINVAL;
+	}
+
+	switch (fb->pitches[0]) {
 	case 256:
 	case 512:
 	case 1024:
 	case 2048:
 		break;
+	default:
+		DRM_DEBUG_KMS("Invalid cursor stride (%u)\n",
+			      fb->pitches[0]);
+		return -EINVAL;
 	}
 
-	return CURSOR_ENABLE |
-		CURSOR_GAMMA_ENABLE |
-		CURSOR_FORMAT_ARGB |
-		CURSOR_STRIDE(stride);
+	plane_state->ctl = i845_cursor_ctl(crtc_state, plane_state);
+
+	return 0;
 }
 
-static void i845_update_cursor(struct drm_crtc *crtc, u32 base,
+static void i845_update_cursor(struct intel_plane *plane,
+			       const struct intel_crtc_state *crtc_state,
 			       const struct intel_plane_state *plane_state)
 {
-	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	uint32_t cntl = 0, size = 0;
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+	u32 cntl = 0, base = 0, pos = 0, size = 0;
+	unsigned long irqflags;
 
 	if (plane_state && plane_state->base.visible) {
 		unsigned int width = plane_state->base.crtc_w;
@@ -9177,35 +9316,41 @@ static void i845_update_cursor(struct drm_crtc *crtc, u32 base,
 
 		cntl = plane_state->ctl;
 		size = (height << 12) | width;
-	}
 
-	if (intel_crtc->cursor_cntl != 0 &&
-	    (intel_crtc->cursor_base != base ||
-	     intel_crtc->cursor_size != size ||
-	     intel_crtc->cursor_cntl != cntl)) {
-		/* On these chipsets we can only modify the base/size/stride
-		 * whilst the cursor is disabled.
-		 */
-		I915_WRITE_FW(CURCNTR(PIPE_A), 0);
-		POSTING_READ_FW(CURCNTR(PIPE_A));
-		intel_crtc->cursor_cntl = 0;
+		base = intel_cursor_base(plane_state);
+		pos = intel_cursor_position(plane_state);
 	}
 
-	if (intel_crtc->cursor_base != base) {
-		I915_WRITE_FW(CURBASE(PIPE_A), base);
-		intel_crtc->cursor_base = base;
-	}
+	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
 
-	if (intel_crtc->cursor_size != size) {
+	/* On these chipsets we can only modify the base/size/stride
+	 * whilst the cursor is disabled.
+	 */
+	if (plane->cursor.base != base ||
+	    plane->cursor.size != size ||
+	    plane->cursor.cntl != cntl) {
+		I915_WRITE_FW(CURCNTR(PIPE_A), 0);
+		I915_WRITE_FW(CURBASE(PIPE_A), base);
 		I915_WRITE_FW(CURSIZE, size);
-		intel_crtc->cursor_size = size;
-	}
-
-	if (intel_crtc->cursor_cntl != cntl) {
+		I915_WRITE_FW(CURPOS(PIPE_A), pos);
 		I915_WRITE_FW(CURCNTR(PIPE_A), cntl);
-		POSTING_READ_FW(CURCNTR(PIPE_A));
-		intel_crtc->cursor_cntl = cntl;
+
+		plane->cursor.base = base;
+		plane->cursor.size = size;
+		plane->cursor.cntl = cntl;
+	} else {
+		I915_WRITE_FW(CURPOS(PIPE_A), pos);
 	}
+
+	POSTING_READ_FW(CURCNTR(PIPE_A));
+
+	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
+}
+
+static void i845_disable_cursor(struct intel_plane *plane,
+				struct intel_crtc *crtc)
+{
+	i845_update_cursor(plane, NULL, NULL);
 }
 
 static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state,
@@ -9214,7 +9359,6 @@ static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state,
 	struct drm_i915_private *dev_priv =
 		to_i915(plane_state->base.plane->dev);
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
-	enum pipe pipe = crtc->pipe;
 	u32 cntl;
 
 	cntl = MCURSOR_GAMMA_ENABLE;
@@ -9222,7 +9366,7 @@ static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state,
 	if (HAS_DDI(dev_priv))
 		cntl |= CURSOR_PIPE_CSC_ENABLE;
 
-	cntl |= pipe << 28; /* Connect to correct pipe */
+	cntl |= MCURSOR_PIPE_SELECT(crtc->pipe);
 
 	switch (plane_state->base.crtc_w) {
 	case 64:
@@ -9239,122 +9383,160 @@ static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state,
 		return 0;
 	}
 
-	if (plane_state->base.rotation & DRM_ROTATE_180)
+	if (plane_state->base.rotation & DRM_MODE_ROTATE_180)
 		cntl |= CURSOR_ROTATE_180;
 
 	return cntl;
 }
 
-static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base,
-			       const struct intel_plane_state *plane_state)
+static bool i9xx_cursor_size_ok(const struct intel_plane_state *plane_state)
 {
-	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	int pipe = intel_crtc->pipe;
-	uint32_t cntl = 0;
+	struct drm_i915_private *dev_priv =
+		to_i915(plane_state->base.plane->dev);
+	int width = plane_state->base.crtc_w;
+	int height = plane_state->base.crtc_h;
 
-	if (plane_state && plane_state->base.visible)
-		cntl = plane_state->ctl;
+	if (!intel_cursor_size_ok(plane_state))
+		return false;
 
-	if (intel_crtc->cursor_cntl != cntl) {
-		I915_WRITE_FW(CURCNTR(pipe), cntl);
-		POSTING_READ_FW(CURCNTR(pipe));
-		intel_crtc->cursor_cntl = cntl;
+	/* Cursor width is limited to a few power-of-two sizes */
+	switch (width) {
+	case 256:
+	case 128:
+	case 64:
+		break;
+	default:
+		return false;
 	}
 
-	/* and commit changes on next vblank */
-	I915_WRITE_FW(CURBASE(pipe), base);
-	POSTING_READ_FW(CURBASE(pipe));
+	/*
+	 * IVB+ have CUR_FBC_CTL which allows an arbitrary cursor
+	 * height from 8 lines up to the cursor width, when the
+	 * cursor is not rotated. Everything else requires square
+	 * cursors.
+	 */
+	if (HAS_CUR_FBC(dev_priv) &&
+	    plane_state->base.rotation & DRM_MODE_ROTATE_0) {
+		if (height < 8 || height > width)
+			return false;
+	} else {
+		if (height != width)
+			return false;
+	}
 
-	intel_crtc->cursor_base = base;
+	return true;
 }
 
-/* If no-part of the cursor is visible on the framebuffer, then the GPU may hang... */
-static void intel_crtc_update_cursor(struct drm_crtc *crtc,
-				     const struct intel_plane_state *plane_state)
+static int i9xx_check_cursor(struct intel_plane *plane,
+			     struct intel_crtc_state *crtc_state,
+			     struct intel_plane_state *plane_state)
 {
-	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	int pipe = intel_crtc->pipe;
-	u32 base = intel_crtc->cursor_addr;
-	unsigned long irqflags;
-	u32 pos = 0;
-
-	if (plane_state) {
-		int x = plane_state->base.crtc_x;
-		int y = plane_state->base.crtc_y;
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+	const struct drm_framebuffer *fb = plane_state->base.fb;
+	enum pipe pipe = plane->pipe;
+	int ret;
 
-		if (x < 0) {
-			pos |= CURSOR_POS_SIGN << CURSOR_X_SHIFT;
-			x = -x;
-		}
-		pos |= x << CURSOR_X_SHIFT;
+	ret = intel_check_cursor(crtc_state, plane_state);
+	if (ret)
+		return ret;
 
-		if (y < 0) {
-			pos |= CURSOR_POS_SIGN << CURSOR_Y_SHIFT;
-			y = -y;
-		}
-		pos |= y << CURSOR_Y_SHIFT;
+	/* if we want to turn off the cursor ignore width and height */
+	if (!fb)
+		return 0;
 
-		/* ILK+ do this automagically */
-		if (HAS_GMCH_DISPLAY(dev_priv) &&
-		    plane_state->base.rotation & DRM_ROTATE_180) {
-			base += (plane_state->base.crtc_h *
-				 plane_state->base.crtc_w - 1) * 4;
-		}
+	/* Check for which cursor types we support */
+	if (!i9xx_cursor_size_ok(plane_state)) {
+		DRM_DEBUG("Cursor dimension %dx%d not supported\n",
+			  plane_state->base.crtc_w,
+			  plane_state->base.crtc_h);
+		return -EINVAL;
 	}
 
-	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
+	if (fb->pitches[0] != plane_state->base.crtc_w * fb->format->cpp[0]) {
+		DRM_DEBUG_KMS("Invalid cursor stride (%u) (cursor width %d)\n",
+			      fb->pitches[0], plane_state->base.crtc_w);
+		return -EINVAL;
+	}
 
-	I915_WRITE_FW(CURPOS(pipe), pos);
+	/*
+	 * There's something wrong with the cursor on CHV pipe C.
+	 * If it straddles the left edge of the screen then
+	 * moving it away from the edge or disabling it often
+	 * results in a pipe underrun, and often that can lead to
+	 * dead pipe (constant underrun reported, and it scans
+	 * out just a solid color). To recover from that, the
+	 * display power well must be turned off and on again.
+	 * Refuse the put the cursor into that compromised position.
+	 */
+	if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_C &&
+	    plane_state->base.visible && plane_state->base.crtc_x < 0) {
+		DRM_DEBUG_KMS("CHV cursor C not allowed to straddle the left screen edge\n");
+		return -EINVAL;
+	}
 
-	if (IS_I845G(dev_priv) || IS_I865G(dev_priv))
-		i845_update_cursor(crtc, base, plane_state);
-	else
-		i9xx_update_cursor(crtc, base, plane_state);
+	plane_state->ctl = i9xx_cursor_ctl(crtc_state, plane_state);
 
-	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
+	return 0;
 }
 
-static bool cursor_size_ok(struct drm_i915_private *dev_priv,
-			   uint32_t width, uint32_t height)
+static void i9xx_update_cursor(struct intel_plane *plane,
+			       const struct intel_crtc_state *crtc_state,
+			       const struct intel_plane_state *plane_state)
 {
-	if (width == 0 || height == 0)
-		return false;
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+	enum pipe pipe = plane->pipe;
+	u32 cntl = 0, base = 0, pos = 0, fbc_ctl = 0;
+	unsigned long irqflags;
 
-	/*
-	 * 845g/865g are special in that they are only limited by
-	 * the width of their cursors, the height is arbitrary up to
-	 * the precision of the register. Everything else requires
-	 * square cursors, limited to a few power-of-two sizes.
-	 */
-	if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) {
-		if ((width & 63) != 0)
-			return false;
+	if (plane_state && plane_state->base.visible) {
+		cntl = plane_state->ctl;
 
-		if (width > (IS_I845G(dev_priv) ? 64 : 512))
-			return false;
+		if (plane_state->base.crtc_h != plane_state->base.crtc_w)
+			fbc_ctl = CUR_FBC_CTL_EN | (plane_state->base.crtc_h - 1);
 
-		if (height > 1023)
-			return false;
+		base = intel_cursor_base(plane_state);
+		pos = intel_cursor_position(plane_state);
+	}
+
+	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
+
+	/*
+	 * On some platforms writing CURCNTR first will also
+	 * cause CURPOS to be armed by the CURBASE write.
+	 * Without the CURCNTR write the CURPOS write would
+	 * arm itself.
+	 *
+	 * CURCNTR and CUR_FBC_CTL are always
+	 * armed by the CURBASE write only.
+	 */
+	if (plane->cursor.base != base ||
+	    plane->cursor.size != fbc_ctl ||
+	    plane->cursor.cntl != cntl) {
+		I915_WRITE_FW(CURCNTR(pipe), cntl);
+		if (HAS_CUR_FBC(dev_priv))
+			I915_WRITE_FW(CUR_FBC_CTL(pipe), fbc_ctl);
+		I915_WRITE_FW(CURPOS(pipe), pos);
+		I915_WRITE_FW(CURBASE(pipe), base);
+
+		plane->cursor.base = base;
+		plane->cursor.size = fbc_ctl;
+		plane->cursor.cntl = cntl;
 	} else {
-		switch (width | height) {
-		case 256:
-		case 128:
-			if (IS_GEN2(dev_priv))
-				return false;
-		case 64:
-			break;
-		default:
-			return false;
-		}
+		I915_WRITE_FW(CURPOS(pipe), pos);
 	}
 
-	return true;
+	POSTING_READ_FW(CURBASE(pipe));
+
+	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 }
 
+static void i9xx_disable_cursor(struct intel_plane *plane,
+				struct intel_crtc *crtc)
+{
+	i9xx_update_cursor(plane, NULL, NULL);
+}
+
+
 /* VESA 640x480x72Hz mode to set on the pipe */
 static struct drm_display_mode load_detect_mode = {
 	DRM_MODE("640x480", DRM_MODE_TYPE_DEFAULT, 31500, 640, 664,
@@ -9566,6 +9748,7 @@ int intel_get_load_detect_pipe(struct drm_connector *connector,
 	 */
 	if (!crtc) {
 		DRM_DEBUG_KMS("no pipe available for load-detect\n");
+		ret = -ENODEV;
 		goto fail;
 	}
 
@@ -9622,6 +9805,7 @@ found:
 		DRM_DEBUG_KMS("reusing fbdev for load-detection framebuffer\n");
 	if (IS_ERR(fb)) {
 		DRM_DEBUG_KMS("failed to allocate framebuffer for load-detection\n");
+		ret = PTR_ERR(fb);
 		goto fail;
 	}
 
@@ -10853,21 +11037,21 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state,
 			 turn_off, turn_on, mode_changed);
 
 	if (turn_on) {
-		if (INTEL_GEN(dev_priv) < 5)
+		if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv))
 			pipe_config->update_wm_pre = true;
 
 		/* must disable cxsr around plane enable/disable */
 		if (plane->id != PLANE_CURSOR)
 			pipe_config->disable_cxsr = true;
 	} else if (turn_off) {
-		if (INTEL_GEN(dev_priv) < 5)
+		if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv))
 			pipe_config->update_wm_post = true;
 
 		/* must disable cxsr around plane enable/disable */
 		if (plane->id != PLANE_CURSOR)
 			pipe_config->disable_cxsr = true;
 	} else if (intel_wm_need_update(&plane->base, plane_state)) {
-		if (INTEL_GEN(dev_priv) < 5) {
+		if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv)) {
 			/* FIXME bollocks */
 			pipe_config->update_wm_pre = true;
 			pipe_config->update_wm_post = true;
@@ -11291,7 +11475,8 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state)
 	shared_dpll = crtc_state->shared_dpll;
 	dpll_hw_state = crtc_state->dpll_hw_state;
 	force_thru = crtc_state->pch_pfit.force_thru;
-	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+	if (IS_G4X(dev_priv) ||
+	    IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
 		wm_state = crtc_state->wm;
 
 	/* Keep base drm_crtc_state intact, only clear our extended struct */
@@ -11303,7 +11488,8 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state)
 	crtc_state->shared_dpll = shared_dpll;
 	crtc_state->dpll_hw_state = dpll_hw_state;
 	crtc_state->pch_pfit.force_thru = force_thru;
-	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+	if (IS_G4X(dev_priv) ||
+	    IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
 		crtc_state->wm = wm_state;
 }
 
@@ -11444,12 +11630,6 @@ intel_modeset_update_crtc_state(struct drm_atomic_state *state)
 	for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
 		to_intel_crtc(crtc)->config = to_intel_crtc_state(new_crtc_state);
 
-		/* Update hwmode for vblank functions */
-		if (new_crtc_state->active)
-			crtc->hwmode = new_crtc_state->adjusted_mode;
-		else
-			crtc->hwmode.crtc_clock = 0;
-
 		/*
 		 * Update legacy state to satisfy fbc code. This can
 		 * be removed when fbc uses the atomic state.
@@ -11871,7 +12051,7 @@ static void verify_wm_state(struct drm_crtc *crtc,
 	 * allocation. In that case since the ddb allocation will be updated
 	 * once the plane becomes visible, we can skip this check
 	 */
-	if (intel_crtc->cursor_addr) {
+	if (1) {
 		hw_plane_wm = &hw_wm.planes[PLANE_CURSOR];
 		sw_plane_wm = &sw_wm->planes[PLANE_CURSOR];
 
@@ -11927,11 +12107,15 @@ verify_connector_state(struct drm_device *dev,
 
 	for_each_new_connector_in_state(state, connector, new_conn_state, i) {
 		struct drm_encoder *encoder = connector->encoder;
+		struct drm_crtc_state *crtc_state = NULL;
 
 		if (new_conn_state->crtc != crtc)
 			continue;
 
-		intel_connector_verify_state(to_intel_connector(connector));
+		if (crtc)
+			crtc_state = drm_atomic_get_new_crtc_state(state, new_conn_state->crtc);
+
+		intel_connector_verify_state(crtc_state, new_conn_state);
 
 		I915_STATE_WARN(new_conn_state->best_encoder != encoder,
 		     "connector's atomic encoder doesn't match legacy encoder\n");
@@ -12049,7 +12233,7 @@ verify_crtc_state(struct drm_crtc *crtc,
 
 	intel_pipe_config_sanity_check(dev_priv, pipe_config);
 
-	sw_config = to_intel_crtc_state(crtc->state);
+	sw_config = to_intel_crtc_state(new_crtc_state);
 	if (!intel_pipe_config_compare(dev_priv, sw_config,
 				       pipe_config, false)) {
 		I915_STATE_WARN(1, "pipe state doesn't match!\n");
@@ -13145,7 +13329,7 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 	if (obj) {
 		if (plane->type == DRM_PLANE_TYPE_CURSOR &&
 		    INTEL_INFO(dev_priv)->cursor_needs_physical) {
-			const int align = IS_I830(dev_priv) ? 16 * 1024 : 256;
+			const int align = intel_cursor_alignment(dev_priv);
 
 			ret = i915_gem_object_attach_phys(obj, align);
 			if (ret) {
@@ -13275,11 +13459,11 @@ skl_max_scale(struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state
 }
 
 static int
-intel_check_primary_plane(struct drm_plane *plane,
+intel_check_primary_plane(struct intel_plane *plane,
 			  struct intel_crtc_state *crtc_state,
 			  struct intel_plane_state *state)
 {
-	struct drm_i915_private *dev_priv = to_i915(plane->dev);
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
 	struct drm_crtc *crtc = state->base.crtc;
 	int min_scale = DRM_PLANE_HELPER_NO_SCALING;
 	int max_scale = DRM_PLANE_HELPER_NO_SCALING;
@@ -13458,7 +13642,7 @@ intel_legacy_cursor_update(struct drm_plane *plane,
 		goto out_free;
 
 	if (INTEL_INFO(dev_priv)->cursor_needs_physical) {
-		int align = IS_I830(dev_priv) ? 16 * 1024 : 256;
+		int align = intel_cursor_alignment(dev_priv);
 
 		ret = i915_gem_object_attach_phys(intel_fb_obj(fb), align);
 		if (ret) {
@@ -13494,12 +13678,12 @@ intel_legacy_cursor_update(struct drm_plane *plane,
 
 	if (plane->state->visible) {
 		trace_intel_update_plane(plane, to_intel_crtc(crtc));
-		intel_plane->update_plane(plane,
+		intel_plane->update_plane(intel_plane,
 					  to_intel_crtc_state(crtc->state),
 					  to_intel_plane_state(plane->state));
 	} else {
 		trace_intel_disable_plane(plane, to_intel_crtc(crtc));
-		intel_plane->disable_plane(plane, crtc);
+		intel_plane->disable_plane(intel_plane, to_intel_crtc(crtc));
 	}
 
 	intel_cleanup_plane_fb(plane, new_plane_state);
@@ -13613,22 +13797,22 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
 
 	if (INTEL_GEN(dev_priv) >= 9) {
 		supported_rotations =
-			DRM_ROTATE_0 | DRM_ROTATE_90 |
-			DRM_ROTATE_180 | DRM_ROTATE_270;
+			DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_90 |
+			DRM_MODE_ROTATE_180 | DRM_MODE_ROTATE_270;
 	} else if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_B) {
 		supported_rotations =
-			DRM_ROTATE_0 | DRM_ROTATE_180 |
-			DRM_REFLECT_X;
+			DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_180 |
+			DRM_MODE_REFLECT_X;
 	} else if (INTEL_GEN(dev_priv) >= 4) {
 		supported_rotations =
-			DRM_ROTATE_0 | DRM_ROTATE_180;
+			DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_180;
 	} else {
-		supported_rotations = DRM_ROTATE_0;
+		supported_rotations = DRM_MODE_ROTATE_0;
 	}
 
 	if (INTEL_GEN(dev_priv) >= 4)
 		drm_plane_create_rotation_property(&primary->base,
-						   DRM_ROTATE_0,
+						   DRM_MODE_ROTATE_0,
 						   supported_rotations);
 
 	drm_plane_helper_add(&primary->base, &intel_plane_helper_funcs);
@@ -13642,107 +13826,9 @@ fail:
 	return ERR_PTR(ret);
 }
 
-static int
-intel_check_cursor_plane(struct drm_plane *plane,
-			 struct intel_crtc_state *crtc_state,
-			 struct intel_plane_state *state)
-{
-	struct drm_i915_private *dev_priv = to_i915(plane->dev);
-	struct drm_framebuffer *fb = state->base.fb;
-	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
-	enum pipe pipe = to_intel_plane(plane)->pipe;
-	unsigned stride;
-	int ret;
-
-	ret = drm_plane_helper_check_state(&state->base,
-					   &state->clip,
-					   DRM_PLANE_HELPER_NO_SCALING,
-					   DRM_PLANE_HELPER_NO_SCALING,
-					   true, true);
-	if (ret)
-		return ret;
-
-	/* if we want to turn off the cursor ignore width and height */
-	if (!obj)
-		return 0;
-
-	/* Check for which cursor types we support */
-	if (!cursor_size_ok(dev_priv, state->base.crtc_w,
-			    state->base.crtc_h)) {
-		DRM_DEBUG("Cursor dimension %dx%d not supported\n",
-			  state->base.crtc_w, state->base.crtc_h);
-		return -EINVAL;
-	}
-
-	stride = roundup_pow_of_two(state->base.crtc_w) * 4;
-	if (obj->base.size < stride * state->base.crtc_h) {
-		DRM_DEBUG_KMS("buffer is too small\n");
-		return -ENOMEM;
-	}
-
-	if (fb->modifier != DRM_FORMAT_MOD_LINEAR) {
-		DRM_DEBUG_KMS("cursor cannot be tiled\n");
-		return -EINVAL;
-	}
-
-	/*
-	 * There's something wrong with the cursor on CHV pipe C.
-	 * If it straddles the left edge of the screen then
-	 * moving it away from the edge or disabling it often
-	 * results in a pipe underrun, and often that can lead to
-	 * dead pipe (constant underrun reported, and it scans
-	 * out just a solid color). To recover from that, the
-	 * display power well must be turned off and on again.
-	 * Refuse the put the cursor into that compromised position.
-	 */
-	if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_C &&
-	    state->base.visible && state->base.crtc_x < 0) {
-		DRM_DEBUG_KMS("CHV cursor C not allowed to straddle the left screen edge\n");
-		return -EINVAL;
-	}
-
-	if (IS_I845G(dev_priv) || IS_I865G(dev_priv))
-		state->ctl = i845_cursor_ctl(crtc_state, state);
-	else
-		state->ctl = i9xx_cursor_ctl(crtc_state, state);
-
-	return 0;
-}
-
-static void
-intel_disable_cursor_plane(struct drm_plane *plane,
-			   struct drm_crtc *crtc)
-{
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-
-	intel_crtc->cursor_addr = 0;
-	intel_crtc_update_cursor(crtc, NULL);
-}
-
-static void
-intel_update_cursor_plane(struct drm_plane *plane,
-			  const struct intel_crtc_state *crtc_state,
-			  const struct intel_plane_state *state)
-{
-	struct drm_crtc *crtc = crtc_state->base.crtc;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	struct drm_i915_private *dev_priv = to_i915(plane->dev);
-	struct drm_i915_gem_object *obj = intel_fb_obj(state->base.fb);
-	uint32_t addr;
-
-	if (!obj)
-		addr = 0;
-	else if (!INTEL_INFO(dev_priv)->cursor_needs_physical)
-		addr = intel_plane_ggtt_offset(state);
-	else
-		addr = obj->phys_handle->busaddr;
-
-	intel_crtc->cursor_addr = addr;
-	intel_crtc_update_cursor(crtc, state);
-}
-
 static struct intel_plane *
-intel_cursor_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
+intel_cursor_plane_create(struct drm_i915_private *dev_priv,
+			  enum pipe pipe)
 {
 	struct intel_plane *cursor = NULL;
 	struct intel_plane_state *state = NULL;
@@ -13768,9 +13854,22 @@ intel_cursor_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
 	cursor->plane = pipe;
 	cursor->id = PLANE_CURSOR;
 	cursor->frontbuffer_bit = INTEL_FRONTBUFFER_CURSOR(pipe);
-	cursor->check_plane = intel_check_cursor_plane;
-	cursor->update_plane = intel_update_cursor_plane;
-	cursor->disable_plane = intel_disable_cursor_plane;
+
+	if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) {
+		cursor->update_plane = i845_update_cursor;
+		cursor->disable_plane = i845_disable_cursor;
+		cursor->check_plane = i845_check_cursor;
+	} else {
+		cursor->update_plane = i9xx_update_cursor;
+		cursor->disable_plane = i9xx_disable_cursor;
+		cursor->check_plane = i9xx_check_cursor;
+	}
+
+	cursor->cursor.base = ~0;
+	cursor->cursor.cntl = ~0;
+
+	if (IS_I845G(dev_priv) || IS_I865G(dev_priv) || HAS_CUR_FBC(dev_priv))
+		cursor->cursor.size = ~0;
 
 	ret = drm_universal_plane_init(&dev_priv->drm, &cursor->base,
 				       0, &intel_cursor_plane_funcs,
@@ -13783,9 +13882,9 @@ intel_cursor_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
 
 	if (INTEL_GEN(dev_priv) >= 4)
 		drm_plane_create_rotation_property(&cursor->base,
-						   DRM_ROTATE_0,
-						   DRM_ROTATE_0 |
-						   DRM_ROTATE_180);
+						   DRM_MODE_ROTATE_0,
+						   DRM_MODE_ROTATE_0 |
+						   DRM_MODE_ROTATE_180);
 
 	if (INTEL_GEN(dev_priv) >= 9)
 		state->scaler_id = -1;
@@ -13879,10 +13978,6 @@ static int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe)
 	intel_crtc->pipe = pipe;
 	intel_crtc->plane = primary->plane;
 
-	intel_crtc->cursor_base = ~0;
-	intel_crtc->cursor_cntl = ~0;
-	intel_crtc->cursor_size = ~0;
-
 	/* initialize shared scalers */
 	intel_crtc_init_scalers(intel_crtc, crtc_state);
 
@@ -14422,7 +14517,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 	case DRM_FORMAT_UYVY:
 	case DRM_FORMAT_YVYU:
 	case DRM_FORMAT_VYUY:
-		if (INTEL_GEN(dev_priv) < 5) {
+		if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv)) {
 			DRM_DEBUG_KMS("unsupported pixel format: %s\n",
 				      drm_get_format_name(mode_cmd->pixel_format, &format_name));
 			goto err;
@@ -14934,6 +15029,7 @@ int intel_modeset_init(struct drm_device *dev)
 
 	dev->mode_config.funcs = &intel_mode_funcs;
 
+	init_llist_head(&dev_priv->atomic_helper.free_list);
 	INIT_WORK(&dev_priv->atomic_helper.free_work,
 		  intel_atomic_helper_free_state_worker);
 
@@ -15155,7 +15251,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc)
 				continue;
 
 			trace_intel_disable_plane(&plane->base, crtc);
-			plane->disable_plane(&plane->base, &crtc->base);
+			plane->disable_plane(plane, crtc);
 		}
 	}
 
@@ -15425,8 +15521,6 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
 			to_intel_crtc_state(crtc->base.state);
 		int pixclk = 0;
 
-		crtc->base.hwmode = crtc_state->base.adjusted_mode;
-
 		memset(&crtc->base.mode, 0, sizeof(crtc->base.mode));
 		if (crtc_state->base.active) {
 			intel_mode_from_pipe_config(&crtc->base.mode, crtc_state);
@@ -15456,7 +15550,8 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
 			if (IS_BROADWELL(dev_priv) && crtc_state->ips_enabled)
 				pixclk = DIV_ROUND_UP(pixclk * 100, 95);
 
-			drm_calc_timestamping_constants(&crtc->base, &crtc->base.hwmode);
+			drm_calc_timestamping_constants(&crtc->base,
+							&crtc_state->base.adjusted_mode);
 			update_scanline_offset(crtc);
 		}
 
@@ -15527,7 +15622,10 @@ intel_modeset_setup_hw_state(struct drm_device *dev)
 		pll->on = false;
 	}
 
-	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
+	if (IS_G4X(dev_priv)) {
+		g4x_wm_get_hw_state(dev);
+		g4x_wm_sanitize(dev_priv);
+	} else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
 		vlv_wm_get_hw_state(dev);
 		vlv_wm_sanitize(dev_priv);
 	} else if (IS_GEN9(dev_priv)) {
@@ -15561,13 +15659,6 @@ void intel_display_resume(struct drm_device *dev)
 	if (state)
 		state->acquire_ctx = &ctx;
 
-	/*
-	 * This is a cludge because with real atomic modeset mode_config.mutex
-	 * won't be taken. Unfortunately some probed state like
-	 * audio_codec_enable is still protected by mode_config.mutex, so lock
-	 * it here for now.
-	 */
-	mutex_lock(&dev->mode_config.mutex);
 	drm_modeset_acquire_init(&ctx, 0);
 
 	while (1) {
@@ -15583,7 +15674,6 @@ void intel_display_resume(struct drm_device *dev)
 
 	drm_modeset_drop_locks(&ctx);
 	drm_modeset_acquire_fini(&ctx);
-	mutex_unlock(&dev->mode_config.mutex);
 
 	if (ret)
 		DRM_ERROR("Restoring old state failed with %i\n", ret);
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index ee77b519835c..4a6feb6a69bd 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -133,36 +133,55 @@ static void vlv_steal_power_sequencer(struct drm_device *dev,
 				      enum pipe pipe);
 static void intel_dp_unset_edid(struct intel_dp *intel_dp);
 
-static int
-intel_dp_max_link_bw(struct intel_dp  *intel_dp)
+static int intel_dp_num_rates(u8 link_bw_code)
 {
-	int max_link_bw = intel_dp->dpcd[DP_MAX_LINK_RATE];
-
-	switch (max_link_bw) {
+	switch (link_bw_code) {
+	default:
+		WARN(1, "invalid max DP link bw val %x, using 1.62Gbps\n",
+		     link_bw_code);
 	case DP_LINK_BW_1_62:
+		return 1;
 	case DP_LINK_BW_2_7:
+		return 2;
 	case DP_LINK_BW_5_4:
-		break;
-	default:
-		WARN(1, "invalid max DP link bw val %x, using 1.62Gbps\n",
-		     max_link_bw);
-		max_link_bw = DP_LINK_BW_1_62;
-		break;
+		return 3;
 	}
-	return max_link_bw;
 }
 
-static u8 intel_dp_max_lane_count(struct intel_dp *intel_dp)
+/* update sink rates from dpcd */
+static void intel_dp_set_sink_rates(struct intel_dp *intel_dp)
 {
-	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
-	u8 source_max, sink_max;
+	int i, num_rates;
+
+	num_rates = intel_dp_num_rates(intel_dp->dpcd[DP_MAX_LINK_RATE]);
+
+	for (i = 0; i < num_rates; i++)
+		intel_dp->sink_rates[i] = default_rates[i];
 
-	source_max = intel_dig_port->max_lanes;
-	sink_max = intel_dp->max_sink_lane_count;
+	intel_dp->num_sink_rates = num_rates;
+}
+
+/* Theoretical max between source and sink */
+static int intel_dp_max_common_rate(struct intel_dp *intel_dp)
+{
+	return intel_dp->common_rates[intel_dp->num_common_rates - 1];
+}
+
+/* Theoretical max between source and sink */
+static int intel_dp_max_common_lane_count(struct intel_dp *intel_dp)
+{
+	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
+	int source_max = intel_dig_port->max_lanes;
+	int sink_max = drm_dp_max_lane_count(intel_dp->dpcd);
 
 	return min(source_max, sink_max);
 }
 
+int intel_dp_max_lane_count(struct intel_dp *intel_dp)
+{
+	return intel_dp->max_link_lane_count;
+}
+
 int
 intel_dp_link_required(int pixel_clock, int bpp)
 {
@@ -205,34 +224,25 @@ intel_dp_downstream_max_dotclock(struct intel_dp *intel_dp)
 	return max_dotclk;
 }
 
-static int
-intel_dp_sink_rates(struct intel_dp *intel_dp, const int **sink_rates)
-{
-	if (intel_dp->num_sink_rates) {
-		*sink_rates = intel_dp->sink_rates;
-		return intel_dp->num_sink_rates;
-	}
-
-	*sink_rates = default_rates;
-
-	return (intel_dp->max_sink_link_bw >> 3) + 1;
-}
-
-static int
-intel_dp_source_rates(struct intel_dp *intel_dp, const int **source_rates)
+static void
+intel_dp_set_source_rates(struct intel_dp *intel_dp)
 {
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
 	struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev);
+	const int *source_rates;
 	int size;
 
+	/* This should only be done once */
+	WARN_ON(intel_dp->source_rates || intel_dp->num_source_rates);
+
 	if (IS_GEN9_LP(dev_priv)) {
-		*source_rates = bxt_rates;
+		source_rates = bxt_rates;
 		size = ARRAY_SIZE(bxt_rates);
 	} else if (IS_GEN9_BC(dev_priv)) {
-		*source_rates = skl_rates;
+		source_rates = skl_rates;
 		size = ARRAY_SIZE(skl_rates);
 	} else {
-		*source_rates = default_rates;
+		source_rates = default_rates;
 		size = ARRAY_SIZE(default_rates);
 	}
 
@@ -240,7 +250,8 @@ intel_dp_source_rates(struct intel_dp *intel_dp, const int **source_rates)
 	if (!intel_dp_source_supports_hbr2(intel_dp))
 		size--;
 
-	return size;
+	intel_dp->source_rates = source_rates;
+	intel_dp->num_source_rates = size;
 }
 
 static int intersect_rates(const int *source_rates, int source_len,
@@ -266,50 +277,83 @@ static int intersect_rates(const int *source_rates, int source_len,
 	return k;
 }
 
-static int intel_dp_common_rates(struct intel_dp *intel_dp,
-				 int *common_rates)
+/* return index of rate in rates array, or -1 if not found */
+static int intel_dp_rate_index(const int *rates, int len, int rate)
 {
-	const int *source_rates, *sink_rates;
-	int source_len, sink_len;
+	int i;
 
-	sink_len = intel_dp_sink_rates(intel_dp, &sink_rates);
-	source_len = intel_dp_source_rates(intel_dp, &source_rates);
+	for (i = 0; i < len; i++)
+		if (rate == rates[i])
+			return i;
 
-	return intersect_rates(source_rates, source_len,
-			       sink_rates, sink_len,
-			       common_rates);
+	return -1;
 }
 
-static int intel_dp_link_rate_index(struct intel_dp *intel_dp,
-				    int *common_rates, int link_rate)
+static void intel_dp_set_common_rates(struct intel_dp *intel_dp)
 {
-	int common_len;
-	int index;
+	WARN_ON(!intel_dp->num_source_rates || !intel_dp->num_sink_rates);
+
+	intel_dp->num_common_rates = intersect_rates(intel_dp->source_rates,
+						     intel_dp->num_source_rates,
+						     intel_dp->sink_rates,
+						     intel_dp->num_sink_rates,
+						     intel_dp->common_rates);
 
-	common_len = intel_dp_common_rates(intel_dp, common_rates);
-	for (index = 0; index < common_len; index++) {
-		if (link_rate == common_rates[common_len - index - 1])
-			return common_len - index - 1;
+	/* Paranoia, there should always be something in common. */
+	if (WARN_ON(intel_dp->num_common_rates == 0)) {
+		intel_dp->common_rates[0] = default_rates[0];
+		intel_dp->num_common_rates = 1;
 	}
+}
 
-	return -1;
+/* get length of common rates potentially limited by max_rate */
+static int intel_dp_common_len_rate_limit(struct intel_dp *intel_dp,
+					  int max_rate)
+{
+	const int *common_rates = intel_dp->common_rates;
+	int i, common_len = intel_dp->num_common_rates;
+
+	/* Limit results by potentially reduced max rate */
+	for (i = 0; i < common_len; i++) {
+		if (common_rates[common_len - i - 1] <= max_rate)
+			return common_len - i;
+	}
+
+	return 0;
+}
+
+static bool intel_dp_link_params_valid(struct intel_dp *intel_dp)
+{
+	/*
+	 * FIXME: we need to synchronize the current link parameters with
+	 * hardware readout. Currently fast link training doesn't work on
+	 * boot-up.
+	 */
+	if (intel_dp->link_rate == 0 ||
+	    intel_dp->link_rate > intel_dp->max_link_rate)
+		return false;
+
+	if (intel_dp->lane_count == 0 ||
+	    intel_dp->lane_count > intel_dp_max_lane_count(intel_dp))
+		return false;
+
+	return true;
 }
 
 int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp,
 					    int link_rate, uint8_t lane_count)
 {
-	int common_rates[DP_MAX_SUPPORTED_RATES];
-	int link_rate_index;
+	int index;
 
-	link_rate_index = intel_dp_link_rate_index(intel_dp,
-						   common_rates,
-						   link_rate);
-	if (link_rate_index > 0) {
-		intel_dp->max_sink_link_bw = drm_dp_link_rate_to_bw_code(common_rates[link_rate_index - 1]);
-		intel_dp->max_sink_lane_count = lane_count;
+	index = intel_dp_rate_index(intel_dp->common_rates,
+				    intel_dp->num_common_rates,
+				    link_rate);
+	if (index > 0) {
+		intel_dp->max_link_rate = intel_dp->common_rates[index - 1];
+		intel_dp->max_link_lane_count = lane_count;
 	} else if (lane_count > 1) {
-		intel_dp->max_sink_link_bw = intel_dp_max_link_bw(intel_dp);
-		intel_dp->max_sink_lane_count = lane_count >> 1;
+		intel_dp->max_link_rate = intel_dp_max_common_rate(intel_dp);
+		intel_dp->max_link_lane_count = lane_count >> 1;
 	} else {
 		DRM_ERROR("Link Training Unsuccessful\n");
 		return -1;
@@ -1486,24 +1530,21 @@ static void snprintf_int_array(char *str, size_t len,
 
 static void intel_dp_print_rates(struct intel_dp *intel_dp)
 {
-	const int *source_rates, *sink_rates;
-	int source_len, sink_len, common_len;
-	int common_rates[DP_MAX_SUPPORTED_RATES];
 	char str[128]; /* FIXME: too big for stack? */
 
 	if ((drm_debug & DRM_UT_KMS) == 0)
 		return;
 
-	source_len = intel_dp_source_rates(intel_dp, &source_rates);
-	snprintf_int_array(str, sizeof(str), source_rates, source_len);
+	snprintf_int_array(str, sizeof(str),
+			   intel_dp->source_rates, intel_dp->num_source_rates);
 	DRM_DEBUG_KMS("source rates: %s\n", str);
 
-	sink_len = intel_dp_sink_rates(intel_dp, &sink_rates);
-	snprintf_int_array(str, sizeof(str), sink_rates, sink_len);
+	snprintf_int_array(str, sizeof(str),
+			   intel_dp->sink_rates, intel_dp->num_sink_rates);
 	DRM_DEBUG_KMS("sink rates: %s\n", str);
 
-	common_len = intel_dp_common_rates(intel_dp, common_rates);
-	snprintf_int_array(str, sizeof(str), common_rates, common_len);
+	snprintf_int_array(str, sizeof(str),
+			   intel_dp->common_rates, intel_dp->num_common_rates);
 	DRM_DEBUG_KMS("common rates: %s\n", str);
 }
 
@@ -1538,39 +1579,34 @@ bool intel_dp_read_desc(struct intel_dp *intel_dp)
 	return true;
 }
 
-static int rate_to_index(int find, const int *rates)
-{
-	int i = 0;
-
-	for (i = 0; i < DP_MAX_SUPPORTED_RATES; ++i)
-		if (find == rates[i])
-			break;
-
-	return i;
-}
-
 int
 intel_dp_max_link_rate(struct intel_dp *intel_dp)
 {
-	int rates[DP_MAX_SUPPORTED_RATES] = {};
 	int len;
 
-	len = intel_dp_common_rates(intel_dp, rates);
+	len = intel_dp_common_len_rate_limit(intel_dp, intel_dp->max_link_rate);
 	if (WARN_ON(len <= 0))
 		return 162000;
 
-	return rates[len - 1];
+	return intel_dp->common_rates[len - 1];
 }
 
 int intel_dp_rate_select(struct intel_dp *intel_dp, int rate)
 {
-	return rate_to_index(rate, intel_dp->sink_rates);
+	int i = intel_dp_rate_index(intel_dp->sink_rates,
+				    intel_dp->num_sink_rates, rate);
+
+	if (WARN_ON(i < 0))
+		i = 0;
+
+	return i;
 }
 
 void intel_dp_compute_rate(struct intel_dp *intel_dp, int port_clock,
 			   uint8_t *link_bw, uint8_t *rate_select)
 {
-	if (intel_dp->num_sink_rates) {
+	/* eDP 1.4 rate select method. */
+	if (intel_dp->use_rate_select) {
 		*link_bw = 0;
 		*rate_select =
 			intel_dp_rate_select(intel_dp, port_clock);
@@ -1618,14 +1654,13 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 	/* Conveniently, the link BW constants become indices with a shift...*/
 	int min_clock = 0;
 	int max_clock;
-	int link_rate_index;
 	int bpp, mode_rate;
 	int link_avail, link_clock;
-	int common_rates[DP_MAX_SUPPORTED_RATES] = {};
 	int common_len;
 	uint8_t link_bw, rate_select;
 
-	common_len = intel_dp_common_rates(intel_dp, common_rates);
+	common_len = intel_dp_common_len_rate_limit(intel_dp,
+						    intel_dp->max_link_rate);
 
 	/* No common link rates between source and sink */
 	WARN_ON(common_len <= 0);
@@ -1662,16 +1697,18 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 
 	/* Use values requested by Compliance Test Request */
 	if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) {
-		link_rate_index = intel_dp_link_rate_index(intel_dp,
-							   common_rates,
-							   intel_dp->compliance.test_link_rate);
-		if (link_rate_index >= 0)
-			min_clock = max_clock = link_rate_index;
+		int index;
+
+		index = intel_dp_rate_index(intel_dp->common_rates,
+					    intel_dp->num_common_rates,
+					    intel_dp->compliance.test_link_rate);
+		if (index >= 0)
+			min_clock = max_clock = index;
 		min_lane_count = max_lane_count = intel_dp->compliance.test_lane_count;
 	}
 	DRM_DEBUG_KMS("DP link computation with max lane count %i "
 		      "max bw %d pixel clock %iKHz\n",
-		      max_lane_count, common_rates[max_clock],
+		      max_lane_count, intel_dp->common_rates[max_clock],
 		      adjusted_mode->crtc_clock);
 
 	/* Walk through all bpp values. Luckily they're all nicely spaced with 2
@@ -1707,7 +1744,7 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 				lane_count <= max_lane_count;
 				lane_count <<= 1) {
 
-				link_clock = common_rates[clock];
+				link_clock = intel_dp->common_rates[clock];
 				link_avail = intel_dp_max_data_rate(link_clock,
 								    lane_count);
 
@@ -1739,7 +1776,7 @@ found:
 	pipe_config->lane_count = lane_count;
 
 	pipe_config->pipe_bpp = bpp;
-	pipe_config->port_clock = common_rates[clock];
+	pipe_config->port_clock = intel_dp->common_rates[clock];
 
 	intel_dp_compute_rate(intel_dp, pipe_config->port_clock,
 			      &link_bw, &rate_select);
@@ -3051,7 +3088,8 @@ static bool intel_dp_get_y_cord_status(struct intel_dp *intel_dp)
 {
 	uint8_t psr_caps = 0;
 
-	drm_dp_dpcd_readb(&intel_dp->aux, DP_PSR_CAPS, &psr_caps);
+	if (drm_dp_dpcd_readb(&intel_dp->aux, DP_PSR_CAPS, &psr_caps) != 1)
+		return false;
 	return psr_caps & DP_PSR2_SU_Y_COORDINATE_REQUIRED;
 }
 
@@ -3059,9 +3097,9 @@ static bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp)
 {
 	uint8_t dprx = 0;
 
-	drm_dp_dpcd_readb(&intel_dp->aux,
-			DP_DPRX_FEATURE_ENUMERATION_LIST,
-			&dprx);
+	if (drm_dp_dpcd_readb(&intel_dp->aux, DP_DPRX_FEATURE_ENUMERATION_LIST,
+			      &dprx) != 1)
+		return false;
 	return dprx & DP_VSC_SDP_EXT_FOR_COLORIMETRY_SUPPORTED;
 }
 
@@ -3069,7 +3107,9 @@ static bool intel_dp_get_alpm_status(struct intel_dp *intel_dp)
 {
 	uint8_t alpm_caps = 0;
 
-	drm_dp_dpcd_readb(&intel_dp->aux, DP_RECEIVER_ALPM_CAP, &alpm_caps);
+	if (drm_dp_dpcd_readb(&intel_dp->aux, DP_RECEIVER_ALPM_CAP,
+			      &alpm_caps) != 1)
+		return false;
 	return alpm_caps & DP_ALPM_CAP;
 }
 
@@ -3642,9 +3682,10 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp)
 		uint8_t frame_sync_cap;
 
 		dev_priv->psr.sink_support = true;
-		drm_dp_dpcd_read(&intel_dp->aux,
-				 DP_SINK_DEVICE_AUX_FRAME_SYNC_CAP,
-				 &frame_sync_cap, 1);
+		if (drm_dp_dpcd_readb(&intel_dp->aux,
+				      DP_SINK_DEVICE_AUX_FRAME_SYNC_CAP,
+				      &frame_sync_cap) != 1)
+			frame_sync_cap = 0;
 		dev_priv->psr.aux_frame_sync = frame_sync_cap ? true : false;
 		/* PSR2 needs frame sync as well */
 		dev_priv->psr.psr2_support = dev_priv->psr.aux_frame_sync;
@@ -3695,6 +3736,13 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp)
 		intel_dp->num_sink_rates = i;
 	}
 
+	if (intel_dp->num_sink_rates)
+		intel_dp->use_rate_select = true;
+	else
+		intel_dp_set_sink_rates(intel_dp);
+
+	intel_dp_set_common_rates(intel_dp);
+
 	return true;
 }
 
@@ -3702,11 +3750,18 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp)
 static bool
 intel_dp_get_dpcd(struct intel_dp *intel_dp)
 {
+	u8 sink_count;
+
 	if (!intel_dp_read_dpcd(intel_dp))
 		return false;
 
-	if (drm_dp_dpcd_read(&intel_dp->aux, DP_SINK_COUNT,
-			     &intel_dp->sink_count, 1) < 0)
+	/* Don't clobber cached eDP rates. */
+	if (!is_edp(intel_dp)) {
+		intel_dp_set_sink_rates(intel_dp);
+		intel_dp_set_common_rates(intel_dp);
+	}
+
+	if (drm_dp_dpcd_readb(&intel_dp->aux, DP_SINK_COUNT, &sink_count) <= 0)
 		return false;
 
 	/*
@@ -3714,7 +3769,7 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp)
 	 * a member variable in intel_dp will track any changes
 	 * between short pulse interrupts.
 	 */
-	intel_dp->sink_count = DP_GET_SINK_COUNT(intel_dp->sink_count);
+	intel_dp->sink_count = DP_GET_SINK_COUNT(sink_count);
 
 	/*
 	 * SINK_COUNT == 0 and DOWNSTREAM_PORT_PRESENT == 1 implies that
@@ -3743,7 +3798,7 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp)
 static bool
 intel_dp_can_mst(struct intel_dp *intel_dp)
 {
-	u8 buf[1];
+	u8 mstm_cap;
 
 	if (!i915.enable_dp_mst)
 		return false;
@@ -3754,10 +3809,10 @@ intel_dp_can_mst(struct intel_dp *intel_dp)
 	if (intel_dp->dpcd[DP_DPCD_REV] < 0x12)
 		return false;
 
-	if (drm_dp_dpcd_read(&intel_dp->aux, DP_MSTM_CAP, buf, 1) != 1)
+	if (drm_dp_dpcd_readb(&intel_dp->aux, DP_MSTM_CAP, &mstm_cap) != 1)
 		return false;
 
-	return buf[0] & DP_MST_CAP;
+	return mstm_cap & DP_MST_CAP;
 }
 
 static void
@@ -3903,9 +3958,8 @@ stop:
 static bool
 intel_dp_get_sink_irq(struct intel_dp *intel_dp, u8 *sink_irq_vector)
 {
-	return drm_dp_dpcd_read(&intel_dp->aux,
-				       DP_DEVICE_SERVICE_IRQ_VECTOR,
-				       sink_irq_vector, 1) == 1;
+	return drm_dp_dpcd_readb(&intel_dp->aux, DP_DEVICE_SERVICE_IRQ_VECTOR,
+				 sink_irq_vector) == 1;
 }
 
 static bool
@@ -3926,7 +3980,6 @@ static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp)
 {
 	int status = 0;
 	int min_lane_count = 1;
-	int common_rates[DP_MAX_SUPPORTED_RATES] = {};
 	int link_rate_index, test_link_rate;
 	uint8_t test_lane_count, test_link_bw;
 	/* (DP CTS 1.2)
@@ -3943,7 +3996,7 @@ static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp)
 	test_lane_count &= DP_MAX_LANE_COUNT_MASK;
 	/* Validate the requested lane count */
 	if (test_lane_count < min_lane_count ||
-	    test_lane_count > intel_dp->max_sink_lane_count)
+	    test_lane_count > intel_dp->max_link_lane_count)
 		return DP_TEST_NAK;
 
 	status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_LINK_RATE,
@@ -3954,9 +4007,9 @@ static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp)
 	}
 	/* Validate the requested link rate */
 	test_link_rate = drm_dp_bw_code_to_link_rate(test_link_bw);
-	link_rate_index = intel_dp_link_rate_index(intel_dp,
-						   common_rates,
-						   test_link_rate);
+	link_rate_index = intel_dp_rate_index(intel_dp->common_rates,
+					      intel_dp->num_common_rates,
+					      test_link_rate);
 	if (link_rate_index < 0)
 		return DP_TEST_NAK;
 
@@ -3969,13 +4022,13 @@ static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp)
 static uint8_t intel_dp_autotest_video_pattern(struct intel_dp *intel_dp)
 {
 	uint8_t test_pattern;
-	uint16_t test_misc;
+	uint8_t test_misc;
 	__be16 h_width, v_height;
 	int status = 0;
 
 	/* Read the TEST_PATTERN (DP CTS 3.1.5) */
-	status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_PATTERN,
-				  &test_pattern, 1);
+	status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_PATTERN,
+				   &test_pattern);
 	if (status <= 0) {
 		DRM_DEBUG_KMS("Test pattern read failed\n");
 		return DP_TEST_NAK;
@@ -3997,8 +4050,8 @@ static uint8_t intel_dp_autotest_video_pattern(struct intel_dp *intel_dp)
 		return DP_TEST_NAK;
 	}
 
-	status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_MISC0,
-				  &test_misc, 1);
+	status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_MISC0,
+				   &test_misc);
 	if (status <= 0) {
 		DRM_DEBUG_KMS("TEST MISC read failed\n");
 		return DP_TEST_NAK;
@@ -4057,10 +4110,8 @@ static uint8_t intel_dp_autotest_edid(struct intel_dp *intel_dp)
 		 */
 		block += intel_connector->detect_edid->extensions;
 
-		if (!drm_dp_dpcd_write(&intel_dp->aux,
-					DP_TEST_EDID_CHECKSUM,
-					&block->checksum,
-					1))
+		if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_TEST_EDID_CHECKSUM,
+				       block->checksum) <= 0)
 			DRM_DEBUG_KMS("Failed to write EDID checksum\n");
 
 		test_result = DP_TEST_ACK | DP_TEST_EDID_CHECKSUM_WRITE;
@@ -4224,9 +4275,11 @@ intel_dp_check_link_status(struct intel_dp *intel_dp)
 	if (!to_intel_crtc(intel_encoder->base.crtc)->active)
 		return;
 
-	/* FIXME: we need to synchronize this sort of stuff with hardware
-	 * readout. Currently fast link training doesn't work on boot-up. */
-	if (!intel_dp->lane_count)
+	/*
+	 * Validate the cached values of intel_dp->link_rate and
+	 * intel_dp->lane_count before attempting to retrain.
+	 */
+	if (!intel_dp_link_params_valid(intel_dp))
 		return;
 
 	/* Retrain if Channel EQ or CR not ok */
@@ -4613,11 +4666,11 @@ intel_dp_long_pulse(struct intel_connector *intel_connector)
 		      yesno(drm_dp_tps3_supported(intel_dp->dpcd)));
 
 	if (intel_dp->reset_link_params) {
-		/* Set the max lane count for sink */
-		intel_dp->max_sink_lane_count = drm_dp_max_lane_count(intel_dp->dpcd);
+		/* Initial max link lane count */
+		intel_dp->max_link_lane_count = intel_dp_max_common_lane_count(intel_dp);
 
-		/* Set the max link BW for sink */
-		intel_dp->max_sink_link_bw = intel_dp_max_link_bw(intel_dp);
+		/* Initial max link rate */
+		intel_dp->max_link_rate = intel_dp_max_common_rate(intel_dp);
 
 		intel_dp->reset_link_params = false;
 	}
@@ -5127,7 +5180,7 @@ bool intel_dp_is_edp(struct drm_i915_private *dev_priv, enum port port)
 	return intel_bios_is_port_edp(dev_priv, port);
 }
 
-void
+static void
 intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connector)
 {
 	struct intel_connector *intel_connector = to_intel_connector(connector);
@@ -5932,6 +5985,29 @@ intel_dp_init_connector_port_info(struct intel_digital_port *intel_dig_port)
 	}
 }
 
+static void intel_dp_modeset_retry_work_fn(struct work_struct *work)
+{
+	struct intel_connector *intel_connector;
+	struct drm_connector *connector;
+
+	intel_connector = container_of(work, typeof(*intel_connector),
+				       modeset_retry_work);
+	connector = &intel_connector->base;
+	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id,
+		      connector->name);
+
+	/* Grab the locks before changing connector property*/
+	mutex_lock(&connector->dev->mode_config.mutex);
+	/* Set connector link status to BAD and send a Uevent to notify
+	 * userspace to do a modeset.
+	 */
+	drm_mode_connector_set_link_status_property(connector,
+						    DRM_MODE_LINK_STATUS_BAD);
+	mutex_unlock(&connector->dev->mode_config.mutex);
+	/* Send Hotplug uevent so userspace can reprobe */
+	drm_kms_helper_hotplug_event(connector->dev);
+}
+
 bool
 intel_dp_init_connector(struct intel_digital_port *intel_dig_port,
 			struct intel_connector *intel_connector)
@@ -5944,11 +6020,17 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port,
 	enum port port = intel_dig_port->port;
 	int type;
 
+	/* Initialize the work for modeset in case of link train failure */
+	INIT_WORK(&intel_connector->modeset_retry_work,
+		  intel_dp_modeset_retry_work_fn);
+
 	if (WARN(intel_dig_port->max_lanes < 1,
 		 "Not enough lanes (%d) for DP on port %c\n",
 		 intel_dig_port->max_lanes, port_name(port)))
 		return false;
 
+	intel_dp_set_source_rates(intel_dp);
+
 	intel_dp->reset_link_params = true;
 	intel_dp->pps_pipe = INVALID_PIPE;
 	intel_dp->active_pipe = INVALID_PIPE;
diff --git a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c
index 6532e226db29..a0995c00fc84 100644
--- a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c
+++ b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c
@@ -28,6 +28,10 @@ static void set_aux_backlight_enable(struct intel_dp *intel_dp, bool enable)
 {
 	uint8_t reg_val = 0;
 
+	/* Early return when display use other mechanism to enable backlight. */
+	if (!(intel_dp->edp_dpcd[1] & DP_EDP_BACKLIGHT_AUX_ENABLE_CAP))
+		return;
+
 	if (drm_dp_dpcd_readb(&intel_dp->aux, DP_EDP_DISPLAY_CONTROL_REGISTER,
 			      &reg_val) < 0) {
 		DRM_DEBUG_KMS("Failed to read DPCD register 0x%x\n",
@@ -97,15 +101,37 @@ static void intel_dp_aux_enable_backlight(struct intel_connector *connector)
 {
 	struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base);
 	uint8_t dpcd_buf = 0;
+	uint8_t edp_backlight_mode = 0;
 
-	set_aux_backlight_enable(intel_dp, true);
+	if (drm_dp_dpcd_readb(&intel_dp->aux,
+			DP_EDP_BACKLIGHT_MODE_SET_REGISTER, &dpcd_buf) != 1) {
+		DRM_DEBUG_KMS("Failed to read DPCD register 0x%x\n",
+			      DP_EDP_BACKLIGHT_MODE_SET_REGISTER);
+		return;
+	}
+
+	edp_backlight_mode = dpcd_buf & DP_EDP_BACKLIGHT_CONTROL_MODE_MASK;
+
+	switch (edp_backlight_mode) {
+	case DP_EDP_BACKLIGHT_CONTROL_MODE_PWM:
+	case DP_EDP_BACKLIGHT_CONTROL_MODE_PRESET:
+	case DP_EDP_BACKLIGHT_CONTROL_MODE_PRODUCT:
+		dpcd_buf &= ~DP_EDP_BACKLIGHT_CONTROL_MODE_MASK;
+		dpcd_buf |= DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD;
+		if (drm_dp_dpcd_writeb(&intel_dp->aux,
+			DP_EDP_BACKLIGHT_MODE_SET_REGISTER, dpcd_buf) < 0) {
+			DRM_DEBUG_KMS("Failed to write aux backlight mode\n");
+		}
+		break;
+
+	/* Do nothing when it is already DPCD mode */
+	case DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD:
+	default:
+		break;
+	}
 
-	if ((drm_dp_dpcd_readb(&intel_dp->aux,
-			       DP_EDP_BACKLIGHT_MODE_SET_REGISTER, &dpcd_buf) == 1) &&
-	    ((dpcd_buf & DP_EDP_BACKLIGHT_CONTROL_MODE_MASK) ==
-	     DP_EDP_BACKLIGHT_CONTROL_MODE_PRESET))
-		drm_dp_dpcd_writeb(&intel_dp->aux, DP_EDP_BACKLIGHT_MODE_SET_REGISTER,
-				   (dpcd_buf | DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD));
+	set_aux_backlight_enable(intel_dp, true);
+	intel_dp_aux_set_backlight(connector, connector->panel.backlight.level);
 }
 
 static void intel_dp_aux_disable_backlight(struct intel_connector *connector)
@@ -143,9 +169,8 @@ intel_dp_aux_display_control_capable(struct intel_connector *connector)
 	 * the panel can support backlight control over the aux channel
 	 */
 	if (intel_dp->edp_dpcd[1] & DP_EDP_TCON_BACKLIGHT_ADJUSTMENT_CAP &&
-	    (intel_dp->edp_dpcd[1] & DP_EDP_BACKLIGHT_AUX_ENABLE_CAP) &&
-	    !((intel_dp->edp_dpcd[1] & DP_EDP_BACKLIGHT_PIN_ENABLE_CAP) ||
-	      (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_PWM_PIN_CAP))) {
+	    (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_AUX_SET_CAP) &&
+	    !(intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_PWM_PIN_CAP)) {
 		DRM_DEBUG_KMS("AUX Backlight Control Supported!\n");
 		return true;
 	}
diff --git a/drivers/gpu/drm/i915/intel_dp_link_training.c b/drivers/gpu/drm/i915/intel_dp_link_training.c
index 0048b520baf7..b79c1c0e404c 100644
--- a/drivers/gpu/drm/i915/intel_dp_link_training.c
+++ b/drivers/gpu/drm/i915/intel_dp_link_training.c
@@ -146,7 +146,8 @@ intel_dp_link_training_clock_recovery(struct intel_dp *intel_dp)
 		link_config[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
 	drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_BW_SET, link_config, 2);
 
-	if (intel_dp->num_sink_rates)
+	/* eDP 1.4 rate select method. */
+	if (!link_bw)
 		drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_RATE_SET,
 				  &rate_select, 1);
 
@@ -313,6 +314,24 @@ void intel_dp_stop_link_train(struct intel_dp *intel_dp)
 void
 intel_dp_start_link_train(struct intel_dp *intel_dp)
 {
-	intel_dp_link_training_clock_recovery(intel_dp);
-	intel_dp_link_training_channel_equalization(intel_dp);
+	struct intel_connector *intel_connector = intel_dp->attached_connector;
+
+	if (!intel_dp_link_training_clock_recovery(intel_dp))
+		goto failure_handling;
+	if (!intel_dp_link_training_channel_equalization(intel_dp))
+		goto failure_handling;
+
+	DRM_DEBUG_KMS("Link Training Passed at Link Rate = %d, Lane count = %d",
+		      intel_dp->link_rate, intel_dp->lane_count);
+	return;
+
+ failure_handling:
+	DRM_DEBUG_KMS("Link Training failed at link rate = %d, lane count = %d",
+		      intel_dp->link_rate, intel_dp->lane_count);
+	if (!intel_dp_get_link_train_fallback_values(intel_dp,
+						     intel_dp->link_rate,
+						     intel_dp->lane_count))
+		/* Schedule a Hotplug Uevent to userspace to start modeset */
+		schedule_work(&intel_connector->modeset_retry_work);
+	return;
 }
diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c
index c1f62eb07c07..3715386e4272 100644
--- a/drivers/gpu/drm/i915/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/intel_dp_mst.c
@@ -39,7 +39,7 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder,
 	struct intel_dp *intel_dp = &intel_dig_port->dp;
 	struct intel_connector *connector =
 		to_intel_connector(conn_state->connector);
-	struct drm_atomic_state *state;
+	struct drm_atomic_state *state = pipe_config->base.state;
 	int bpp;
 	int lane_count, slots;
 	const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode;
@@ -56,21 +56,26 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder,
 	 * for MST we always configure max link bw - the spec doesn't
 	 * seem to suggest we should do otherwise.
 	 */
-	lane_count = drm_dp_max_lane_count(intel_dp->dpcd);
+	lane_count = intel_dp_max_lane_count(intel_dp);
 
 	pipe_config->lane_count = lane_count;
 
 	pipe_config->pipe_bpp = bpp;
-	pipe_config->port_clock = intel_dp_max_link_rate(intel_dp);
 
-	state = pipe_config->base.state;
+	pipe_config->port_clock = intel_dp_max_link_rate(intel_dp);
 
 	if (drm_dp_mst_port_has_audio(&intel_dp->mst_mgr, connector->port))
 		pipe_config->has_audio = true;
-	mst_pbn = drm_dp_calc_pbn_mode(adjusted_mode->crtc_clock, bpp);
 
+	mst_pbn = drm_dp_calc_pbn_mode(adjusted_mode->crtc_clock, bpp);
 	pipe_config->pbn = mst_pbn;
-	slots = drm_dp_find_vcpi_slots(&intel_dp->mst_mgr, mst_pbn);
+
+	slots = drm_dp_atomic_find_vcpi_slots(state, &intel_dp->mst_mgr,
+					      connector->port, mst_pbn);
+	if (slots < 0) {
+		DRM_DEBUG_KMS("failed finding vcpi slots:%d\n", slots);
+		return false;
+	}
 
 	intel_link_compute_m_n(bpp, lane_count,
 			       adjusted_mode->crtc_clock,
@@ -80,7 +85,38 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder,
 	pipe_config->dp_m_n.tu = slots;
 
 	return true;
+}
+
+static int intel_dp_mst_atomic_check(struct drm_connector *connector,
+		struct drm_connector_state *new_conn_state)
+{
+	struct drm_atomic_state *state = new_conn_state->state;
+	struct drm_connector_state *old_conn_state;
+	struct drm_crtc *old_crtc;
+	struct drm_crtc_state *crtc_state;
+	int slots, ret = 0;
+
+	old_conn_state = drm_atomic_get_old_connector_state(state, connector);
+	old_crtc = old_conn_state->crtc;
+	if (!old_crtc)
+		return ret;
+
+	crtc_state = drm_atomic_get_new_crtc_state(state, old_crtc);
+	slots = to_intel_crtc_state(crtc_state)->dp_m_n.tu;
+	if (drm_atomic_crtc_needs_modeset(crtc_state) && slots > 0) {
+		struct drm_dp_mst_topology_mgr *mgr;
+		struct drm_encoder *old_encoder;
+
+		old_encoder = old_conn_state->best_encoder;
+		mgr = &enc_to_mst(old_encoder)->primary->dp.mst_mgr;
 
+		ret = drm_dp_atomic_release_vcpi_slots(state, mgr, slots);
+		if (ret)
+			DRM_DEBUG_KMS("failed releasing %d vcpi slots:%d\n", slots, ret);
+		else
+			to_intel_crtc_state(crtc_state)->dp_m_n.tu = 0;
+	}
+	return ret;
 }
 
 static void intel_mst_disable_dp(struct intel_encoder *encoder,
@@ -294,14 +330,6 @@ intel_dp_mst_detect(struct drm_connector *connector, bool force)
 	return drm_dp_mst_detect_port(connector, &intel_dp->mst_mgr, intel_connector->port);
 }
 
-static int
-intel_dp_mst_set_property(struct drm_connector *connector,
-			  struct drm_property *property,
-			  uint64_t val)
-{
-	return 0;
-}
-
 static void
 intel_dp_mst_connector_destroy(struct drm_connector *connector)
 {
@@ -318,8 +346,7 @@ static const struct drm_connector_funcs intel_dp_mst_connector_funcs = {
 	.dpms = drm_atomic_helper_connector_dpms,
 	.detect = intel_dp_mst_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
-	.set_property = intel_dp_mst_set_property,
-	.atomic_get_property = intel_connector_atomic_get_property,
+	.set_property = drm_atomic_helper_connector_set_property,
 	.late_register = intel_connector_register,
 	.early_unregister = intel_connector_unregister,
 	.destroy = intel_dp_mst_connector_destroy,
@@ -343,7 +370,7 @@ intel_dp_mst_mode_valid(struct drm_connector *connector,
 	int max_rate, mode_rate, max_lanes, max_link_clock;
 
 	max_link_clock = intel_dp_max_link_rate(intel_dp);
-	max_lanes = drm_dp_max_lane_count(intel_dp->dpcd);
+	max_lanes = intel_dp_max_lane_count(intel_dp);
 
 	max_rate = intel_dp_max_data_rate(max_link_clock, max_lanes);
 	mode_rate = intel_dp_link_required(mode->clock, bpp);
@@ -387,6 +414,7 @@ static const struct drm_connector_helper_funcs intel_dp_mst_connector_helper_fun
 	.mode_valid = intel_dp_mst_mode_valid,
 	.atomic_best_encoder = intel_mst_atomic_best_encoder,
 	.best_encoder = intel_mst_best_encoder,
+	.atomic_check = intel_dp_mst_atomic_check,
 };
 
 static void intel_dp_mst_encoder_destroy(struct drm_encoder *encoder)
@@ -459,7 +487,6 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo
 		drm_mode_connector_attach_encoder(&intel_connector->base,
 						  &intel_dp->mst_encoders[i]->base.base);
 	}
-	intel_dp_add_properties(intel_dp, connector);
 
 	drm_object_attach_property(&connector->base, dev->mode_config.path_property, 0);
 	drm_object_attach_property(&connector->base, dev->mode_config.tile_property, 0);
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index aaee3949a422..bd500977b3fc 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -88,7 +88,6 @@
 	int cpu, ret, timeout = (US) * 1000; \
 	u64 base; \
 	_WAIT_FOR_ATOMIC_CHECK(ATOMIC); \
-	BUILD_BUG_ON((US) > 50000); \
 	if (!(ATOMIC)) { \
 		preempt_disable(); \
 		cpu = smp_processor_id(); \
@@ -130,8 +129,14 @@
 	ret__; \
 })
 
-#define wait_for_atomic(COND, MS)	_wait_for_atomic((COND), (MS) * 1000, 1)
-#define wait_for_atomic_us(COND, US)	_wait_for_atomic((COND), (US), 1)
+#define wait_for_atomic_us(COND, US) \
+({ \
+	BUILD_BUG_ON(!__builtin_constant_p(US)); \
+	BUILD_BUG_ON((US) > 50000); \
+	_wait_for_atomic((COND), (US), 1); \
+})
+
+#define wait_for_atomic(COND, MS) wait_for_atomic_us((COND), (MS) * 1000)
 
 #define KHz(x) (1000 * (x))
 #define MHz(x) KHz(1000 * (x))
@@ -321,6 +326,9 @@ struct intel_connector {
 	void *port; /* store this opaque as its illegal to dereference it */
 
 	struct intel_dp *mst_port;
+
+	/* Work struct to schedule a uevent on link train failure */
+	struct work_struct modeset_retry_work;
 };
 
 struct dpll {
@@ -504,8 +512,8 @@ enum vlv_wm_level {
 };
 
 struct vlv_wm_state {
-	struct vlv_pipe_wm wm[NUM_VLV_WM_LEVELS];
-	struct vlv_sr_wm sr[NUM_VLV_WM_LEVELS];
+	struct g4x_pipe_wm wm[NUM_VLV_WM_LEVELS];
+	struct g4x_sr_wm sr[NUM_VLV_WM_LEVELS];
 	uint8_t num_levels;
 	bool cxsr;
 };
@@ -514,6 +522,22 @@ struct vlv_fifo_state {
 	u16 plane[I915_MAX_PLANES];
 };
 
+enum g4x_wm_level {
+	G4X_WM_LEVEL_NORMAL,
+	G4X_WM_LEVEL_SR,
+	G4X_WM_LEVEL_HPLL,
+	NUM_G4X_WM_LEVELS,
+};
+
+struct g4x_wm_state {
+	struct g4x_pipe_wm wm;
+	struct g4x_sr_wm sr;
+	struct g4x_sr_wm hpll;
+	bool cxsr;
+	bool hpll_en;
+	bool fbc_en;
+};
+
 struct intel_crtc_wm_state {
 	union {
 		struct {
@@ -541,7 +565,7 @@ struct intel_crtc_wm_state {
 
 		struct {
 			/* "raw" watermarks (not inverted) */
-			struct vlv_pipe_wm raw[NUM_VLV_WM_LEVELS];
+			struct g4x_pipe_wm raw[NUM_VLV_WM_LEVELS];
 			/* intermediate watermarks (inverted) */
 			struct vlv_wm_state intermediate;
 			/* optimal watermarks (inverted) */
@@ -549,6 +573,15 @@ struct intel_crtc_wm_state {
 			/* display FIFO split */
 			struct vlv_fifo_state fifo_state;
 		} vlv;
+
+		struct {
+			/* "raw" watermarks */
+			struct g4x_pipe_wm raw[NUM_G4X_WM_LEVELS];
+			/* intermediate watermarks */
+			struct g4x_wm_state intermediate;
+			/* optimal watermarks */
+			struct g4x_wm_state optimal;
+		} g4x;
 	};
 
 	/*
@@ -766,11 +799,6 @@ struct intel_crtc {
 	int adjusted_x;
 	int adjusted_y;
 
-	uint32_t cursor_addr;
-	uint32_t cursor_cntl;
-	uint32_t cursor_size;
-	uint32_t cursor_base;
-
 	struct intel_crtc_state *config;
 
 	/* global reset count when the last flip was submitted */
@@ -786,6 +814,7 @@ struct intel_crtc {
 		union {
 			struct intel_pipe_wm ilk;
 			struct vlv_wm_state vlv;
+			struct g4x_wm_state g4x;
 		} active;
 	} wm;
 
@@ -811,18 +840,22 @@ struct intel_plane {
 	int max_downscale;
 	uint32_t frontbuffer_bit;
 
+	struct {
+		u32 base, cntl, size;
+	} cursor;
+
 	/*
 	 * NOTE: Do not place new plane state fields here (e.g., when adding
 	 * new plane properties).  New runtime state should now be placed in
 	 * the intel_plane_state structure and accessed via plane_state.
 	 */
 
-	void (*update_plane)(struct drm_plane *plane,
+	void (*update_plane)(struct intel_plane *plane,
 			     const struct intel_crtc_state *crtc_state,
 			     const struct intel_plane_state *plane_state);
-	void (*disable_plane)(struct drm_plane *plane,
-			      struct drm_crtc *crtc);
-	int (*check_plane)(struct drm_plane *plane,
+	void (*disable_plane)(struct intel_plane *plane,
+			      struct intel_crtc *crtc);
+	int (*check_plane)(struct intel_plane *plane,
 			   struct intel_crtc_state *crtc_state,
 			   struct intel_plane_state *state);
 };
@@ -869,7 +902,6 @@ struct intel_hdmi {
 	bool has_audio;
 	enum hdmi_force_audio force_audio;
 	bool rgb_quant_range_selectable;
-	enum hdmi_picture_aspect aspect_ratio;
 	struct intel_connector *attached_connector;
 	void (*write_infoframe)(struct drm_encoder *encoder,
 				const struct intel_crtc_state *crtc_state,
@@ -949,13 +981,20 @@ struct intel_dp {
 	uint8_t psr_dpcd[EDP_PSR_RECEIVER_CAP_SIZE];
 	uint8_t downstream_ports[DP_MAX_DOWNSTREAM_PORTS];
 	uint8_t edp_dpcd[EDP_DISPLAY_CTL_CAP_SIZE];
-	/* sink rates as reported by DP_SUPPORTED_LINK_RATES */
-	uint8_t num_sink_rates;
+	/* source rates */
+	int num_source_rates;
+	const int *source_rates;
+	/* sink rates as reported by DP_MAX_LINK_RATE/DP_SUPPORTED_LINK_RATES */
+	int num_sink_rates;
 	int sink_rates[DP_MAX_SUPPORTED_RATES];
-	/* Max lane count for the sink as per DPCD registers */
-	uint8_t max_sink_lane_count;
-	/* Max link BW for the sink as per DPCD registers */
-	int max_sink_link_bw;
+	bool use_rate_select;
+	/* intersection of source and sink rates */
+	int num_common_rates;
+	int common_rates[DP_MAX_SUPPORTED_RATES];
+	/* Max lane count for the current link */
+	int max_link_lane_count;
+	/* Max rate for the current link */
+	int max_link_rate;
 	/* sink or branch descriptor */
 	struct intel_dp_desc desc;
 	struct drm_dp_aux aux;
@@ -1492,10 +1531,10 @@ void intel_edp_backlight_off(struct intel_dp *intel_dp);
 void intel_edp_panel_vdd_on(struct intel_dp *intel_dp);
 void intel_edp_panel_on(struct intel_dp *intel_dp);
 void intel_edp_panel_off(struct intel_dp *intel_dp);
-void intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connector);
 void intel_dp_mst_suspend(struct drm_device *dev);
 void intel_dp_mst_resume(struct drm_device *dev);
 int intel_dp_max_link_rate(struct intel_dp *intel_dp);
+int intel_dp_max_lane_count(struct intel_dp *intel_dp);
 int intel_dp_rate_select(struct intel_dp *intel_dp, int rate);
 void intel_dp_hot_plug(struct intel_encoder *intel_encoder);
 void intel_power_sequencer_reset(struct drm_i915_private *dev_priv);
@@ -1826,6 +1865,7 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv,
 		    struct intel_rps_client *rps,
 		    unsigned long submitted);
 void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req);
+void g4x_wm_get_hw_state(struct drm_device *dev);
 void vlv_wm_get_hw_state(struct drm_device *dev);
 void ilk_wm_get_hw_state(struct drm_device *dev);
 void skl_wm_get_hw_state(struct drm_device *dev);
@@ -1833,6 +1873,7 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
 			  struct skl_ddb_allocation *ddb /* out */);
 void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc,
 			      struct skl_pipe_wm *out);
+void g4x_wm_sanitize(struct drm_i915_private *dev_priv);
 void vlv_wm_sanitize(struct drm_i915_private *dev_priv);
 bool intel_can_enable_sagv(struct drm_atomic_state *state);
 int intel_enable_sagv(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_dsi_vbt.c b/drivers/gpu/drm/i915/intel_dsi_vbt.c
index 0dce7792643a..7158c7ce9c09 100644
--- a/drivers/gpu/drm/i915/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/intel_dsi_vbt.c
@@ -694,8 +694,8 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id)
 						clk_zero_cnt << 8 | prepare_cnt;
 
 	/*
-	 * LP to HS switch count = 4TLPX + PREP_COUNT * 2 + EXIT_ZERO_COUNT * 2
-	 *					+ 10UI + Extra Byte Count
+	 * LP to HS switch count = 4TLPX + PREP_COUNT * mul + EXIT_ZERO_COUNT *
+	 *					mul + 10UI + Extra Byte Count
 	 *
 	 * HS to LP switch count = THS-TRAIL + 2TLPX + Extra Byte Count
 	 * Extra Byte Count is calculated according to number of lanes.
@@ -708,8 +708,8 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id)
 	/* B044 */
 	/* FIXME:
 	 * The comment above does not match with the code */
-	lp_to_hs_switch = DIV_ROUND_UP(4 * tlpx_ui + prepare_cnt * 2 +
-						exit_zero_cnt * 2 + 10, 8);
+	lp_to_hs_switch = DIV_ROUND_UP(4 * tlpx_ui + prepare_cnt * mul +
+						exit_zero_cnt * mul + 10, 8);
 
 	hs_to_lp_switch = DIV_ROUND_UP(mipi_config->ths_trail + 2 * tlpx_ui, 8);
 
diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c
index 6025839ed3b7..c1544a53095d 100644
--- a/drivers/gpu/drm/i915/intel_dvo.c
+++ b/drivers/gpu/drm/i915/intel_dvo.c
@@ -350,7 +350,7 @@ static const struct drm_connector_funcs intel_dvo_connector_funcs = {
 	.early_unregister = intel_connector_unregister,
 	.destroy = intel_dvo_destroy,
 	.fill_modes = drm_helper_probe_single_connector_modes,
-	.atomic_get_property = intel_connector_atomic_get_property,
+	.set_property = drm_atomic_helper_connector_set_property,
 	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
 };
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 854e8e0c836b..413bfd8d4bf4 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -26,69 +26,177 @@
 #include "intel_ringbuffer.h"
 #include "intel_lrc.h"
 
-static const struct engine_info {
+/* Haswell does have the CXT_SIZE register however it does not appear to be
+ * valid. Now, docs explain in dwords what is in the context object. The full
+ * size is 70720 bytes, however, the power context and execlist context will
+ * never be saved (power context is stored elsewhere, and execlists don't work
+ * on HSW) - so the final size, including the extra state required for the
+ * Resource Streamer, is 66944 bytes, which rounds to 17 pages.
+ */
+#define HSW_CXT_TOTAL_SIZE		(17 * PAGE_SIZE)
+/* Same as Haswell, but 72064 bytes now. */
+#define GEN8_CXT_TOTAL_SIZE		(18 * PAGE_SIZE)
+
+#define GEN8_LR_CONTEXT_RENDER_SIZE	(20 * PAGE_SIZE)
+#define GEN9_LR_CONTEXT_RENDER_SIZE	(22 * PAGE_SIZE)
+
+#define GEN8_LR_CONTEXT_OTHER_SIZE	( 2 * PAGE_SIZE)
+
+struct engine_class_info {
 	const char *name;
-	unsigned int exec_id;
+	int (*init_legacy)(struct intel_engine_cs *engine);
+	int (*init_execlists)(struct intel_engine_cs *engine);
+};
+
+static const struct engine_class_info intel_engine_classes[] = {
+	[RENDER_CLASS] = {
+		.name = "rcs",
+		.init_execlists = logical_render_ring_init,
+		.init_legacy = intel_init_render_ring_buffer,
+	},
+	[COPY_ENGINE_CLASS] = {
+		.name = "bcs",
+		.init_execlists = logical_xcs_ring_init,
+		.init_legacy = intel_init_blt_ring_buffer,
+	},
+	[VIDEO_DECODE_CLASS] = {
+		.name = "vcs",
+		.init_execlists = logical_xcs_ring_init,
+		.init_legacy = intel_init_bsd_ring_buffer,
+	},
+	[VIDEO_ENHANCEMENT_CLASS] = {
+		.name = "vecs",
+		.init_execlists = logical_xcs_ring_init,
+		.init_legacy = intel_init_vebox_ring_buffer,
+	},
+};
+
+struct engine_info {
 	unsigned int hw_id;
+	unsigned int uabi_id;
+	u8 class;
+	u8 instance;
 	u32 mmio_base;
 	unsigned irq_shift;
-	int (*init_legacy)(struct intel_engine_cs *engine);
-	int (*init_execlists)(struct intel_engine_cs *engine);
-} intel_engines[] = {
+};
+
+static const struct engine_info intel_engines[] = {
 	[RCS] = {
-		.name = "rcs",
 		.hw_id = RCS_HW,
-		.exec_id = I915_EXEC_RENDER,
+		.uabi_id = I915_EXEC_RENDER,
+		.class = RENDER_CLASS,
+		.instance = 0,
 		.mmio_base = RENDER_RING_BASE,
 		.irq_shift = GEN8_RCS_IRQ_SHIFT,
-		.init_execlists = logical_render_ring_init,
-		.init_legacy = intel_init_render_ring_buffer,
 	},
 	[BCS] = {
-		.name = "bcs",
 		.hw_id = BCS_HW,
-		.exec_id = I915_EXEC_BLT,
+		.uabi_id = I915_EXEC_BLT,
+		.class = COPY_ENGINE_CLASS,
+		.instance = 0,
 		.mmio_base = BLT_RING_BASE,
 		.irq_shift = GEN8_BCS_IRQ_SHIFT,
-		.init_execlists = logical_xcs_ring_init,
-		.init_legacy = intel_init_blt_ring_buffer,
 	},
 	[VCS] = {
-		.name = "vcs",
 		.hw_id = VCS_HW,
-		.exec_id = I915_EXEC_BSD,
+		.uabi_id = I915_EXEC_BSD,
+		.class = VIDEO_DECODE_CLASS,
+		.instance = 0,
 		.mmio_base = GEN6_BSD_RING_BASE,
 		.irq_shift = GEN8_VCS1_IRQ_SHIFT,
-		.init_execlists = logical_xcs_ring_init,
-		.init_legacy = intel_init_bsd_ring_buffer,
 	},
 	[VCS2] = {
-		.name = "vcs2",
 		.hw_id = VCS2_HW,
-		.exec_id = I915_EXEC_BSD,
+		.uabi_id = I915_EXEC_BSD,
+		.class = VIDEO_DECODE_CLASS,
+		.instance = 1,
 		.mmio_base = GEN8_BSD2_RING_BASE,
 		.irq_shift = GEN8_VCS2_IRQ_SHIFT,
-		.init_execlists = logical_xcs_ring_init,
-		.init_legacy = intel_init_bsd2_ring_buffer,
 	},
 	[VECS] = {
-		.name = "vecs",
 		.hw_id = VECS_HW,
-		.exec_id = I915_EXEC_VEBOX,
+		.uabi_id = I915_EXEC_VEBOX,
+		.class = VIDEO_ENHANCEMENT_CLASS,
+		.instance = 0,
 		.mmio_base = VEBOX_RING_BASE,
 		.irq_shift = GEN8_VECS_IRQ_SHIFT,
-		.init_execlists = logical_xcs_ring_init,
-		.init_legacy = intel_init_vebox_ring_buffer,
 	},
 };
 
+/**
+ * ___intel_engine_context_size() - return the size of the context for an engine
+ * @dev_priv: i915 device private
+ * @class: engine class
+ *
+ * Each engine class may require a different amount of space for a context
+ * image.
+ *
+ * Return: size (in bytes) of an engine class specific context image
+ *
+ * Note: this size includes the HWSP, which is part of the context image
+ * in LRC mode, but does not include the "shared data page" used with
+ * GuC submission. The caller should account for this if using the GuC.
+ */
+static u32
+__intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
+{
+	u32 cxt_size;
+
+	BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE);
+
+	switch (class) {
+	case RENDER_CLASS:
+		switch (INTEL_GEN(dev_priv)) {
+		default:
+			MISSING_CASE(INTEL_GEN(dev_priv));
+		case 9:
+			return GEN9_LR_CONTEXT_RENDER_SIZE;
+		case 8:
+			return i915.enable_execlists ?
+			       GEN8_LR_CONTEXT_RENDER_SIZE :
+			       GEN8_CXT_TOTAL_SIZE;
+		case 7:
+			if (IS_HASWELL(dev_priv))
+				return HSW_CXT_TOTAL_SIZE;
+
+			cxt_size = I915_READ(GEN7_CXT_SIZE);
+			return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64,
+					PAGE_SIZE);
+		case 6:
+			cxt_size = I915_READ(CXT_SIZE);
+			return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64,
+					PAGE_SIZE);
+		case 5:
+		case 4:
+		case 3:
+		case 2:
+		/* For the special day when i810 gets merged. */
+		case 1:
+			return 0;
+		}
+		break;
+	default:
+		MISSING_CASE(class);
+	case VIDEO_DECODE_CLASS:
+	case VIDEO_ENHANCEMENT_CLASS:
+	case COPY_ENGINE_CLASS:
+		if (INTEL_GEN(dev_priv) < 8)
+			return 0;
+		return GEN8_LR_CONTEXT_OTHER_SIZE;
+	}
+}
+
 static int
 intel_engine_setup(struct drm_i915_private *dev_priv,
 		   enum intel_engine_id id)
 {
 	const struct engine_info *info = &intel_engines[id];
+	const struct engine_class_info *class_info;
 	struct intel_engine_cs *engine;
 
+	GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes));
+	class_info = &intel_engine_classes[info->class];
+
 	GEM_BUG_ON(dev_priv->engine[id]);
 	engine = kzalloc(sizeof(*engine), GFP_KERNEL);
 	if (!engine)
@@ -96,11 +204,20 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
 
 	engine->id = id;
 	engine->i915 = dev_priv;
-	engine->name = info->name;
-	engine->exec_id = info->exec_id;
+	WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s%u",
+			 class_info->name, info->instance) >=
+		sizeof(engine->name));
+	engine->uabi_id = info->uabi_id;
 	engine->hw_id = engine->guc_id = info->hw_id;
 	engine->mmio_base = info->mmio_base;
 	engine->irq_shift = info->irq_shift;
+	engine->class = info->class;
+	engine->instance = info->instance;
+
+	engine->context_size = __intel_engine_context_size(dev_priv,
+							   engine->class);
+	if (WARN_ON(engine->context_size > BIT(20)))
+		engine->context_size = 0;
 
 	/* Nothing to do here, execute in order of dependencies */
 	engine->schedule = NULL;
@@ -112,18 +229,18 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
 }
 
 /**
- * intel_engines_init_early() - allocate the Engine Command Streamers
+ * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
  * @dev_priv: i915 device private
  *
  * Return: non-zero if the initialization failed.
  */
-int intel_engines_init_early(struct drm_i915_private *dev_priv)
+int intel_engines_init_mmio(struct drm_i915_private *dev_priv)
 {
 	struct intel_device_info *device_info = mkwrite_device_info(dev_priv);
-	unsigned int ring_mask = INTEL_INFO(dev_priv)->ring_mask;
-	unsigned int mask = 0;
+	const unsigned int ring_mask = INTEL_INFO(dev_priv)->ring_mask;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	unsigned int mask = 0;
 	unsigned int i;
 	int err;
 
@@ -150,6 +267,12 @@ int intel_engines_init_early(struct drm_i915_private *dev_priv)
 	if (WARN_ON(mask != ring_mask))
 		device_info->ring_mask = mask;
 
+	/* We always presume we have at least RCS available for later probing */
+	if (WARN_ON(!HAS_ENGINE(dev_priv, RCS))) {
+		err = -ENODEV;
+		goto cleanup;
+	}
+
 	device_info->num_rings = hweight32(mask);
 
 	return 0;
@@ -161,7 +284,7 @@ cleanup:
 }
 
 /**
- * intel_engines_init() - allocate, populate and init the Engine Command Streamers
+ * intel_engines_init() - init the Engine Command Streamers
  * @dev_priv: i915 device private
  *
  * Return: non-zero if the initialization failed.
@@ -175,12 +298,14 @@ int intel_engines_init(struct drm_i915_private *dev_priv)
 	int err = 0;
 
 	for_each_engine(engine, dev_priv, id) {
+		const struct engine_class_info *class_info =
+			&intel_engine_classes[engine->class];
 		int (*init)(struct intel_engine_cs *engine);
 
 		if (i915.enable_execlists)
-			init = intel_engines[id].init_execlists;
+			init = class_info->init_execlists;
 		else
-			init = intel_engines[id].init_legacy;
+			init = class_info->init_legacy;
 		if (!init) {
 			kfree(engine);
 			dev_priv->engine[id] = NULL;
@@ -223,6 +348,9 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
 
+	GEM_BUG_ON(!intel_engine_is_idle(engine));
+	GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request));
+
 	/* Our semaphore implementation is strictly monotonic (i.e. we proceed
 	 * so long as the semaphore value in the register/page is greater
 	 * than the sync value), so whenever we reset the seqno,
@@ -253,13 +381,12 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
 	intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
 	clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
 
-	GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request));
-	engine->hangcheck.seqno = seqno;
-
 	/* After manually advancing the seqno, fake the interrupt in case
 	 * there are any waiters for that seqno.
 	 */
 	intel_engine_wakeup(engine);
+
+	GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno);
 }
 
 static void intel_engine_init_timeline(struct intel_engine_cs *engine)
@@ -342,6 +469,7 @@ static void intel_engine_cleanup_scratch(struct intel_engine_cs *engine)
  */
 int intel_engine_init_common(struct intel_engine_cs *engine)
 {
+	struct intel_ring *ring;
 	int ret;
 
 	engine->set_default_submission(engine);
@@ -353,9 +481,9 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
 	 * be available. To avoid this we always pin the default
 	 * context.
 	 */
-	ret = engine->context_pin(engine, engine->i915->kernel_context);
-	if (ret)
-		return ret;
+	ring = engine->context_pin(engine, engine->i915->kernel_context);
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
 
 	ret = intel_engine_init_breadcrumbs(engine);
 	if (ret)
@@ -723,8 +851,10 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine)
 		 */
 	}
 
+	/* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk */
 	/* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl */
 	WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
+			  GEN9_ENABLE_YV12_BUGFIX |
 			  GEN9_ENABLE_GPGPU_PREEMPTION);
 
 	/* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk */
@@ -1086,17 +1216,24 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
 
+	/* More white lies, if wedged, hw state is inconsistent */
+	if (i915_terminally_wedged(&dev_priv->gpu_error))
+		return true;
+
 	/* Any inflight/incomplete requests? */
 	if (!i915_seqno_passed(intel_engine_get_seqno(engine),
 			       intel_engine_last_submit(engine)))
 		return false;
 
+	if (I915_SELFTEST_ONLY(engine->breadcrumbs.mock))
+		return true;
+
 	/* Interrupt/tasklet pending? */
 	if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
 		return false;
 
 	/* Both ports drained, no more ELSP submission? */
-	if (engine->execlist_port[0].request)
+	if (port_request(&engine->execlist_port[0]))
 		return false;
 
 	/* Ring stopped? */
@@ -1137,6 +1274,18 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915)
 		engine->set_default_submission(engine);
 }
 
+void intel_engines_mark_idle(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	for_each_engine(engine, i915, id) {
+		intel_engine_disarm_breadcrumbs(engine);
+		i915_gem_batch_pool_fini(&engine->batch_pool);
+		engine->no_priolist = false;
+	}
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/mock_engine.c"
 #endif
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
index ded2add18b26..ff2fc5bc4af4 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -801,7 +801,7 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc)
 		return false;
 	}
 	if (INTEL_GEN(dev_priv) <= 4 && !IS_G4X(dev_priv) &&
-	    cache->plane.rotation != DRM_ROTATE_0) {
+	    cache->plane.rotation != DRM_MODE_ROTATE_0) {
 		fbc->no_fbc_reason = "rotation unsupported";
 		return false;
 	}
@@ -1312,14 +1312,12 @@ static int intel_sanitize_fbc_option(struct drm_i915_private *dev_priv)
 
 static bool need_fbc_vtd_wa(struct drm_i915_private *dev_priv)
 {
-#ifdef CONFIG_INTEL_IOMMU
 	/* WaFbcTurnOffFbcWhenHyperVisorIsUsed:skl,bxt */
-	if (intel_iommu_gfx_mapped &&
+	if (intel_vtd_active() &&
 	    (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv))) {
 		DRM_INFO("Disabling framebuffer compression (FBC) to prevent screen flicker with VT-d enabled\n");
 		return true;
 	}
-#endif
 
 	return false;
 }
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index 332254a8eebe..03347c6ae599 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -211,7 +211,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
 	 * This also validates that any existing fb inherited from the
 	 * BIOS is suitable for own access.
 	 */
-	vma = intel_pin_and_fence_fb_obj(&ifbdev->fb->base, DRM_ROTATE_0);
+	vma = intel_pin_and_fence_fb_obj(&ifbdev->fb->base, DRM_MODE_ROTATE_0);
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
 		goto out_unlock;
diff --git a/drivers/gpu/drm/i915/intel_guc_ct.c b/drivers/gpu/drm/i915/intel_guc_ct.c
new file mode 100644
index 000000000000..c4cbec140101
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_guc_ct.c
@@ -0,0 +1,461 @@
+/*
+ * Copyright © 2016-2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "i915_drv.h"
+#include "intel_guc_ct.h"
+
+enum { CTB_SEND = 0, CTB_RECV = 1 };
+
+enum { CTB_OWNER_HOST = 0 };
+
+void intel_guc_ct_init_early(struct intel_guc_ct *ct)
+{
+	/* we're using static channel owners */
+	ct->host_channel.owner = CTB_OWNER_HOST;
+}
+
+static inline const char *guc_ct_buffer_type_to_str(u32 type)
+{
+	switch (type) {
+	case INTEL_GUC_CT_BUFFER_TYPE_SEND:
+		return "SEND";
+	case INTEL_GUC_CT_BUFFER_TYPE_RECV:
+		return "RECV";
+	default:
+		return "<invalid>";
+	}
+}
+
+static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc,
+				    u32 cmds_addr, u32 size, u32 owner)
+{
+	DRM_DEBUG_DRIVER("CT: desc %p init addr=%#x size=%u owner=%u\n",
+			 desc, cmds_addr, size, owner);
+	memset(desc, 0, sizeof(*desc));
+	desc->addr = cmds_addr;
+	desc->size = size;
+	desc->owner = owner;
+}
+
+static void guc_ct_buffer_desc_reset(struct guc_ct_buffer_desc *desc)
+{
+	DRM_DEBUG_DRIVER("CT: desc %p reset head=%u tail=%u\n",
+			 desc, desc->head, desc->tail);
+	desc->head = 0;
+	desc->tail = 0;
+	desc->is_in_error = 0;
+}
+
+static int guc_action_register_ct_buffer(struct intel_guc *guc,
+					 u32 desc_addr,
+					 u32 type)
+{
+	u32 action[] = {
+		INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER,
+		desc_addr,
+		sizeof(struct guc_ct_buffer_desc),
+		type
+	};
+	int err;
+
+	/* Can't use generic send(), CT registration must go over MMIO */
+	err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action));
+	if (err)
+		DRM_ERROR("CT: register %s buffer failed; err=%d\n",
+			  guc_ct_buffer_type_to_str(type), err);
+	return err;
+}
+
+static int guc_action_deregister_ct_buffer(struct intel_guc *guc,
+					   u32 owner,
+					   u32 type)
+{
+	u32 action[] = {
+		INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER,
+		owner,
+		type
+	};
+	int err;
+
+	/* Can't use generic send(), CT deregistration must go over MMIO */
+	err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action));
+	if (err)
+		DRM_ERROR("CT: deregister %s buffer failed; owner=%d err=%d\n",
+			  guc_ct_buffer_type_to_str(type), owner, err);
+	return err;
+}
+
+static bool ctch_is_open(struct intel_guc_ct_channel *ctch)
+{
+	return ctch->vma != NULL;
+}
+
+static int ctch_init(struct intel_guc *guc,
+		     struct intel_guc_ct_channel *ctch)
+{
+	struct i915_vma *vma;
+	void *blob;
+	int err;
+	int i;
+
+	GEM_BUG_ON(ctch->vma);
+
+	/* We allocate 1 page to hold both descriptors and both buffers.
+	 *       ___________.....................
+	 *      |desc (SEND)|                   :
+	 *      |___________|                   PAGE/4
+	 *      :___________....................:
+	 *      |desc (RECV)|                   :
+	 *      |___________|                   PAGE/4
+	 *      :_______________________________:
+	 *      |cmds (SEND)                    |
+	 *      |                               PAGE/4
+	 *      |_______________________________|
+	 *      |cmds (RECV)                    |
+	 *      |                               PAGE/4
+	 *      |_______________________________|
+	 *
+	 * Each message can use a maximum of 32 dwords and we don't expect to
+	 * have more than 1 in flight at any time, so we have enough space.
+	 * Some logic further ahead will rely on the fact that there is only 1
+	 * page and that it is always mapped, so if the size is changed the
+	 * other code will need updating as well.
+	 */
+
+	/* allocate vma */
+	vma = intel_guc_allocate_vma(guc, PAGE_SIZE);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto err_out;
+	}
+	ctch->vma = vma;
+
+	/* map first page */
+	blob = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
+	if (IS_ERR(blob)) {
+		err = PTR_ERR(blob);
+		goto err_vma;
+	}
+	DRM_DEBUG_DRIVER("CT: vma base=%#x\n", guc_ggtt_offset(ctch->vma));
+
+	/* store pointers to desc and cmds */
+	for (i = 0; i < ARRAY_SIZE(ctch->ctbs); i++) {
+		GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV));
+		ctch->ctbs[i].desc = blob + PAGE_SIZE/4 * i;
+		ctch->ctbs[i].cmds = blob + PAGE_SIZE/4 * i + PAGE_SIZE/2;
+	}
+
+	return 0;
+
+err_vma:
+	i915_vma_unpin_and_release(&ctch->vma);
+err_out:
+	DRM_DEBUG_DRIVER("CT: channel %d initialization failed; err=%d\n",
+			 ctch->owner, err);
+	return err;
+}
+
+static void ctch_fini(struct intel_guc *guc,
+		      struct intel_guc_ct_channel *ctch)
+{
+	GEM_BUG_ON(!ctch->vma);
+
+	i915_gem_object_unpin_map(ctch->vma->obj);
+	i915_vma_unpin_and_release(&ctch->vma);
+}
+
+static int ctch_open(struct intel_guc *guc,
+		     struct intel_guc_ct_channel *ctch)
+{
+	u32 base;
+	int err;
+	int i;
+
+	DRM_DEBUG_DRIVER("CT: channel %d reopen=%s\n",
+			 ctch->owner, yesno(ctch_is_open(ctch)));
+
+	if (!ctch->vma) {
+		err = ctch_init(guc, ctch);
+		if (unlikely(err))
+			goto err_out;
+	}
+
+	/* vma should be already allocated and map'ed */
+	base = guc_ggtt_offset(ctch->vma);
+
+	/* (re)initialize descriptors
+	 * cmds buffers are in the second half of the blob page
+	 */
+	for (i = 0; i < ARRAY_SIZE(ctch->ctbs); i++) {
+		GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV));
+		guc_ct_buffer_desc_init(ctch->ctbs[i].desc,
+					base + PAGE_SIZE/4 * i + PAGE_SIZE/2,
+					PAGE_SIZE/4,
+					ctch->owner);
+	}
+
+	/* register buffers, starting wirh RECV buffer
+	 * descriptors are in first half of the blob
+	 */
+	err = guc_action_register_ct_buffer(guc,
+					    base + PAGE_SIZE/4 * CTB_RECV,
+					    INTEL_GUC_CT_BUFFER_TYPE_RECV);
+	if (unlikely(err))
+		goto err_fini;
+
+	err = guc_action_register_ct_buffer(guc,
+					    base + PAGE_SIZE/4 * CTB_SEND,
+					    INTEL_GUC_CT_BUFFER_TYPE_SEND);
+	if (unlikely(err))
+		goto err_deregister;
+
+	return 0;
+
+err_deregister:
+	guc_action_deregister_ct_buffer(guc,
+					ctch->owner,
+					INTEL_GUC_CT_BUFFER_TYPE_RECV);
+err_fini:
+	ctch_fini(guc, ctch);
+err_out:
+	DRM_ERROR("CT: can't open channel %d; err=%d\n", ctch->owner, err);
+	return err;
+}
+
+static void ctch_close(struct intel_guc *guc,
+		       struct intel_guc_ct_channel *ctch)
+{
+	GEM_BUG_ON(!ctch_is_open(ctch));
+
+	guc_action_deregister_ct_buffer(guc,
+					ctch->owner,
+					INTEL_GUC_CT_BUFFER_TYPE_SEND);
+	guc_action_deregister_ct_buffer(guc,
+					ctch->owner,
+					INTEL_GUC_CT_BUFFER_TYPE_RECV);
+	ctch_fini(guc, ctch);
+}
+
+static u32 ctch_get_next_fence(struct intel_guc_ct_channel *ctch)
+{
+	/* For now it's trivial */
+	return ++ctch->next_fence;
+}
+
+static int ctb_write(struct intel_guc_ct_buffer *ctb,
+		     const u32 *action,
+		     u32 len /* in dwords */,
+		     u32 fence)
+{
+	struct guc_ct_buffer_desc *desc = ctb->desc;
+	u32 head = desc->head / 4;	/* in dwords */
+	u32 tail = desc->tail / 4;	/* in dwords */
+	u32 size = desc->size / 4;	/* in dwords */
+	u32 used;			/* in dwords */
+	u32 header;
+	u32 *cmds = ctb->cmds;
+	unsigned int i;
+
+	GEM_BUG_ON(desc->size % 4);
+	GEM_BUG_ON(desc->head % 4);
+	GEM_BUG_ON(desc->tail % 4);
+	GEM_BUG_ON(tail >= size);
+
+	/*
+	 * tail == head condition indicates empty. GuC FW does not support
+	 * using up the entire buffer to get tail == head meaning full.
+	 */
+	if (tail < head)
+		used = (size - head) + tail;
+	else
+		used = tail - head;
+
+	/* make sure there is a space including extra dw for the fence */
+	if (unlikely(used + len + 1 >= size))
+		return -ENOSPC;
+
+	/* Write the message. The format is the following:
+	 * DW0: header (including action code)
+	 * DW1: fence
+	 * DW2+: action data
+	 */
+	header = (len << GUC_CT_MSG_LEN_SHIFT) |
+		 (GUC_CT_MSG_WRITE_FENCE_TO_DESC) |
+		 (action[0] << GUC_CT_MSG_ACTION_SHIFT);
+
+	cmds[tail] = header;
+	tail = (tail + 1) % size;
+
+	cmds[tail] = fence;
+	tail = (tail + 1) % size;
+
+	for (i = 1; i < len; i++) {
+		cmds[tail] = action[i];
+		tail = (tail + 1) % size;
+	}
+
+	/* now update desc tail (back in bytes) */
+	desc->tail = tail * 4;
+	GEM_BUG_ON(desc->tail > desc->size);
+
+	return 0;
+}
+
+/* Wait for the response from the GuC.
+ * @fence:	response fence
+ * @status:	placeholder for status
+ * return:	0 response received (status is valid)
+ *		-ETIMEDOUT no response within hardcoded timeout
+ *		-EPROTO no response, ct buffer was in error
+ */
+static int wait_for_response(struct guc_ct_buffer_desc *desc,
+			     u32 fence,
+			     u32 *status)
+{
+	int err;
+
+	/*
+	 * Fast commands should complete in less than 10us, so sample quickly
+	 * up to that length of time, then switch to a slower sleep-wait loop.
+	 * No GuC command should ever take longer than 10ms.
+	 */
+#define done (READ_ONCE(desc->fence) == fence)
+	err = wait_for_us(done, 10);
+	if (err)
+		err = wait_for(done, 10);
+#undef done
+
+	if (unlikely(err)) {
+		DRM_ERROR("CT: fence %u failed; reported fence=%u\n",
+			  fence, desc->fence);
+
+		if (WARN_ON(desc->is_in_error)) {
+			/* Something went wrong with the messaging, try to reset
+			 * the buffer and hope for the best
+			 */
+			guc_ct_buffer_desc_reset(desc);
+			err = -EPROTO;
+		}
+	}
+
+	*status = desc->status;
+	return err;
+}
+
+static int ctch_send(struct intel_guc *guc,
+		     struct intel_guc_ct_channel *ctch,
+		     const u32 *action,
+		     u32 len,
+		     u32 *status)
+{
+	struct intel_guc_ct_buffer *ctb = &ctch->ctbs[CTB_SEND];
+	struct guc_ct_buffer_desc *desc = ctb->desc;
+	u32 fence;
+	int err;
+
+	GEM_BUG_ON(!ctch_is_open(ctch));
+	GEM_BUG_ON(!len);
+	GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK);
+
+	fence = ctch_get_next_fence(ctch);
+	err = ctb_write(ctb, action, len, fence);
+	if (unlikely(err))
+		return err;
+
+	intel_guc_notify(guc);
+
+	err = wait_for_response(desc, fence, status);
+	if (unlikely(err))
+		return err;
+	if (*status != INTEL_GUC_STATUS_SUCCESS)
+		return -EIO;
+	return 0;
+}
+
+/*
+ * Command Transport (CT) buffer based GuC send function.
+ */
+static int intel_guc_send_ct(struct intel_guc *guc, const u32 *action, u32 len)
+{
+	struct intel_guc_ct_channel *ctch = &guc->ct.host_channel;
+	u32 status = ~0; /* undefined */
+	int err;
+
+	mutex_lock(&guc->send_mutex);
+
+	err = ctch_send(guc, ctch, action, len, &status);
+	if (unlikely(err)) {
+		DRM_ERROR("CT: send action %#X failed; err=%d status=%#X\n",
+			  action[0], err, status);
+	}
+
+	mutex_unlock(&guc->send_mutex);
+	return err;
+}
+
+/**
+ * Enable buffer based command transport
+ * Shall only be called for platforms with HAS_GUC_CT.
+ * @guc:	the guc
+ * return:	0 on success
+ *		non-zero on failure
+ */
+int intel_guc_enable_ct(struct intel_guc *guc)
+{
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+	struct intel_guc_ct_channel *ctch = &guc->ct.host_channel;
+	int err;
+
+	GEM_BUG_ON(!HAS_GUC_CT(dev_priv));
+
+	err = ctch_open(guc, ctch);
+	if (unlikely(err))
+		return err;
+
+	/* Switch into cmd transport buffer based send() */
+	guc->send = intel_guc_send_ct;
+	DRM_INFO("CT: %s\n", enableddisabled(true));
+	return 0;
+}
+
+/**
+ * Disable buffer based command transport.
+ * Shall only be called for platforms with HAS_GUC_CT.
+ * @guc: the guc
+ */
+void intel_guc_disable_ct(struct intel_guc *guc)
+{
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+	struct intel_guc_ct_channel *ctch = &guc->ct.host_channel;
+
+	GEM_BUG_ON(!HAS_GUC_CT(dev_priv));
+
+	if (!ctch_is_open(ctch))
+		return;
+
+	ctch_close(guc, ctch);
+
+	/* Disable send */
+	guc->send = intel_guc_send_nop;
+	DRM_INFO("CT: %s\n", enableddisabled(false));
+}
diff --git a/drivers/gpu/drm/i915/intel_guc_ct.h b/drivers/gpu/drm/i915/intel_guc_ct.h
new file mode 100644
index 000000000000..6d97f36fcc62
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_guc_ct.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright © 2016-2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _INTEL_GUC_CT_H_
+#define _INTEL_GUC_CT_H_
+
+struct intel_guc;
+struct i915_vma;
+
+#include "intel_guc_fwif.h"
+
+/**
+ * DOC: Command Transport (CT).
+ *
+ * Buffer based command transport is a replacement for MMIO based mechanism.
+ * It can be used to perform both host-2-guc and guc-to-host communication.
+ */
+
+/** Represents single command transport buffer.
+ *
+ * A single command transport buffer consists of two parts, the header
+ * record (command transport buffer descriptor) and the actual buffer which
+ * holds the commands.
+ *
+ * @desc: pointer to the buffer descriptor
+ * @cmds: pointer to the commands buffer
+ */
+struct intel_guc_ct_buffer {
+	struct guc_ct_buffer_desc *desc;
+	u32 *cmds;
+};
+
+/** Represents pair of command transport buffers.
+ *
+ * Buffers go in pairs to allow bi-directional communication.
+ * To simplify the code we place both of them in the same vma.
+ * Buffers from the same pair must share unique owner id.
+ *
+ * @vma: pointer to the vma with pair of CT buffers
+ * @ctbs: buffers for sending(0) and receiving(1) commands
+ * @owner: unique identifier
+ * @next_fence: fence to be used with next send command
+ */
+struct intel_guc_ct_channel {
+	struct i915_vma *vma;
+	struct intel_guc_ct_buffer ctbs[2];
+	u32 owner;
+	u32 next_fence;
+};
+
+/** Holds all command transport channels.
+ *
+ * @host_channel: main channel used by the host
+ */
+struct intel_guc_ct {
+	struct intel_guc_ct_channel host_channel;
+	/* other channels are tbd */
+};
+
+void intel_guc_ct_init_early(struct intel_guc_ct *ct);
+
+/* XXX: move to intel_uc.h ? don't fit there either */
+int intel_guc_enable_ct(struct intel_guc *guc);
+void intel_guc_disable_ct(struct intel_guc *guc);
+
+#endif /* _INTEL_GUC_CT_H_ */
diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h
index cb36cbf3818f..5fa286074811 100644
--- a/drivers/gpu/drm/i915/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/intel_guc_fwif.h
@@ -23,8 +23,8 @@
 #ifndef _INTEL_GUC_FWIF_H
 #define _INTEL_GUC_FWIF_H
 
-#define GFXCORE_FAMILY_GEN9		12
-#define GFXCORE_FAMILY_UNKNOWN		0x7fffffff
+#define GUC_CORE_FAMILY_GEN9		12
+#define GUC_CORE_FAMILY_UNKNOWN		0x7fffffff
 
 #define GUC_CLIENT_PRIORITY_KMD_HIGH	0
 #define GUC_CLIENT_PRIORITY_HIGH	1
@@ -331,6 +331,47 @@ struct guc_stage_desc {
 	u64 desc_private;
 } __packed;
 
+/*
+ * Describes single command transport buffer.
+ * Used by both guc-master and clients.
+ */
+struct guc_ct_buffer_desc {
+	u32 addr;		/* gfx address */
+	u64 host_private;	/* host private data */
+	u32 size;		/* size in bytes */
+	u32 head;		/* offset updated by GuC*/
+	u32 tail;		/* offset updated by owner */
+	u32 is_in_error;	/* error indicator */
+	u32 fence;		/* fence updated by GuC */
+	u32 status;		/* status updated by GuC */
+	u32 owner;		/* id of the channel owner */
+	u32 owner_sub_id;	/* owner-defined field for extra tracking */
+	u32 reserved[5];
+} __packed;
+
+/* Type of command transport buffer */
+#define INTEL_GUC_CT_BUFFER_TYPE_SEND	0x0u
+#define INTEL_GUC_CT_BUFFER_TYPE_RECV	0x1u
+
+/*
+ * Definition of the command transport message header (DW0)
+ *
+ * bit[4..0]	message len (in dwords)
+ * bit[7..5]	reserved
+ * bit[8]	write fence to desc
+ * bit[9]	write status to H2G buff
+ * bit[10]	send status (via G2H)
+ * bit[15..11]	reserved
+ * bit[31..16]	action code
+ */
+#define GUC_CT_MSG_LEN_SHIFT			0
+#define GUC_CT_MSG_LEN_MASK			0x1F
+#define GUC_CT_MSG_WRITE_FENCE_TO_DESC		(1 << 8)
+#define GUC_CT_MSG_WRITE_STATUS_TO_BUFF		(1 << 9)
+#define GUC_CT_MSG_SEND_STATUS			(1 << 10)
+#define GUC_CT_MSG_ACTION_SHIFT			16
+#define GUC_CT_MSG_ACTION_MASK			0xFFFF
+
 #define GUC_FORCEWAKE_RENDER	(1 << 0)
 #define GUC_FORCEWAKE_MEDIA	(1 << 1)
 
@@ -515,6 +556,8 @@ enum intel_guc_action {
 	INTEL_GUC_ACTION_EXIT_S_STATE = 0x502,
 	INTEL_GUC_ACTION_SLPC_REQUEST = 0x3003,
 	INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000,
+	INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
+	INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
 	INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x0E000,
 	INTEL_GUC_ACTION_LIMIT
 };
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
index 8a1a023e48b2..d9045b6e897b 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -61,6 +61,9 @@
 #define KBL_FW_MAJOR 9
 #define KBL_FW_MINOR 14
 
+#define GLK_FW_MAJOR 10
+#define GLK_FW_MINOR 56
+
 #define GUC_FW_PATH(platform, major, minor) \
        "i915/" __stringify(platform) "_guc_ver" __stringify(major) "_" __stringify(minor) ".bin"
 
@@ -73,6 +76,8 @@ MODULE_FIRMWARE(I915_BXT_GUC_UCODE);
 #define I915_KBL_GUC_UCODE GUC_FW_PATH(kbl, KBL_FW_MAJOR, KBL_FW_MINOR)
 MODULE_FIRMWARE(I915_KBL_GUC_UCODE);
 
+#define I915_GLK_GUC_UCODE GUC_FW_PATH(glk, GLK_FW_MAJOR, GLK_FW_MINOR)
+
 
 static u32 get_gttype(struct drm_i915_private *dev_priv)
 {
@@ -86,11 +91,11 @@ static u32 get_core_family(struct drm_i915_private *dev_priv)
 
 	switch (gen) {
 	case 9:
-		return GFXCORE_FAMILY_GEN9;
+		return GUC_CORE_FAMILY_GEN9;
 
 	default:
-		WARN(1, "GEN%d does not support GuC operation!\n", gen);
-		return GFXCORE_FAMILY_UNKNOWN;
+		MISSING_CASE(gen);
+		return GUC_CORE_FAMILY_UNKNOWN;
 	}
 }
 
@@ -280,10 +285,6 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
 
 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
 
-	/* init WOPCM */
-	I915_WRITE(GUC_WOPCM_SIZE, intel_guc_wopcm_size(dev_priv));
-	I915_WRITE(DMA_GUC_WOPCM_OFFSET, GUC_WOPCM_OFFSET_VALUE);
-
 	/* Enable MIA caching. GuC clock gating is disabled. */
 	I915_WRITE(GUC_SHIM_CONTROL, GUC_SHIM_CONTROL_VALUE);
 
@@ -405,6 +406,10 @@ int intel_guc_select_fw(struct intel_guc *guc)
 		guc->fw.path = I915_KBL_GUC_UCODE;
 		guc->fw.major_ver_wanted = KBL_FW_MAJOR;
 		guc->fw.minor_ver_wanted = KBL_FW_MINOR;
+	} else if (IS_GEMINILAKE(dev_priv)) {
+		guc->fw.path = I915_GLK_GUC_UCODE;
+		guc->fw.major_ver_wanted = GLK_FW_MAJOR;
+		guc->fw.minor_ver_wanted = GLK_FW_MINOR;
 	} else {
 		DRM_ERROR("No GuC firmware known for platform with GuC!\n");
 		return -ENOENT;
diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
index 6fb63a3c65b0..16d3b8719cab 100644
--- a/drivers/gpu/drm/i915/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/intel_guc_log.c
@@ -359,12 +359,16 @@ static int guc_log_runtime_create(struct intel_guc *guc)
 	void *vaddr;
 	struct rchan *guc_log_relay_chan;
 	size_t n_subbufs, subbuf_size;
-	int ret = 0;
+	int ret;
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
 	GEM_BUG_ON(guc_log_has_runtime(guc));
 
+	ret = i915_gem_object_set_to_wc_domain(guc->log.vma->obj, true);
+	if (ret)
+		return ret;
+
 	/* Create a WC (Uncached for read) vmalloc mapping of log
 	 * buffer pages, so that we can directly get the data
 	 * (up-to-date) from memory.
diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
index dce742243ba6..9b0ece427bdc 100644
--- a/drivers/gpu/drm/i915/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/intel_hangcheck.c
@@ -407,7 +407,7 @@ static void hangcheck_declare_hang(struct drm_i915_private *i915,
 				 "%s, ", engine->name);
 	msg[len-2] = '\0';
 
-	return i915_handle_error(i915, hung, msg);
+	return i915_handle_error(i915, hung, "%s", msg);
 }
 
 /*
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index 1d623b5e09d6..58d690393b29 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -1327,6 +1327,11 @@ static bool hdmi_12bpc_possible(struct intel_crtc_state *crtc_state)
 			return false;
 	}
 
+	/* Display Wa #1139 */
+	if (IS_GLK_REVID(dev_priv, 0, GLK_REVID_A1) &&
+	    crtc_state->base.adjusted_mode.htotal > 5460)
+		return false;
+
 	return true;
 }
 
@@ -1392,7 +1397,7 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder,
 	}
 
 	if (!pipe_config->bw_constrained) {
-		DRM_DEBUG_KMS("forcing pipe bpc to %i for HDMI\n", desired_bpp);
+		DRM_DEBUG_KMS("forcing pipe bpp to %i for HDMI\n", desired_bpp);
 		pipe_config->pipe_bpp = desired_bpp;
 	}
 
@@ -1403,7 +1408,7 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder,
 	}
 
 	/* Set user selected PAR to incoming mode's member */
-	adjusted_mode->picture_aspect_ratio = intel_hdmi->aspect_ratio;
+	adjusted_mode->picture_aspect_ratio = conn_state->picture_aspect_ratio;
 
 	pipe_config->lane_count = 4;
 
@@ -1649,19 +1654,7 @@ intel_hdmi_set_property(struct drm_connector *connector,
 	}
 
 	if (property == connector->dev->mode_config.aspect_ratio_property) {
-		switch (val) {
-		case DRM_MODE_PICTURE_ASPECT_NONE:
-			intel_hdmi->aspect_ratio = HDMI_PICTURE_ASPECT_NONE;
-			break;
-		case DRM_MODE_PICTURE_ASPECT_4_3:
-			intel_hdmi->aspect_ratio = HDMI_PICTURE_ASPECT_4_3;
-			break;
-		case DRM_MODE_PICTURE_ASPECT_16_9:
-			intel_hdmi->aspect_ratio = HDMI_PICTURE_ASPECT_16_9;
-			break;
-		default:
-			return -EINVAL;
-		}
+		connector->state->picture_aspect_ratio = val;
 		goto done;
 	}
 
@@ -1823,7 +1816,7 @@ intel_hdmi_add_properties(struct intel_hdmi *intel_hdmi, struct drm_connector *c
 	intel_attach_broadcast_rgb_property(connector);
 	intel_hdmi->color_range_auto = true;
 	intel_attach_aspect_ratio_property(connector);
-	intel_hdmi->aspect_ratio = HDMI_PICTURE_ASPECT_NONE;
+	connector->state->picture_aspect_ratio = HDMI_PICTURE_ASPECT_NONE;
 }
 
 /*
diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c
index 9ee819666a4c..f5eb18d0e2d1 100644
--- a/drivers/gpu/drm/i915/intel_huc.c
+++ b/drivers/gpu/drm/i915/intel_huc.c
@@ -52,6 +52,10 @@
 #define KBL_HUC_FW_MINOR 00
 #define KBL_BLD_NUM 1810
 
+#define GLK_HUC_FW_MAJOR 02
+#define GLK_HUC_FW_MINOR 00
+#define GLK_BLD_NUM 1748
+
 #define HUC_FW_PATH(platform, major, minor, bld_num) \
 	"i915/" __stringify(platform) "_huc_ver" __stringify(major) "_" \
 	__stringify(minor) "_" __stringify(bld_num) ".bin"
@@ -68,6 +72,9 @@ MODULE_FIRMWARE(I915_BXT_HUC_UCODE);
 	KBL_HUC_FW_MINOR, KBL_BLD_NUM)
 MODULE_FIRMWARE(I915_KBL_HUC_UCODE);
 
+#define I915_GLK_HUC_UCODE HUC_FW_PATH(glk, GLK_HUC_FW_MAJOR, \
+	GLK_HUC_FW_MINOR, GLK_BLD_NUM)
+
 /**
  * huc_ucode_xfer() - DMA's the firmware
  * @dev_priv: the drm_i915_private device
@@ -99,11 +106,6 @@ static int huc_ucode_xfer(struct drm_i915_private *dev_priv)
 
 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
 
-	/* init WOPCM */
-	I915_WRITE(GUC_WOPCM_SIZE, intel_guc_wopcm_size(dev_priv));
-	I915_WRITE(DMA_GUC_WOPCM_OFFSET, GUC_WOPCM_OFFSET_VALUE |
-			HUC_LOADING_AGENT_GUC);
-
 	/* Set the source address for the uCode */
 	offset = guc_ggtt_offset(vma) + huc_fw->header_offset;
 	I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset));
@@ -169,6 +171,10 @@ void intel_huc_select_fw(struct intel_huc *huc)
 		huc->fw.path = I915_KBL_HUC_UCODE;
 		huc->fw.major_ver_wanted = KBL_HUC_FW_MAJOR;
 		huc->fw.minor_ver_wanted = KBL_HUC_FW_MINOR;
+	} else if (IS_GEMINILAKE(dev_priv)) {
+		huc->fw.path = I915_GLK_HUC_UCODE;
+		huc->fw.major_ver_wanted = GLK_HUC_FW_MAJOR;
+		huc->fw.minor_ver_wanted = GLK_HUC_FW_MINOR;
 	} else {
 		DRM_ERROR("No HuC firmware known for platform with HuC!\n");
 		return;
@@ -186,68 +192,36 @@ void intel_huc_select_fw(struct intel_huc *huc)
  * earlier call to intel_huc_init(), so here we need only check that
  * is succeeded, and then transfer the image to the h/w.
  *
- * Return:	non-zero code on error
  */
-int intel_huc_init_hw(struct intel_huc *huc)
+void intel_huc_init_hw(struct intel_huc *huc)
 {
 	struct drm_i915_private *dev_priv = huc_to_i915(huc);
 	int err;
 
-	if (huc->fw.fetch_status == INTEL_UC_FIRMWARE_NONE)
-		return 0;
-
 	DRM_DEBUG_DRIVER("%s fw status: fetch %s, load %s\n",
 		huc->fw.path,
 		intel_uc_fw_status_repr(huc->fw.fetch_status),
 		intel_uc_fw_status_repr(huc->fw.load_status));
 
-	if (huc->fw.fetch_status == INTEL_UC_FIRMWARE_SUCCESS &&
-	    huc->fw.load_status == INTEL_UC_FIRMWARE_FAIL)
-		return -ENOEXEC;
+	if (huc->fw.fetch_status != INTEL_UC_FIRMWARE_SUCCESS)
+		return;
 
 	huc->fw.load_status = INTEL_UC_FIRMWARE_PENDING;
 
-	switch (huc->fw.fetch_status) {
-	case INTEL_UC_FIRMWARE_FAIL:
-		/* something went wrong :( */
-		err = -EIO;
-		goto fail;
-
-	case INTEL_UC_FIRMWARE_NONE:
-	case INTEL_UC_FIRMWARE_PENDING:
-	default:
-		/* "can't happen" */
-		WARN_ONCE(1, "HuC fw %s invalid fetch_status %s [%d]\n",
-			huc->fw.path,
-			intel_uc_fw_status_repr(huc->fw.fetch_status),
-			huc->fw.fetch_status);
-		err = -ENXIO;
-		goto fail;
-
-	case INTEL_UC_FIRMWARE_SUCCESS:
-		break;
-	}
-
 	err = huc_ucode_xfer(dev_priv);
-	if (err)
-		goto fail;
 
-	huc->fw.load_status = INTEL_UC_FIRMWARE_SUCCESS;
+	huc->fw.load_status = err ?
+		INTEL_UC_FIRMWARE_FAIL : INTEL_UC_FIRMWARE_SUCCESS;
 
 	DRM_DEBUG_DRIVER("%s fw status: fetch %s, load %s\n",
 		huc->fw.path,
 		intel_uc_fw_status_repr(huc->fw.fetch_status),
 		intel_uc_fw_status_repr(huc->fw.load_status));
 
-	return 0;
-
-fail:
-	if (huc->fw.load_status == INTEL_UC_FIRMWARE_PENDING)
-		huc->fw.load_status = INTEL_UC_FIRMWARE_FAIL;
-
-	DRM_ERROR("Failed to complete HuC uCode load with ret %d\n", err);
+	if (huc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS)
+		DRM_ERROR("Failed to complete HuC uCode load with ret %d\n", err);
 
-	return err;
+	return;
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c
index 668f00480d97..3bf65288ffff 100644
--- a/drivers/gpu/drm/i915/intel_lpe_audio.c
+++ b/drivers/gpu/drm/i915/intel_lpe_audio.c
@@ -111,6 +111,11 @@ lpe_audio_platdev_create(struct drm_i915_private *dev_priv)
 	pinfo.size_data = sizeof(*pdata);
 	pinfo.dma_mask = DMA_BIT_MASK(32);
 
+	pdata->num_pipes = INTEL_INFO(dev_priv)->num_pipes;
+	pdata->num_ports = IS_CHERRYVIEW(dev_priv) ? 3 : 2; /* B,C,D or B,C */
+	pdata->port[0].pipe = -1;
+	pdata->port[1].pipe = -1;
+	pdata->port[2].pipe = -1;
 	spin_lock_init(&pdata->lpe_audio_slock);
 
 	platdev = platform_device_register_full(&pinfo);
@@ -149,44 +154,10 @@ static void lpe_audio_platdev_destroy(struct drm_i915_private *dev_priv)
 
 static void lpe_audio_irq_unmask(struct irq_data *d)
 {
-	struct drm_i915_private *dev_priv = d->chip_data;
-	unsigned long irqflags;
-	u32 val = (I915_LPE_PIPE_A_INTERRUPT |
-		I915_LPE_PIPE_B_INTERRUPT);
-
-	if (IS_CHERRYVIEW(dev_priv))
-		val |= I915_LPE_PIPE_C_INTERRUPT;
-
-	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-
-	dev_priv->irq_mask &= ~val;
-	I915_WRITE(VLV_IIR, val);
-	I915_WRITE(VLV_IIR, val);
-	I915_WRITE(VLV_IMR, dev_priv->irq_mask);
-	POSTING_READ(VLV_IMR);
-
-	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 }
 
 static void lpe_audio_irq_mask(struct irq_data *d)
 {
-	struct drm_i915_private *dev_priv = d->chip_data;
-	unsigned long irqflags;
-	u32 val = (I915_LPE_PIPE_A_INTERRUPT |
-		I915_LPE_PIPE_B_INTERRUPT);
-
-	if (IS_CHERRYVIEW(dev_priv))
-		val |= I915_LPE_PIPE_C_INTERRUPT;
-
-	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-
-	dev_priv->irq_mask |= val;
-	I915_WRITE(VLV_IMR, dev_priv->irq_mask);
-	I915_WRITE(VLV_IIR, val);
-	I915_WRITE(VLV_IIR, val);
-	POSTING_READ(VLV_IIR);
-
-	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 }
 
 static struct irq_chip lpe_audio_irqchip = {
@@ -330,8 +301,6 @@ void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv)
 
 	desc = irq_to_desc(dev_priv->lpe_audio.irq);
 
-	lpe_audio_irq_mask(&desc->irq_data);
-
 	lpe_audio_platdev_destroy(dev_priv);
 
 	irq_free_desc(dev_priv->lpe_audio.irq);
@@ -342,53 +311,47 @@ void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv)
  * intel_lpe_audio_notify() - notify lpe audio event
  * audio driver and i915
  * @dev_priv: the i915 drm device private data
+ * @pipe: pipe
+ * @port: port
  * @eld : ELD data
- * @pipe: pipe id
- * @port: port id
- * @tmds_clk_speed: tmds clock frequency in Hz
+ * @ls_clock: Link symbol clock in kHz
+ * @dp_output: Driving a DP output?
  *
  * Notify lpe audio driver of eld change.
  */
 void intel_lpe_audio_notify(struct drm_i915_private *dev_priv,
-			    void *eld, int port, int pipe, int tmds_clk_speed,
-			    bool dp_output, int link_rate)
+			    enum pipe pipe, enum port port,
+			    const void *eld, int ls_clock, bool dp_output)
 {
-	unsigned long irq_flags;
-	struct intel_hdmi_lpe_audio_pdata *pdata = NULL;
+	unsigned long irqflags;
+	struct intel_hdmi_lpe_audio_pdata *pdata;
+	struct intel_hdmi_lpe_audio_port_pdata *ppdata;
 	u32 audio_enable;
 
 	if (!HAS_LPE_AUDIO(dev_priv))
 		return;
 
-	pdata = dev_get_platdata(
-		&(dev_priv->lpe_audio.platdev->dev));
+	pdata = dev_get_platdata(&dev_priv->lpe_audio.platdev->dev);
+	ppdata = &pdata->port[port - PORT_B];
 
-	spin_lock_irqsave(&pdata->lpe_audio_slock, irq_flags);
+	spin_lock_irqsave(&pdata->lpe_audio_slock, irqflags);
 
 	audio_enable = I915_READ(VLV_AUD_PORT_EN_DBG(port));
 
 	if (eld != NULL) {
-		memcpy(pdata->eld.eld_data, eld,
-			HDMI_MAX_ELD_BYTES);
-		pdata->eld.port_id = port;
-		pdata->eld.pipe_id = pipe;
-		pdata->hdmi_connected = true;
-
-		pdata->dp_output = dp_output;
-		if (tmds_clk_speed)
-			pdata->tmds_clock_speed = tmds_clk_speed;
-		if (link_rate)
-			pdata->link_rate = link_rate;
+		memcpy(ppdata->eld, eld, HDMI_MAX_ELD_BYTES);
+		ppdata->pipe = pipe;
+		ppdata->ls_clock = ls_clock;
+		ppdata->dp_output = dp_output;
 
 		/* Unmute the amp for both DP and HDMI */
 		I915_WRITE(VLV_AUD_PORT_EN_DBG(port),
 			   audio_enable & ~VLV_AMP_MUTE);
-
 	} else {
-		memset(pdata->eld.eld_data, 0,
-			HDMI_MAX_ELD_BYTES);
-		pdata->hdmi_connected = false;
-		pdata->dp_output = false;
+		memset(ppdata->eld, 0, HDMI_MAX_ELD_BYTES);
+		ppdata->pipe = -1;
+		ppdata->ls_clock = 0;
+		ppdata->dp_output = false;
 
 		/* Mute the amp for both DP and HDMI */
 		I915_WRITE(VLV_AUD_PORT_EN_DBG(port),
@@ -396,10 +359,7 @@ void intel_lpe_audio_notify(struct drm_i915_private *dev_priv,
 	}
 
 	if (pdata->notify_audio_lpe)
-		pdata->notify_audio_lpe(dev_priv->lpe_audio.platdev);
-	else
-		pdata->notify_pending = true;
+		pdata->notify_audio_lpe(dev_priv->lpe_audio.platdev, port - PORT_B);
 
-	spin_unlock_irqrestore(&pdata->lpe_audio_slock,
-			irq_flags);
+	spin_unlock_irqrestore(&pdata->lpe_audio_slock, irqflags);
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index c8f7c631fc1f..014b30ace8a0 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -138,10 +138,6 @@
 #include "i915_drv.h"
 #include "intel_mocs.h"
 
-#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
-#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
-#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE)
-
 #define RING_EXECLIST_QFULL		(1 << 0x2)
 #define RING_EXECLIST1_VALID		(1 << 0x3)
 #define RING_EXECLIST0_VALID		(1 << 0x4)
@@ -326,8 +322,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
 		rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
 	u32 *reg_state = ce->lrc_reg_state;
 
-	assert_ring_tail_valid(rq->ring, rq->tail);
-	reg_state[CTX_RING_TAIL+1] = rq->tail;
+	reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
 
 	/* True 32b PPGTT with dynamic page allocation: update PDP
 	 * registers and point the unallocated PDPs to scratch page.
@@ -342,39 +337,32 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
 
 static void execlists_submit_ports(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
 	struct execlist_port *port = engine->execlist_port;
 	u32 __iomem *elsp =
-		dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine));
-	u64 desc[2];
-
-	GEM_BUG_ON(port[0].count > 1);
-	if (!port[0].count)
-		execlists_context_status_change(port[0].request,
-						INTEL_CONTEXT_SCHEDULE_IN);
-	desc[0] = execlists_update_context(port[0].request);
-	GEM_DEBUG_EXEC(port[0].context_id = upper_32_bits(desc[0]));
-	port[0].count++;
-
-	if (port[1].request) {
-		GEM_BUG_ON(port[1].count);
-		execlists_context_status_change(port[1].request,
-						INTEL_CONTEXT_SCHEDULE_IN);
-		desc[1] = execlists_update_context(port[1].request);
-		GEM_DEBUG_EXEC(port[1].context_id = upper_32_bits(desc[1]));
-		port[1].count = 1;
-	} else {
-		desc[1] = 0;
-	}
-	GEM_BUG_ON(desc[0] == desc[1]);
-
-	/* You must always write both descriptors in the order below. */
-	writel(upper_32_bits(desc[1]), elsp);
-	writel(lower_32_bits(desc[1]), elsp);
+		engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
+	unsigned int n;
+
+	for (n = ARRAY_SIZE(engine->execlist_port); n--; ) {
+		struct drm_i915_gem_request *rq;
+		unsigned int count;
+		u64 desc;
+
+		rq = port_unpack(&port[n], &count);
+		if (rq) {
+			GEM_BUG_ON(count > !n);
+			if (!count++)
+				execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
+			port_set(&port[n], port_pack(rq, count));
+			desc = execlists_update_context(rq);
+			GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
+		} else {
+			GEM_BUG_ON(!n);
+			desc = 0;
+		}
 
-	writel(upper_32_bits(desc[0]), elsp);
-	/* The context is automatically loaded after the following */
-	writel(lower_32_bits(desc[0]), elsp);
+		writel(upper_32_bits(desc), elsp);
+		writel(lower_32_bits(desc), elsp);
+	}
 }
 
 static bool ctx_single_port_submission(const struct i915_gem_context *ctx)
@@ -395,6 +383,17 @@ static bool can_merge_ctx(const struct i915_gem_context *prev,
 	return true;
 }
 
+static void port_assign(struct execlist_port *port,
+			struct drm_i915_gem_request *rq)
+{
+	GEM_BUG_ON(rq == port_request(port));
+
+	if (port_isset(port))
+		i915_gem_request_put(port_request(port));
+
+	port_set(port, port_pack(i915_gem_request_get(rq), port_count(port)));
+}
+
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_request *last;
@@ -402,7 +401,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	struct rb_node *rb;
 	bool submit = false;
 
-	last = port->request;
+	last = port_request(port);
 	if (last)
 		/* WaIdleLiteRestore:bdw,skl
 		 * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
@@ -412,7 +411,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		 */
 		last->tail = last->wa_tail;
 
-	GEM_BUG_ON(port[1].request);
+	GEM_BUG_ON(port_isset(&port[1]));
 
 	/* Hardware submission is through 2 ports. Conceptually each port
 	 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
@@ -437,72 +436,86 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 
 	spin_lock_irq(&engine->timeline->lock);
 	rb = engine->execlist_first;
+	GEM_BUG_ON(rb_first(&engine->execlist_queue) != rb);
 	while (rb) {
-		struct drm_i915_gem_request *cursor =
-			rb_entry(rb, typeof(*cursor), priotree.node);
-
-		/* Can we combine this request with the current port? It has to
-		 * be the same context/ringbuffer and not have any exceptions
-		 * (e.g. GVT saying never to combine contexts).
-		 *
-		 * If we can combine the requests, we can execute both by
-		 * updating the RING_TAIL to point to the end of the second
-		 * request, and so we never need to tell the hardware about
-		 * the first.
-		 */
-		if (last && !can_merge_ctx(cursor->ctx, last->ctx)) {
-			/* If we are on the second port and cannot combine
-			 * this request with the last, then we are done.
-			 */
-			if (port != engine->execlist_port)
-				break;
-
-			/* If GVT overrides us we only ever submit port[0],
-			 * leaving port[1] empty. Note that we also have
-			 * to be careful that we don't queue the same
-			 * context (even though a different request) to
-			 * the second port.
+		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+		struct drm_i915_gem_request *rq, *rn;
+
+		list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
+			/*
+			 * Can we combine this request with the current port?
+			 * It has to be the same context/ringbuffer and not
+			 * have any exceptions (e.g. GVT saying never to
+			 * combine contexts).
+			 *
+			 * If we can combine the requests, we can execute both
+			 * by updating the RING_TAIL to point to the end of the
+			 * second request, and so we never need to tell the
+			 * hardware about the first.
 			 */
-			if (ctx_single_port_submission(last->ctx) ||
-			    ctx_single_port_submission(cursor->ctx))
-				break;
+			if (last && !can_merge_ctx(rq->ctx, last->ctx)) {
+				/*
+				 * If we are on the second port and cannot
+				 * combine this request with the last, then we
+				 * are done.
+				 */
+				if (port != engine->execlist_port) {
+					__list_del_many(&p->requests,
+							&rq->priotree.link);
+					goto done;
+				}
+
+				/*
+				 * If GVT overrides us we only ever submit
+				 * port[0], leaving port[1] empty. Note that we
+				 * also have to be careful that we don't queue
+				 * the same context (even though a different
+				 * request) to the second port.
+				 */
+				if (ctx_single_port_submission(last->ctx) ||
+				    ctx_single_port_submission(rq->ctx)) {
+					__list_del_many(&p->requests,
+							&rq->priotree.link);
+					goto done;
+				}
+
+				GEM_BUG_ON(last->ctx == rq->ctx);
+
+				if (submit)
+					port_assign(port, last);
+				port++;
+			}
 
-			GEM_BUG_ON(last->ctx == cursor->ctx);
+			INIT_LIST_HEAD(&rq->priotree.link);
+			rq->priotree.priority = INT_MAX;
 
-			i915_gem_request_assign(&port->request, last);
-			port++;
+			__i915_gem_request_submit(rq);
+			trace_i915_gem_request_in(rq, port_index(port, engine));
+			last = rq;
+			submit = true;
 		}
 
 		rb = rb_next(rb);
-		rb_erase(&cursor->priotree.node, &engine->execlist_queue);
-		RB_CLEAR_NODE(&cursor->priotree.node);
-		cursor->priotree.priority = INT_MAX;
-
-		__i915_gem_request_submit(cursor);
-		trace_i915_gem_request_in(cursor, port - engine->execlist_port);
-		last = cursor;
-		submit = true;
-	}
-	if (submit) {
-		i915_gem_request_assign(&port->request, last);
-		engine->execlist_first = rb;
+		rb_erase(&p->node, &engine->execlist_queue);
+		INIT_LIST_HEAD(&p->requests);
+		if (p->priority != I915_PRIORITY_NORMAL)
+			kmem_cache_free(engine->i915->priorities, p);
 	}
+done:
+	engine->execlist_first = rb;
+	if (submit)
+		port_assign(port, last);
 	spin_unlock_irq(&engine->timeline->lock);
 
 	if (submit)
 		execlists_submit_ports(engine);
 }
 
-static bool execlists_elsp_idle(struct intel_engine_cs *engine)
-{
-	return !engine->execlist_port[0].request;
-}
-
 static bool execlists_elsp_ready(const struct intel_engine_cs *engine)
 {
 	const struct execlist_port *port = engine->execlist_port;
 
-	return port[0].count + port[1].count < 2;
+	return port_count(&port[0]) + port_count(&port[1]) < 2;
 }
 
 /*
@@ -515,6 +528,15 @@ static void intel_lrc_irq_handler(unsigned long data)
 	struct execlist_port *port = engine->execlist_port;
 	struct drm_i915_private *dev_priv = engine->i915;
 
+	/* We can skip acquiring intel_runtime_pm_get() here as it was taken
+	 * on our behalf by the request (see i915_gem_mark_busy()) and it will
+	 * not be relinquished until the device is idle (see
+	 * i915_gem_idle_work_handler()). As a precaution, we make sure
+	 * that all ELSP are drained i.e. we have processed the CSB,
+	 * before allowing ourselves to idle and calling intel_runtime_pm_put().
+	 */
+	GEM_BUG_ON(!dev_priv->gt.awake);
+
 	intel_uncore_forcewake_get(dev_priv, engine->fw_domains);
 
 	/* Prefer doing test_and_clear_bit() as a two stage operation to avoid
@@ -543,7 +565,9 @@ static void intel_lrc_irq_handler(unsigned long data)
 		tail = GEN8_CSB_WRITE_PTR(head);
 		head = GEN8_CSB_READ_PTR(head);
 		while (head != tail) {
+			struct drm_i915_gem_request *rq;
 			unsigned int status;
+			unsigned int count;
 
 			if (++head == GEN8_CSB_ENTRIES)
 				head = 0;
@@ -571,22 +595,26 @@ static void intel_lrc_irq_handler(unsigned long data)
 
 			/* Check the context/desc id for this event matches */
 			GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) !=
-					 port[0].context_id);
+					 port->context_id);
 
-			GEM_BUG_ON(port[0].count == 0);
-			if (--port[0].count == 0) {
+			rq = port_unpack(port, &count);
+			GEM_BUG_ON(count == 0);
+			if (--count == 0) {
 				GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
-				GEM_BUG_ON(!i915_gem_request_completed(port[0].request));
-				execlists_context_status_change(port[0].request,
-								INTEL_CONTEXT_SCHEDULE_OUT);
+				GEM_BUG_ON(!i915_gem_request_completed(rq));
+				execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
+
+				trace_i915_gem_request_out(rq);
+				i915_gem_request_put(rq);
 
-				trace_i915_gem_request_out(port[0].request);
-				i915_gem_request_put(port[0].request);
 				port[0] = port[1];
 				memset(&port[1], 0, sizeof(port[1]));
+			} else {
+				port_set(port, port_pack(rq, count));
 			}
 
-			GEM_BUG_ON(port[0].count == 0 &&
+			/* After the final element, the hw should be idle */
+			GEM_BUG_ON(port_count(port) == 0 &&
 				   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
 		}
 
@@ -600,28 +628,66 @@ static void intel_lrc_irq_handler(unsigned long data)
 	intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
 }
 
-static bool insert_request(struct i915_priotree *pt, struct rb_root *root)
+static bool
+insert_request(struct intel_engine_cs *engine,
+	       struct i915_priotree *pt,
+	       int prio)
 {
-	struct rb_node **p, *rb;
+	struct i915_priolist *p;
+	struct rb_node **parent, *rb;
 	bool first = true;
 
+	if (unlikely(engine->no_priolist))
+		prio = I915_PRIORITY_NORMAL;
+
+find_priolist:
 	/* most positive priority is scheduled first, equal priorities fifo */
 	rb = NULL;
-	p = &root->rb_node;
-	while (*p) {
-		struct i915_priotree *pos;
-
-		rb = *p;
-		pos = rb_entry(rb, typeof(*pos), node);
-		if (pt->priority > pos->priority) {
-			p = &rb->rb_left;
-		} else {
-			p = &rb->rb_right;
+	parent = &engine->execlist_queue.rb_node;
+	while (*parent) {
+		rb = *parent;
+		p = rb_entry(rb, typeof(*p), node);
+		if (prio > p->priority) {
+			parent = &rb->rb_left;
+		} else if (prio < p->priority) {
+			parent = &rb->rb_right;
 			first = false;
+		} else {
+			list_add_tail(&pt->link, &p->requests);
+			return false;
 		}
 	}
-	rb_link_node(&pt->node, rb, p);
-	rb_insert_color(&pt->node, root);
+
+	if (prio == I915_PRIORITY_NORMAL) {
+		p = &engine->default_priolist;
+	} else {
+		p = kmem_cache_alloc(engine->i915->priorities, GFP_ATOMIC);
+		/* Convert an allocation failure to a priority bump */
+		if (unlikely(!p)) {
+			prio = I915_PRIORITY_NORMAL; /* recurses just once */
+
+			/* To maintain ordering with all rendering, after an
+			 * allocation failure we have to disable all scheduling.
+			 * Requests will then be executed in fifo, and schedule
+			 * will ensure that dependencies are emitted in fifo.
+			 * There will be still some reordering with existing
+			 * requests, so if userspace lied about their
+			 * dependencies that reordering may be visible.
+			 */
+			engine->no_priolist = true;
+			goto find_priolist;
+		}
+	}
+
+	p->priority = prio;
+	rb_link_node(&p->node, rb, parent);
+	rb_insert_color(&p->node, &engine->execlist_queue);
+
+	INIT_LIST_HEAD(&p->requests);
+	list_add_tail(&pt->link, &p->requests);
+
+	if (first)
+		engine->execlist_first = &p->node;
 
 	return first;
 }
@@ -634,12 +700,16 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
 	/* Will be called from irq-context when using foreign fences. */
 	spin_lock_irqsave(&engine->timeline->lock, flags);
 
-	if (insert_request(&request->priotree, &engine->execlist_queue)) {
-		engine->execlist_first = &request->priotree.node;
+	if (insert_request(engine,
+			   &request->priotree,
+			   request->priotree.priority)) {
 		if (execlists_elsp_ready(engine))
 			tasklet_hi_schedule(&engine->irq_tasklet);
 	}
 
+	GEM_BUG_ON(!engine->execlist_first);
+	GEM_BUG_ON(list_empty(&request->priotree.link));
+
 	spin_unlock_irqrestore(&engine->timeline->lock, flags);
 }
 
@@ -709,6 +779,19 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 		list_safe_reset_next(dep, p, dfs_link);
 	}
 
+	/* If we didn't need to bump any existing priorities, and we haven't
+	 * yet submitted this request (i.e. there is no potential race with
+	 * execlists_submit_request()), we can set our own priority and skip
+	 * acquiring the engine locks.
+	 */
+	if (request->priotree.priority == INT_MIN) {
+		GEM_BUG_ON(!list_empty(&request->priotree.link));
+		request->priotree.priority = prio;
+		if (stack.dfs_link.next == stack.dfs_link.prev)
+			return;
+		__list_del_entry(&stack.dfs_link);
+	}
+
 	engine = request->engine;
 	spin_lock_irq(&engine->timeline->lock);
 
@@ -724,10 +807,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 			continue;
 
 		pt->priority = prio;
-		if (!RB_EMPTY_NODE(&pt->node)) {
-			rb_erase(&pt->node, &engine->execlist_queue);
-			if (insert_request(pt, &engine->execlist_queue))
-				engine->execlist_first = &pt->node;
+		if (!list_empty(&pt->link)) {
+			__list_del_entry(&pt->link);
+			insert_request(engine, pt, prio);
 		}
 	}
 
@@ -736,8 +818,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 	/* XXX Do we need to preempt to make room for us and our deps? */
 }
 
-static int execlists_context_pin(struct intel_engine_cs *engine,
-				 struct i915_gem_context *ctx)
+static struct intel_ring *
+execlists_context_pin(struct intel_engine_cs *engine,
+		      struct i915_gem_context *ctx)
 {
 	struct intel_context *ce = &ctx->engine[engine->id];
 	unsigned int flags;
@@ -746,8 +829,8 @@ static int execlists_context_pin(struct intel_engine_cs *engine,
 
 	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
 
-	if (ce->pin_count++)
-		return 0;
+	if (likely(ce->pin_count++))
+		goto out;
 	GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
 
 	if (!ce->state) {
@@ -771,7 +854,7 @@ static int execlists_context_pin(struct intel_engine_cs *engine,
 		goto unpin_vma;
 	}
 
-	ret = intel_ring_pin(ce->ring, ctx->ggtt_offset_bias);
+	ret = intel_ring_pin(ce->ring, ctx->i915, ctx->ggtt_offset_bias);
 	if (ret)
 		goto unpin_map;
 
@@ -784,7 +867,8 @@ static int execlists_context_pin(struct intel_engine_cs *engine,
 	ce->state->obj->mm.dirty = true;
 
 	i915_gem_context_get(ctx);
-	return 0;
+out:
+	return ce->ring;
 
 unpin_map:
 	i915_gem_object_unpin_map(ce->state->obj);
@@ -792,7 +876,7 @@ unpin_vma:
 	__i915_vma_unpin(ce->state);
 err:
 	ce->pin_count = 0;
-	return ret;
+	return ERR_PTR(ret);
 }
 
 static void execlists_context_unpin(struct intel_engine_cs *engine,
@@ -829,9 +913,6 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request)
 	 */
 	request->reserved_space += EXECLISTS_REQUEST_SIZE;
 
-	GEM_BUG_ON(!ce->ring);
-	request->ring = ce->ring;
-
 	if (i915.enable_guc_submission) {
 		/*
 		 * Check that the GuC has space for the request before
@@ -1139,14 +1220,12 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
 	return ret;
 }
 
-static u32 port_seqno(struct execlist_port *port)
-{
-	return port->request ? port->request->global_seqno : 0;
-}
-
 static int gen8_init_common_ring(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
+	struct execlist_port *port = engine->execlist_port;
+	unsigned int n;
+	bool submit;
 	int ret;
 
 	ret = intel_mocs_init_engine(engine);
@@ -1167,16 +1246,24 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
 
 	/* After a GPU reset, we may have requests to replay */
 	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-	if (!i915.enable_guc_submission && !execlists_elsp_idle(engine)) {
-		DRM_DEBUG_DRIVER("Restarting %s from requests [0x%x, 0x%x]\n",
-				 engine->name,
-				 port_seqno(&engine->execlist_port[0]),
-				 port_seqno(&engine->execlist_port[1]));
-		engine->execlist_port[0].count = 0;
-		engine->execlist_port[1].count = 0;
-		execlists_submit_ports(engine);
+
+	submit = false;
+	for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
+		if (!port_isset(&port[n]))
+			break;
+
+		DRM_DEBUG_DRIVER("Restarting %s:%d from 0x%x\n",
+				 engine->name, n,
+				 port_request(&port[n])->global_seqno);
+
+		/* Discard the current inflight count */
+		port_set(&port[n], port_request(&port[n]));
+		submit = true;
 	}
 
+	if (submit && !i915.enable_guc_submission)
+		execlists_submit_ports(engine);
+
 	return 0;
 }
 
@@ -1252,13 +1339,13 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 	intel_ring_update_space(request->ring);
 
 	/* Catch up with any missed context-switch interrupts */
-	if (request->ctx != port[0].request->ctx) {
-		i915_gem_request_put(port[0].request);
+	if (request->ctx != port_request(port)->ctx) {
+		i915_gem_request_put(port_request(port));
 		port[0] = port[1];
 		memset(&port[1], 0, sizeof(port[1]));
 	}
 
-	GEM_BUG_ON(request->ctx != port[0].request->ctx);
+	GEM_BUG_ON(request->ctx != port_request(port)->ctx);
 
 	/* Reset WaIdleLiteRestore:bdw,skl as well */
 	request->tail =
@@ -1907,44 +1994,6 @@ populate_lr_context(struct i915_gem_context *ctx,
 	return 0;
 }
 
-/**
- * intel_lr_context_size() - return the size of the context for an engine
- * @engine: which engine to find the context size for
- *
- * Each engine may require a different amount of space for a context image,
- * so when allocating (or copying) an image, this function can be used to
- * find the right size for the specific engine.
- *
- * Return: size (in bytes) of an engine-specific context image
- *
- * Note: this size includes the HWSP, which is part of the context image
- * in LRC mode, but does not include the "shared data page" used with
- * GuC submission. The caller should account for this if using the GuC.
- */
-uint32_t intel_lr_context_size(struct intel_engine_cs *engine)
-{
-	int ret = 0;
-
-	WARN_ON(INTEL_GEN(engine->i915) < 8);
-
-	switch (engine->id) {
-	case RCS:
-		if (INTEL_GEN(engine->i915) >= 9)
-			ret = GEN9_LR_CONTEXT_RENDER_SIZE;
-		else
-			ret = GEN8_LR_CONTEXT_RENDER_SIZE;
-		break;
-	case VCS:
-	case BCS:
-	case VECS:
-	case VCS2:
-		ret = GEN8_LR_CONTEXT_OTHER_SIZE;
-		break;
-	}
-
-	return ret;
-}
-
 static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
 					    struct intel_engine_cs *engine)
 {
@@ -1957,8 +2006,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
 
 	WARN_ON(ce->state);
 
-	context_size = round_up(intel_lr_context_size(engine),
-				I915_GTT_PAGE_SIZE);
+	context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
 
 	/* One extra page as the sharing data between driver and GuC */
 	context_size += PAGE_SIZE * LRC_PPHWSP_PN;
@@ -1989,7 +2037,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
 
 	ce->ring = ring;
 	ce->state = vma;
-	ce->initialised = engine->init_context == NULL;
+	ce->initialised |= engine->init_context == NULL;
 
 	return 0;
 
@@ -2036,8 +2084,7 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv)
 			ce->state->obj->mm.dirty = true;
 			i915_gem_object_unpin_map(ce->state->obj);
 
-			ce->ring->head = ce->ring->tail = 0;
-			intel_ring_update_space(ce->ring);
+			intel_ring_reset(ce->ring, 0);
 		}
 	}
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index e8015e7bf4e9..52b3a1fd4059 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -78,8 +78,6 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine);
 struct drm_i915_private;
 struct i915_gem_context;
 
-uint32_t intel_lr_context_size(struct intel_engine_cs *engine);
-
 void intel_lr_context_resume(struct drm_i915_private *dev_priv);
 uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
 				     struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index cb50c527401f..c8103f8d4dfa 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -888,10 +888,14 @@ static void pch_enable_backlight(struct intel_connector *connector)
 	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
 	struct intel_panel *panel = &connector->panel;
 	enum pipe pipe = intel_get_pipe_from_connector(connector);
-	enum transcoder cpu_transcoder =
-		intel_pipe_to_cpu_transcoder(dev_priv, pipe);
+	enum transcoder cpu_transcoder;
 	u32 cpu_ctl2, pch_ctl1, pch_ctl2;
 
+	if (!WARN_ON_ONCE(pipe == INVALID_PIPE))
+		cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv, pipe);
+	else
+		cpu_transcoder = TRANSCODER_EDP;
+
 	cpu_ctl2 = I915_READ(BLC_PWM_CPU_CTL2);
 	if (cpu_ctl2 & BLM_PWM_ENABLE) {
 		DRM_DEBUG_KMS("cpu backlight already enabled\n");
@@ -973,6 +977,9 @@ static void i965_enable_backlight(struct intel_connector *connector)
 	enum pipe pipe = intel_get_pipe_from_connector(connector);
 	u32 ctl, ctl2, freq;
 
+	if (WARN_ON_ONCE(pipe == INVALID_PIPE))
+		pipe = PIPE_A;
+
 	ctl2 = I915_READ(BLC_PWM_CTL2);
 	if (ctl2 & BLM_PWM_ENABLE) {
 		DRM_DEBUG_KMS("backlight already enabled\n");
@@ -1037,6 +1044,9 @@ static void bxt_enable_backlight(struct intel_connector *connector)
 	enum pipe pipe = intel_get_pipe_from_connector(connector);
 	u32 pwm_ctl, val;
 
+	if (WARN_ON_ONCE(pipe == INVALID_PIPE))
+		pipe = PIPE_A;
+
 	/* Controller 1 uses the utility pin. */
 	if (panel->backlight.controller == 1) {
 		val = I915_READ(UTIL_PIN_CTL);
@@ -1093,7 +1103,8 @@ void intel_panel_enable_backlight(struct intel_connector *connector)
 	if (!panel->backlight.present)
 		return;
 
-	DRM_DEBUG_KMS("pipe %c\n", pipe_name(pipe));
+	if (!WARN_ON_ONCE(pipe == INVALID_PIPE))
+		DRM_DEBUG_KMS("pipe %c\n", pipe_name(pipe));
 
 	mutex_lock(&dev_priv->backlight_lock);
 
diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c
index 206ee4f0150e..8fbd2bd0877f 100644
--- a/drivers/gpu/drm/i915/intel_pipe_crc.c
+++ b/drivers/gpu/drm/i915/intel_pipe_crc.c
@@ -513,16 +513,20 @@ static void hsw_trans_edp_pipe_A_crc_wa(struct drm_i915_private *dev_priv,
 	struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, PIPE_A);
 	struct intel_crtc_state *pipe_config;
 	struct drm_atomic_state *state;
+	struct drm_modeset_acquire_ctx ctx;
 	int ret = 0;
 
-	drm_modeset_lock_all(dev);
+	drm_modeset_acquire_init(&ctx, 0);
+
 	state = drm_atomic_state_alloc(dev);
 	if (!state) {
 		ret = -ENOMEM;
 		goto unlock;
 	}
 
-	state->acquire_ctx = crtc->base.dev->mode_config.acquire_ctx;
+	state->acquire_ctx = &ctx;
+
+retry:
 	pipe_config = intel_atomic_get_crtc_state(state, crtc);
 	if (IS_ERR(pipe_config)) {
 		ret = PTR_ERR(pipe_config);
@@ -537,10 +541,17 @@ static void hsw_trans_edp_pipe_A_crc_wa(struct drm_i915_private *dev_priv,
 	ret = drm_atomic_commit(state);
 
 put_state:
+	if (ret == -EDEADLK) {
+		drm_atomic_state_clear(state);
+		drm_modeset_backoff(&ctx);
+		goto retry;
+	}
+
 	drm_atomic_state_put(state);
 unlock:
 	WARN(ret, "Toggling workaround to %i returns %i\n", enable, ret);
-	drm_modeset_unlock_all(dev);
+	drm_modeset_drop_locks(&ctx);
+	drm_modeset_acquire_fini(&ctx);
 }
 
 static int ivb_pipe_crc_ctl_reg(struct drm_i915_private *dev_priv,
@@ -842,19 +853,12 @@ static ssize_t display_crc_ctl_write(struct file *file, const char __user *ubuf,
 		return -E2BIG;
 	}
 
-	tmpbuf = kmalloc(len + 1, GFP_KERNEL);
-	if (!tmpbuf)
-		return -ENOMEM;
-
-	if (copy_from_user(tmpbuf, ubuf, len)) {
-		ret = -EFAULT;
-		goto out;
-	}
-	tmpbuf[len] = '\0';
+	tmpbuf = memdup_user_nul(ubuf, len);
+	if (IS_ERR(tmpbuf))
+		return PTR_ERR(tmpbuf);
 
 	ret = display_crc_ctl_parse(dev_priv, tmpbuf, len);
 
-out:
 	kfree(tmpbuf);
 	if (ret < 0)
 		return ret;
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 570bd603f401..936eef1634c7 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -386,13 +386,53 @@ static bool _intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enabl
 	return was_enabled;
 }
 
+/**
+ * intel_set_memory_cxsr - Configure CxSR state
+ * @dev_priv: i915 device
+ * @enable: Allow vs. disallow CxSR
+ *
+ * Allow or disallow the system to enter a special CxSR
+ * (C-state self refresh) state. What typically happens in CxSR mode
+ * is that several display FIFOs may get combined into a single larger
+ * FIFO for a particular plane (so called max FIFO mode) to allow the
+ * system to defer memory fetches longer, and the memory will enter
+ * self refresh.
+ *
+ * Note that enabling CxSR does not guarantee that the system enter
+ * this special mode, nor does it guarantee that the system stays
+ * in that mode once entered. So this just allows/disallows the system
+ * to autonomously utilize the CxSR mode. Other factors such as core
+ * C-states will affect when/if the system actually enters/exits the
+ * CxSR mode.
+ *
+ * Note that on VLV/CHV this actually only controls the max FIFO mode,
+ * and the system is free to enter/exit memory self refresh at any time
+ * even when the use of CxSR has been disallowed.
+ *
+ * While the system is actually in the CxSR/max FIFO mode, some plane
+ * control registers will not get latched on vblank. Thus in order to
+ * guarantee the system will respond to changes in the plane registers
+ * we must always disallow CxSR prior to making changes to those registers.
+ * Unfortunately the system will re-evaluate the CxSR conditions at
+ * frame start which happens after vblank start (which is when the plane
+ * registers would get latched), so we can't proceed with the plane update
+ * during the same frame where we disallowed CxSR.
+ *
+ * Certain platforms also have a deeper HPLL SR mode. Fortunately the
+ * HPLL SR mode depends on CxSR itself, so we don't have to hand hold
+ * the hardware w.r.t. HPLL SR when writing to plane registers.
+ * Disallowing just CxSR is sufficient.
+ */
 bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
 {
 	bool ret;
 
 	mutex_lock(&dev_priv->wm.wm_mutex);
 	ret = _intel_set_memory_cxsr(dev_priv, enable);
-	dev_priv->wm.vlv.cxsr = enable;
+	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+		dev_priv->wm.vlv.cxsr = enable;
+	else if (IS_G4X(dev_priv))
+		dev_priv->wm.g4x.cxsr = enable;
 	mutex_unlock(&dev_priv->wm.wm_mutex);
 
 	return ret;
@@ -454,13 +494,6 @@ static void vlv_get_fifo_size(struct intel_crtc_state *crtc_state)
 	fifo_state->plane[PLANE_SPRITE0] = sprite1_start - sprite0_start;
 	fifo_state->plane[PLANE_SPRITE1] = 511 - sprite1_start;
 	fifo_state->plane[PLANE_CURSOR] = 63;
-
-	DRM_DEBUG_KMS("Pipe %c FIFO size: %d/%d/%d/%d\n",
-		      pipe_name(pipe),
-		      fifo_state->plane[PLANE_PRIMARY],
-		      fifo_state->plane[PLANE_SPRITE0],
-		      fifo_state->plane[PLANE_SPRITE1],
-		      fifo_state->plane[PLANE_CURSOR]);
 }
 
 static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv, int plane)
@@ -538,20 +571,6 @@ static const struct intel_watermark_params pineview_cursor_hplloff_wm = {
 	.guard_size = PINEVIEW_CURSOR_GUARD_WM,
 	.cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
 };
-static const struct intel_watermark_params g4x_wm_info = {
-	.fifo_size = G4X_FIFO_SIZE,
-	.max_wm = G4X_MAX_WM,
-	.default_wm = G4X_MAX_WM,
-	.guard_size = 2,
-	.cacheline_size = G4X_FIFO_LINE_SIZE,
-};
-static const struct intel_watermark_params g4x_cursor_wm_info = {
-	.fifo_size = I965_CURSOR_FIFO,
-	.max_wm = I965_CURSOR_MAX_WM,
-	.default_wm = I965_CURSOR_DFT_WM,
-	.guard_size = 2,
-	.cacheline_size = G4X_FIFO_LINE_SIZE,
-};
 static const struct intel_watermark_params i965_cursor_wm_info = {
 	.fifo_size = I965_CURSOR_FIFO,
 	.max_wm = I965_CURSOR_MAX_WM,
@@ -596,8 +615,104 @@ static const struct intel_watermark_params i845_wm_info = {
 };
 
 /**
+ * intel_wm_method1 - Method 1 / "small buffer" watermark formula
+ * @pixel_rate: Pipe pixel rate in kHz
+ * @cpp: Plane bytes per pixel
+ * @latency: Memory wakeup latency in 0.1us units
+ *
+ * Compute the watermark using the method 1 or "small buffer"
+ * formula. The caller may additonally add extra cachelines
+ * to account for TLB misses and clock crossings.
+ *
+ * This method is concerned with the short term drain rate
+ * of the FIFO, ie. it does not account for blanking periods
+ * which would effectively reduce the average drain rate across
+ * a longer period. The name "small" refers to the fact the
+ * FIFO is relatively small compared to the amount of data
+ * fetched.
+ *
+ * The FIFO level vs. time graph might look something like:
+ *
+ *   |\   |\
+ *   | \  | \
+ * __---__---__ (- plane active, _ blanking)
+ * -> time
+ *
+ * or perhaps like this:
+ *
+ *   |\|\  |\|\
+ * __----__----__ (- plane active, _ blanking)
+ * -> time
+ *
+ * Returns:
+ * The watermark in bytes
+ */
+static unsigned int intel_wm_method1(unsigned int pixel_rate,
+				     unsigned int cpp,
+				     unsigned int latency)
+{
+	uint64_t ret;
+
+	ret = (uint64_t) pixel_rate * cpp * latency;
+	ret = DIV_ROUND_UP_ULL(ret, 10000);
+
+	return ret;
+}
+
+/**
+ * intel_wm_method2 - Method 2 / "large buffer" watermark formula
+ * @pixel_rate: Pipe pixel rate in kHz
+ * @htotal: Pipe horizontal total
+ * @width: Plane width in pixels
+ * @cpp: Plane bytes per pixel
+ * @latency: Memory wakeup latency in 0.1us units
+ *
+ * Compute the watermark using the method 2 or "large buffer"
+ * formula. The caller may additonally add extra cachelines
+ * to account for TLB misses and clock crossings.
+ *
+ * This method is concerned with the long term drain rate
+ * of the FIFO, ie. it does account for blanking periods
+ * which effectively reduce the average drain rate across
+ * a longer period. The name "large" refers to the fact the
+ * FIFO is relatively large compared to the amount of data
+ * fetched.
+ *
+ * The FIFO level vs. time graph might look something like:
+ *
+ *    |\___       |\___
+ *    |    \___   |    \___
+ *    |        \  |        \
+ * __ --__--__--__--__--__--__ (- plane active, _ blanking)
+ * -> time
+ *
+ * Returns:
+ * The watermark in bytes
+ */
+static unsigned int intel_wm_method2(unsigned int pixel_rate,
+				     unsigned int htotal,
+				     unsigned int width,
+				     unsigned int cpp,
+				     unsigned int latency)
+{
+	unsigned int ret;
+
+	/*
+	 * FIXME remove once all users are computing
+	 * watermarks in the correct place.
+	 */
+	if (WARN_ON_ONCE(htotal == 0))
+		htotal = 1;
+
+	ret = (latency * pixel_rate) / (htotal * 10000);
+	ret = (ret + 1) * width * cpp;
+
+	return ret;
+}
+
+/**
  * intel_calculate_wm - calculate watermark level
- * @clock_in_khz: pixel clock
+ * @pixel_rate: pixel clock
  * @wm: chip FIFO params
  * @cpp: bytes per pixel
  * @latency_ns: memory latency for the platform
@@ -613,12 +728,12 @@ static const struct intel_watermark_params i845_wm_info = {
  * past the watermark point.  If the FIFO drains completely, a FIFO underrun
  * will occur, and a display engine hang could result.
  */
-static unsigned long intel_calculate_wm(unsigned long clock_in_khz,
-					const struct intel_watermark_params *wm,
-					int fifo_size, int cpp,
-					unsigned long latency_ns)
+static unsigned int intel_calculate_wm(int pixel_rate,
+				       const struct intel_watermark_params *wm,
+				       int fifo_size, int cpp,
+				       unsigned int latency_ns)
 {
-	long entries_required, wm_size;
+	int entries, wm_size;
 
 	/*
 	 * Note: we need to make sure we don't overflow for various clock &
@@ -626,18 +741,17 @@ static unsigned long intel_calculate_wm(unsigned long clock_in_khz,
 	 * clocks go from a few thousand to several hundred thousand.
 	 * latency is usually a few thousand
 	 */
-	entries_required = ((clock_in_khz / 1000) * cpp * latency_ns) /
-		1000;
-	entries_required = DIV_ROUND_UP(entries_required, wm->cacheline_size);
-
-	DRM_DEBUG_KMS("FIFO entries required for mode: %ld\n", entries_required);
-
-	wm_size = fifo_size - (entries_required + wm->guard_size);
+	entries = intel_wm_method1(pixel_rate, cpp,
+				   latency_ns / 100);
+	entries = DIV_ROUND_UP(entries, wm->cacheline_size) +
+		wm->guard_size;
+	DRM_DEBUG_KMS("FIFO entries required for mode: %d\n", entries);
 
-	DRM_DEBUG_KMS("FIFO watermark level: %ld\n", wm_size);
+	wm_size = fifo_size - entries;
+	DRM_DEBUG_KMS("FIFO watermark level: %d\n", wm_size);
 
 	/* Don't promote wm_size to unsigned... */
-	if (wm_size > (long)wm->max_wm)
+	if (wm_size > wm->max_wm)
 		wm_size = wm->max_wm;
 	if (wm_size <= 0)
 		wm_size = wm->default_wm;
@@ -655,6 +769,21 @@ static unsigned long intel_calculate_wm(unsigned long clock_in_khz,
 	return wm_size;
 }
 
+static bool is_disabling(int old, int new, int threshold)
+{
+	return old >= threshold && new < threshold;
+}
+
+static bool is_enabling(int old, int new, int threshold)
+{
+	return old < threshold && new >= threshold;
+}
+
+static int intel_wm_num_levels(struct drm_i915_private *dev_priv)
+{
+	return dev_priv->wm.max_level + 1;
+}
+
 static bool intel_wm_plane_visible(const struct intel_crtc_state *crtc_state,
 				   const struct intel_plane_state *plane_state)
 {
@@ -699,7 +828,7 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc)
 	struct intel_crtc *crtc;
 	const struct cxsr_latency *latency;
 	u32 reg;
-	unsigned long wm;
+	unsigned int wm;
 
 	latency = intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv),
 					 dev_priv->is_ddr3,
@@ -733,7 +862,7 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc)
 		/* cursor SR */
 		wm = intel_calculate_wm(clock, &pineview_cursor_wm,
 					pineview_display_wm.fifo_size,
-					cpp, latency->cursor_sr);
+					4, latency->cursor_sr);
 		reg = I915_READ(DSPFW3);
 		reg &= ~DSPFW_CURSOR_SR_MASK;
 		reg |= FW_WM(wm, CURSOR_SR);
@@ -751,7 +880,7 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc)
 		/* cursor HPLL off SR */
 		wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm,
 					pineview_display_hplloff_wm.fifo_size,
-					cpp, latency->cursor_hpll_disable);
+					4, latency->cursor_hpll_disable);
 		reg = I915_READ(DSPFW3);
 		reg &= ~DSPFW_HPLL_CURSOR_MASK;
 		reg |= FW_WM(wm, HPLL_CURSOR);
@@ -764,144 +893,50 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc)
 	}
 }
 
-static bool g4x_compute_wm0(struct drm_i915_private *dev_priv,
-			    int plane,
-			    const struct intel_watermark_params *display,
-			    int display_latency_ns,
-			    const struct intel_watermark_params *cursor,
-			    int cursor_latency_ns,
-			    int *plane_wm,
-			    int *cursor_wm)
-{
-	struct intel_crtc *crtc;
-	const struct drm_display_mode *adjusted_mode;
-	const struct drm_framebuffer *fb;
-	int htotal, hdisplay, clock, cpp;
-	int line_time_us, line_count;
-	int entries, tlb_miss;
-
-	crtc = intel_get_crtc_for_plane(dev_priv, plane);
-	if (!intel_crtc_active(crtc)) {
-		*cursor_wm = cursor->guard_size;
-		*plane_wm = display->guard_size;
-		return false;
-	}
-
-	adjusted_mode = &crtc->config->base.adjusted_mode;
-	fb = crtc->base.primary->state->fb;
-	clock = adjusted_mode->crtc_clock;
-	htotal = adjusted_mode->crtc_htotal;
-	hdisplay = crtc->config->pipe_src_w;
-	cpp = fb->format->cpp[0];
-
-	/* Use the small buffer method to calculate plane watermark */
-	entries = ((clock * cpp / 1000) * display_latency_ns) / 1000;
-	tlb_miss = display->fifo_size*display->cacheline_size - hdisplay * 8;
-	if (tlb_miss > 0)
-		entries += tlb_miss;
-	entries = DIV_ROUND_UP(entries, display->cacheline_size);
-	*plane_wm = entries + display->guard_size;
-	if (*plane_wm > (int)display->max_wm)
-		*plane_wm = display->max_wm;
-
-	/* Use the large buffer method to calculate cursor watermark */
-	line_time_us = max(htotal * 1000 / clock, 1);
-	line_count = (cursor_latency_ns / line_time_us + 1000) / 1000;
-	entries = line_count * crtc->base.cursor->state->crtc_w * cpp;
-	tlb_miss = cursor->fifo_size*cursor->cacheline_size - hdisplay * 8;
-	if (tlb_miss > 0)
-		entries += tlb_miss;
-	entries = DIV_ROUND_UP(entries, cursor->cacheline_size);
-	*cursor_wm = entries + cursor->guard_size;
-	if (*cursor_wm > (int)cursor->max_wm)
-		*cursor_wm = (int)cursor->max_wm;
-
-	return true;
-}
-
 /*
- * Check the wm result.
- *
- * If any calculated watermark values is larger than the maximum value that
- * can be programmed into the associated watermark register, that watermark
- * must be disabled.
+ * Documentation says:
+ * "If the line size is small, the TLB fetches can get in the way of the
+ *  data fetches, causing some lag in the pixel data return which is not
+ *  accounted for in the above formulas. The following adjustment only
+ *  needs to be applied if eight whole lines fit in the buffer at once.
+ *  The WM is adjusted upwards by the difference between the FIFO size
+ *  and the size of 8 whole lines. This adjustment is always performed
+ *  in the actual pixel depth regardless of whether FBC is enabled or not."
  */
-static bool g4x_check_srwm(struct drm_i915_private *dev_priv,
-			   int display_wm, int cursor_wm,
-			   const struct intel_watermark_params *display,
-			   const struct intel_watermark_params *cursor)
+static int g4x_tlb_miss_wa(int fifo_size, int width, int cpp)
 {
-	DRM_DEBUG_KMS("SR watermark: display plane %d, cursor %d\n",
-		      display_wm, cursor_wm);
-
-	if (display_wm > display->max_wm) {
-		DRM_DEBUG_KMS("display watermark is too large(%d/%u), disabling\n",
-			      display_wm, display->max_wm);
-		return false;
-	}
+	int tlb_miss = fifo_size * 64 - width * cpp * 8;
 
-	if (cursor_wm > cursor->max_wm) {
-		DRM_DEBUG_KMS("cursor watermark is too large(%d/%u), disabling\n",
-			      cursor_wm, cursor->max_wm);
-		return false;
-	}
-
-	if (!(display_wm || cursor_wm)) {
-		DRM_DEBUG_KMS("SR latency is 0, disabling\n");
-		return false;
-	}
-
-	return true;
+	return max(0, tlb_miss);
 }
 
-static bool g4x_compute_srwm(struct drm_i915_private *dev_priv,
-			     int plane,
-			     int latency_ns,
-			     const struct intel_watermark_params *display,
-			     const struct intel_watermark_params *cursor,
-			     int *display_wm, int *cursor_wm)
+static void g4x_write_wm_values(struct drm_i915_private *dev_priv,
+				const struct g4x_wm_values *wm)
 {
-	struct intel_crtc *crtc;
-	const struct drm_display_mode *adjusted_mode;
-	const struct drm_framebuffer *fb;
-	int hdisplay, htotal, cpp, clock;
-	unsigned long line_time_us;
-	int line_count, line_size;
-	int small, large;
-	int entries;
-
-	if (!latency_ns) {
-		*display_wm = *cursor_wm = 0;
-		return false;
-	}
-
-	crtc = intel_get_crtc_for_plane(dev_priv, plane);
-	adjusted_mode = &crtc->config->base.adjusted_mode;
-	fb = crtc->base.primary->state->fb;
-	clock = adjusted_mode->crtc_clock;
-	htotal = adjusted_mode->crtc_htotal;
-	hdisplay = crtc->config->pipe_src_w;
-	cpp = fb->format->cpp[0];
-
-	line_time_us = max(htotal * 1000 / clock, 1);
-	line_count = (latency_ns / line_time_us + 1000) / 1000;
-	line_size = hdisplay * cpp;
-
-	/* Use the minimum of the small and large buffer method for primary */
-	small = ((clock * cpp / 1000) * latency_ns) / 1000;
-	large = line_count * line_size;
+	enum pipe pipe;
 
-	entries = DIV_ROUND_UP(min(small, large), display->cacheline_size);
-	*display_wm = entries + display->guard_size;
+	for_each_pipe(dev_priv, pipe)
+		trace_g4x_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm);
 
-	/* calculate the self-refresh watermark for display cursor */
-	entries = line_count * cpp * crtc->base.cursor->state->crtc_w;
-	entries = DIV_ROUND_UP(entries, cursor->cacheline_size);
-	*cursor_wm = entries + cursor->guard_size;
+	I915_WRITE(DSPFW1,
+		   FW_WM(wm->sr.plane, SR) |
+		   FW_WM(wm->pipe[PIPE_B].plane[PLANE_CURSOR], CURSORB) |
+		   FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY], PLANEB) |
+		   FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY], PLANEA));
+	I915_WRITE(DSPFW2,
+		   (wm->fbc_en ? DSPFW_FBC_SR_EN : 0) |
+		   FW_WM(wm->sr.fbc, FBC_SR) |
+		   FW_WM(wm->hpll.fbc, FBC_HPLL_SR) |
+		   FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEB) |
+		   FW_WM(wm->pipe[PIPE_A].plane[PLANE_CURSOR], CURSORA) |
+		   FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0], SPRITEA));
+	I915_WRITE(DSPFW3,
+		   (wm->hpll_en ? DSPFW_HPLL_SR_EN : 0) |
+		   FW_WM(wm->sr.cursor, CURSOR_SR) |
+		   FW_WM(wm->hpll.cursor, HPLL_CURSOR) |
+		   FW_WM(wm->hpll.plane, HPLL_SR));
 
-	return g4x_check_srwm(dev_priv,
-			      *display_wm, *cursor_wm,
-			      display, cursor);
+	POSTING_READ(DSPFW1);
 }
 
 #define FW_WM_VLV(value, plane) \
@@ -985,17 +1020,535 @@ static void vlv_write_wm_values(struct drm_i915_private *dev_priv,
 
 #undef FW_WM_VLV
 
+static void g4x_setup_wm_latency(struct drm_i915_private *dev_priv)
+{
+	/* all latencies in usec */
+	dev_priv->wm.pri_latency[G4X_WM_LEVEL_NORMAL] = 5;
+	dev_priv->wm.pri_latency[G4X_WM_LEVEL_SR] = 12;
+	dev_priv->wm.pri_latency[G4X_WM_LEVEL_HPLL] = 35;
+
+	dev_priv->wm.max_level = G4X_WM_LEVEL_HPLL;
+}
+
+static int g4x_plane_fifo_size(enum plane_id plane_id, int level)
+{
+	/*
+	 * DSPCNTR[13] supposedly controls whether the
+	 * primary plane can use the FIFO space otherwise
+	 * reserved for the sprite plane. It's not 100% clear
+	 * what the actual FIFO size is, but it looks like we
+	 * can happily set both primary and sprite watermarks
+	 * up to 127 cachelines. So that would seem to mean
+	 * that either DSPCNTR[13] doesn't do anything, or that
+	 * the total FIFO is >= 256 cachelines in size. Either
+	 * way, we don't seem to have to worry about this
+	 * repartitioning as the maximum watermark value the
+	 * register can hold for each plane is lower than the
+	 * minimum FIFO size.
+	 */
+	switch (plane_id) {
+	case PLANE_CURSOR:
+		return 63;
+	case PLANE_PRIMARY:
+		return level == G4X_WM_LEVEL_NORMAL ? 127 : 511;
+	case PLANE_SPRITE0:
+		return level == G4X_WM_LEVEL_NORMAL ? 127 : 0;
+	default:
+		MISSING_CASE(plane_id);
+		return 0;
+	}
+}
+
+static int g4x_fbc_fifo_size(int level)
+{
+	switch (level) {
+	case G4X_WM_LEVEL_SR:
+		return 7;
+	case G4X_WM_LEVEL_HPLL:
+		return 15;
+	default:
+		MISSING_CASE(level);
+		return 0;
+	}
+}
+
+static uint16_t g4x_compute_wm(const struct intel_crtc_state *crtc_state,
+			       const struct intel_plane_state *plane_state,
+			       int level)
+{
+	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+	const struct drm_display_mode *adjusted_mode =
+		&crtc_state->base.adjusted_mode;
+	int clock, htotal, cpp, width, wm;
+	int latency = dev_priv->wm.pri_latency[level] * 10;
+
+	if (latency == 0)
+		return USHRT_MAX;
+
+	if (!intel_wm_plane_visible(crtc_state, plane_state))
+		return 0;
+
+	/*
+	 * Not 100% sure which way ELK should go here as the
+	 * spec only says CL/CTG should assume 32bpp and BW
+	 * doesn't need to. But as these things followed the
+	 * mobile vs. desktop lines on gen3 as well, let's
+	 * assume ELK doesn't need this.
+	 *
+	 * The spec also fails to list such a restriction for
+	 * the HPLL watermark, which seems a little strange.
+	 * Let's use 32bpp for the HPLL watermark as well.
+	 */
+	if (IS_GM45(dev_priv) && plane->id == PLANE_PRIMARY &&
+	    level != G4X_WM_LEVEL_NORMAL)
+		cpp = 4;
+	else
+		cpp = plane_state->base.fb->format->cpp[0];
+
+	clock = adjusted_mode->crtc_clock;
+	htotal = adjusted_mode->crtc_htotal;
+
+	if (plane->id == PLANE_CURSOR)
+		width = plane_state->base.crtc_w;
+	else
+		width = drm_rect_width(&plane_state->base.dst);
+
+	if (plane->id == PLANE_CURSOR) {
+		wm = intel_wm_method2(clock, htotal, width, cpp, latency);
+	} else if (plane->id == PLANE_PRIMARY &&
+		   level == G4X_WM_LEVEL_NORMAL) {
+		wm = intel_wm_method1(clock, cpp, latency);
+	} else {
+		int small, large;
+
+		small = intel_wm_method1(clock, cpp, latency);
+		large = intel_wm_method2(clock, htotal, width, cpp, latency);
+
+		wm = min(small, large);
+	}
+
+	wm += g4x_tlb_miss_wa(g4x_plane_fifo_size(plane->id, level),
+			      width, cpp);
+
+	wm = DIV_ROUND_UP(wm, 64) + 2;
+
+	return min_t(int, wm, USHRT_MAX);
+}
+
+static bool g4x_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
+				 int level, enum plane_id plane_id, u16 value)
+{
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+	bool dirty = false;
+
+	for (; level < intel_wm_num_levels(dev_priv); level++) {
+		struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
+
+		dirty |= raw->plane[plane_id] != value;
+		raw->plane[plane_id] = value;
+	}
+
+	return dirty;
+}
+
+static bool g4x_raw_fbc_wm_set(struct intel_crtc_state *crtc_state,
+			       int level, u16 value)
+{
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+	bool dirty = false;
+
+	/* NORMAL level doesn't have an FBC watermark */
+	level = max(level, G4X_WM_LEVEL_SR);
+
+	for (; level < intel_wm_num_levels(dev_priv); level++) {
+		struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
+
+		dirty |= raw->fbc != value;
+		raw->fbc = value;
+	}
+
+	return dirty;
+}
+
+static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate,
+				   const struct intel_plane_state *pstate,
+				   uint32_t pri_val);
+
+static bool g4x_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
+				     const struct intel_plane_state *plane_state)
+{
+	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
+	int num_levels = intel_wm_num_levels(to_i915(plane->base.dev));
+	enum plane_id plane_id = plane->id;
+	bool dirty = false;
+	int level;
+
+	if (!intel_wm_plane_visible(crtc_state, plane_state)) {
+		dirty |= g4x_raw_plane_wm_set(crtc_state, 0, plane_id, 0);
+		if (plane_id == PLANE_PRIMARY)
+			dirty |= g4x_raw_fbc_wm_set(crtc_state, 0, 0);
+		goto out;
+	}
+
+	for (level = 0; level < num_levels; level++) {
+		struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
+		int wm, max_wm;
+
+		wm = g4x_compute_wm(crtc_state, plane_state, level);
+		max_wm = g4x_plane_fifo_size(plane_id, level);
+
+		if (wm > max_wm)
+			break;
+
+		dirty |= raw->plane[plane_id] != wm;
+		raw->plane[plane_id] = wm;
+
+		if (plane_id != PLANE_PRIMARY ||
+		    level == G4X_WM_LEVEL_NORMAL)
+			continue;
+
+		wm = ilk_compute_fbc_wm(crtc_state, plane_state,
+					raw->plane[plane_id]);
+		max_wm = g4x_fbc_fifo_size(level);
+
+		/*
+		 * FBC wm is not mandatory as we
+		 * can always just disable its use.
+		 */
+		if (wm > max_wm)
+			wm = USHRT_MAX;
+
+		dirty |= raw->fbc != wm;
+		raw->fbc = wm;
+	}
+
+	/* mark watermarks as invalid */
+	dirty |= g4x_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX);
+
+	if (plane_id == PLANE_PRIMARY)
+		dirty |= g4x_raw_fbc_wm_set(crtc_state, level, USHRT_MAX);
+
+ out:
+	if (dirty) {
+		DRM_DEBUG_KMS("%s watermarks: normal=%d, SR=%d, HPLL=%d\n",
+			      plane->base.name,
+			      crtc_state->wm.g4x.raw[G4X_WM_LEVEL_NORMAL].plane[plane_id],
+			      crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].plane[plane_id],
+			      crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].plane[plane_id]);
+
+		if (plane_id == PLANE_PRIMARY)
+			DRM_DEBUG_KMS("FBC watermarks: SR=%d, HPLL=%d\n",
+				      crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].fbc,
+				      crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].fbc);
+	}
+
+	return dirty;
+}
+
+static bool g4x_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
+				      enum plane_id plane_id, int level)
+{
+	const struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
+
+	return raw->plane[plane_id] <= g4x_plane_fifo_size(plane_id, level);
+}
+
+static bool g4x_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state,
+				     int level)
+{
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+
+	if (level > dev_priv->wm.max_level)
+		return false;
+
+	return g4x_raw_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) &&
+		g4x_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) &&
+		g4x_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level);
+}
+
+/* mark all levels starting from 'level' as invalid */
+static void g4x_invalidate_wms(struct intel_crtc *crtc,
+			       struct g4x_wm_state *wm_state, int level)
+{
+	if (level <= G4X_WM_LEVEL_NORMAL) {
+		enum plane_id plane_id;
+
+		for_each_plane_id_on_crtc(crtc, plane_id)
+			wm_state->wm.plane[plane_id] = USHRT_MAX;
+	}
+
+	if (level <= G4X_WM_LEVEL_SR) {
+		wm_state->cxsr = false;
+		wm_state->sr.cursor = USHRT_MAX;
+		wm_state->sr.plane = USHRT_MAX;
+		wm_state->sr.fbc = USHRT_MAX;
+	}
+
+	if (level <= G4X_WM_LEVEL_HPLL) {
+		wm_state->hpll_en = false;
+		wm_state->hpll.cursor = USHRT_MAX;
+		wm_state->hpll.plane = USHRT_MAX;
+		wm_state->hpll.fbc = USHRT_MAX;
+	}
+}
+
+static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct intel_atomic_state *state =
+		to_intel_atomic_state(crtc_state->base.state);
+	struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal;
+	int num_active_planes = hweight32(crtc_state->active_planes &
+					  ~BIT(PLANE_CURSOR));
+	const struct g4x_pipe_wm *raw;
+	struct intel_plane_state *plane_state;
+	struct intel_plane *plane;
+	enum plane_id plane_id;
+	int i, level;
+	unsigned int dirty = 0;
+
+	for_each_intel_plane_in_state(state, plane, plane_state, i) {
+		const struct intel_plane_state *old_plane_state =
+			to_intel_plane_state(plane->base.state);
+
+		if (plane_state->base.crtc != &crtc->base &&
+		    old_plane_state->base.crtc != &crtc->base)
+			continue;
+
+		if (g4x_raw_plane_wm_compute(crtc_state, plane_state))
+			dirty |= BIT(plane->id);
+	}
+
+	if (!dirty)
+		return 0;
+
+	level = G4X_WM_LEVEL_NORMAL;
+	if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
+		goto out;
+
+	raw = &crtc_state->wm.g4x.raw[level];
+	for_each_plane_id_on_crtc(crtc, plane_id)
+		wm_state->wm.plane[plane_id] = raw->plane[plane_id];
+
+	level = G4X_WM_LEVEL_SR;
+
+	if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
+		goto out;
+
+	raw = &crtc_state->wm.g4x.raw[level];
+	wm_state->sr.plane = raw->plane[PLANE_PRIMARY];
+	wm_state->sr.cursor = raw->plane[PLANE_CURSOR];
+	wm_state->sr.fbc = raw->fbc;
+
+	wm_state->cxsr = num_active_planes == BIT(PLANE_PRIMARY);
+
+	level = G4X_WM_LEVEL_HPLL;
+
+	if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
+		goto out;
+
+	raw = &crtc_state->wm.g4x.raw[level];
+	wm_state->hpll.plane = raw->plane[PLANE_PRIMARY];
+	wm_state->hpll.cursor = raw->plane[PLANE_CURSOR];
+	wm_state->hpll.fbc = raw->fbc;
+
+	wm_state->hpll_en = wm_state->cxsr;
+
+	level++;
+
+ out:
+	if (level == G4X_WM_LEVEL_NORMAL)
+		return -EINVAL;
+
+	/* invalidate the higher levels */
+	g4x_invalidate_wms(crtc, wm_state, level);
+
+	/*
+	 * Determine if the FBC watermark(s) can be used. IF
+	 * this isn't the case we prefer to disable the FBC
+	 ( watermark(s) rather than disable the SR/HPLL
+	 * level(s) entirely.
+	 */
+	wm_state->fbc_en = level > G4X_WM_LEVEL_NORMAL;
+
+	if (level >= G4X_WM_LEVEL_SR &&
+	    wm_state->sr.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_SR))
+		wm_state->fbc_en = false;
+	else if (level >= G4X_WM_LEVEL_HPLL &&
+		 wm_state->hpll.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_HPLL))
+		wm_state->fbc_en = false;
+
+	return 0;
+}
+
+static int g4x_compute_intermediate_wm(struct drm_device *dev,
+				       struct intel_crtc *crtc,
+				       struct intel_crtc_state *crtc_state)
+{
+	struct g4x_wm_state *intermediate = &crtc_state->wm.g4x.intermediate;
+	const struct g4x_wm_state *optimal = &crtc_state->wm.g4x.optimal;
+	const struct g4x_wm_state *active = &crtc->wm.active.g4x;
+	enum plane_id plane_id;
+
+	intermediate->cxsr = optimal->cxsr && active->cxsr &&
+		!crtc_state->disable_cxsr;
+	intermediate->hpll_en = optimal->hpll_en && active->hpll_en &&
+		!crtc_state->disable_cxsr;
+	intermediate->fbc_en = optimal->fbc_en && active->fbc_en;
+
+	for_each_plane_id_on_crtc(crtc, plane_id) {
+		intermediate->wm.plane[plane_id] =
+			max(optimal->wm.plane[plane_id],
+			    active->wm.plane[plane_id]);
+
+		WARN_ON(intermediate->wm.plane[plane_id] >
+			g4x_plane_fifo_size(plane_id, G4X_WM_LEVEL_NORMAL));
+	}
+
+	intermediate->sr.plane = max(optimal->sr.plane,
+				     active->sr.plane);
+	intermediate->sr.cursor = max(optimal->sr.cursor,
+				      active->sr.cursor);
+	intermediate->sr.fbc = max(optimal->sr.fbc,
+				   active->sr.fbc);
+
+	intermediate->hpll.plane = max(optimal->hpll.plane,
+				       active->hpll.plane);
+	intermediate->hpll.cursor = max(optimal->hpll.cursor,
+					active->hpll.cursor);
+	intermediate->hpll.fbc = max(optimal->hpll.fbc,
+				     active->hpll.fbc);
+
+	WARN_ON((intermediate->sr.plane >
+		 g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_SR) ||
+		 intermediate->sr.cursor >
+		 g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_SR)) &&
+		intermediate->cxsr);
+	WARN_ON((intermediate->sr.plane >
+		 g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_HPLL) ||
+		 intermediate->sr.cursor >
+		 g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_HPLL)) &&
+		intermediate->hpll_en);
+
+	WARN_ON(intermediate->sr.fbc > g4x_fbc_fifo_size(1) &&
+		intermediate->fbc_en && intermediate->cxsr);
+	WARN_ON(intermediate->hpll.fbc > g4x_fbc_fifo_size(2) &&
+		intermediate->fbc_en && intermediate->hpll_en);
+
+	/*
+	 * If our intermediate WM are identical to the final WM, then we can
+	 * omit the post-vblank programming; only update if it's different.
+	 */
+	if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0)
+		crtc_state->wm.need_postvbl_update = true;
+
+	return 0;
+}
+
+static void g4x_merge_wm(struct drm_i915_private *dev_priv,
+			 struct g4x_wm_values *wm)
+{
+	struct intel_crtc *crtc;
+	int num_active_crtcs = 0;
+
+	wm->cxsr = true;
+	wm->hpll_en = true;
+	wm->fbc_en = true;
+
+	for_each_intel_crtc(&dev_priv->drm, crtc) {
+		const struct g4x_wm_state *wm_state = &crtc->wm.active.g4x;
+
+		if (!crtc->active)
+			continue;
+
+		if (!wm_state->cxsr)
+			wm->cxsr = false;
+		if (!wm_state->hpll_en)
+			wm->hpll_en = false;
+		if (!wm_state->fbc_en)
+			wm->fbc_en = false;
+
+		num_active_crtcs++;
+	}
+
+	if (num_active_crtcs != 1) {
+		wm->cxsr = false;
+		wm->hpll_en = false;
+		wm->fbc_en = false;
+	}
+
+	for_each_intel_crtc(&dev_priv->drm, crtc) {
+		const struct g4x_wm_state *wm_state = &crtc->wm.active.g4x;
+		enum pipe pipe = crtc->pipe;
+
+		wm->pipe[pipe] = wm_state->wm;
+		if (crtc->active && wm->cxsr)
+			wm->sr = wm_state->sr;
+		if (crtc->active && wm->hpll_en)
+			wm->hpll = wm_state->hpll;
+	}
+}
+
+static void g4x_program_watermarks(struct drm_i915_private *dev_priv)
+{
+	struct g4x_wm_values *old_wm = &dev_priv->wm.g4x;
+	struct g4x_wm_values new_wm = {};
+
+	g4x_merge_wm(dev_priv, &new_wm);
+
+	if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0)
+		return;
+
+	if (is_disabling(old_wm->cxsr, new_wm.cxsr, true))
+		_intel_set_memory_cxsr(dev_priv, false);
+
+	g4x_write_wm_values(dev_priv, &new_wm);
+
+	if (is_enabling(old_wm->cxsr, new_wm.cxsr, true))
+		_intel_set_memory_cxsr(dev_priv, true);
+
+	*old_wm = new_wm;
+}
+
+static void g4x_initial_watermarks(struct intel_atomic_state *state,
+				   struct intel_crtc_state *crtc_state)
+{
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+
+	mutex_lock(&dev_priv->wm.wm_mutex);
+	crtc->wm.active.g4x = crtc_state->wm.g4x.intermediate;
+	g4x_program_watermarks(dev_priv);
+	mutex_unlock(&dev_priv->wm.wm_mutex);
+}
+
+static void g4x_optimize_watermarks(struct intel_atomic_state *state,
+				    struct intel_crtc_state *crtc_state)
+{
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
+
+	if (!crtc_state->wm.need_postvbl_update)
+		return;
+
+	mutex_lock(&dev_priv->wm.wm_mutex);
+	intel_crtc->wm.active.g4x = crtc_state->wm.g4x.optimal;
+	g4x_program_watermarks(dev_priv);
+	mutex_unlock(&dev_priv->wm.wm_mutex);
+}
+
 /* latency must be in 0.1us units. */
 static unsigned int vlv_wm_method2(unsigned int pixel_rate,
-				   unsigned int pipe_htotal,
-				   unsigned int horiz_pixels,
+				   unsigned int htotal,
+				   unsigned int width,
 				   unsigned int cpp,
 				   unsigned int latency)
 {
 	unsigned int ret;
 
-	ret = (latency * pixel_rate) / (pipe_htotal * 10000);
-	ret = (ret + 1) * horiz_pixels * cpp;
+	ret = intel_wm_method2(pixel_rate, htotal,
+			       width, cpp, latency);
 	ret = DIV_ROUND_UP(ret, 64);
 
 	return ret;
@@ -1029,17 +1582,15 @@ static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state,
 	if (dev_priv->wm.pri_latency[level] == 0)
 		return USHRT_MAX;
 
-	if (!plane_state->base.visible)
+	if (!intel_wm_plane_visible(crtc_state, plane_state))
 		return 0;
 
 	cpp = plane_state->base.fb->format->cpp[0];
 	clock = adjusted_mode->crtc_clock;
 	htotal = adjusted_mode->crtc_htotal;
 	width = crtc_state->pipe_src_w;
-	if (WARN_ON(htotal == 0))
-		htotal = 1;
 
-	if (plane->base.type == DRM_PLANE_TYPE_CURSOR) {
+	if (plane->id == PLANE_CURSOR) {
 		/*
 		 * FIXME the formula gives values that are
 		 * too big for the cursor FIFO, and hence we
@@ -1064,7 +1615,7 @@ static bool vlv_need_sprite0_fifo_workaround(unsigned int active_planes)
 static int vlv_compute_fifo(struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
-	const struct vlv_pipe_wm *raw =
+	const struct g4x_pipe_wm *raw =
 		&crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2];
 	struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state;
 	unsigned int active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR);
@@ -1143,18 +1694,13 @@ static int vlv_compute_fifo(struct intel_crtc_state *crtc_state)
 	return 0;
 }
 
-static int vlv_num_wm_levels(struct drm_i915_private *dev_priv)
-{
-	return dev_priv->wm.max_level + 1;
-}
-
 /* mark all levels starting from 'level' as invalid */
 static void vlv_invalidate_wms(struct intel_crtc *crtc,
 			       struct vlv_wm_state *wm_state, int level)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 
-	for (; level < vlv_num_wm_levels(dev_priv); level++) {
+	for (; level < intel_wm_num_levels(dev_priv); level++) {
 		enum plane_id plane_id;
 
 		for_each_plane_id_on_crtc(crtc, plane_id)
@@ -1181,11 +1727,11 @@ static bool vlv_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
 				 int level, enum plane_id plane_id, u16 value)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
-	int num_levels = vlv_num_wm_levels(dev_priv);
+	int num_levels = intel_wm_num_levels(dev_priv);
 	bool dirty = false;
 
 	for (; level < num_levels; level++) {
-		struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
+		struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
 
 		dirty |= raw->plane[plane_id] != value;
 		raw->plane[plane_id] = value;
@@ -1194,22 +1740,22 @@ static bool vlv_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
 	return dirty;
 }
 
-static bool vlv_plane_wm_compute(struct intel_crtc_state *crtc_state,
-				 const struct intel_plane_state *plane_state)
+static bool vlv_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
+				     const struct intel_plane_state *plane_state)
 {
 	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
 	enum plane_id plane_id = plane->id;
-	int num_levels = vlv_num_wm_levels(to_i915(plane->base.dev));
+	int num_levels = intel_wm_num_levels(to_i915(plane->base.dev));
 	int level;
 	bool dirty = false;
 
-	if (!plane_state->base.visible) {
+	if (!intel_wm_plane_visible(crtc_state, plane_state)) {
 		dirty |= vlv_raw_plane_wm_set(crtc_state, 0, plane_id, 0);
 		goto out;
 	}
 
 	for (level = 0; level < num_levels; level++) {
-		struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
+		struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
 		int wm = vlv_compute_wm_level(crtc_state, plane_state, level);
 		int max_wm = plane_id == PLANE_CURSOR ? 63 : 511;
 
@@ -1225,7 +1771,7 @@ static bool vlv_plane_wm_compute(struct intel_crtc_state *crtc_state,
 
 out:
 	if (dirty)
-		DRM_DEBUG_KMS("%s wms: [0]=%d,[1]=%d,[2]=%d\n",
+		DRM_DEBUG_KMS("%s watermarks: PM2=%d, PM5=%d, DDR DVFS=%d\n",
 			      plane->base.name,
 			      crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2].plane[plane_id],
 			      crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM5].plane[plane_id],
@@ -1234,10 +1780,10 @@ out:
 	return dirty;
 }
 
-static bool vlv_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
-				  enum plane_id plane_id, int level)
+static bool vlv_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
+				      enum plane_id plane_id, int level)
 {
-	const struct vlv_pipe_wm *raw =
+	const struct g4x_pipe_wm *raw =
 		&crtc_state->wm.vlv.raw[level];
 	const struct vlv_fifo_state *fifo_state =
 		&crtc_state->wm.vlv.fifo_state;
@@ -1245,12 +1791,12 @@ static bool vlv_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
 	return raw->plane[plane_id] <= fifo_state->plane[plane_id];
 }
 
-static bool vlv_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state, int level)
+static bool vlv_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state, int level)
 {
-	return vlv_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) &&
-		vlv_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) &&
-		vlv_plane_wm_is_valid(crtc_state, PLANE_SPRITE1, level) &&
-		vlv_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level);
+	return vlv_raw_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) &&
+		vlv_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) &&
+		vlv_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE1, level) &&
+		vlv_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level);
 }
 
 static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state)
@@ -1279,7 +1825,7 @@ static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state)
 		    old_plane_state->base.crtc != &crtc->base)
 			continue;
 
-		if (vlv_plane_wm_compute(crtc_state, plane_state))
+		if (vlv_raw_plane_wm_compute(crtc_state, plane_state))
 			dirty |= BIT(plane->id);
 	}
 
@@ -1313,7 +1859,7 @@ static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state)
 	}
 
 	/* initially allow all levels */
-	wm_state->num_levels = vlv_num_wm_levels(dev_priv);
+	wm_state->num_levels = intel_wm_num_levels(dev_priv);
 	/*
 	 * Note that enabling cxsr with no primary/sprite planes
 	 * enabled can wedge the pipe. Hence we only allow cxsr
@@ -1322,10 +1868,10 @@ static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state)
 	wm_state->cxsr = crtc->pipe != PIPE_C && num_active_planes == 1;
 
 	for (level = 0; level < wm_state->num_levels; level++) {
-		const struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
+		const struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
 		const int sr_fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1;
 
-		if (!vlv_crtc_wm_is_valid(crtc_state, level))
+		if (!vlv_raw_crtc_wm_is_valid(crtc_state, level))
 			break;
 
 		for_each_plane_id_on_crtc(crtc, plane_id) {
@@ -1539,16 +2085,6 @@ static void vlv_merge_wm(struct drm_i915_private *dev_priv,
 	}
 }
 
-static bool is_disabling(int old, int new, int threshold)
-{
-	return old >= threshold && new < threshold;
-}
-
-static bool is_enabling(int old, int new, int threshold)
-{
-	return old < threshold && new >= threshold;
-}
-
 static void vlv_program_watermarks(struct drm_i915_private *dev_priv)
 {
 	struct vlv_wm_values *old_wm = &dev_priv->wm.vlv;
@@ -1609,65 +2145,6 @@ static void vlv_optimize_watermarks(struct intel_atomic_state *state,
 	mutex_unlock(&dev_priv->wm.wm_mutex);
 }
 
-#define single_plane_enabled(mask) is_power_of_2(mask)
-
-static void g4x_update_wm(struct intel_crtc *crtc)
-{
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	static const int sr_latency_ns = 12000;
-	int planea_wm, planeb_wm, cursora_wm, cursorb_wm;
-	int plane_sr, cursor_sr;
-	unsigned int enabled = 0;
-	bool cxsr_enabled;
-
-	if (g4x_compute_wm0(dev_priv, PIPE_A,
-			    &g4x_wm_info, pessimal_latency_ns,
-			    &g4x_cursor_wm_info, pessimal_latency_ns,
-			    &planea_wm, &cursora_wm))
-		enabled |= 1 << PIPE_A;
-
-	if (g4x_compute_wm0(dev_priv, PIPE_B,
-			    &g4x_wm_info, pessimal_latency_ns,
-			    &g4x_cursor_wm_info, pessimal_latency_ns,
-			    &planeb_wm, &cursorb_wm))
-		enabled |= 1 << PIPE_B;
-
-	if (single_plane_enabled(enabled) &&
-	    g4x_compute_srwm(dev_priv, ffs(enabled) - 1,
-			     sr_latency_ns,
-			     &g4x_wm_info,
-			     &g4x_cursor_wm_info,
-			     &plane_sr, &cursor_sr)) {
-		cxsr_enabled = true;
-	} else {
-		cxsr_enabled = false;
-		intel_set_memory_cxsr(dev_priv, false);
-		plane_sr = cursor_sr = 0;
-	}
-
-	DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, "
-		      "B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n",
-		      planea_wm, cursora_wm,
-		      planeb_wm, cursorb_wm,
-		      plane_sr, cursor_sr);
-
-	I915_WRITE(DSPFW1,
-		   FW_WM(plane_sr, SR) |
-		   FW_WM(cursorb_wm, CURSORB) |
-		   FW_WM(planeb_wm, PLANEB) |
-		   FW_WM(planea_wm, PLANEA));
-	I915_WRITE(DSPFW2,
-		   (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) |
-		   FW_WM(cursora_wm, CURSORA));
-	/* HPLL off in SR has some issues on G4x... disable it */
-	I915_WRITE(DSPFW3,
-		   (I915_READ(DSPFW3) & ~(DSPFW_HPLL_SR_EN | DSPFW_CURSOR_SR_MASK)) |
-		   FW_WM(cursor_sr, CURSOR_SR));
-
-	if (cxsr_enabled)
-		intel_set_memory_cxsr(dev_priv, true);
-}
-
 static void i965_update_wm(struct intel_crtc *unused_crtc)
 {
 	struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
@@ -1689,14 +2166,10 @@ static void i965_update_wm(struct intel_crtc *unused_crtc)
 		int htotal = adjusted_mode->crtc_htotal;
 		int hdisplay = crtc->config->pipe_src_w;
 		int cpp = fb->format->cpp[0];
-		unsigned long line_time_us;
 		int entries;
 
-		line_time_us = max(htotal * 1000 / clock, 1);
-
-		/* Use ns/us then divide to preserve precision */
-		entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
-			cpp * hdisplay;
+		entries = intel_wm_method2(clock, htotal,
+					   hdisplay, cpp, sr_latency_ns / 100);
 		entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE);
 		srwm = I965_FIFO_SIZE - entries;
 		if (srwm < 0)
@@ -1705,13 +2178,14 @@ static void i965_update_wm(struct intel_crtc *unused_crtc)
 		DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n",
 			      entries, srwm);
 
-		entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
-			cpp * crtc->base.cursor->state->crtc_w;
+		entries = intel_wm_method2(clock, htotal,
+					   crtc->base.cursor->state->crtc_w, 4,
+					   sr_latency_ns / 100);
 		entries = DIV_ROUND_UP(entries,
-					  i965_cursor_wm_info.cacheline_size);
-		cursor_sr = i965_cursor_wm_info.fifo_size -
-			(entries + i965_cursor_wm_info.guard_size);
+				       i965_cursor_wm_info.cacheline_size) +
+			i965_cursor_wm_info.guard_size;
 
+		cursor_sr = i965_cursor_wm_info.fifo_size - entries;
 		if (cursor_sr > i965_cursor_wm_info.max_wm)
 			cursor_sr = i965_cursor_wm_info.max_wm;
 
@@ -1848,7 +2322,6 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc)
 		int htotal = adjusted_mode->crtc_htotal;
 		int hdisplay = enabled->config->pipe_src_w;
 		int cpp;
-		unsigned long line_time_us;
 		int entries;
 
 		if (IS_I915GM(dev_priv) || IS_I945GM(dev_priv))
@@ -1856,11 +2329,8 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc)
 		else
 			cpp = fb->format->cpp[0];
 
-		line_time_us = max(htotal * 1000 / clock, 1);
-
-		/* Use ns/us then divide to preserve precision */
-		entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
-			cpp * hdisplay;
+		entries = intel_wm_method2(clock, htotal, hdisplay, cpp,
+					   sr_latency_ns / 100);
 		entries = DIV_ROUND_UP(entries, wm_info->cacheline_size);
 		DRM_DEBUG_KMS("self-refresh entries: %d\n", entries);
 		srwm = wm_info->fifo_size - entries;
@@ -1917,34 +2387,31 @@ static void i845_update_wm(struct intel_crtc *unused_crtc)
 }
 
 /* latency must be in 0.1us units. */
-static uint32_t ilk_wm_method1(uint32_t pixel_rate, uint8_t cpp, uint32_t latency)
+static unsigned int ilk_wm_method1(unsigned int pixel_rate,
+				   unsigned int cpp,
+				   unsigned int latency)
 {
-	uint64_t ret;
-
-	if (WARN(latency == 0, "Latency value missing\n"))
-		return UINT_MAX;
+	unsigned int ret;
 
-	ret = (uint64_t) pixel_rate * cpp * latency;
-	ret = DIV_ROUND_UP_ULL(ret, 64 * 10000) + 2;
+	ret = intel_wm_method1(pixel_rate, cpp, latency);
+	ret = DIV_ROUND_UP(ret, 64) + 2;
 
 	return ret;
 }
 
 /* latency must be in 0.1us units. */
-static uint32_t ilk_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
-			       uint32_t horiz_pixels, uint8_t cpp,
-			       uint32_t latency)
+static unsigned int ilk_wm_method2(unsigned int pixel_rate,
+				   unsigned int htotal,
+				   unsigned int width,
+				   unsigned int cpp,
+				   unsigned int latency)
 {
-	uint32_t ret;
-
-	if (WARN(latency == 0, "Latency value missing\n"))
-		return UINT_MAX;
-	if (WARN_ON(!pipe_htotal))
-		return UINT_MAX;
+	unsigned int ret;
 
-	ret = (latency * pixel_rate) / (pipe_htotal * 10000);
-	ret = (ret + 1) * horiz_pixels * cpp;
+	ret = intel_wm_method2(pixel_rate, htotal,
+			       width, cpp, latency);
 	ret = DIV_ROUND_UP(ret, 64) + 2;
+
 	return ret;
 }
 
@@ -3360,26 +3827,27 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
  * Return value is provided in 16.16 fixed point form to retain fractional part.
  * Caller should take care of dividing & rounding off the value.
  */
-static uint32_t
+static uint_fixed_16_16_t
 skl_plane_downscale_amount(const struct intel_crtc_state *cstate,
 			   const struct intel_plane_state *pstate)
 {
 	struct intel_plane *plane = to_intel_plane(pstate->base.plane);
-	uint32_t downscale_h, downscale_w;
 	uint32_t src_w, src_h, dst_w, dst_h;
+	uint_fixed_16_16_t fp_w_ratio, fp_h_ratio;
+	uint_fixed_16_16_t downscale_h, downscale_w;
 
 	if (WARN_ON(!intel_wm_plane_visible(cstate, pstate)))
-		return DRM_PLANE_HELPER_NO_SCALING;
+		return u32_to_fixed_16_16(0);
 
 	/* n.b., src is 16.16 fixed point, dst is whole integer */
 	if (plane->id == PLANE_CURSOR) {
-		src_w = pstate->base.src_w;
-		src_h = pstate->base.src_h;
+		src_w = pstate->base.src_w >> 16;
+		src_h = pstate->base.src_h >> 16;
 		dst_w = pstate->base.crtc_w;
 		dst_h = pstate->base.crtc_h;
 	} else {
-		src_w = drm_rect_width(&pstate->base.src);
-		src_h = drm_rect_height(&pstate->base.src);
+		src_w = drm_rect_width(&pstate->base.src) >> 16;
+		src_h = drm_rect_height(&pstate->base.src) >> 16;
 		dst_w = drm_rect_width(&pstate->base.dst);
 		dst_h = drm_rect_height(&pstate->base.dst);
 	}
@@ -3387,11 +3855,12 @@ skl_plane_downscale_amount(const struct intel_crtc_state *cstate,
 	if (drm_rotation_90_or_270(pstate->base.rotation))
 		swap(dst_w, dst_h);
 
-	downscale_h = max(src_h / dst_h, (uint32_t)DRM_PLANE_HELPER_NO_SCALING);
-	downscale_w = max(src_w / dst_w, (uint32_t)DRM_PLANE_HELPER_NO_SCALING);
+	fp_w_ratio = fixed_16_16_div(src_w, dst_w);
+	fp_h_ratio = fixed_16_16_div(src_h, dst_h);
+	downscale_w = max_fixed_16_16(fp_w_ratio, u32_to_fixed_16_16(1));
+	downscale_h = max_fixed_16_16(fp_h_ratio, u32_to_fixed_16_16(1));
 
-	/* Provide result in 16.16 fixed point */
-	return (uint64_t)downscale_w * downscale_h >> 16;
+	return mul_fixed16(downscale_w, downscale_h);
 }
 
 static unsigned int
@@ -3401,10 +3870,11 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate,
 {
 	struct intel_plane *plane = to_intel_plane(pstate->plane);
 	struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate);
-	uint32_t down_scale_amount, data_rate;
+	uint32_t data_rate;
 	uint32_t width = 0, height = 0;
 	struct drm_framebuffer *fb;
 	u32 format;
+	uint_fixed_16_16_t down_scale_amount;
 
 	if (!intel_pstate->base.visible)
 		return 0;
@@ -3438,7 +3908,7 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate,
 
 	down_scale_amount = skl_plane_downscale_amount(cstate, intel_pstate);
 
-	return (uint64_t)data_rate * down_scale_amount >> 16;
+	return mul_round_up_u32_fixed16(data_rate, down_scale_amount);
 }
 
 /*
@@ -3587,6 +4057,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
 	int num_active;
 	unsigned plane_data_rate[I915_MAX_PLANES] = {};
 	unsigned plane_y_data_rate[I915_MAX_PLANES] = {};
+	uint16_t total_min_blocks = 0;
 
 	/* Clear the partitioning for disabled planes. */
 	memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe]));
@@ -3602,10 +4073,8 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
 
 	skl_ddb_get_pipe_allocation_limits(dev, cstate, alloc, &num_active);
 	alloc_size = skl_ddb_entry_size(alloc);
-	if (alloc_size == 0) {
-		memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe]));
+	if (alloc_size == 0)
 		return 0;
-	}
 
 	skl_ddb_calc_min(cstate, num_active, minimum, y_minimum);
 
@@ -3616,10 +4085,18 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
 	 */
 
 	for_each_plane_id_on_crtc(intel_crtc, plane_id) {
-		alloc_size -= minimum[plane_id];
-		alloc_size -= y_minimum[plane_id];
+		total_min_blocks += minimum[plane_id];
+		total_min_blocks += y_minimum[plane_id];
 	}
 
+	if (total_min_blocks > alloc_size) {
+		DRM_DEBUG_KMS("Requested display configuration exceeds system DDB limitations");
+		DRM_DEBUG_KMS("minimum required %d/%d\n", total_min_blocks,
+							alloc_size);
+		return -EINVAL;
+	}
+
+	alloc_size -= total_min_blocks;
 	ddb->plane[pipe][PLANE_CURSOR].start = alloc->end - minimum[PLANE_CURSOR];
 	ddb->plane[pipe][PLANE_CURSOR].end = alloc->end;
 
@@ -3698,7 +4175,7 @@ static uint_fixed_16_16_t skl_wm_method1(uint32_t pixel_rate, uint8_t cpp,
 		return FP_16_16_MAX;
 
 	wm_intermediate_val = latency * pixel_rate * cpp;
-	ret = fixed_16_16_div_round_up_u64(wm_intermediate_val, 1000 * 512);
+	ret = fixed_16_16_div_u64(wm_intermediate_val, 1000 * 512);
 	return ret;
 }
 
@@ -3720,12 +4197,33 @@ static uint_fixed_16_16_t skl_wm_method2(uint32_t pixel_rate,
 	return ret;
 }
 
-static uint32_t skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate,
-					      struct intel_plane_state *pstate)
+static uint_fixed_16_16_t
+intel_get_linetime_us(struct intel_crtc_state *cstate)
+{
+	uint32_t pixel_rate;
+	uint32_t crtc_htotal;
+	uint_fixed_16_16_t linetime_us;
+
+	if (!cstate->base.active)
+		return u32_to_fixed_16_16(0);
+
+	pixel_rate = cstate->pixel_rate;
+
+	if (WARN_ON(pixel_rate == 0))
+		return u32_to_fixed_16_16(0);
+
+	crtc_htotal = cstate->base.adjusted_mode.crtc_htotal;
+	linetime_us = fixed_16_16_div_u64(crtc_htotal * 1000, pixel_rate);
+
+	return linetime_us;
+}
+
+static uint32_t
+skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate,
+			      const struct intel_plane_state *pstate)
 {
 	uint64_t adjusted_pixel_rate;
-	uint64_t downscale_amount;
-	uint64_t pixel_rate;
+	uint_fixed_16_16_t downscale_amount;
 
 	/* Shouldn't reach here on disabled planes... */
 	if (WARN_ON(!intel_wm_plane_visible(cstate, pstate)))
@@ -3738,15 +4236,13 @@ static uint32_t skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cst
 	adjusted_pixel_rate = cstate->pixel_rate;
 	downscale_amount = skl_plane_downscale_amount(cstate, pstate);
 
-	pixel_rate = adjusted_pixel_rate * downscale_amount >> 16;
-	WARN_ON(pixel_rate != clamp_t(uint32_t, pixel_rate, 0, ~0));
-
-	return pixel_rate;
+	return mul_round_up_u32_fixed16(adjusted_pixel_rate,
+					    downscale_amount);
 }
 
 static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 				struct intel_crtc_state *cstate,
-				struct intel_plane_state *intel_pstate,
+				const struct intel_plane_state *intel_pstate,
 				uint16_t ddb_allocation,
 				int level,
 				uint16_t *out_blocks, /* out */
@@ -3754,8 +4250,8 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 				bool *enabled /* out */)
 {
 	struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane);
-	struct drm_plane_state *pstate = &intel_pstate->base;
-	struct drm_framebuffer *fb = pstate->fb;
+	const struct drm_plane_state *pstate = &intel_pstate->base;
+	const struct drm_framebuffer *fb = pstate->fb;
 	uint32_t latency = dev_priv->wm.skl_latency[level];
 	uint_fixed_16_16_t method1, method2;
 	uint_fixed_16_16_t plane_blocks_per_line;
@@ -3834,8 +4330,8 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 	if (y_tiled) {
 		interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line *
 					   y_min_scanlines, 512);
-		plane_blocks_per_line =
-		      fixed_16_16_div_round_up(interm_pbpl, y_min_scanlines);
+		plane_blocks_per_line = fixed_16_16_div(interm_pbpl,
+							y_min_scanlines);
 	} else if (x_tiled) {
 		interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512);
 		plane_blocks_per_line = u32_to_fixed_16_16(interm_pbpl);
@@ -3856,19 +4352,25 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 	if (y_tiled) {
 		selected_result = max_fixed_16_16(method2, y_tile_minimum);
 	} else {
+		uint32_t linetime_us;
+
+		linetime_us = fixed_16_16_to_u32_round_up(
+				intel_get_linetime_us(cstate));
 		if ((cpp * cstate->base.adjusted_mode.crtc_htotal / 512 < 1) &&
 		    (plane_bytes_per_line / 512 < 1))
 			selected_result = method2;
-		else if ((ddb_allocation /
+		else if ((ddb_allocation && ddb_allocation /
 			fixed_16_16_to_u32_round_up(plane_blocks_per_line)) >= 1)
 			selected_result = min_fixed_16_16(method1, method2);
+		else if (latency >= linetime_us)
+			selected_result = min_fixed_16_16(method1, method2);
 		else
 			selected_result = method1;
 	}
 
 	res_blocks = fixed_16_16_to_u32_round_up(selected_result) + 1;
-	res_lines = DIV_ROUND_UP(selected_result.val,
-				 plane_blocks_per_line.val);
+	res_lines = div_round_up_fixed16(selected_result,
+					 plane_blocks_per_line);
 
 	if (level >= 1 && level <= 7) {
 		if (y_tiled) {
@@ -3907,54 +4409,39 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 }
 
 static int
-skl_compute_wm_level(const struct drm_i915_private *dev_priv,
-		     struct skl_ddb_allocation *ddb,
-		     struct intel_crtc_state *cstate,
-		     struct intel_plane *intel_plane,
-		     int level,
-		     struct skl_wm_level *result)
+skl_compute_wm_levels(const struct drm_i915_private *dev_priv,
+		      struct skl_ddb_allocation *ddb,
+		      struct intel_crtc_state *cstate,
+		      const struct intel_plane_state *intel_pstate,
+		      struct skl_plane_wm *wm)
 {
-	struct drm_atomic_state *state = cstate->base.state;
 	struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
-	struct drm_plane *plane = &intel_plane->base;
-	struct intel_plane_state *intel_pstate = NULL;
+	struct drm_plane *plane = intel_pstate->base.plane;
+	struct intel_plane *intel_plane = to_intel_plane(plane);
 	uint16_t ddb_blocks;
 	enum pipe pipe = intel_crtc->pipe;
+	int level, max_level = ilk_wm_max_level(dev_priv);
 	int ret;
 
-	if (state)
-		intel_pstate =
-			intel_atomic_get_existing_plane_state(state,
-							      intel_plane);
-
-	/*
-	 * Note: If we start supporting multiple pending atomic commits against
-	 * the same planes/CRTC's in the future, plane->state will no longer be
-	 * the correct pre-state to use for the calculations here and we'll
-	 * need to change where we get the 'unchanged' plane data from.
-	 *
-	 * For now this is fine because we only allow one queued commit against
-	 * a CRTC.  Even if the plane isn't modified by this transaction and we
-	 * don't have a plane lock, we still have the CRTC's lock, so we know
-	 * that no other transactions are racing with us to update it.
-	 */
-	if (!intel_pstate)
-		intel_pstate = to_intel_plane_state(plane->state);
-
-	WARN_ON(!intel_pstate->base.fb);
+	if (WARN_ON(!intel_pstate->base.fb))
+		return -EINVAL;
 
 	ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][intel_plane->id]);
 
-	ret = skl_compute_plane_wm(dev_priv,
-				   cstate,
-				   intel_pstate,
-				   ddb_blocks,
-				   level,
-				   &result->plane_res_b,
-				   &result->plane_res_l,
-				   &result->plane_en);
-	if (ret)
-		return ret;
+	for (level = 0; level <= max_level; level++) {
+		struct skl_wm_level *result = &wm->wm[level];
+
+		ret = skl_compute_plane_wm(dev_priv,
+					   cstate,
+					   intel_pstate,
+					   ddb_blocks,
+					   level,
+					   &result->plane_res_b,
+					   &result->plane_res_l,
+					   &result->plane_en);
+		if (ret)
+			return ret;
+	}
 
 	return 0;
 }
@@ -3964,19 +4451,16 @@ skl_compute_linetime_wm(struct intel_crtc_state *cstate)
 {
 	struct drm_atomic_state *state = cstate->base.state;
 	struct drm_i915_private *dev_priv = to_i915(state->dev);
-	uint32_t pixel_rate;
+	uint_fixed_16_16_t linetime_us;
 	uint32_t linetime_wm;
 
-	if (!cstate->base.active)
-		return 0;
+	linetime_us = intel_get_linetime_us(cstate);
 
-	pixel_rate = cstate->pixel_rate;
-
-	if (WARN_ON(pixel_rate == 0))
+	if (is_fixed16_zero(linetime_us))
 		return 0;
 
-	linetime_wm = DIV_ROUND_UP(8 * cstate->base.adjusted_mode.crtc_htotal *
-				   1000, pixel_rate);
+	linetime_wm = fixed_16_16_to_u32_round_up(mul_u32_fixed_16_16(8,
+				linetime_us));
 
 	/* Display WA #1135: bxt. */
 	if (IS_BROXTON(dev_priv) && dev_priv->ipc_enabled)
@@ -4000,10 +4484,11 @@ static int skl_build_pipe_wm(struct intel_crtc_state *cstate,
 			     struct skl_pipe_wm *pipe_wm)
 {
 	struct drm_device *dev = cstate->base.crtc->dev;
+	struct drm_crtc_state *crtc_state = &cstate->base;
 	const struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_plane *intel_plane;
+	struct drm_plane *plane;
+	const struct drm_plane_state *pstate;
 	struct skl_plane_wm *wm;
-	int level, max_level = ilk_wm_max_level(dev_priv);
 	int ret;
 
 	/*
@@ -4012,18 +4497,17 @@ static int skl_build_pipe_wm(struct intel_crtc_state *cstate,
 	 */
 	memset(pipe_wm->planes, 0, sizeof(pipe_wm->planes));
 
-	for_each_intel_plane_mask(&dev_priv->drm,
-				  intel_plane,
-				  cstate->base.plane_mask) {
-		wm = &pipe_wm->planes[intel_plane->id];
+	drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) {
+		const struct intel_plane_state *intel_pstate =
+						to_intel_plane_state(pstate);
+		enum plane_id plane_id = to_intel_plane(plane)->id;
 
-		for (level = 0; level <= max_level; level++) {
-			ret = skl_compute_wm_level(dev_priv, ddb, cstate,
-						   intel_plane, level,
-						   &wm->wm[level]);
-			if (ret)
-				return ret;
-		}
+		wm = &pipe_wm->planes[plane_id];
+
+		ret = skl_compute_wm_levels(dev_priv, ddb, cstate,
+					    intel_pstate, wm);
+		if (ret)
+			return ret;
 		skl_compute_transition_wm(cstate, &wm->trans_wm);
 	}
 	pipe_wm->linetime = skl_compute_linetime_wm(cstate);
@@ -4654,6 +5138,32 @@ static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc)
 #define _FW_WM_VLV(value, plane) \
 	(((value) & DSPFW_ ## plane ## _MASK_VLV) >> DSPFW_ ## plane ## _SHIFT)
 
+static void g4x_read_wm_values(struct drm_i915_private *dev_priv,
+			       struct g4x_wm_values *wm)
+{
+	uint32_t tmp;
+
+	tmp = I915_READ(DSPFW1);
+	wm->sr.plane = _FW_WM(tmp, SR);
+	wm->pipe[PIPE_B].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORB);
+	wm->pipe[PIPE_B].plane[PLANE_PRIMARY] = _FW_WM(tmp, PLANEB);
+	wm->pipe[PIPE_A].plane[PLANE_PRIMARY] = _FW_WM(tmp, PLANEA);
+
+	tmp = I915_READ(DSPFW2);
+	wm->fbc_en = tmp & DSPFW_FBC_SR_EN;
+	wm->sr.fbc = _FW_WM(tmp, FBC_SR);
+	wm->hpll.fbc = _FW_WM(tmp, FBC_HPLL_SR);
+	wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM(tmp, SPRITEB);
+	wm->pipe[PIPE_A].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORA);
+	wm->pipe[PIPE_A].plane[PLANE_SPRITE0] = _FW_WM(tmp, SPRITEA);
+
+	tmp = I915_READ(DSPFW3);
+	wm->hpll_en = tmp & DSPFW_HPLL_SR_EN;
+	wm->sr.cursor = _FW_WM(tmp, CURSOR_SR);
+	wm->hpll.cursor = _FW_WM(tmp, HPLL_CURSOR);
+	wm->hpll.plane = _FW_WM(tmp, HPLL_SR);
+}
+
 static void vlv_read_wm_values(struct drm_i915_private *dev_priv,
 			       struct vlv_wm_values *wm)
 {
@@ -4730,6 +5240,147 @@ static void vlv_read_wm_values(struct drm_i915_private *dev_priv,
 #undef _FW_WM
 #undef _FW_WM_VLV
 
+void g4x_wm_get_hw_state(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct g4x_wm_values *wm = &dev_priv->wm.g4x;
+	struct intel_crtc *crtc;
+
+	g4x_read_wm_values(dev_priv, wm);
+
+	wm->cxsr = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
+
+	for_each_intel_crtc(dev, crtc) {
+		struct intel_crtc_state *crtc_state =
+			to_intel_crtc_state(crtc->base.state);
+		struct g4x_wm_state *active = &crtc->wm.active.g4x;
+		struct g4x_pipe_wm *raw;
+		enum pipe pipe = crtc->pipe;
+		enum plane_id plane_id;
+		int level, max_level;
+
+		active->cxsr = wm->cxsr;
+		active->hpll_en = wm->hpll_en;
+		active->fbc_en = wm->fbc_en;
+
+		active->sr = wm->sr;
+		active->hpll = wm->hpll;
+
+		for_each_plane_id_on_crtc(crtc, plane_id) {
+			active->wm.plane[plane_id] =
+				wm->pipe[pipe].plane[plane_id];
+		}
+
+		if (wm->cxsr && wm->hpll_en)
+			max_level = G4X_WM_LEVEL_HPLL;
+		else if (wm->cxsr)
+			max_level = G4X_WM_LEVEL_SR;
+		else
+			max_level = G4X_WM_LEVEL_NORMAL;
+
+		level = G4X_WM_LEVEL_NORMAL;
+		raw = &crtc_state->wm.g4x.raw[level];
+		for_each_plane_id_on_crtc(crtc, plane_id)
+			raw->plane[plane_id] = active->wm.plane[plane_id];
+
+		if (++level > max_level)
+			goto out;
+
+		raw = &crtc_state->wm.g4x.raw[level];
+		raw->plane[PLANE_PRIMARY] = active->sr.plane;
+		raw->plane[PLANE_CURSOR] = active->sr.cursor;
+		raw->plane[PLANE_SPRITE0] = 0;
+		raw->fbc = active->sr.fbc;
+
+		if (++level > max_level)
+			goto out;
+
+		raw = &crtc_state->wm.g4x.raw[level];
+		raw->plane[PLANE_PRIMARY] = active->hpll.plane;
+		raw->plane[PLANE_CURSOR] = active->hpll.cursor;
+		raw->plane[PLANE_SPRITE0] = 0;
+		raw->fbc = active->hpll.fbc;
+
+	out:
+		for_each_plane_id_on_crtc(crtc, plane_id)
+			g4x_raw_plane_wm_set(crtc_state, level,
+					     plane_id, USHRT_MAX);
+		g4x_raw_fbc_wm_set(crtc_state, level, USHRT_MAX);
+
+		crtc_state->wm.g4x.optimal = *active;
+		crtc_state->wm.g4x.intermediate = *active;
+
+		DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite=%d\n",
+			      pipe_name(pipe),
+			      wm->pipe[pipe].plane[PLANE_PRIMARY],
+			      wm->pipe[pipe].plane[PLANE_CURSOR],
+			      wm->pipe[pipe].plane[PLANE_SPRITE0]);
+	}
+
+	DRM_DEBUG_KMS("Initial SR watermarks: plane=%d, cursor=%d fbc=%d\n",
+		      wm->sr.plane, wm->sr.cursor, wm->sr.fbc);
+	DRM_DEBUG_KMS("Initial HPLL watermarks: plane=%d, SR cursor=%d fbc=%d\n",
+		      wm->hpll.plane, wm->hpll.cursor, wm->hpll.fbc);
+	DRM_DEBUG_KMS("Initial SR=%s HPLL=%s FBC=%s\n",
+		      yesno(wm->cxsr), yesno(wm->hpll_en), yesno(wm->fbc_en));
+}
+
+void g4x_wm_sanitize(struct drm_i915_private *dev_priv)
+{
+	struct intel_plane *plane;
+	struct intel_crtc *crtc;
+
+	mutex_lock(&dev_priv->wm.wm_mutex);
+
+	for_each_intel_plane(&dev_priv->drm, plane) {
+		struct intel_crtc *crtc =
+			intel_get_crtc_for_pipe(dev_priv, plane->pipe);
+		struct intel_crtc_state *crtc_state =
+			to_intel_crtc_state(crtc->base.state);
+		struct intel_plane_state *plane_state =
+			to_intel_plane_state(plane->base.state);
+		struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal;
+		enum plane_id plane_id = plane->id;
+		int level;
+
+		if (plane_state->base.visible)
+			continue;
+
+		for (level = 0; level < 3; level++) {
+			struct g4x_pipe_wm *raw =
+				&crtc_state->wm.g4x.raw[level];
+
+			raw->plane[plane_id] = 0;
+			wm_state->wm.plane[plane_id] = 0;
+		}
+
+		if (plane_id == PLANE_PRIMARY) {
+			for (level = 0; level < 3; level++) {
+				struct g4x_pipe_wm *raw =
+					&crtc_state->wm.g4x.raw[level];
+				raw->fbc = 0;
+			}
+
+			wm_state->sr.fbc = 0;
+			wm_state->hpll.fbc = 0;
+			wm_state->fbc_en = false;
+		}
+	}
+
+	for_each_intel_crtc(&dev_priv->drm, crtc) {
+		struct intel_crtc_state *crtc_state =
+			to_intel_crtc_state(crtc->base.state);
+
+		crtc_state->wm.g4x.intermediate =
+			crtc_state->wm.g4x.optimal;
+		crtc->wm.active.g4x = crtc_state->wm.g4x.optimal;
+	}
+
+	g4x_program_watermarks(dev_priv);
+
+	mutex_unlock(&dev_priv->wm.wm_mutex);
+}
+
 void vlv_wm_get_hw_state(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
@@ -4792,7 +5443,7 @@ void vlv_wm_get_hw_state(struct drm_device *dev)
 		active->cxsr = wm->cxsr;
 
 		for (level = 0; level < active->num_levels; level++) {
-			struct vlv_pipe_wm *raw =
+			struct g4x_pipe_wm *raw =
 				&crtc_state->wm.vlv.raw[level];
 
 			active->sr[level].plane = wm->sr.plane;
@@ -4852,7 +5503,7 @@ void vlv_wm_sanitize(struct drm_i915_private *dev_priv)
 			continue;
 
 		for (level = 0; level < wm_state->num_levels; level++) {
-			struct vlv_pipe_wm *raw =
+			struct g4x_pipe_wm *raw =
 				&crtc_state->wm.vlv.raw[level];
 
 			raw->plane[plane_id] = 0;
@@ -8036,6 +8687,12 @@ void intel_init_pm(struct drm_i915_private *dev_priv)
 		dev_priv->display.initial_watermarks = vlv_initial_watermarks;
 		dev_priv->display.optimize_watermarks = vlv_optimize_watermarks;
 		dev_priv->display.atomic_update_watermarks = vlv_atomic_update_fifo;
+	} else if (IS_G4X(dev_priv)) {
+		g4x_setup_wm_latency(dev_priv);
+		dev_priv->display.compute_pipe_wm = g4x_compute_pipe_wm;
+		dev_priv->display.compute_intermediate_wm = g4x_compute_intermediate_wm;
+		dev_priv->display.initial_watermarks = g4x_initial_watermarks;
+		dev_priv->display.optimize_watermarks = g4x_optimize_watermarks;
 	} else if (IS_PINEVIEW(dev_priv)) {
 		if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv),
 					    dev_priv->is_ddr3,
@@ -8051,8 +8708,6 @@ void intel_init_pm(struct drm_i915_private *dev_priv)
 			dev_priv->display.update_wm = NULL;
 		} else
 			dev_priv->display.update_wm = pineview_update_wm;
-	} else if (IS_G4X(dev_priv)) {
-		dev_priv->display.update_wm = g4x_update_wm;
 	} else if (IS_GEN4(dev_priv)) {
 		dev_priv->display.update_wm = i965_update_wm;
 	} else if (IS_GEN3(dev_priv)) {
@@ -8135,9 +8790,9 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val
 	I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
 	I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
 
-	if (intel_wait_for_register_fw(dev_priv,
-				       GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
-				       500)) {
+	if (__intel_wait_for_register_fw(dev_priv,
+					 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
+					 500, 0, NULL)) {
 		DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox);
 		return -ETIMEDOUT;
 	}
@@ -8180,9 +8835,9 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv,
 	I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
 	I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
 
-	if (intel_wait_for_register_fw(dev_priv,
-				       GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
-				       500)) {
+	if (__intel_wait_for_register_fw(dev_priv,
+					 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
+					 500, 0, NULL)) {
 		DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox);
 		return -ETIMEDOUT;
 	}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 66a2b8b83972..acd1da9b62a3 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -39,17 +39,27 @@
  */
 #define LEGACY_REQUEST_SIZE 200
 
-static int __intel_ring_space(int head, int tail, int size)
+static unsigned int __intel_ring_space(unsigned int head,
+				       unsigned int tail,
+				       unsigned int size)
 {
-	int space = head - tail;
-	if (space <= 0)
-		space += size;
-	return space - I915_RING_FREE_SPACE;
+	/*
+	 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
+	 * same cacheline, the Head Pointer must not be greater than the Tail
+	 * Pointer."
+	 */
+	GEM_BUG_ON(!is_power_of_2(size));
+	return (head - tail - CACHELINE_BYTES) & (size - 1);
 }
 
-void intel_ring_update_space(struct intel_ring *ring)
+unsigned int intel_ring_update_space(struct intel_ring *ring)
 {
-	ring->space = __intel_ring_space(ring->head, ring->tail, ring->size);
+	unsigned int space;
+
+	space = __intel_ring_space(ring->head, ring->emit, ring->size);
+
+	ring->space = space;
+	return space;
 }
 
 static int
@@ -538,9 +548,9 @@ static int init_ring_common(struct intel_engine_cs *engine)
 	I915_WRITE_CTL(engine, RING_CTL_SIZE(ring->size) | RING_VALID);
 
 	/* If the head is still not zero, the ring is dead */
-	if (intel_wait_for_register_fw(dev_priv, RING_CTL(engine->mmio_base),
-				       RING_VALID, RING_VALID,
-				       50)) {
+	if (intel_wait_for_register(dev_priv, RING_CTL(engine->mmio_base),
+				    RING_VALID, RING_VALID,
+				    50)) {
 		DRM_ERROR("%s initialization failed "
 			  "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
 			  engine->name,
@@ -774,8 +784,8 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request)
 
 	i915_gem_request_submit(request);
 
-	assert_ring_tail_valid(request->ring, request->tail);
-	I915_WRITE_TAIL(request->engine, request->tail);
+	I915_WRITE_TAIL(request->engine,
+			intel_ring_set_tail(request->ring, request->tail));
 }
 
 static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs)
@@ -1259,6 +1269,8 @@ static int init_phys_status_page(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
 
+	GEM_BUG_ON(engine->id != RCS);
+
 	dev_priv->status_page_dmah =
 		drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE);
 	if (!dev_priv->status_page_dmah)
@@ -1270,17 +1282,18 @@ static int init_phys_status_page(struct intel_engine_cs *engine)
 	return 0;
 }
 
-int intel_ring_pin(struct intel_ring *ring, unsigned int offset_bias)
+int intel_ring_pin(struct intel_ring *ring,
+		   struct drm_i915_private *i915,
+		   unsigned int offset_bias)
 {
-	unsigned int flags;
-	enum i915_map_type map;
+	enum i915_map_type map = HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC;
 	struct i915_vma *vma = ring->vma;
+	unsigned int flags;
 	void *addr;
 	int ret;
 
 	GEM_BUG_ON(ring->vaddr);
 
-	map = HAS_LLC(ring->engine->i915) ? I915_MAP_WB : I915_MAP_WC;
 
 	flags = PIN_GLOBAL;
 	if (offset_bias)
@@ -1316,11 +1329,23 @@ err:
 	return PTR_ERR(addr);
 }
 
+void intel_ring_reset(struct intel_ring *ring, u32 tail)
+{
+	GEM_BUG_ON(!list_empty(&ring->request_list));
+	ring->tail = tail;
+	ring->head = tail;
+	ring->emit = tail;
+	intel_ring_update_space(ring);
+}
+
 void intel_ring_unpin(struct intel_ring *ring)
 {
 	GEM_BUG_ON(!ring->vma);
 	GEM_BUG_ON(!ring->vaddr);
 
+	/* Discard any unused bytes beyond that submitted to hw. */
+	intel_ring_reset(ring, ring->tail);
+
 	if (i915_vma_is_map_and_fenceable(ring->vma))
 		i915_vma_unpin_iomap(ring->vma);
 	else
@@ -1338,7 +1363,7 @@ intel_ring_create_vma(struct drm_i915_private *dev_priv, int size)
 
 	obj = i915_gem_object_create_stolen(dev_priv, size);
 	if (!obj)
-		obj = i915_gem_object_create(dev_priv, size);
+		obj = i915_gem_object_create_internal(dev_priv, size);
 	if (IS_ERR(obj))
 		return ERR_CAST(obj);
 
@@ -1369,8 +1394,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
 	if (!ring)
 		return ERR_PTR(-ENOMEM);
 
-	ring->engine = engine;
-
 	INIT_LIST_HEAD(&ring->request_list);
 
 	ring->size = size;
@@ -1424,22 +1447,73 @@ static int context_pin(struct i915_gem_context *ctx)
 			    PIN_GLOBAL | PIN_HIGH);
 }
 
-static int intel_ring_context_pin(struct intel_engine_cs *engine,
-				  struct i915_gem_context *ctx)
+static struct i915_vma *
+alloc_context_vma(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *i915 = engine->i915;
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+
+	obj = i915_gem_object_create(i915, engine->context_size);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+
+	/*
+	 * Try to make the context utilize L3 as well as LLC.
+	 *
+	 * On VLV we don't have L3 controls in the PTEs so we
+	 * shouldn't touch the cache level, especially as that
+	 * would make the object snooped which might have a
+	 * negative performance impact.
+	 *
+	 * Snooping is required on non-llc platforms in execlist
+	 * mode, but since all GGTT accesses use PAT entry 0 we
+	 * get snooping anyway regardless of cache_level.
+	 *
+	 * This is only applicable for Ivy Bridge devices since
+	 * later platforms don't have L3 control bits in the PTE.
+	 */
+	if (IS_IVYBRIDGE(i915)) {
+		/* Ignore any error, regard it as a simple optimisation */
+		i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC);
+	}
+
+	vma = i915_vma_instance(obj, &i915->ggtt.base, NULL);
+	if (IS_ERR(vma))
+		i915_gem_object_put(obj);
+
+	return vma;
+}
+
+static struct intel_ring *
+intel_ring_context_pin(struct intel_engine_cs *engine,
+		       struct i915_gem_context *ctx)
 {
 	struct intel_context *ce = &ctx->engine[engine->id];
 	int ret;
 
 	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
 
-	if (ce->pin_count++)
-		return 0;
+	if (likely(ce->pin_count++))
+		goto out;
 	GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
 
+	if (!ce->state && engine->context_size) {
+		struct i915_vma *vma;
+
+		vma = alloc_context_vma(engine);
+		if (IS_ERR(vma)) {
+			ret = PTR_ERR(vma);
+			goto err;
+		}
+
+		ce->state = vma;
+	}
+
 	if (ce->state) {
 		ret = context_pin(ctx);
 		if (ret)
-			goto error;
+			goto err;
 
 		ce->state->obj->mm.dirty = true;
 	}
@@ -1455,11 +1529,14 @@ static int intel_ring_context_pin(struct intel_engine_cs *engine,
 		ce->initialised = true;
 
 	i915_gem_context_get(ctx);
-	return 0;
 
-error:
+out:
+	/* One ringbuffer to rule them all */
+	return engine->buffer;
+
+err:
 	ce->pin_count = 0;
-	return ret;
+	return ERR_PTR(ret);
 }
 
 static void intel_ring_context_unpin(struct intel_engine_cs *engine,
@@ -1481,78 +1558,70 @@ static void intel_ring_context_unpin(struct intel_engine_cs *engine,
 
 static int intel_init_ring_buffer(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
 	struct intel_ring *ring;
-	int ret;
-
-	WARN_ON(engine->buffer);
+	int err;
 
 	intel_engine_setup_common(engine);
 
-	ret = intel_engine_init_common(engine);
-	if (ret)
-		goto error;
+	err = intel_engine_init_common(engine);
+	if (err)
+		goto err;
+
+	if (HWS_NEEDS_PHYSICAL(engine->i915))
+		err = init_phys_status_page(engine);
+	else
+		err = init_status_page(engine);
+	if (err)
+		goto err;
 
 	ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE);
 	if (IS_ERR(ring)) {
-		ret = PTR_ERR(ring);
-		goto error;
-	}
-
-	if (HWS_NEEDS_PHYSICAL(dev_priv)) {
-		WARN_ON(engine->id != RCS);
-		ret = init_phys_status_page(engine);
-		if (ret)
-			goto error;
-	} else {
-		ret = init_status_page(engine);
-		if (ret)
-			goto error;
+		err = PTR_ERR(ring);
+		goto err_hws;
 	}
 
 	/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
-	ret = intel_ring_pin(ring, I915_GTT_PAGE_SIZE);
-	if (ret) {
-		intel_ring_free(ring);
-		goto error;
-	}
+	err = intel_ring_pin(ring, engine->i915, I915_GTT_PAGE_SIZE);
+	if (err)
+		goto err_ring;
+
+	GEM_BUG_ON(engine->buffer);
 	engine->buffer = ring;
 
 	return 0;
 
-error:
-	intel_engine_cleanup(engine);
-	return ret;
+err_ring:
+	intel_ring_free(ring);
+err_hws:
+	if (HWS_NEEDS_PHYSICAL(engine->i915))
+		cleanup_phys_status_page(engine);
+	else
+		cleanup_status_page(engine);
+err:
+	intel_engine_cleanup_common(engine);
+	return err;
 }
 
 void intel_engine_cleanup(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv;
-
-	dev_priv = engine->i915;
+	struct drm_i915_private *dev_priv = engine->i915;
 
-	if (engine->buffer) {
-		WARN_ON(INTEL_GEN(dev_priv) > 2 &&
-			(I915_READ_MODE(engine) & MODE_IDLE) == 0);
+	WARN_ON(INTEL_GEN(dev_priv) > 2 &&
+		(I915_READ_MODE(engine) & MODE_IDLE) == 0);
 
-		intel_ring_unpin(engine->buffer);
-		intel_ring_free(engine->buffer);
-		engine->buffer = NULL;
-	}
+	intel_ring_unpin(engine->buffer);
+	intel_ring_free(engine->buffer);
 
 	if (engine->cleanup)
 		engine->cleanup(engine);
 
-	if (HWS_NEEDS_PHYSICAL(dev_priv)) {
-		WARN_ON(engine->id != RCS);
+	if (HWS_NEEDS_PHYSICAL(dev_priv))
 		cleanup_phys_status_page(engine);
-	} else {
+	else
 		cleanup_status_page(engine);
-	}
 
 	intel_engine_cleanup_common(engine);
 
-	engine->i915 = NULL;
 	dev_priv->engine[engine->id] = NULL;
 	kfree(engine);
 }
@@ -1562,8 +1631,9 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
+	/* Restart from the beginning of the rings for convenience */
 	for_each_engine(engine, dev_priv, id)
-		engine->buffer->head = engine->buffer->tail;
+		intel_ring_reset(engine->buffer, 0);
 }
 
 static int ring_request_alloc(struct drm_i915_gem_request *request)
@@ -1578,9 +1648,6 @@ static int ring_request_alloc(struct drm_i915_gem_request *request)
 	 */
 	request->reserved_space += LEGACY_REQUEST_SIZE;
 
-	GEM_BUG_ON(!request->engine->buffer);
-	request->ring = request->engine->buffer;
-
 	cs = intel_ring_begin(request, 0);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
@@ -1589,7 +1656,8 @@ static int ring_request_alloc(struct drm_i915_gem_request *request)
 	return 0;
 }
 
-static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
+static noinline int wait_for_space(struct drm_i915_gem_request *req,
+				   unsigned int bytes)
 {
 	struct intel_ring *ring = req->ring;
 	struct drm_i915_gem_request *target;
@@ -1597,8 +1665,7 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
 
 	lockdep_assert_held(&req->i915->drm.struct_mutex);
 
-	intel_ring_update_space(ring);
-	if (ring->space >= bytes)
+	if (intel_ring_update_space(ring) >= bytes)
 		return 0;
 
 	/*
@@ -1613,12 +1680,9 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
 	GEM_BUG_ON(!req->reserved_space);
 
 	list_for_each_entry(target, &ring->request_list, ring_link) {
-		unsigned space;
-
 		/* Would completion of this request free enough space? */
-		space = __intel_ring_space(target->postfix, ring->tail,
-					   ring->size);
-		if (space >= bytes)
+		if (bytes <= __intel_ring_space(target->postfix,
+						ring->emit, ring->size))
 			break;
 	}
 
@@ -1638,59 +1702,64 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
 	return 0;
 }
 
-u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
+u32 *intel_ring_begin(struct drm_i915_gem_request *req,
+		      unsigned int num_dwords)
 {
 	struct intel_ring *ring = req->ring;
-	int remain_actual = ring->size - ring->tail;
-	int remain_usable = ring->effective_size - ring->tail;
-	int bytes = num_dwords * sizeof(u32);
-	int total_bytes, wait_bytes;
-	bool need_wrap = false;
+	const unsigned int remain_usable = ring->effective_size - ring->emit;
+	const unsigned int bytes = num_dwords * sizeof(u32);
+	unsigned int need_wrap = 0;
+	unsigned int total_bytes;
 	u32 *cs;
 
 	total_bytes = bytes + req->reserved_space;
+	GEM_BUG_ON(total_bytes > ring->effective_size);
 
-	if (unlikely(bytes > remain_usable)) {
-		/*
-		 * Not enough space for the basic request. So need to flush
-		 * out the remainder and then wait for base + reserved.
-		 */
-		wait_bytes = remain_actual + total_bytes;
-		need_wrap = true;
-	} else if (unlikely(total_bytes > remain_usable)) {
-		/*
-		 * The base request will fit but the reserved space
-		 * falls off the end. So we don't need an immediate wrap
-		 * and only need to effectively wait for the reserved
-		 * size space from the start of ringbuffer.
-		 */
-		wait_bytes = remain_actual + req->reserved_space;
-	} else {
-		/* No wrapping required, just waiting. */
-		wait_bytes = total_bytes;
+	if (unlikely(total_bytes > remain_usable)) {
+		const int remain_actual = ring->size - ring->emit;
+
+		if (bytes > remain_usable) {
+			/*
+			 * Not enough space for the basic request. So need to
+			 * flush out the remainder and then wait for
+			 * base + reserved.
+			 */
+			total_bytes += remain_actual;
+			need_wrap = remain_actual | 1;
+		} else  {
+			/*
+			 * The base request will fit but the reserved space
+			 * falls off the end. So we don't need an immediate
+			 * wrap and only need to effectively wait for the
+			 * reserved size from the start of ringbuffer.
+			 */
+			total_bytes = req->reserved_space + remain_actual;
+		}
 	}
 
-	if (wait_bytes > ring->space) {
-		int ret = wait_for_space(req, wait_bytes);
+	if (unlikely(total_bytes > ring->space)) {
+		int ret = wait_for_space(req, total_bytes);
 		if (unlikely(ret))
 			return ERR_PTR(ret);
 	}
 
 	if (unlikely(need_wrap)) {
-		GEM_BUG_ON(remain_actual > ring->space);
-		GEM_BUG_ON(ring->tail + remain_actual > ring->size);
+		need_wrap &= ~1;
+		GEM_BUG_ON(need_wrap > ring->space);
+		GEM_BUG_ON(ring->emit + need_wrap > ring->size);
 
 		/* Fill the tail with MI_NOOP */
-		memset(ring->vaddr + ring->tail, 0, remain_actual);
-		ring->tail = 0;
-		ring->space -= remain_actual;
+		memset(ring->vaddr + ring->emit, 0, need_wrap);
+		ring->emit = 0;
+		ring->space -= need_wrap;
 	}
 
-	GEM_BUG_ON(ring->tail > ring->size - bytes);
-	cs = ring->vaddr + ring->tail;
-	ring->tail += bytes;
+	GEM_BUG_ON(ring->emit > ring->size - bytes);
+	GEM_BUG_ON(ring->space < bytes);
+	cs = ring->vaddr + ring->emit;
+	GEM_DEBUG_EXEC(memset(cs, POISON_INUSE, bytes));
+	ring->emit += bytes;
 	ring->space -= bytes;
-	GEM_BUG_ON(ring->space < 0);
 
 	return cs;
 }
@@ -1699,7 +1768,7 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
 int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
 {
 	int num_dwords =
-		(req->ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
+		(req->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
 	u32 *cs;
 
 	if (num_dwords == 0)
@@ -1736,11 +1805,11 @@ static void gen6_bsd_submit_request(struct drm_i915_gem_request *request)
 	I915_WRITE64_FW(GEN6_BSD_RNCID, 0x0);
 
 	/* Wait for the ring not to be idle, i.e. for it to wake up. */
-	if (intel_wait_for_register_fw(dev_priv,
-				       GEN6_BSD_SLEEP_PSMI_CONTROL,
-				       GEN6_BSD_SLEEP_INDICATOR,
-				       0,
-				       50))
+	if (__intel_wait_for_register_fw(dev_priv,
+					 GEN6_BSD_SLEEP_PSMI_CONTROL,
+					 GEN6_BSD_SLEEP_INDICATOR,
+					 0,
+					 1000, 0, NULL))
 		DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
 
 	/* Now that the ring is fully powered up, update the tail */
@@ -2182,20 +2251,6 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
 	return intel_init_ring_buffer(engine);
 }
 
-/**
- * Initialize the second BSD ring (eg. Broadwell GT3, Skylake GT3)
- */
-int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	intel_ring_default_vfuncs(dev_priv, engine);
-
-	engine->emit_flush = gen6_bsd_ring_flush;
-
-	return intel_init_ring_buffer(engine);
-}
-
 int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index a82a0807f64d..6aa20ac8cde3 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -17,17 +17,6 @@
 #define CACHELINE_BYTES 64
 #define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(uint32_t))
 
-/*
- * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 "Ring Buffer Use"
- * Gen3 BSpec "vol1c Memory Interface Functions" / 2.3.4.5 "Ring Buffer Use"
- * Gen4+ BSpec "vol1c Memory Interface and Command Stream" / 5.3.4.5 "Ring Buffer Use"
- *
- * "If the Ring Buffer Head Pointer and the Tail Pointer are on the same
- * cacheline, the Head Pointer must not be greater than the Tail
- * Pointer."
- */
-#define I915_RING_FREE_SPACE 64
-
 struct intel_hw_status_page {
 	struct i915_vma *vma;
 	u32 *page_addr;
@@ -139,16 +128,15 @@ struct intel_ring {
 	struct i915_vma *vma;
 	void *vaddr;
 
-	struct intel_engine_cs *engine;
-
 	struct list_head request_list;
 
 	u32 head;
 	u32 tail;
+	u32 emit;
 
-	int space;
-	int size;
-	int effective_size;
+	u32 space;
+	u32 size;
+	u32 effective_size;
 };
 
 struct i915_gem_context;
@@ -189,15 +177,28 @@ enum intel_engine_id {
 	VECS
 };
 
+struct i915_priolist {
+	struct rb_node node;
+	struct list_head requests;
+	int priority;
+};
+
+#define INTEL_ENGINE_CS_MAX_NAME 8
+
 struct intel_engine_cs {
 	struct drm_i915_private *i915;
-	const char	*name;
+	char name[INTEL_ENGINE_CS_MAX_NAME];
 	enum intel_engine_id id;
-	unsigned int exec_id;
+	unsigned int uabi_id;
 	unsigned int hw_id;
 	unsigned int guc_id;
-	u32		mmio_base;
+
+	u8 class;
+	u8 instance;
+	u32 context_size;
+	u32 mmio_base;
 	unsigned int irq_shift;
+
 	struct intel_ring *buffer;
 	struct intel_timeline *timeline;
 
@@ -265,8 +266,8 @@ struct intel_engine_cs {
 
 	void		(*set_default_submission)(struct intel_engine_cs *engine);
 
-	int		(*context_pin)(struct intel_engine_cs *engine,
-				       struct i915_gem_context *ctx);
+	struct intel_ring *(*context_pin)(struct intel_engine_cs *engine,
+					  struct i915_gem_context *ctx);
 	void		(*context_unpin)(struct intel_engine_cs *engine,
 					 struct i915_gem_context *ctx);
 	int		(*request_alloc)(struct drm_i915_gem_request *req);
@@ -372,9 +373,18 @@ struct intel_engine_cs {
 
 	/* Execlists */
 	struct tasklet_struct irq_tasklet;
+	struct i915_priolist default_priolist;
+	bool no_priolist;
 	struct execlist_port {
-		struct drm_i915_gem_request *request;
-		unsigned int count;
+		struct drm_i915_gem_request *request_count;
+#define EXECLIST_COUNT_BITS 2
+#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS)
+#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS)
+#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS)
+#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS)
+#define port_set(p, packed) ((p)->request_count = (packed))
+#define port_isset(p) ((p)->request_count)
+#define port_index(p, e) ((p) - (e)->execlist_port)
 		GEM_DEBUG_DECL(u32 context_id);
 	} execlist_port[2];
 	struct rb_root execlist_queue;
@@ -487,7 +497,11 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
 
 struct intel_ring *
 intel_engine_create_ring(struct intel_engine_cs *engine, int size);
-int intel_ring_pin(struct intel_ring *ring, unsigned int offset_bias);
+int intel_ring_pin(struct intel_ring *ring,
+		   struct drm_i915_private *i915,
+		   unsigned int offset_bias);
+void intel_ring_reset(struct intel_ring *ring, u32 tail);
+unsigned int intel_ring_update_space(struct intel_ring *ring);
 void intel_ring_unpin(struct intel_ring *ring);
 void intel_ring_free(struct intel_ring *ring);
 
@@ -498,7 +512,8 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv);
 
 int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req);
 
-u32 __must_check *intel_ring_begin(struct drm_i915_gem_request *req, int n);
+u32 __must_check *intel_ring_begin(struct drm_i915_gem_request *req,
+				   unsigned int n);
 
 static inline void
 intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs)
@@ -511,7 +526,7 @@ intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs)
 	 * reserved for the command packet (i.e. the value passed to
 	 * intel_ring_begin()).
 	 */
-	GEM_BUG_ON((req->ring->vaddr + req->ring->tail) != cs);
+	GEM_BUG_ON((req->ring->vaddr + req->ring->emit) != cs);
 }
 
 static inline u32
@@ -538,9 +553,40 @@ assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
 	 */
 	GEM_BUG_ON(!IS_ALIGNED(tail, 8));
 	GEM_BUG_ON(tail >= ring->size);
+
+	/*
+	 * "Ring Buffer Use"
+	 *	Gen2 BSpec "1. Programming Environment" / 1.4.4.6
+	 *	Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
+	 *	Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
+	 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
+	 * same cacheline, the Head Pointer must not be greater than the Tail
+	 * Pointer."
+	 *
+	 * We use ring->head as the last known location of the actual RING_HEAD,
+	 * it may have advanced but in the worst case it is equally the same
+	 * as ring->head and so we should never program RING_TAIL to advance
+	 * into the same cacheline as ring->head.
+	 */
+#define cacheline(a) round_down(a, CACHELINE_BYTES)
+	GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
+		   tail < ring->head);
+#undef cacheline
 }
 
-void intel_ring_update_space(struct intel_ring *ring);
+static inline unsigned int
+intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
+{
+	/* Whilst writes to the tail are strictly order, there is no
+	 * serialisation between readers and the writers. The tail may be
+	 * read by i915_gem_request_retire() just as it is being updated
+	 * by execlists, as although the breadcrumb is complete, the context
+	 * switch hasn't been seen.
+	 */
+	assert_ring_tail_valid(ring, tail);
+	ring->tail = tail;
+	return tail;
+}
 
 void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno);
 
@@ -551,7 +597,6 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine);
 
 int intel_init_render_ring_buffer(struct intel_engine_cs *engine);
 int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine);
-int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine);
 int intel_init_blt_ring_buffer(struct intel_engine_cs *engine);
 int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine);
 
@@ -652,7 +697,8 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine,
 			   struct intel_wait *wait);
 void intel_engine_remove_wait(struct intel_engine_cs *engine,
 			      struct intel_wait *wait);
-void intel_engine_enable_signaling(struct drm_i915_gem_request *request);
+void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
+				   bool wakeup);
 void intel_engine_cancel_signaling(struct drm_i915_gem_request *request);
 
 static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine)
@@ -685,6 +731,7 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
 bool intel_engine_is_idle(struct intel_engine_cs *engine);
 bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
 
+void intel_engines_mark_idle(struct drm_i915_private *i915);
 void intel_engines_reset_default_submission(struct drm_i915_private *i915);
 
 #endif /* _INTEL_RINGBUFFER_H_ */
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index 816a6f5a3fd9..6cc181203135 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -107,11 +107,6 @@ struct intel_sdvo {
 	bool color_range_auto;
 
 	/**
-	 * HDMI user specified aspect ratio
-	 */
-	enum hdmi_picture_aspect aspect_ratio;
-
-	/**
 	 * This is set if we're going to treat the device as TV-out.
 	 *
 	 * While we have these nice friendly flags for output types that ought
@@ -1186,7 +1181,7 @@ static bool intel_sdvo_compute_config(struct intel_encoder *encoder,
 
 	/* Set user selected PAR to incoming mode's member */
 	if (intel_sdvo->is_hdmi)
-		adjusted_mode->picture_aspect_ratio = intel_sdvo->aspect_ratio;
+		adjusted_mode->picture_aspect_ratio = conn_state->picture_aspect_ratio;
 
 	return true;
 }
@@ -2067,19 +2062,7 @@ intel_sdvo_set_property(struct drm_connector *connector,
 	}
 
 	if (property == connector->dev->mode_config.aspect_ratio_property) {
-		switch (val) {
-		case DRM_MODE_PICTURE_ASPECT_NONE:
-			intel_sdvo->aspect_ratio = HDMI_PICTURE_ASPECT_NONE;
-			break;
-		case DRM_MODE_PICTURE_ASPECT_4_3:
-			intel_sdvo->aspect_ratio = HDMI_PICTURE_ASPECT_4_3;
-			break;
-		case DRM_MODE_PICTURE_ASPECT_16_9:
-			intel_sdvo->aspect_ratio = HDMI_PICTURE_ASPECT_16_9;
-			break;
-		default:
-			return -EINVAL;
-		}
+		connector->state->picture_aspect_ratio = val;
 		goto done;
 	}
 
@@ -2418,7 +2401,7 @@ intel_sdvo_add_hdmi_properties(struct intel_sdvo *intel_sdvo,
 		intel_sdvo->color_range_auto = true;
 	}
 	intel_attach_aspect_ratio_property(&connector->base.base);
-	intel_sdvo->aspect_ratio = HDMI_PICTURE_ASPECT_NONE;
+	connector->base.base.state->picture_aspect_ratio = HDMI_PICTURE_ASPECT_NONE;
 }
 
 static struct intel_sdvo_connector *intel_sdvo_connector_alloc(void)
@@ -2892,11 +2875,10 @@ static bool intel_sdvo_create_enhance_property(struct intel_sdvo *intel_sdvo,
 
 	BUILD_BUG_ON(sizeof(enhancements) != 2);
 
-	enhancements.response = 0;
-	intel_sdvo_get_value(intel_sdvo,
-			     SDVO_CMD_GET_SUPPORTED_ENHANCEMENTS,
-			     &enhancements, sizeof(enhancements));
-	if (enhancements.response == 0) {
+	if (!intel_sdvo_get_value(intel_sdvo,
+				  SDVO_CMD_GET_SUPPORTED_ENHANCEMENTS,
+				  &enhancements, sizeof(enhancements)) ||
+	    enhancements.response == 0) {
 		DRM_DEBUG_KMS("No enhancement is supported\n");
 		return true;
 	}
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index 8c87c717c7cd..c4bf19364e49 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -210,16 +210,14 @@ void intel_pipe_update_end(struct intel_crtc *crtc, struct intel_flip_work *work
 }
 
 static void
-skl_update_plane(struct drm_plane *drm_plane,
+skl_update_plane(struct intel_plane *plane,
 		 const struct intel_crtc_state *crtc_state,
 		 const struct intel_plane_state *plane_state)
 {
-	struct drm_device *dev = drm_plane->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_plane *intel_plane = to_intel_plane(drm_plane);
-	struct drm_framebuffer *fb = plane_state->base.fb;
-	enum plane_id plane_id = intel_plane->id;
-	enum pipe pipe = intel_plane->pipe;
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+	const struct drm_framebuffer *fb = plane_state->base.fb;
+	enum plane_id plane_id = plane->id;
+	enum pipe pipe = plane->pipe;
 	u32 plane_ctl = plane_state->ctl;
 	const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
 	u32 surf_addr = plane_state->main.offset;
@@ -288,13 +286,11 @@ skl_update_plane(struct drm_plane *drm_plane,
 }
 
 static void
-skl_disable_plane(struct drm_plane *dplane, struct drm_crtc *crtc)
+skl_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc)
 {
-	struct drm_device *dev = dplane->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_plane *intel_plane = to_intel_plane(dplane);
-	enum plane_id plane_id = intel_plane->id;
-	enum pipe pipe = intel_plane->pipe;
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+	enum plane_id plane_id = plane->id;
+	enum pipe pipe = plane->pipe;
 	unsigned long irqflags;
 
 	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
@@ -308,10 +304,10 @@ skl_disable_plane(struct drm_plane *dplane, struct drm_crtc *crtc)
 }
 
 static void
-chv_update_csc(struct intel_plane *intel_plane, uint32_t format)
+chv_update_csc(struct intel_plane *plane, uint32_t format)
 {
-	struct drm_i915_private *dev_priv = to_i915(intel_plane->base.dev);
-	enum plane_id plane_id = intel_plane->id;
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+	enum plane_id plane_id = plane->id;
 
 	/* Seems RGB data bypasses the CSC always */
 	if (!format_is_yuv(format))
@@ -398,10 +394,10 @@ static u32 vlv_sprite_ctl(const struct intel_crtc_state *crtc_state,
 	if (fb->modifier == I915_FORMAT_MOD_X_TILED)
 		sprctl |= SP_TILED;
 
-	if (rotation & DRM_ROTATE_180)
+	if (rotation & DRM_MODE_ROTATE_180)
 		sprctl |= SP_ROTATE_180;
 
-	if (rotation & DRM_REFLECT_X)
+	if (rotation & DRM_MODE_REFLECT_X)
 		sprctl |= SP_MIRROR;
 
 	if (key->flags & I915_SET_COLORKEY_SOURCE)
@@ -411,16 +407,14 @@ static u32 vlv_sprite_ctl(const struct intel_crtc_state *crtc_state,
 }
 
 static void
-vlv_update_plane(struct drm_plane *dplane,
+vlv_update_plane(struct intel_plane *plane,
 		 const struct intel_crtc_state *crtc_state,
 		 const struct intel_plane_state *plane_state)
 {
-	struct drm_device *dev = dplane->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_plane *intel_plane = to_intel_plane(dplane);
-	struct drm_framebuffer *fb = plane_state->base.fb;
-	enum pipe pipe = intel_plane->pipe;
-	enum plane_id plane_id = intel_plane->id;
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+	const struct drm_framebuffer *fb = plane_state->base.fb;
+	enum pipe pipe = plane->pipe;
+	enum plane_id plane_id = plane->id;
 	u32 sprctl = plane_state->ctl;
 	u32 sprsurf_offset = plane_state->main.offset;
 	u32 linear_offset;
@@ -442,7 +436,7 @@ vlv_update_plane(struct drm_plane *dplane,
 	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
 
 	if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_B)
-		chv_update_csc(intel_plane, fb->format->format);
+		chv_update_csc(plane, fb->format->format);
 
 	if (key->flags) {
 		I915_WRITE_FW(SPKEYMINVAL(pipe, plane_id), key->min_value);
@@ -469,13 +463,11 @@ vlv_update_plane(struct drm_plane *dplane,
 }
 
 static void
-vlv_disable_plane(struct drm_plane *dplane, struct drm_crtc *crtc)
+vlv_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc)
 {
-	struct drm_device *dev = dplane->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_plane *intel_plane = to_intel_plane(dplane);
-	enum pipe pipe = intel_plane->pipe;
-	enum plane_id plane_id = intel_plane->id;
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+	enum pipe pipe = plane->pipe;
+	enum plane_id plane_id = plane->id;
 	unsigned long irqflags;
 
 	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
@@ -533,7 +525,7 @@ static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state,
 	if (fb->modifier == I915_FORMAT_MOD_X_TILED)
 		sprctl |= SPRITE_TILED;
 
-	if (rotation & DRM_ROTATE_180)
+	if (rotation & DRM_MODE_ROTATE_180)
 		sprctl |= SPRITE_ROTATE_180;
 
 	if (key->flags & I915_SET_COLORKEY_DESTINATION)
@@ -545,15 +537,13 @@ static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state,
 }
 
 static void
-ivb_update_plane(struct drm_plane *plane,
+ivb_update_plane(struct intel_plane *plane,
 		 const struct intel_crtc_state *crtc_state,
 		 const struct intel_plane_state *plane_state)
 {
-	struct drm_device *dev = plane->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_plane *intel_plane = to_intel_plane(plane);
-	struct drm_framebuffer *fb = plane_state->base.fb;
-	enum pipe pipe = intel_plane->pipe;
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+	const struct drm_framebuffer *fb = plane_state->base.fb;
+	enum pipe pipe = plane->pipe;
 	u32 sprctl = plane_state->ctl, sprscale = 0;
 	u32 sprsurf_offset = plane_state->main.offset;
 	u32 linear_offset;
@@ -600,7 +590,7 @@ ivb_update_plane(struct drm_plane *plane,
 		I915_WRITE_FW(SPRLINOFF(pipe), linear_offset);
 
 	I915_WRITE_FW(SPRSIZE(pipe), (crtc_h << 16) | crtc_w);
-	if (intel_plane->can_scale)
+	if (plane->can_scale)
 		I915_WRITE_FW(SPRSCALE(pipe), sprscale);
 	I915_WRITE_FW(SPRCTL(pipe), sprctl);
 	I915_WRITE_FW(SPRSURF(pipe),
@@ -611,19 +601,17 @@ ivb_update_plane(struct drm_plane *plane,
 }
 
 static void
-ivb_disable_plane(struct drm_plane *plane, struct drm_crtc *crtc)
+ivb_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc)
 {
-	struct drm_device *dev = plane->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_plane *intel_plane = to_intel_plane(plane);
-	int pipe = intel_plane->pipe;
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+	enum pipe pipe = plane->pipe;
 	unsigned long irqflags;
 
 	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
 
 	I915_WRITE_FW(SPRCTL(pipe), 0);
 	/* Can't leave the scaler enabled... */
-	if (intel_plane->can_scale)
+	if (plane->can_scale)
 		I915_WRITE_FW(SPRSCALE(pipe), 0);
 
 	I915_WRITE_FW(SPRSURF(pipe), 0);
@@ -632,7 +620,7 @@ ivb_disable_plane(struct drm_plane *plane, struct drm_crtc *crtc)
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 }
 
-static u32 ilk_sprite_ctl(const struct intel_crtc_state *crtc_state,
+static u32 g4x_sprite_ctl(const struct intel_crtc_state *crtc_state,
 			  const struct intel_plane_state *plane_state)
 {
 	struct drm_i915_private *dev_priv =
@@ -674,7 +662,7 @@ static u32 ilk_sprite_ctl(const struct intel_crtc_state *crtc_state,
 	if (fb->modifier == I915_FORMAT_MOD_X_TILED)
 		dvscntr |= DVS_TILED;
 
-	if (rotation & DRM_ROTATE_180)
+	if (rotation & DRM_MODE_ROTATE_180)
 		dvscntr |= DVS_ROTATE_180;
 
 	if (key->flags & I915_SET_COLORKEY_DESTINATION)
@@ -686,15 +674,13 @@ static u32 ilk_sprite_ctl(const struct intel_crtc_state *crtc_state,
 }
 
 static void
-ilk_update_plane(struct drm_plane *plane,
+g4x_update_plane(struct intel_plane *plane,
 		 const struct intel_crtc_state *crtc_state,
 		 const struct intel_plane_state *plane_state)
 {
-	struct drm_device *dev = plane->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_plane *intel_plane = to_intel_plane(plane);
-	struct drm_framebuffer *fb = plane_state->base.fb;
-	int pipe = intel_plane->pipe;
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+	const struct drm_framebuffer *fb = plane_state->base.fb;
+	enum pipe pipe = plane->pipe;
 	u32 dvscntr = plane_state->ctl, dvsscale = 0;
 	u32 dvssurf_offset = plane_state->main.offset;
 	u32 linear_offset;
@@ -747,12 +733,10 @@ ilk_update_plane(struct drm_plane *plane,
 }
 
 static void
-ilk_disable_plane(struct drm_plane *plane, struct drm_crtc *crtc)
+g4x_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc)
 {
-	struct drm_device *dev = plane->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_plane *intel_plane = to_intel_plane(plane);
-	int pipe = intel_plane->pipe;
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+	enum pipe pipe = plane->pipe;
 	unsigned long irqflags;
 
 	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
@@ -768,14 +752,12 @@ ilk_disable_plane(struct drm_plane *plane, struct drm_crtc *crtc)
 }
 
 static int
-intel_check_sprite_plane(struct drm_plane *plane,
+intel_check_sprite_plane(struct intel_plane *plane,
 			 struct intel_crtc_state *crtc_state,
 			 struct intel_plane_state *state)
 {
-	struct drm_i915_private *dev_priv = to_i915(plane->dev);
-	struct drm_crtc *crtc = state->base.crtc;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	struct intel_plane *intel_plane = to_intel_plane(plane);
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
 	struct drm_framebuffer *fb = state->base.fb;
 	int crtc_x, crtc_y;
 	unsigned int crtc_w, crtc_h;
@@ -797,7 +779,7 @@ intel_check_sprite_plane(struct drm_plane *plane,
 	}
 
 	/* Don't modify another pipe's plane */
-	if (intel_plane->pipe != intel_crtc->pipe) {
+	if (plane->pipe != crtc->pipe) {
 		DRM_DEBUG_KMS("Wrong plane <-> crtc mapping\n");
 		return -EINVAL;
 	}
@@ -814,16 +796,16 @@ intel_check_sprite_plane(struct drm_plane *plane,
 		if (state->ckey.flags == I915_SET_COLORKEY_NONE) {
 			can_scale = 1;
 			min_scale = 1;
-			max_scale = skl_max_scale(intel_crtc, crtc_state);
+			max_scale = skl_max_scale(crtc, crtc_state);
 		} else {
 			can_scale = 0;
 			min_scale = DRM_PLANE_HELPER_NO_SCALING;
 			max_scale = DRM_PLANE_HELPER_NO_SCALING;
 		}
 	} else {
-		can_scale = intel_plane->can_scale;
-		max_scale = intel_plane->max_downscale << 16;
-		min_scale = intel_plane->can_scale ? 1 : (1 << 16);
+		can_scale = plane->can_scale;
+		max_scale = plane->max_downscale << 16;
+		min_scale = plane->can_scale ? 1 : (1 << 16);
 	}
 
 	/*
@@ -967,7 +949,7 @@ intel_check_sprite_plane(struct drm_plane *plane,
 		if (ret)
 			return ret;
 
-		state->ctl = ilk_sprite_ctl(crtc_state, state);
+		state->ctl = g4x_sprite_ctl(crtc_state, state);
 	}
 
 	return 0;
@@ -1027,7 +1009,7 @@ out:
 	return ret;
 }
 
-static const uint32_t ilk_plane_formats[] = {
+static const uint32_t g4x_plane_formats[] = {
 	DRM_FORMAT_XRGB8888,
 	DRM_FORMAT_YUYV,
 	DRM_FORMAT_YVYU,
@@ -1131,29 +1113,29 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv,
 		intel_plane->can_scale = true;
 		intel_plane->max_downscale = 16;
 
-		intel_plane->update_plane = ilk_update_plane;
-		intel_plane->disable_plane = ilk_disable_plane;
+		intel_plane->update_plane = g4x_update_plane;
+		intel_plane->disable_plane = g4x_disable_plane;
 
 		if (IS_GEN6(dev_priv)) {
 			plane_formats = snb_plane_formats;
 			num_plane_formats = ARRAY_SIZE(snb_plane_formats);
 		} else {
-			plane_formats = ilk_plane_formats;
-			num_plane_formats = ARRAY_SIZE(ilk_plane_formats);
+			plane_formats = g4x_plane_formats;
+			num_plane_formats = ARRAY_SIZE(g4x_plane_formats);
 		}
 	}
 
 	if (INTEL_GEN(dev_priv) >= 9) {
 		supported_rotations =
-			DRM_ROTATE_0 | DRM_ROTATE_90 |
-			DRM_ROTATE_180 | DRM_ROTATE_270;
+			DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_90 |
+			DRM_MODE_ROTATE_180 | DRM_MODE_ROTATE_270;
 	} else if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_B) {
 		supported_rotations =
-			DRM_ROTATE_0 | DRM_ROTATE_180 |
-			DRM_REFLECT_X;
+			DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_180 |
+			DRM_MODE_REFLECT_X;
 	} else {
 		supported_rotations =
-			DRM_ROTATE_0 | DRM_ROTATE_180;
+			DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_180;
 	}
 
 	intel_plane->pipe = pipe;
@@ -1180,7 +1162,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv,
 		goto fail;
 
 	drm_plane_create_rotation_property(&intel_plane->base,
-					   DRM_ROTATE_0,
+					   DRM_MODE_ROTATE_0,
 					   supported_rotations);
 
 	drm_plane_helper_add(&intel_plane->base, &intel_plane_helper_funcs);
diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index e077c2a9e694..784df024e230 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -48,41 +48,6 @@ struct intel_tv {
 	struct intel_encoder base;
 
 	int type;
-	const char *tv_format;
-	int margin[4];
-	u32 save_TV_H_CTL_1;
-	u32 save_TV_H_CTL_2;
-	u32 save_TV_H_CTL_3;
-	u32 save_TV_V_CTL_1;
-	u32 save_TV_V_CTL_2;
-	u32 save_TV_V_CTL_3;
-	u32 save_TV_V_CTL_4;
-	u32 save_TV_V_CTL_5;
-	u32 save_TV_V_CTL_6;
-	u32 save_TV_V_CTL_7;
-	u32 save_TV_SC_CTL_1, save_TV_SC_CTL_2, save_TV_SC_CTL_3;
-
-	u32 save_TV_CSC_Y;
-	u32 save_TV_CSC_Y2;
-	u32 save_TV_CSC_U;
-	u32 save_TV_CSC_U2;
-	u32 save_TV_CSC_V;
-	u32 save_TV_CSC_V2;
-	u32 save_TV_CLR_KNOBS;
-	u32 save_TV_CLR_LEVEL;
-	u32 save_TV_WIN_POS;
-	u32 save_TV_WIN_SIZE;
-	u32 save_TV_FILTER_CTL_1;
-	u32 save_TV_FILTER_CTL_2;
-	u32 save_TV_FILTER_CTL_3;
-
-	u32 save_TV_H_LUMA[60];
-	u32 save_TV_H_CHROMA[60];
-	u32 save_TV_V_LUMA[43];
-	u32 save_TV_V_CHROMA[43];
-
-	u32 save_TV_DAC;
-	u32 save_TV_CTL;
 };
 
 struct video_levels {
@@ -873,32 +838,18 @@ intel_disable_tv(struct intel_encoder *encoder,
 	I915_WRITE(TV_CTL, I915_READ(TV_CTL) & ~TV_ENC_ENABLE);
 }
 
-static const struct tv_mode *
-intel_tv_mode_lookup(const char *tv_format)
+static const struct tv_mode *intel_tv_mode_find(struct drm_connector_state *conn_state)
 {
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(tv_modes); i++) {
-		const struct tv_mode *tv_mode = &tv_modes[i];
+	int format = conn_state->tv.mode;
 
-		if (!strcmp(tv_format, tv_mode->name))
-			return tv_mode;
-	}
-	return NULL;
-}
-
-static const struct tv_mode *
-intel_tv_mode_find(struct intel_tv *intel_tv)
-{
-	return intel_tv_mode_lookup(intel_tv->tv_format);
+	return &tv_modes[format];
 }
 
 static enum drm_mode_status
 intel_tv_mode_valid(struct drm_connector *connector,
 		    struct drm_display_mode *mode)
 {
-	struct intel_tv *intel_tv = intel_attached_tv(connector);
-	const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv);
+	const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state);
 	int max_dotclk = to_i915(connector->dev)->max_dotclk_freq;
 
 	if (mode->clock > max_dotclk)
@@ -925,8 +876,7 @@ intel_tv_compute_config(struct intel_encoder *encoder,
 			struct intel_crtc_state *pipe_config,
 			struct drm_connector_state *conn_state)
 {
-	struct intel_tv *intel_tv = enc_to_tv(encoder);
-	const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv);
+	const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state);
 
 	if (!tv_mode)
 		return false;
@@ -1032,7 +982,7 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder,
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc);
 	struct intel_tv *intel_tv = enc_to_tv(encoder);
-	const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv);
+	const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state);
 	u32 tv_ctl;
 	u32 scctl1, scctl2, scctl3;
 	int i, j;
@@ -1135,12 +1085,12 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder,
 	else
 		ysize = 2*tv_mode->nbr_end + 1;
 
-	xpos += intel_tv->margin[TV_MARGIN_LEFT];
-	ypos += intel_tv->margin[TV_MARGIN_TOP];
-	xsize -= (intel_tv->margin[TV_MARGIN_LEFT] +
-		  intel_tv->margin[TV_MARGIN_RIGHT]);
-	ysize -= (intel_tv->margin[TV_MARGIN_TOP] +
-		  intel_tv->margin[TV_MARGIN_BOTTOM]);
+	xpos += conn_state->tv.margins.left;
+	ypos += conn_state->tv.margins.top;
+	xsize -= (conn_state->tv.margins.left +
+		  conn_state->tv.margins.right);
+	ysize -= (conn_state->tv.margins.top +
+		  conn_state->tv.margins.bottom);
 	I915_WRITE(TV_WIN_POS, (xpos<<16)|ypos);
 	I915_WRITE(TV_WIN_SIZE, (xsize<<16)|ysize);
 
@@ -1288,7 +1238,7 @@ intel_tv_detect_type(struct intel_tv *intel_tv,
 static void intel_tv_find_better_format(struct drm_connector *connector)
 {
 	struct intel_tv *intel_tv = intel_attached_tv(connector);
-	const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv);
+	const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state);
 	int i;
 
 	if ((intel_tv->type == DRM_MODE_CONNECTOR_Component) ==
@@ -1304,9 +1254,7 @@ static void intel_tv_find_better_format(struct drm_connector *connector)
 			break;
 	}
 
-	intel_tv->tv_format = tv_mode->name;
-	drm_object_property_set_value(&connector->base,
-		connector->dev->mode_config.tv_mode_property, i);
+	connector->state->tv.mode = i;
 }
 
 /**
@@ -1347,16 +1295,15 @@ intel_tv_detect(struct drm_connector *connector,
 				connector_status_connected;
 		} else
 			status = connector_status_unknown;
-	} else
-		return connector->status;
 
-	if (status != connector_status_connected)
-		return status;
-
-	intel_tv->type = type;
-	intel_tv_find_better_format(connector);
+		if (status == connector_status_connected) {
+			intel_tv->type = type;
+			intel_tv_find_better_format(connector);
+		}
 
-	return connector_status_connected;
+		return status;
+	} else
+		return connector->status;
 }
 
 static const struct input_res {
@@ -1376,12 +1323,9 @@ static const struct input_res {
  * Chose preferred mode  according to line number of TV format
  */
 static void
-intel_tv_chose_preferred_modes(struct drm_connector *connector,
+intel_tv_choose_preferred_modes(const struct tv_mode *tv_mode,
 			       struct drm_display_mode *mode_ptr)
 {
-	struct intel_tv *intel_tv = intel_attached_tv(connector);
-	const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv);
-
 	if (tv_mode->nbr_end < 480 && mode_ptr->vdisplay == 480)
 		mode_ptr->type |= DRM_MODE_TYPE_PREFERRED;
 	else if (tv_mode->nbr_end > 480) {
@@ -1404,8 +1348,7 @@ static int
 intel_tv_get_modes(struct drm_connector *connector)
 {
 	struct drm_display_mode *mode_ptr;
-	struct intel_tv *intel_tv = intel_attached_tv(connector);
-	const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv);
+	const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state);
 	int j, count = 0;
 	u64 tmp;
 
@@ -1448,7 +1391,7 @@ intel_tv_get_modes(struct drm_connector *connector)
 		mode_ptr->clock = (int) tmp;
 
 		mode_ptr->type = DRM_MODE_TYPE_DRIVER;
-		intel_tv_chose_preferred_modes(connector, mode_ptr);
+		intel_tv_choose_preferred_modes(tv_mode, mode_ptr);
 		drm_mode_probed_add(connector, mode_ptr);
 		count++;
 	}
@@ -1463,74 +1406,47 @@ intel_tv_destroy(struct drm_connector *connector)
 	kfree(connector);
 }
 
-
-static int
-intel_tv_set_property(struct drm_connector *connector, struct drm_property *property,
-		      uint64_t val)
-{
-	struct drm_device *dev = connector->dev;
-	struct intel_tv *intel_tv = intel_attached_tv(connector);
-	struct drm_crtc *crtc = intel_tv->base.base.crtc;
-	int ret = 0;
-	bool changed = false;
-
-	ret = drm_object_property_set_value(&connector->base, property, val);
-	if (ret < 0)
-		goto out;
-
-	if (property == dev->mode_config.tv_left_margin_property &&
-		intel_tv->margin[TV_MARGIN_LEFT] != val) {
-		intel_tv->margin[TV_MARGIN_LEFT] = val;
-		changed = true;
-	} else if (property == dev->mode_config.tv_right_margin_property &&
-		intel_tv->margin[TV_MARGIN_RIGHT] != val) {
-		intel_tv->margin[TV_MARGIN_RIGHT] = val;
-		changed = true;
-	} else if (property == dev->mode_config.tv_top_margin_property &&
-		intel_tv->margin[TV_MARGIN_TOP] != val) {
-		intel_tv->margin[TV_MARGIN_TOP] = val;
-		changed = true;
-	} else if (property == dev->mode_config.tv_bottom_margin_property &&
-		intel_tv->margin[TV_MARGIN_BOTTOM] != val) {
-		intel_tv->margin[TV_MARGIN_BOTTOM] = val;
-		changed = true;
-	} else if (property == dev->mode_config.tv_mode_property) {
-		if (val >= ARRAY_SIZE(tv_modes)) {
-			ret = -EINVAL;
-			goto out;
-		}
-		if (!strcmp(intel_tv->tv_format, tv_modes[val].name))
-			goto out;
-
-		intel_tv->tv_format = tv_modes[val].name;
-		changed = true;
-	} else {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	if (changed && crtc)
-		intel_crtc_restore_mode(crtc);
-out:
-	return ret;
-}
-
 static const struct drm_connector_funcs intel_tv_connector_funcs = {
 	.dpms = drm_atomic_helper_connector_dpms,
 	.late_register = intel_connector_register,
 	.early_unregister = intel_connector_unregister,
 	.destroy = intel_tv_destroy,
-	.set_property = intel_tv_set_property,
-	.atomic_get_property = intel_connector_atomic_get_property,
+	.set_property = drm_atomic_helper_connector_set_property,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
 };
 
+static int intel_tv_atomic_check(struct drm_connector *connector,
+				 struct drm_connector_state *new_state)
+{
+	struct drm_crtc_state *new_crtc_state;
+	struct drm_connector_state *old_state;
+
+	if (!new_state->crtc)
+		return 0;
+
+	old_state = drm_atomic_get_old_connector_state(new_state->state, connector);
+	new_crtc_state = drm_atomic_get_new_crtc_state(new_state->state, new_state->crtc);
+
+	if (old_state->tv.mode != new_state->tv.mode ||
+	    old_state->tv.margins.left != new_state->tv.margins.left ||
+	    old_state->tv.margins.right != new_state->tv.margins.right ||
+	    old_state->tv.margins.top != new_state->tv.margins.top ||
+	    old_state->tv.margins.bottom != new_state->tv.margins.bottom) {
+		/* Force a modeset. */
+
+		new_crtc_state->connectors_changed = true;
+	}
+
+	return 0;
+}
+
 static const struct drm_connector_helper_funcs intel_tv_connector_helper_funcs = {
 	.detect_ctx = intel_tv_detect,
 	.mode_valid = intel_tv_mode_valid,
 	.get_modes = intel_tv_get_modes,
+	.atomic_check = intel_tv_atomic_check,
 };
 
 static const struct drm_encoder_funcs intel_tv_enc_funcs = {
@@ -1548,6 +1464,7 @@ intel_tv_init(struct drm_i915_private *dev_priv)
 	u32 tv_dac_on, tv_dac_off, save_tv_dac;
 	const char *tv_format_names[ARRAY_SIZE(tv_modes)];
 	int i, initial_mode = 0;
+	struct drm_connector_state *state;
 
 	if ((I915_READ(TV_CTL) & TV_FUSE_STATE_MASK) == TV_FUSE_STATE_DISABLED)
 		return;
@@ -1593,6 +1510,7 @@ intel_tv_init(struct drm_i915_private *dev_priv)
 
 	intel_encoder = &intel_tv->base;
 	connector = &intel_connector->base;
+	state = connector->state;
 
 	/* The documentation, for the older chipsets at least, recommend
 	 * using a polling method rather than hotplug detection for TVs.
@@ -1630,12 +1548,12 @@ intel_tv_init(struct drm_i915_private *dev_priv)
 	intel_tv->type = DRM_MODE_CONNECTOR_Unknown;
 
 	/* BIOS margin values */
-	intel_tv->margin[TV_MARGIN_LEFT] = 54;
-	intel_tv->margin[TV_MARGIN_TOP] = 36;
-	intel_tv->margin[TV_MARGIN_RIGHT] = 46;
-	intel_tv->margin[TV_MARGIN_BOTTOM] = 37;
+	state->tv.margins.left = 54;
+	state->tv.margins.top = 36;
+	state->tv.margins.right = 46;
+	state->tv.margins.bottom = 37;
 
-	intel_tv->tv_format = tv_modes[initial_mode].name;
+	state->tv.mode = initial_mode;
 
 	drm_connector_helper_add(connector, &intel_tv_connector_helper_funcs);
 	connector->interlace_allowed = false;
@@ -1649,17 +1567,17 @@ intel_tv_init(struct drm_i915_private *dev_priv)
 				      tv_format_names);
 
 	drm_object_attach_property(&connector->base, dev->mode_config.tv_mode_property,
-				   initial_mode);
+				   state->tv.mode);
 	drm_object_attach_property(&connector->base,
 				   dev->mode_config.tv_left_margin_property,
-				   intel_tv->margin[TV_MARGIN_LEFT]);
+				   state->tv.margins.left);
 	drm_object_attach_property(&connector->base,
 				   dev->mode_config.tv_top_margin_property,
-				   intel_tv->margin[TV_MARGIN_TOP]);
+				   state->tv.margins.top);
 	drm_object_attach_property(&connector->base,
 				   dev->mode_config.tv_right_margin_property,
-				   intel_tv->margin[TV_MARGIN_RIGHT]);
+				   state->tv.margins.right);
 	drm_object_attach_property(&connector->base,
 				   dev->mode_config.tv_bottom_margin_property,
-				   intel_tv->margin[TV_MARGIN_BOTTOM]);
+				   state->tv.margins.bottom);
 }
diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
index c117424f1f50..7a7b07de28a3 100644
--- a/drivers/gpu/drm/i915/intel_uc.c
+++ b/drivers/gpu/drm/i915/intel_uc.c
@@ -94,12 +94,22 @@ void intel_uc_sanitize_options(struct drm_i915_private *dev_priv)
 		i915.enable_guc_submission = HAS_GUC_SCHED(dev_priv);
 }
 
+static void guc_write_irq_trigger(struct intel_guc *guc)
+{
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+
+	I915_WRITE(GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER);
+}
+
 void intel_uc_init_early(struct drm_i915_private *dev_priv)
 {
 	struct intel_guc *guc = &dev_priv->guc;
 
+	intel_guc_ct_init_early(&guc->ct);
+
 	mutex_init(&guc->send_mutex);
-	guc->send = intel_guc_send_mmio;
+	guc->send = intel_guc_send_nop;
+	guc->notify = guc_write_irq_trigger;
 }
 
 static void fetch_uc_fw(struct drm_i915_private *dev_priv,
@@ -252,13 +262,81 @@ void intel_uc_fini_fw(struct drm_i915_private *dev_priv)
 	__intel_uc_fw_fini(&dev_priv->huc.fw);
 }
 
+static inline i915_reg_t guc_send_reg(struct intel_guc *guc, u32 i)
+{
+	GEM_BUG_ON(!guc->send_regs.base);
+	GEM_BUG_ON(!guc->send_regs.count);
+	GEM_BUG_ON(i >= guc->send_regs.count);
+
+	return _MMIO(guc->send_regs.base + 4 * i);
+}
+
+static void guc_init_send_regs(struct intel_guc *guc)
+{
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+	enum forcewake_domains fw_domains = 0;
+	unsigned int i;
+
+	guc->send_regs.base = i915_mmio_reg_offset(SOFT_SCRATCH(0));
+	guc->send_regs.count = SOFT_SCRATCH_COUNT - 1;
+
+	for (i = 0; i < guc->send_regs.count; i++) {
+		fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
+					guc_send_reg(guc, i),
+					FW_REG_READ | FW_REG_WRITE);
+	}
+	guc->send_regs.fw_domains = fw_domains;
+}
+
+static void guc_capture_load_err_log(struct intel_guc *guc)
+{
+	if (!guc->log.vma || i915.guc_log_level < 0)
+		return;
+
+	if (!guc->load_err_log)
+		guc->load_err_log = i915_gem_object_get(guc->log.vma->obj);
+
+	return;
+}
+
+static void guc_free_load_err_log(struct intel_guc *guc)
+{
+	if (guc->load_err_log)
+		i915_gem_object_put(guc->load_err_log);
+}
+
+static int guc_enable_communication(struct intel_guc *guc)
+{
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+
+	guc_init_send_regs(guc);
+
+	if (HAS_GUC_CT(dev_priv))
+		return intel_guc_enable_ct(guc);
+
+	guc->send = intel_guc_send_mmio;
+	return 0;
+}
+
+static void guc_disable_communication(struct intel_guc *guc)
+{
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+
+	if (HAS_GUC_CT(dev_priv))
+		intel_guc_disable_ct(guc);
+
+	guc->send = intel_guc_send_nop;
+}
+
 int intel_uc_init_hw(struct drm_i915_private *dev_priv)
 {
+	struct intel_guc *guc = &dev_priv->guc;
 	int ret, attempts;
 
 	if (!i915.enable_guc_loading)
 		return 0;
 
+	guc_disable_communication(guc);
 	gen9_reset_guc_interrupts(dev_priv);
 
 	/* We need to notify the guc whenever we change the GGTT */
@@ -274,6 +352,11 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
 			goto err_guc;
 	}
 
+	/* init WOPCM */
+	I915_WRITE(GUC_WOPCM_SIZE, intel_guc_wopcm_size(dev_priv));
+	I915_WRITE(DMA_GUC_WOPCM_OFFSET,
+		   GUC_WOPCM_OFFSET_VALUE | HUC_LOADING_AGENT_GUC);
+
 	/* WaEnableuKernelHeaderValidFix:skl */
 	/* WaEnableGuCBootHashCheckNotSet:skl,bxt,kbl */
 	if (IS_GEN9(dev_priv))
@@ -301,7 +384,11 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
 
 	/* Did we succeded or run out of retries? */
 	if (ret)
-		goto err_submission;
+		goto err_log_capture;
+
+	ret = guc_enable_communication(guc);
+	if (ret)
+		goto err_log_capture;
 
 	intel_guc_auth_huc(dev_priv);
 	if (i915.enable_guc_submission) {
@@ -325,7 +412,10 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
 	 * marks the GPU as wedged until reset).
 	 */
 err_interrupts:
+	guc_disable_communication(guc);
 	gen9_disable_guc_interrupts(dev_priv);
+err_log_capture:
+	guc_capture_load_err_log(guc);
 err_submission:
 	if (i915.enable_guc_submission)
 		i915_guc_submission_fini(dev_priv);
@@ -351,25 +441,25 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv)
 	if (!i915.enable_guc_loading)
 		return;
 
-	if (i915.enable_guc_submission) {
+	guc_free_load_err_log(&dev_priv->guc);
+
+	if (i915.enable_guc_submission)
 		i915_guc_submission_disable(dev_priv);
+
+	guc_disable_communication(&dev_priv->guc);
+
+	if (i915.enable_guc_submission) {
 		gen9_disable_guc_interrupts(dev_priv);
 		i915_guc_submission_fini(dev_priv);
 	}
+
 	i915_ggtt_disable_guc(dev_priv);
 }
 
-/*
- * Read GuC command/status register (SOFT_SCRATCH_0)
- * Return true if it contains a response rather than a command
- */
-static bool guc_recv(struct intel_guc *guc, u32 *status)
+int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len)
 {
-	struct drm_i915_private *dev_priv = guc_to_i915(guc);
-
-	u32 val = I915_READ(SOFT_SCRATCH(0));
-	*status = val;
-	return INTEL_GUC_RECV_IS_RESPONSE(val);
+	WARN(1, "Unexpected send: action=%#x\n", *action);
+	return -ENODEV;
 }
 
 /*
@@ -382,30 +472,33 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len)
 	int i;
 	int ret;
 
-	if (WARN_ON(len < 1 || len > 15))
-		return -EINVAL;
+	GEM_BUG_ON(!len);
+	GEM_BUG_ON(len > guc->send_regs.count);
 
-	mutex_lock(&guc->send_mutex);
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_BLITTER);
+	/* If CT is available, we expect to use MMIO only during init/fini */
+	GEM_BUG_ON(HAS_GUC_CT(dev_priv) &&
+		*action != INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER &&
+		*action != INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER);
 
-	dev_priv->guc.action_count += 1;
-	dev_priv->guc.action_cmd = action[0];
+	mutex_lock(&guc->send_mutex);
+	intel_uncore_forcewake_get(dev_priv, guc->send_regs.fw_domains);
 
 	for (i = 0; i < len; i++)
-		I915_WRITE(SOFT_SCRATCH(i), action[i]);
+		I915_WRITE(guc_send_reg(guc, i), action[i]);
 
-	POSTING_READ(SOFT_SCRATCH(i - 1));
+	POSTING_READ(guc_send_reg(guc, i - 1));
 
-	I915_WRITE(GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER);
+	intel_guc_notify(guc);
 
 	/*
-	 * Fast commands should complete in less than 10us, so sample quickly
-	 * up to that length of time, then switch to a slower sleep-wait loop.
-	 * No inte_guc_send command should ever take longer than 10ms.
+	 * No GuC command should ever take longer than 10ms.
+	 * Fast commands should still complete in 10us.
 	 */
-	ret = wait_for_us(guc_recv(guc, &status), 10);
-	if (ret)
-		ret = wait_for(guc_recv(guc, &status), 10);
+	ret = __intel_wait_for_register_fw(dev_priv,
+					   guc_send_reg(guc, 0),
+					   INTEL_GUC_RECV_MASK,
+					   INTEL_GUC_RECV_MASK,
+					   10, 10, &status);
 	if (status != INTEL_GUC_STATUS_SUCCESS) {
 		/*
 		 * Either the GuC explicitly returned an error (which
@@ -418,13 +511,9 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len)
 		DRM_WARN("INTEL_GUC_SEND: Action 0x%X failed;"
 			 " ret=%d status=0x%08X response=0x%08X\n",
 			 action[0], ret, status, I915_READ(SOFT_SCRATCH(15)));
-
-		dev_priv->guc.action_fail += 1;
-		dev_priv->guc.action_err = ret;
 	}
-	dev_priv->guc.action_status = status;
 
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_BLITTER);
+	intel_uncore_forcewake_put(dev_priv, guc->send_regs.fw_domains);
 	mutex_unlock(&guc->send_mutex);
 
 	return ret;
diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
index 4b7f73aeddac..69daf4c01cd0 100644
--- a/drivers/gpu/drm/i915/intel_uc.h
+++ b/drivers/gpu/drm/i915/intel_uc.h
@@ -27,7 +27,7 @@
 #include "intel_guc_fwif.h"
 #include "i915_guc_reg.h"
 #include "intel_ringbuffer.h"
-
+#include "intel_guc_ct.h"
 #include "i915_vma.h"
 
 struct drm_i915_gem_request;
@@ -59,12 +59,6 @@ struct drm_i915_gem_request;
  *                available in the work queue (note, the queue is shared,
  *                not per-engine). It is OK for this to be nonzero, but
  *                it should not be huge!
- *   q_fail: failed to enqueue a work item. This should never happen,
- *           because we check for space beforehand.
- *   b_fail: failed to ring the doorbell. This should never happen, unless
- *           somehow the hardware misbehaves, or maybe if the GuC firmware
- *           crashes? We probably need to reset the GPU to recover.
- *   retcode: errno from last guc_submit()
  */
 struct i915_guc_client {
 	struct i915_vma *vma;
@@ -87,8 +81,6 @@ struct i915_guc_client {
 	uint32_t wq_tail;
 	uint32_t wq_rsvd;
 	uint32_t no_wq_space;
-	uint32_t b_fail;
-	int retcode;
 
 	/* Per-engine counts of GuC submissions */
 	uint64_t submissions[I915_NUM_ENGINES];
@@ -181,6 +173,10 @@ struct intel_guc_log {
 struct intel_guc {
 	struct intel_uc_fw fw;
 	struct intel_guc_log log;
+	struct intel_guc_ct ct;
+
+	/* Log snapshot if GuC errors during load */
+	struct drm_i915_gem_object *load_err_log;
 
 	/* intel_guc_recv interrupt related state */
 	bool interrupts_enabled;
@@ -195,21 +191,21 @@ struct intel_guc {
 	DECLARE_BITMAP(doorbell_bitmap, GUC_NUM_DOORBELLS);
 	uint32_t db_cacheline;		/* Cyclic counter mod pagesize	*/
 
-	/* Action status & statistics */
-	uint64_t action_count;		/* Total commands issued	*/
-	uint32_t action_cmd;		/* Last command word		*/
-	uint32_t action_status;		/* Last return status		*/
-	uint32_t action_fail;		/* Total number of failures	*/
-	int32_t action_err;		/* Last error code		*/
-
-	uint64_t submissions[I915_NUM_ENGINES];
-	uint32_t last_seqno[I915_NUM_ENGINES];
+	/* GuC's FW specific registers used in MMIO send */
+	struct {
+		u32 base;
+		unsigned int count;
+		enum forcewake_domains fw_domains;
+	} send_regs;
 
 	/* To serialize the intel_guc_send actions */
 	struct mutex send_mutex;
 
 	/* GuC's FW specific send function */
 	int (*send)(struct intel_guc *guc, const u32 *data, u32 len);
+
+	/* GuC's FW specific notify function */
+	void (*notify)(struct intel_guc *guc);
 };
 
 struct intel_huc {
@@ -227,12 +223,19 @@ void intel_uc_fini_fw(struct drm_i915_private *dev_priv);
 int intel_uc_init_hw(struct drm_i915_private *dev_priv);
 void intel_uc_fini_hw(struct drm_i915_private *dev_priv);
 int intel_guc_sample_forcewake(struct intel_guc *guc);
+int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len);
 int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len);
+
 static inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len)
 {
 	return guc->send(guc, action, len);
 }
 
+static inline void intel_guc_notify(struct intel_guc *guc)
+{
+	guc->notify(guc);
+}
+
 /* intel_guc_loader.c */
 int intel_guc_select_fw(struct intel_guc *guc);
 int intel_guc_init_hw(struct intel_guc *guc);
@@ -266,7 +269,7 @@ static inline u32 guc_ggtt_offset(struct i915_vma *vma)
 
 /* intel_huc.c */
 void intel_huc_select_fw(struct intel_huc *huc);
-int intel_huc_init_hw(struct intel_huc *huc);
+void intel_huc_init_hw(struct intel_huc *huc);
 void intel_guc_auth_huc(struct drm_i915_private *dev_priv);
 
 #endif
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 6d1ea26b2493..47d7ee1b5d86 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -29,6 +29,7 @@
 #include <linux/pm_runtime.h>
 
 #define FORCEWAKE_ACK_TIMEOUT_MS 50
+#define GT_FIFO_TIMEOUT_MS	 10
 
 #define __raw_posting_read(dev_priv__, reg__) (void)__raw_i915_read32((dev_priv__), (reg__))
 
@@ -172,22 +173,6 @@ static void fw_domains_get_with_thread_status(struct drm_i915_private *dev_priv,
 	__gen6_gt_wait_for_thread_c0(dev_priv);
 }
 
-static void gen6_gt_check_fifodbg(struct drm_i915_private *dev_priv)
-{
-	u32 gtfifodbg;
-
-	gtfifodbg = __raw_i915_read32(dev_priv, GTFIFODBG);
-	if (WARN(gtfifodbg, "GT wake FIFO error 0x%x\n", gtfifodbg))
-		__raw_i915_write32(dev_priv, GTFIFODBG, gtfifodbg);
-}
-
-static void fw_domains_put_with_fifo(struct drm_i915_private *dev_priv,
-				     enum forcewake_domains fw_domains)
-{
-	fw_domains_put(dev_priv, fw_domains);
-	gen6_gt_check_fifodbg(dev_priv);
-}
-
 static inline u32 fifo_free_entries(struct drm_i915_private *dev_priv)
 {
 	u32 count = __raw_i915_read32(dev_priv, GTFIFOCTL);
@@ -195,30 +180,27 @@ static inline u32 fifo_free_entries(struct drm_i915_private *dev_priv)
 	return count & GT_FIFO_FREE_ENTRIES_MASK;
 }
 
-static int __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv)
+static void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv)
 {
-	int ret = 0;
+	u32 n;
 
 	/* On VLV, FIFO will be shared by both SW and HW.
 	 * So, we need to read the FREE_ENTRIES everytime */
 	if (IS_VALLEYVIEW(dev_priv))
-		dev_priv->uncore.fifo_count = fifo_free_entries(dev_priv);
-
-	if (dev_priv->uncore.fifo_count < GT_FIFO_NUM_RESERVED_ENTRIES) {
-		int loop = 500;
-		u32 fifo = fifo_free_entries(dev_priv);
-
-		while (fifo <= GT_FIFO_NUM_RESERVED_ENTRIES && loop--) {
-			udelay(10);
-			fifo = fifo_free_entries(dev_priv);
+		n = fifo_free_entries(dev_priv);
+	else
+		n = dev_priv->uncore.fifo_count;
+
+	if (n <= GT_FIFO_NUM_RESERVED_ENTRIES) {
+		if (wait_for_atomic((n = fifo_free_entries(dev_priv)) >
+				    GT_FIFO_NUM_RESERVED_ENTRIES,
+				    GT_FIFO_TIMEOUT_MS)) {
+			DRM_DEBUG("GT_FIFO timeout, entries: %u\n", n);
+			return;
 		}
-		if (WARN_ON(loop < 0 && fifo <= GT_FIFO_NUM_RESERVED_ENTRIES))
-			++ret;
-		dev_priv->uncore.fifo_count = fifo;
 	}
-	dev_priv->uncore.fifo_count--;
 
-	return ret;
+	dev_priv->uncore.fifo_count = n - 1;
 }
 
 static enum hrtimer_restart
@@ -232,6 +214,9 @@ intel_uncore_fw_release_timer(struct hrtimer *timer)
 
 	assert_rpm_device_not_suspended(dev_priv);
 
+	if (xchg(&domain->active, false))
+		return HRTIMER_RESTART;
+
 	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
 	if (WARN_ON(domain->wake_count == 0))
 		domain->wake_count++;
@@ -262,6 +247,7 @@ static void intel_uncore_forcewake_reset(struct drm_i915_private *dev_priv,
 		active_domains = 0;
 
 		for_each_fw_domain(domain, dev_priv, tmp) {
+			smp_store_mb(domain->active, false);
 			if (hrtimer_cancel(&domain->timer) == 0)
 				continue;
 
@@ -384,15 +370,35 @@ vlv_check_for_unclaimed_mmio(struct drm_i915_private *dev_priv)
 }
 
 static bool
+gen6_check_for_fifo_debug(struct drm_i915_private *dev_priv)
+{
+	u32 fifodbg;
+
+	fifodbg = __raw_i915_read32(dev_priv, GTFIFODBG);
+
+	if (unlikely(fifodbg)) {
+		DRM_DEBUG_DRIVER("GTFIFODBG = 0x08%x\n", fifodbg);
+		__raw_i915_write32(dev_priv, GTFIFODBG, fifodbg);
+	}
+
+	return fifodbg;
+}
+
+static bool
 check_for_unclaimed_mmio(struct drm_i915_private *dev_priv)
 {
+	bool ret = false;
+
 	if (HAS_FPGA_DBG_UNCLAIMED(dev_priv))
-		return fpga_check_for_unclaimed_mmio(dev_priv);
+		ret |= fpga_check_for_unclaimed_mmio(dev_priv);
 
 	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
-		return vlv_check_for_unclaimed_mmio(dev_priv);
+		ret |= vlv_check_for_unclaimed_mmio(dev_priv);
 
-	return false;
+	if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv))
+		ret |= gen6_check_for_fifo_debug(dev_priv);
+
+	return ret;
 }
 
 static void __intel_uncore_early_sanitize(struct drm_i915_private *dev_priv,
@@ -404,11 +410,6 @@ static void __intel_uncore_early_sanitize(struct drm_i915_private *dev_priv,
 	if (check_for_unclaimed_mmio(dev_priv))
 		DRM_DEBUG("unclaimed mmio detected on uncore init, clearing\n");
 
-	/* clear out old GT FIFO errors */
-	if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv))
-		__raw_i915_write32(dev_priv, GTFIFODBG,
-				   __raw_i915_read32(dev_priv, GTFIFODBG));
-
 	/* WaDisableShadowRegForCpd:chv */
 	if (IS_CHERRYVIEW(dev_priv)) {
 		__raw_i915_write32(dev_priv, GTFIFOCTL,
@@ -454,9 +455,12 @@ static void __intel_uncore_forcewake_get(struct drm_i915_private *dev_priv,
 
 	fw_domains &= dev_priv->uncore.fw_domains;
 
-	for_each_fw_domain_masked(domain, fw_domains, dev_priv, tmp)
-		if (domain->wake_count++)
+	for_each_fw_domain_masked(domain, fw_domains, dev_priv, tmp) {
+		if (domain->wake_count++) {
 			fw_domains &= ~domain->mask;
+			domain->active = true;
+		}
+	}
 
 	if (fw_domains)
 		dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains);
@@ -521,8 +525,10 @@ static void __intel_uncore_forcewake_put(struct drm_i915_private *dev_priv,
 		if (WARN_ON(domain->wake_count == 0))
 			continue;
 
-		if (--domain->wake_count)
+		if (--domain->wake_count) {
+			domain->active = true;
 			continue;
+		}
 
 		fw_domain_arm_timer(domain);
 	}
@@ -804,6 +810,18 @@ unclaimed_reg_debug(struct drm_i915_private *dev_priv,
 	__unclaimed_reg_debug(dev_priv, reg, read, before);
 }
 
+enum decoupled_power_domain {
+	GEN9_DECOUPLED_PD_BLITTER = 0,
+	GEN9_DECOUPLED_PD_RENDER,
+	GEN9_DECOUPLED_PD_MEDIA,
+	GEN9_DECOUPLED_PD_ALL
+};
+
+enum decoupled_ops {
+	GEN9_DECOUPLED_OP_WRITE = 0,
+	GEN9_DECOUPLED_OP_READ
+};
+
 static const enum decoupled_power_domain fw2dpd_domain[] = {
 	GEN9_DECOUPLED_PD_RENDER,
 	GEN9_DECOUPLED_PD_BLITTER,
@@ -1047,15 +1065,10 @@ __gen2_write(32)
 #define __gen6_write(x) \
 static void \
 gen6_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \
-	u32 __fifo_ret = 0; \
 	GEN6_WRITE_HEADER; \
-	if (NEEDS_FORCE_WAKE(offset)) { \
-		__fifo_ret = __gen6_gt_wait_for_fifo(dev_priv); \
-	} \
+	if (NEEDS_FORCE_WAKE(offset)) \
+		__gen6_gt_wait_for_fifo(dev_priv); \
 	__raw_i915_write##x(dev_priv, reg, val); \
-	if (unlikely(__fifo_ret)) { \
-		gen6_gt_check_fifodbg(dev_priv); \
-	} \
 	GEN6_WRITE_FOOTER; \
 }
 
@@ -1108,19 +1121,19 @@ __gen6_write(32)
 #undef GEN6_WRITE_FOOTER
 #undef GEN6_WRITE_HEADER
 
-#define ASSIGN_WRITE_MMIO_VFUNCS(x) \
+#define ASSIGN_WRITE_MMIO_VFUNCS(i915, x) \
 do { \
-	dev_priv->uncore.funcs.mmio_writeb = x##_write8; \
-	dev_priv->uncore.funcs.mmio_writew = x##_write16; \
-	dev_priv->uncore.funcs.mmio_writel = x##_write32; \
+	(i915)->uncore.funcs.mmio_writeb = x##_write8; \
+	(i915)->uncore.funcs.mmio_writew = x##_write16; \
+	(i915)->uncore.funcs.mmio_writel = x##_write32; \
 } while (0)
 
-#define ASSIGN_READ_MMIO_VFUNCS(x) \
+#define ASSIGN_READ_MMIO_VFUNCS(i915, x) \
 do { \
-	dev_priv->uncore.funcs.mmio_readb = x##_read8; \
-	dev_priv->uncore.funcs.mmio_readw = x##_read16; \
-	dev_priv->uncore.funcs.mmio_readl = x##_read32; \
-	dev_priv->uncore.funcs.mmio_readq = x##_read64; \
+	(i915)->uncore.funcs.mmio_readb = x##_read8; \
+	(i915)->uncore.funcs.mmio_readw = x##_read16; \
+	(i915)->uncore.funcs.mmio_readl = x##_read32; \
+	(i915)->uncore.funcs.mmio_readq = x##_read64; \
 } while (0)
 
 
@@ -1190,11 +1203,7 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
 			       FORCEWAKE_MEDIA_GEN9, FORCEWAKE_ACK_MEDIA_GEN9);
 	} else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
 		dev_priv->uncore.funcs.force_wake_get = fw_domains_get;
-		if (!IS_CHERRYVIEW(dev_priv))
-			dev_priv->uncore.funcs.force_wake_put =
-				fw_domains_put_with_fifo;
-		else
-			dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
+		dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
 		fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER,
 			       FORCEWAKE_VLV, FORCEWAKE_ACK_VLV);
 		fw_domain_init(dev_priv, FW_DOMAIN_ID_MEDIA,
@@ -1202,11 +1211,7 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
 	} else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
 		dev_priv->uncore.funcs.force_wake_get =
 			fw_domains_get_with_thread_status;
-		if (IS_HASWELL(dev_priv))
-			dev_priv->uncore.funcs.force_wake_put =
-				fw_domains_put_with_fifo;
-		else
-			dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
+		dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
 		fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER,
 			       FORCEWAKE_MT, FORCEWAKE_ACK_HSW);
 	} else if (IS_IVYBRIDGE(dev_priv)) {
@@ -1223,8 +1228,7 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
 		 */
 		dev_priv->uncore.funcs.force_wake_get =
 			fw_domains_get_with_thread_status;
-		dev_priv->uncore.funcs.force_wake_put =
-			fw_domains_put_with_fifo;
+		dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
 
 		/* We need to init first for ECOBUS access and then
 		 * determine later if we want to reinit, in case of MT access is
@@ -1242,7 +1246,7 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
 		spin_lock_irq(&dev_priv->uncore.lock);
 		fw_domains_get_with_thread_status(dev_priv, FORCEWAKE_RENDER);
 		ecobus = __raw_i915_read32(dev_priv, ECOBUS);
-		fw_domains_put_with_fifo(dev_priv, FORCEWAKE_RENDER);
+		fw_domains_put(dev_priv, FORCEWAKE_RENDER);
 		spin_unlock_irq(&dev_priv->uncore.lock);
 
 		if (!(ecobus & FORCEWAKE_MT_ENABLE)) {
@@ -1254,8 +1258,7 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
 	} else if (IS_GEN6(dev_priv)) {
 		dev_priv->uncore.funcs.force_wake_get =
 			fw_domains_get_with_thread_status;
-		dev_priv->uncore.funcs.force_wake_put =
-			fw_domains_put_with_fifo;
+		dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
 		fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER,
 			       FORCEWAKE, FORCEWAKE_ACK);
 	}
@@ -1310,34 +1313,34 @@ void intel_uncore_init(struct drm_i915_private *dev_priv)
 		i915_pmic_bus_access_notifier;
 
 	if (IS_GEN(dev_priv, 2, 4) || intel_vgpu_active(dev_priv)) {
-		ASSIGN_WRITE_MMIO_VFUNCS(gen2);
-		ASSIGN_READ_MMIO_VFUNCS(gen2);
+		ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, gen2);
+		ASSIGN_READ_MMIO_VFUNCS(dev_priv, gen2);
 	} else if (IS_GEN5(dev_priv)) {
-		ASSIGN_WRITE_MMIO_VFUNCS(gen5);
-		ASSIGN_READ_MMIO_VFUNCS(gen5);
+		ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, gen5);
+		ASSIGN_READ_MMIO_VFUNCS(dev_priv, gen5);
 	} else if (IS_GEN(dev_priv, 6, 7)) {
-		ASSIGN_WRITE_MMIO_VFUNCS(gen6);
+		ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, gen6);
 
 		if (IS_VALLEYVIEW(dev_priv)) {
 			ASSIGN_FW_DOMAINS_TABLE(__vlv_fw_ranges);
-			ASSIGN_READ_MMIO_VFUNCS(fwtable);
+			ASSIGN_READ_MMIO_VFUNCS(dev_priv, fwtable);
 		} else {
-			ASSIGN_READ_MMIO_VFUNCS(gen6);
+			ASSIGN_READ_MMIO_VFUNCS(dev_priv, gen6);
 		}
 	} else if (IS_GEN8(dev_priv)) {
 		if (IS_CHERRYVIEW(dev_priv)) {
 			ASSIGN_FW_DOMAINS_TABLE(__chv_fw_ranges);
-			ASSIGN_WRITE_MMIO_VFUNCS(fwtable);
-			ASSIGN_READ_MMIO_VFUNCS(fwtable);
+			ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, fwtable);
+			ASSIGN_READ_MMIO_VFUNCS(dev_priv, fwtable);
 
 		} else {
-			ASSIGN_WRITE_MMIO_VFUNCS(gen8);
-			ASSIGN_READ_MMIO_VFUNCS(gen6);
+			ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, gen8);
+			ASSIGN_READ_MMIO_VFUNCS(dev_priv, gen6);
 		}
 	} else {
 		ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges);
-		ASSIGN_WRITE_MMIO_VFUNCS(fwtable);
-		ASSIGN_READ_MMIO_VFUNCS(fwtable);
+		ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, fwtable);
+		ASSIGN_READ_MMIO_VFUNCS(dev_priv, fwtable);
 		if (HAS_DECOUPLED_MMIO(dev_priv)) {
 			dev_priv->uncore.funcs.mmio_readl =
 						gen9_decoupled_read32;
@@ -1353,8 +1356,6 @@ void intel_uncore_init(struct drm_i915_private *dev_priv)
 
 	i915_check_and_clear_faults(dev_priv);
 }
-#undef ASSIGN_WRITE_MMIO_VFUNCS
-#undef ASSIGN_READ_MMIO_VFUNCS
 
 void intel_uncore_fini(struct drm_i915_private *dev_priv)
 {
@@ -1435,9 +1436,39 @@ out:
 	return ret;
 }
 
-static int i915_reset_complete(struct pci_dev *pdev)
+static void gen3_stop_rings(struct drm_i915_private *dev_priv)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	for_each_engine(engine, dev_priv, id) {
+		const u32 base = engine->mmio_base;
+		const i915_reg_t mode = RING_MI_MODE(base);
+
+		I915_WRITE_FW(mode, _MASKED_BIT_ENABLE(STOP_RING));
+		if (intel_wait_for_register_fw(dev_priv,
+					       mode,
+					       MODE_IDLE,
+					       MODE_IDLE,
+					       500))
+			DRM_DEBUG_DRIVER("%s: timed out on STOP_RING\n",
+					 engine->name);
+
+		I915_WRITE_FW(RING_CTL(base), 0);
+		I915_WRITE_FW(RING_HEAD(base), 0);
+		I915_WRITE_FW(RING_TAIL(base), 0);
+
+		/* Check acts as a post */
+		if (I915_READ_FW(RING_HEAD(base)) != 0)
+			DRM_DEBUG_DRIVER("%s: ring head not parked\n",
+					 engine->name);
+	}
+}
+
+static bool i915_reset_complete(struct pci_dev *pdev)
 {
 	u8 gdrst;
+
 	pci_read_config_byte(pdev, I915_GDRST, &gdrst);
 	return (gdrst & GRDOM_RESET_STATUS) == 0;
 }
@@ -1448,15 +1479,16 @@ static int i915_do_reset(struct drm_i915_private *dev_priv, unsigned engine_mask
 
 	/* assert reset for at least 20 usec */
 	pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
-	udelay(20);
+	usleep_range(50, 200);
 	pci_write_config_byte(pdev, I915_GDRST, 0);
 
 	return wait_for(i915_reset_complete(pdev), 500);
 }
 
-static int g4x_reset_complete(struct pci_dev *pdev)
+static bool g4x_reset_complete(struct pci_dev *pdev)
 {
 	u8 gdrst;
+
 	pci_read_config_byte(pdev, I915_GDRST, &gdrst);
 	return (gdrst & GRDOM_RESET_ENABLE) == 0;
 }
@@ -1464,6 +1496,10 @@ static int g4x_reset_complete(struct pci_dev *pdev)
 static int g33_do_reset(struct drm_i915_private *dev_priv, unsigned engine_mask)
 {
 	struct pci_dev *pdev = dev_priv->drm.pdev;
+
+	/* Stop engines before we reset; see g4x_do_reset() below for why. */
+	gen3_stop_rings(dev_priv);
+
 	pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
 	return wait_for(g4x_reset_complete(pdev), 500);
 }
@@ -1473,29 +1509,41 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv, unsigned engine_mask)
 	struct pci_dev *pdev = dev_priv->drm.pdev;
 	int ret;
 
-	pci_write_config_byte(pdev, I915_GDRST,
-			      GRDOM_RENDER | GRDOM_RESET_ENABLE);
-	ret =  wait_for(g4x_reset_complete(pdev), 500);
-	if (ret)
-		return ret;
-
 	/* WaVcpClkGateDisableForMediaReset:ctg,elk */
-	I915_WRITE(VDECCLK_GATE_D, I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE);
+	I915_WRITE(VDECCLK_GATE_D,
+		   I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE);
 	POSTING_READ(VDECCLK_GATE_D);
 
+	/* We stop engines, otherwise we might get failed reset and a
+	 * dead gpu (on elk).
+	 * WaMediaResetMainRingCleanup:ctg,elk (presumably)
+	 */
+	gen3_stop_rings(dev_priv);
+
 	pci_write_config_byte(pdev, I915_GDRST,
 			      GRDOM_MEDIA | GRDOM_RESET_ENABLE);
 	ret =  wait_for(g4x_reset_complete(pdev), 500);
-	if (ret)
-		return ret;
+	if (ret) {
+		DRM_DEBUG_DRIVER("Wait for media reset failed\n");
+		goto out;
+	}
 
-	/* WaVcpClkGateDisableForMediaReset:ctg,elk */
-	I915_WRITE(VDECCLK_GATE_D, I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE);
-	POSTING_READ(VDECCLK_GATE_D);
+	pci_write_config_byte(pdev, I915_GDRST,
+			      GRDOM_RENDER | GRDOM_RESET_ENABLE);
+	ret =  wait_for(g4x_reset_complete(pdev), 500);
+	if (ret) {
+		DRM_DEBUG_DRIVER("Wait for render reset failed\n");
+		goto out;
+	}
 
+out:
 	pci_write_config_byte(pdev, I915_GDRST, 0);
 
-	return 0;
+	I915_WRITE(VDECCLK_GATE_D,
+		   I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE);
+	POSTING_READ(VDECCLK_GATE_D);
+
+	return ret;
 }
 
 static int ironlake_do_reset(struct drm_i915_private *dev_priv,
@@ -1503,41 +1551,51 @@ static int ironlake_do_reset(struct drm_i915_private *dev_priv,
 {
 	int ret;
 
-	I915_WRITE(ILK_GDSR,
-		   ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE);
+	I915_WRITE(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE);
 	ret = intel_wait_for_register(dev_priv,
 				      ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0,
 				      500);
-	if (ret)
-		return ret;
+	if (ret) {
+		DRM_DEBUG_DRIVER("Wait for render reset failed\n");
+		goto out;
+	}
 
-	I915_WRITE(ILK_GDSR,
-		   ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE);
+	I915_WRITE(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE);
 	ret = intel_wait_for_register(dev_priv,
 				      ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0,
 				      500);
-	if (ret)
-		return ret;
+	if (ret) {
+		DRM_DEBUG_DRIVER("Wait for media reset failed\n");
+		goto out;
+	}
 
+out:
 	I915_WRITE(ILK_GDSR, 0);
-
-	return 0;
+	POSTING_READ(ILK_GDSR);
+	return ret;
 }
 
 /* Reset the hardware domains (GENX_GRDOM_*) specified by mask */
 static int gen6_hw_domain_reset(struct drm_i915_private *dev_priv,
 				u32 hw_domain_mask)
 {
+	int err;
+
 	/* GEN6_GDRST is not in the gt power well, no need to check
 	 * for fifo space for the write or forcewake the chip for
 	 * the read
 	 */
 	__raw_i915_write32(dev_priv, GEN6_GDRST, hw_domain_mask);
 
-	/* Spin waiting for the device to ack the reset requests */
-	return intel_wait_for_register_fw(dev_priv,
+	/* Wait for the device to ack the reset requests */
+	err = intel_wait_for_register_fw(dev_priv,
 					  GEN6_GDRST, hw_domain_mask, 0,
 					  500);
+	if (err)
+		DRM_DEBUG_DRIVER("Wait for 0x%08x engines reset failed\n",
+				 hw_domain_mask);
+
+	return err;
 }
 
 /**
@@ -1585,19 +1643,23 @@ static int gen6_reset_engines(struct drm_i915_private *dev_priv,
 }
 
 /**
- * intel_wait_for_register_fw - wait until register matches expected state
+ * __intel_wait_for_register_fw - wait until register matches expected state
  * @dev_priv: the i915 device
  * @reg: the register to read
  * @mask: mask to apply to register value
  * @value: expected value
- * @timeout_ms: timeout in millisecond
+ * @fast_timeout_us: fast timeout in microsecond for atomic/tight wait
+ * @slow_timeout_ms: slow timeout in millisecond
+ * @out_value: optional placeholder to hold registry value
  *
  * This routine waits until the target register @reg contains the expected
  * @value after applying the @mask, i.e. it waits until ::
  *
  *     (I915_READ_FW(reg) & mask) == value
  *
- * Otherwise, the wait will timeout after @timeout_ms milliseconds.
+ * Otherwise, the wait will timeout after @slow_timeout_ms milliseconds.
+ * For atomic context @slow_timeout_ms must be zero and @fast_timeout_us
+ * must be not larger than 20,0000 microseconds.
  *
  * Note that this routine assumes the caller holds forcewake asserted, it is
  * not suitable for very long waits. See intel_wait_for_register() if you
@@ -1606,16 +1668,31 @@ static int gen6_reset_engines(struct drm_i915_private *dev_priv,
  *
  * Returns 0 if the register matches the desired condition, or -ETIMEOUT.
  */
-int intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
-			       i915_reg_t reg,
-			       const u32 mask,
-			       const u32 value,
-			       const unsigned long timeout_ms)
-{
-#define done ((I915_READ_FW(reg) & mask) == value)
-	int ret = wait_for_us(done, 2);
-	if (ret)
-		ret = wait_for(done, timeout_ms);
+int __intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
+				 i915_reg_t reg,
+				 u32 mask,
+				 u32 value,
+				 unsigned int fast_timeout_us,
+				 unsigned int slow_timeout_ms,
+				 u32 *out_value)
+{
+	u32 uninitialized_var(reg_value);
+#define done (((reg_value = I915_READ_FW(reg)) & mask) == value)
+	int ret;
+
+	/* Catch any overuse of this function */
+	might_sleep_if(slow_timeout_ms);
+	GEM_BUG_ON(fast_timeout_us > 20000);
+
+	ret = -ETIMEDOUT;
+	if (fast_timeout_us && fast_timeout_us <= 20000)
+		ret = _wait_for_atomic(done, fast_timeout_us, 0);
+	if (ret && slow_timeout_ms)
+		ret = wait_for(done, slow_timeout_ms);
+
+	if (out_value)
+		*out_value = reg_value;
+
 	return ret;
 #undef done
 }
@@ -1639,18 +1716,26 @@ int intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
  */
 int intel_wait_for_register(struct drm_i915_private *dev_priv,
 			    i915_reg_t reg,
-			    const u32 mask,
-			    const u32 value,
-			    const unsigned long timeout_ms)
+			    u32 mask,
+			    u32 value,
+			    unsigned int timeout_ms)
 {
-
 	unsigned fw =
 		intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ);
 	int ret;
 
-	intel_uncore_forcewake_get(dev_priv, fw);
-	ret = wait_for_us((I915_READ_FW(reg) & mask) == value, 2);
-	intel_uncore_forcewake_put(dev_priv, fw);
+	might_sleep();
+
+	spin_lock_irq(&dev_priv->uncore.lock);
+	intel_uncore_forcewake_get__locked(dev_priv, fw);
+
+	ret = __intel_wait_for_register_fw(dev_priv,
+					   reg, mask, value,
+					   2, 0, NULL);
+
+	intel_uncore_forcewake_put__locked(dev_priv, fw);
+	spin_unlock_irq(&dev_priv->uncore.lock);
+
 	if (ret)
 		ret = wait_for((I915_READ_NOTRACE(reg) & mask) == value,
 			       timeout_ms);
@@ -1658,7 +1743,7 @@ int intel_wait_for_register(struct drm_i915_private *dev_priv,
 	return ret;
 }
 
-static int gen8_request_engine_reset(struct intel_engine_cs *engine)
+static int gen8_reset_engine_start(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
 	int ret;
@@ -1677,7 +1762,7 @@ static int gen8_request_engine_reset(struct intel_engine_cs *engine)
 	return ret;
 }
 
-static void gen8_unrequest_engine_reset(struct intel_engine_cs *engine)
+static void gen8_reset_engine_cancel(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
 
@@ -1692,14 +1777,14 @@ static int gen8_reset_engines(struct drm_i915_private *dev_priv,
 	unsigned int tmp;
 
 	for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
-		if (gen8_request_engine_reset(engine))
+		if (gen8_reset_engine_start(engine))
 			goto not_ready;
 
 	return gen6_reset_engines(dev_priv, engine_mask);
 
 not_ready:
 	for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
-		gen8_unrequest_engine_reset(engine);
+		gen8_reset_engine_cancel(engine);
 
 	return -EIO;
 }
@@ -1730,8 +1815,11 @@ static reset_func intel_get_gpu_reset(struct drm_i915_private *dev_priv)
 int intel_gpu_reset(struct drm_i915_private *dev_priv, unsigned engine_mask)
 {
 	reset_func reset;
+	int retry;
 	int ret;
 
+	might_sleep();
+
 	reset = intel_get_gpu_reset(dev_priv);
 	if (reset == NULL)
 		return -ENODEV;
@@ -1740,7 +1828,13 @@ int intel_gpu_reset(struct drm_i915_private *dev_priv, unsigned engine_mask)
 	 * request may be dropped and never completes (causing -EIO).
 	 */
 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-	ret = reset(dev_priv, engine_mask);
+	for (retry = 0; retry < 3; retry++) {
+		ret = reset(dev_priv, engine_mask);
+		if (ret != -ETIMEDOUT)
+			break;
+
+		cond_resched();
+	}
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 
 	return ret;
@@ -1754,17 +1848,12 @@ bool intel_has_gpu_reset(struct drm_i915_private *dev_priv)
 int intel_guc_reset(struct drm_i915_private *dev_priv)
 {
 	int ret;
-	unsigned long irqflags;
 
 	if (!HAS_GUC(dev_priv))
 		return -EINVAL;
 
 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
-
 	ret = gen6_hw_domain_reset(dev_priv, GEN9_GRDOM_GUC);
-
-	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 
 	return ret;
@@ -1873,5 +1962,6 @@ intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/mock_uncore.c"
 #include "selftests/intel_uncore.c"
 #endif
diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h
new file mode 100644
index 000000000000..5f90278da461
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_uncore.h
@@ -0,0 +1,170 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __INTEL_UNCORE_H__
+#define __INTEL_UNCORE_H__
+
+struct drm_i915_private;
+
+enum forcewake_domain_id {
+	FW_DOMAIN_ID_RENDER = 0,
+	FW_DOMAIN_ID_BLITTER,
+	FW_DOMAIN_ID_MEDIA,
+
+	FW_DOMAIN_ID_COUNT
+};
+
+enum forcewake_domains {
+	FORCEWAKE_RENDER = BIT(FW_DOMAIN_ID_RENDER),
+	FORCEWAKE_BLITTER = BIT(FW_DOMAIN_ID_BLITTER),
+	FORCEWAKE_MEDIA	= BIT(FW_DOMAIN_ID_MEDIA),
+	FORCEWAKE_ALL = (FORCEWAKE_RENDER |
+			 FORCEWAKE_BLITTER |
+			 FORCEWAKE_MEDIA)
+};
+
+struct intel_uncore_funcs {
+	void (*force_wake_get)(struct drm_i915_private *dev_priv,
+			       enum forcewake_domains domains);
+	void (*force_wake_put)(struct drm_i915_private *dev_priv,
+			       enum forcewake_domains domains);
+
+	uint8_t  (*mmio_readb)(struct drm_i915_private *dev_priv,
+			       i915_reg_t r, bool trace);
+	uint16_t (*mmio_readw)(struct drm_i915_private *dev_priv,
+			       i915_reg_t r, bool trace);
+	uint32_t (*mmio_readl)(struct drm_i915_private *dev_priv,
+			       i915_reg_t r, bool trace);
+	uint64_t (*mmio_readq)(struct drm_i915_private *dev_priv,
+			       i915_reg_t r, bool trace);
+
+	void (*mmio_writeb)(struct drm_i915_private *dev_priv,
+			    i915_reg_t r, uint8_t val, bool trace);
+	void (*mmio_writew)(struct drm_i915_private *dev_priv,
+			    i915_reg_t r, uint16_t val, bool trace);
+	void (*mmio_writel)(struct drm_i915_private *dev_priv,
+			    i915_reg_t r, uint32_t val, bool trace);
+};
+
+struct intel_forcewake_range {
+	u32 start;
+	u32 end;
+
+	enum forcewake_domains domains;
+};
+
+struct intel_uncore {
+	spinlock_t lock; /** lock is also taken in irq contexts. */
+
+	const struct intel_forcewake_range *fw_domains_table;
+	unsigned int fw_domains_table_entries;
+
+	struct notifier_block pmic_bus_access_nb;
+	struct intel_uncore_funcs funcs;
+
+	unsigned int fifo_count;
+
+	enum forcewake_domains fw_domains;
+	enum forcewake_domains fw_domains_active;
+
+	u32 fw_set;
+	u32 fw_clear;
+	u32 fw_reset;
+
+	struct intel_uncore_forcewake_domain {
+		enum forcewake_domain_id id;
+		enum forcewake_domains mask;
+		unsigned int wake_count;
+		bool active;
+		struct hrtimer timer;
+		i915_reg_t reg_set;
+		i915_reg_t reg_ack;
+	} fw_domain[FW_DOMAIN_ID_COUNT];
+
+	int unclaimed_mmio_check;
+};
+
+/* Iterate over initialised fw domains */
+#define for_each_fw_domain_masked(domain__, mask__, dev_priv__, tmp__) \
+	for (tmp__ = (mask__); \
+	     tmp__ ? (domain__ = &(dev_priv__)->uncore.fw_domain[__mask_next_bit(tmp__)]), 1 : 0;)
+
+#define for_each_fw_domain(domain__, dev_priv__, tmp__) \
+	for_each_fw_domain_masked(domain__, (dev_priv__)->uncore.fw_domains, dev_priv__, tmp__)
+
+
+void intel_uncore_sanitize(struct drm_i915_private *dev_priv);
+void intel_uncore_init(struct drm_i915_private *dev_priv);
+bool intel_uncore_unclaimed_mmio(struct drm_i915_private *dev_priv);
+bool intel_uncore_arm_unclaimed_mmio_detection(struct drm_i915_private *dev_priv);
+void intel_uncore_fini(struct drm_i915_private *dev_priv);
+void intel_uncore_suspend(struct drm_i915_private *dev_priv);
+void intel_uncore_resume_early(struct drm_i915_private *dev_priv);
+
+u64 intel_uncore_edram_size(struct drm_i915_private *dev_priv);
+void assert_forcewakes_inactive(struct drm_i915_private *dev_priv);
+const char *intel_uncore_forcewake_domain_to_str(const enum forcewake_domain_id id);
+
+enum forcewake_domains
+intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
+			       i915_reg_t reg, unsigned int op);
+#define FW_REG_READ  (1)
+#define FW_REG_WRITE (2)
+
+void intel_uncore_forcewake_get(struct drm_i915_private *dev_priv,
+				enum forcewake_domains domains);
+void intel_uncore_forcewake_put(struct drm_i915_private *dev_priv,
+				enum forcewake_domains domains);
+/* Like above but the caller must manage the uncore.lock itself.
+ * Must be used with I915_READ_FW and friends.
+ */
+void intel_uncore_forcewake_get__locked(struct drm_i915_private *dev_priv,
+					enum forcewake_domains domains);
+void intel_uncore_forcewake_put__locked(struct drm_i915_private *dev_priv,
+					enum forcewake_domains domains);
+
+int intel_wait_for_register(struct drm_i915_private *dev_priv,
+			    i915_reg_t reg,
+			    u32 mask,
+			    u32 value,
+			    unsigned int timeout_ms);
+int __intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
+				 i915_reg_t reg,
+				 u32 mask,
+				 u32 value,
+				 unsigned int fast_timeout_us,
+				 unsigned int slow_timeout_ms,
+				 u32 *out_value);
+static inline
+int intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
+			       i915_reg_t reg,
+			       u32 mask,
+			       u32 value,
+			       unsigned int timeout_ms)
+{
+	return __intel_wait_for_register_fw(dev_priv, reg, mask, value,
+					    2, timeout_ms, NULL);
+}
+
+#endif /* !__INTEL_UNCORE_H__ */
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
index f08d0179b3df..95d4aebc0181 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
@@ -138,10 +138,7 @@ static int wc_set(struct drm_i915_gem_object *obj,
 	typeof(v) *map;
 	int err;
 
-	/* XXX GTT write followed by WC write go missing */
-	i915_gem_object_flush_gtt_write_domain(obj);
-
-	err = i915_gem_object_set_to_gtt_domain(obj, true);
+	err = i915_gem_object_set_to_wc_domain(obj, true);
 	if (err)
 		return err;
 
@@ -162,10 +159,7 @@ static int wc_get(struct drm_i915_gem_object *obj,
 	typeof(v) map;
 	int err;
 
-	/* XXX WC write followed by GTT write go missing */
-	i915_gem_object_flush_gtt_write_domain(obj);
-
-	err = i915_gem_object_set_to_gtt_domain(obj, false);
+	err = i915_gem_object_set_to_wc_domain(obj, false);
 	if (err)
 		return err;
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 1afb8b06e3e1..12b85b3278cd 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -320,7 +320,7 @@ static unsigned long max_dwords(struct drm_i915_gem_object *obj)
 static int igt_ctx_exec(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
-	struct drm_i915_gem_object *obj;
+	struct drm_i915_gem_object *obj = NULL;
 	struct drm_file *file;
 	IGT_TIMEOUT(end_time);
 	LIST_HEAD(objects);
@@ -359,7 +359,7 @@ static int igt_ctx_exec(void *arg)
 		}
 
 		for_each_engine(engine, i915, id) {
-			if (dw == 0) {
+			if (!obj) {
 				obj = create_test_object(ctx, file, &objects);
 				if (IS_ERR(obj)) {
 					err = PTR_ERR(obj);
@@ -376,8 +376,10 @@ static int igt_ctx_exec(void *arg)
 				goto out_unlock;
 			}
 
-			if (++dw == max_dwords(obj))
+			if (++dw == max_dwords(obj)) {
+				obj = NULL;
 				dw = 0;
+			}
 			ndwords++;
 		}
 		ncontexts++;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c
index 817bef74bbcb..d15cc9d3a5cd 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c
@@ -271,6 +271,105 @@ err_obj:
 	return err;
 }
 
+static int igt_dmabuf_export_kmap(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj;
+	struct dma_buf *dmabuf;
+	void *ptr;
+	int err;
+
+	obj = i915_gem_object_create(i915, 2*PAGE_SIZE);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0);
+	i915_gem_object_put(obj);
+	if (IS_ERR(dmabuf)) {
+		err = PTR_ERR(dmabuf);
+		pr_err("i915_gem_prime_export failed with err=%d\n", err);
+		return err;
+	}
+
+	ptr = dma_buf_kmap(dmabuf, 0);
+	if (!ptr) {
+		pr_err("dma_buf_kmap failed\n");
+		err = -ENOMEM;
+		goto err;
+	}
+
+	if (memchr_inv(ptr, 0, PAGE_SIZE)) {
+		dma_buf_kunmap(dmabuf, 0, ptr);
+		pr_err("Exported page[0] not initialiased to zero!\n");
+		err = -EINVAL;
+		goto err;
+	}
+
+	memset(ptr, 0xc5, PAGE_SIZE);
+	dma_buf_kunmap(dmabuf, 0, ptr);
+
+	ptr = i915_gem_object_pin_map(obj, I915_MAP_WB);
+	if (IS_ERR(ptr)) {
+		err = PTR_ERR(ptr);
+		pr_err("i915_gem_object_pin_map failed with err=%d\n", err);
+		goto err;
+	}
+	memset(ptr + PAGE_SIZE, 0xaa, PAGE_SIZE);
+	i915_gem_object_unpin_map(obj);
+
+	ptr = dma_buf_kmap(dmabuf, 1);
+	if (!ptr) {
+		pr_err("dma_buf_kmap failed\n");
+		err = -ENOMEM;
+		goto err;
+	}
+
+	if (memchr_inv(ptr, 0xaa, PAGE_SIZE)) {
+		dma_buf_kunmap(dmabuf, 1, ptr);
+		pr_err("Exported page[1] not set to 0xaa!\n");
+		err = -EINVAL;
+		goto err;
+	}
+
+	memset(ptr, 0xc5, PAGE_SIZE);
+	dma_buf_kunmap(dmabuf, 1, ptr);
+
+	ptr = dma_buf_kmap(dmabuf, 0);
+	if (!ptr) {
+		pr_err("dma_buf_kmap failed\n");
+		err = -ENOMEM;
+		goto err;
+	}
+	if (memchr_inv(ptr, 0xc5, PAGE_SIZE)) {
+		dma_buf_kunmap(dmabuf, 0, ptr);
+		pr_err("Exported page[0] did not retain 0xc5!\n");
+		err = -EINVAL;
+		goto err;
+	}
+	dma_buf_kunmap(dmabuf, 0, ptr);
+
+	ptr = dma_buf_kmap(dmabuf, 2);
+	if (ptr) {
+		pr_err("Erroneously kmapped beyond the end of the object!\n");
+		dma_buf_kunmap(dmabuf, 2, ptr);
+		err = -EINVAL;
+		goto err;
+	}
+
+	ptr = dma_buf_kmap(dmabuf, -1);
+	if (ptr) {
+		pr_err("Erroneously kmapped before the start of the object!\n");
+		dma_buf_kunmap(dmabuf, -1, ptr);
+		err = -EINVAL;
+		goto err;
+	}
+
+	err = 0;
+err:
+	dma_buf_put(dmabuf);
+	return err;
+}
+
 int i915_gem_dmabuf_mock_selftests(void)
 {
 	static const struct i915_subtest tests[] = {
@@ -279,6 +378,7 @@ int i915_gem_dmabuf_mock_selftests(void)
 		SUBTEST(igt_dmabuf_import),
 		SUBTEST(igt_dmabuf_import_ownership),
 		SUBTEST(igt_dmabuf_export_vmap),
+		SUBTEST(igt_dmabuf_export_kmap),
 	};
 	struct drm_i915_private *i915;
 	int err;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
index 67d82bf1407f..8f011c447e41 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
@@ -266,7 +266,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
 		if (offset >= obj->base.size)
 			continue;
 
-		i915_gem_object_flush_gtt_write_domain(obj);
+		flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 
 		p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
 		cpu = kmap(p) + offset_in_page(offset);
@@ -545,7 +545,9 @@ static int igt_mmap_offset_exhaustion(void *arg)
 		}
 
 		mutex_lock(&i915->drm.struct_mutex);
+		intel_runtime_pm_get(i915);
 		err = make_obj_busy(obj);
+		intel_runtime_pm_put(i915);
 		mutex_unlock(&i915->drm.struct_mutex);
 		if (err) {
 			pr_err("[loop %d] Failed to busy the object\n", loop);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c
index 98b7aac41eec..6664cb2eb0b8 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c
@@ -580,7 +580,7 @@ static struct i915_vma *recursive_batch(struct drm_i915_private *i915)
 	if (err)
 		goto err;
 
-	err = i915_gem_object_set_to_gtt_domain(obj, true);
+	err = i915_gem_object_set_to_wc_domain(obj, true);
 	if (err)
 		goto err;
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c b/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c
new file mode 100644
index 000000000000..7a44dab631b8
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c
@@ -0,0 +1,299 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "../i915_selftest.h"
+#include "i915_random.h"
+
+#include "mock_gem_device.h"
+#include "mock_timeline.h"
+
+struct __igt_sync {
+	const char *name;
+	u32 seqno;
+	bool expected;
+	bool set;
+};
+
+static int __igt_sync(struct intel_timeline *tl,
+		      u64 ctx,
+		      const struct __igt_sync *p,
+		      const char *name)
+{
+	int ret;
+
+	if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
+		pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
+		       name, p->name, ctx, p->seqno, yesno(p->expected));
+		return -EINVAL;
+	}
+
+	if (p->set) {
+		ret = __intel_timeline_sync_set(tl, ctx, p->seqno);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int igt_sync(void *arg)
+{
+	const struct __igt_sync pass[] = {
+		{ "unset", 0, false, false },
+		{ "new", 0, false, true },
+		{ "0a", 0, true, true },
+		{ "1a", 1, false, true },
+		{ "1b", 1, true, true },
+		{ "0b", 0, true, false },
+		{ "2a", 2, false, true },
+		{ "4", 4, false, true },
+		{ "INT_MAX", INT_MAX, false, true },
+		{ "INT_MAX-1", INT_MAX-1, true, false },
+		{ "INT_MAX+1", (u32)INT_MAX+1, false, true },
+		{ "INT_MAX", INT_MAX, true, false },
+		{ "UINT_MAX", UINT_MAX, false, true },
+		{ "wrap", 0, false, true },
+		{ "unwrap", UINT_MAX, true, false },
+		{},
+	}, *p;
+	struct intel_timeline *tl;
+	int order, offset;
+	int ret;
+
+	tl = mock_timeline(0);
+	if (!tl)
+		return -ENOMEM;
+
+	for (p = pass; p->name; p++) {
+		for (order = 1; order < 64; order++) {
+			for (offset = -1; offset <= (order > 1); offset++) {
+				u64 ctx = BIT_ULL(order) + offset;
+
+				ret = __igt_sync(tl, ctx, p, "1");
+				if (ret)
+					goto out;
+			}
+		}
+	}
+	mock_timeline_destroy(tl);
+
+	tl = mock_timeline(0);
+	if (!tl)
+		return -ENOMEM;
+
+	for (order = 1; order < 64; order++) {
+		for (offset = -1; offset <= (order > 1); offset++) {
+			u64 ctx = BIT_ULL(order) + offset;
+
+			for (p = pass; p->name; p++) {
+				ret = __igt_sync(tl, ctx, p, "2");
+				if (ret)
+					goto out;
+			}
+		}
+	}
+
+out:
+	mock_timeline_destroy(tl);
+	return ret;
+}
+
+static unsigned int random_engine(struct rnd_state *rnd)
+{
+	return ((u64)prandom_u32_state(rnd) * I915_NUM_ENGINES) >> 32;
+}
+
+static int bench_sync(void *arg)
+{
+	struct rnd_state prng;
+	struct intel_timeline *tl;
+	unsigned long end_time, count;
+	u64 prng32_1M;
+	ktime_t kt;
+	int order, last_order;
+
+	tl = mock_timeline(0);
+	if (!tl)
+		return -ENOMEM;
+
+	/* Lookups from cache are very fast and so the random number generation
+	 * and the loop itself becomes a significant factor in the per-iteration
+	 * timings. We try to compensate the results by measuring the overhead
+	 * of the prng and subtract it from the reported results.
+	 */
+	prandom_seed_state(&prng, i915_selftest.random_seed);
+	count = 0;
+	kt = ktime_get();
+	end_time = jiffies + HZ/10;
+	do {
+		u32 x;
+
+		/* Make sure the compiler doesn't optimise away the prng call */
+		WRITE_ONCE(x, prandom_u32_state(&prng));
+
+		count++;
+	} while (!time_after(jiffies, end_time));
+	kt = ktime_sub(ktime_get(), kt);
+	pr_debug("%s: %lu random evaluations, %lluns/prng\n",
+		 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+	prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count);
+
+	/* Benchmark (only) setting random context ids */
+	prandom_seed_state(&prng, i915_selftest.random_seed);
+	count = 0;
+	kt = ktime_get();
+	end_time = jiffies + HZ/10;
+	do {
+		u64 id = i915_prandom_u64_state(&prng);
+
+		__intel_timeline_sync_set(tl, id, 0);
+		count++;
+	} while (!time_after(jiffies, end_time));
+	kt = ktime_sub(ktime_get(), kt);
+	kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
+	pr_info("%s: %lu random insertions, %lluns/insert\n",
+		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+
+	/* Benchmark looking up the exact same context ids as we just set */
+	prandom_seed_state(&prng, i915_selftest.random_seed);
+	end_time = count;
+	kt = ktime_get();
+	while (end_time--) {
+		u64 id = i915_prandom_u64_state(&prng);
+
+		if (!__intel_timeline_sync_is_later(tl, id, 0)) {
+			mock_timeline_destroy(tl);
+			pr_err("Lookup of %llu failed\n", id);
+			return -EINVAL;
+		}
+	}
+	kt = ktime_sub(ktime_get(), kt);
+	kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
+	pr_info("%s: %lu random lookups, %lluns/lookup\n",
+		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+
+	mock_timeline_destroy(tl);
+	cond_resched();
+
+	tl = mock_timeline(0);
+	if (!tl)
+		return -ENOMEM;
+
+	/* Benchmark setting the first N (in order) contexts */
+	count = 0;
+	kt = ktime_get();
+	end_time = jiffies + HZ/10;
+	do {
+		__intel_timeline_sync_set(tl, count++, 0);
+	} while (!time_after(jiffies, end_time));
+	kt = ktime_sub(ktime_get(), kt);
+	pr_info("%s: %lu in-order insertions, %lluns/insert\n",
+		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+
+	/* Benchmark looking up the exact same context ids as we just set */
+	end_time = count;
+	kt = ktime_get();
+	while (end_time--) {
+		if (!__intel_timeline_sync_is_later(tl, end_time, 0)) {
+			pr_err("Lookup of %lu failed\n", end_time);
+			mock_timeline_destroy(tl);
+			return -EINVAL;
+		}
+	}
+	kt = ktime_sub(ktime_get(), kt);
+	pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
+		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+
+	mock_timeline_destroy(tl);
+	cond_resched();
+
+	tl = mock_timeline(0);
+	if (!tl)
+		return -ENOMEM;
+
+	/* Benchmark searching for a random context id and maybe changing it */
+	prandom_seed_state(&prng, i915_selftest.random_seed);
+	count = 0;
+	kt = ktime_get();
+	end_time = jiffies + HZ/10;
+	do {
+		u32 id = random_engine(&prng);
+		u32 seqno = prandom_u32_state(&prng);
+
+		if (!__intel_timeline_sync_is_later(tl, id, seqno))
+			__intel_timeline_sync_set(tl, id, seqno);
+
+		count++;
+	} while (!time_after(jiffies, end_time));
+	kt = ktime_sub(ktime_get(), kt);
+	kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
+	pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
+		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+	mock_timeline_destroy(tl);
+	cond_resched();
+
+	/* Benchmark searching for a known context id and changing the seqno */
+	for (last_order = 1, order = 1; order < 32;
+	     ({ int tmp = last_order; last_order = order; order += tmp; })) {
+		unsigned int mask = BIT(order) - 1;
+
+		tl = mock_timeline(0);
+		if (!tl)
+			return -ENOMEM;
+
+		count = 0;
+		kt = ktime_get();
+		end_time = jiffies + HZ/10;
+		do {
+			/* Without assuming too many details of the underlying
+			 * implementation, try to identify its phase-changes
+			 * (if any)!
+			 */
+			u64 id = (u64)(count & mask) << order;
+
+			__intel_timeline_sync_is_later(tl, id, 0);
+			__intel_timeline_sync_set(tl, id, 0);
+
+			count++;
+		} while (!time_after(jiffies, end_time));
+		kt = ktime_sub(ktime_get(), kt);
+		pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
+			__func__, count, order,
+			(long long)div64_ul(ktime_to_ns(kt), count));
+		mock_timeline_destroy(tl);
+		cond_resched();
+	}
+
+	return 0;
+}
+
+int i915_gem_timeline_mock_selftests(void)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_sync),
+		SUBTEST(bench_sync),
+	};
+
+	return i915_subtests(tests, NULL);
+}
diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
index be9a9ebf5692..fc74687501ba 100644
--- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
@@ -9,9 +9,12 @@
  * Tests are executed in order by igt/drv_selftest
  */
 selftest(sanitycheck, i915_mock_sanitycheck) /* keep first (igt selfcheck) */
+selftest(fence, i915_sw_fence_mock_selftests)
 selftest(scatterlist, scatterlist_mock_selftests)
+selftest(syncmap, i915_syncmap_mock_selftests)
 selftest(uncore, intel_uncore_mock_selftests)
 selftest(breadcrumbs, intel_breadcrumbs_mock_selftests)
+selftest(timelines, i915_gem_timeline_mock_selftests)
 selftest(requests, i915_gem_request_mock_selftests)
 selftest(objects, i915_gem_object_mock_selftests)
 selftest(dmabuf, i915_gem_dmabuf_mock_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_random.c b/drivers/gpu/drm/i915/selftests/i915_random.c
index c17c83c30637..d044bf9a6feb 100644
--- a/drivers/gpu/drm/i915/selftests/i915_random.c
+++ b/drivers/gpu/drm/i915/selftests/i915_random.c
@@ -30,6 +30,17 @@
 
 #include "i915_random.h"
 
+u64 i915_prandom_u64_state(struct rnd_state *rnd)
+{
+	u64 x;
+
+	x = prandom_u32_state(rnd);
+	x <<= 32;
+	x |= prandom_u32_state(rnd);
+
+	return x;
+}
+
 static inline u32 i915_prandom_u32_max_state(u32 ep_ro, struct rnd_state *state)
 {
 	return upper_32_bits((u64)prandom_u32_state(state) * ep_ro);
diff --git a/drivers/gpu/drm/i915/selftests/i915_random.h b/drivers/gpu/drm/i915/selftests/i915_random.h
index b9c334ce6cd9..6c9379871384 100644
--- a/drivers/gpu/drm/i915/selftests/i915_random.h
+++ b/drivers/gpu/drm/i915/selftests/i915_random.h
@@ -41,6 +41,8 @@
 #define I915_RND_SUBSTATE(name__, parent__) \
 	struct rnd_state name__ = I915_RND_STATE_INITIALIZER(prandom_u32_state(&(parent__)))
 
+u64 i915_prandom_u64_state(struct rnd_state *rnd);
+
 unsigned int *i915_random_order(unsigned int count,
 				struct rnd_state *state);
 void i915_random_reorder(unsigned int *order,
diff --git a/drivers/gpu/drm/i915/selftests/i915_sw_fence.c b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c
new file mode 100644
index 000000000000..19d145d6bf52
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c
@@ -0,0 +1,582 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/completion.h>
+#include <linux/delay.h>
+
+#include "../i915_selftest.h"
+
+static int __i915_sw_fence_call
+fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
+{
+	switch (state) {
+	case FENCE_COMPLETE:
+		break;
+
+	case FENCE_FREE:
+		/* Leave the fence for the caller to free it after testing */
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct i915_sw_fence *alloc_fence(void)
+{
+	struct i915_sw_fence *fence;
+
+	fence = kmalloc(sizeof(*fence), GFP_KERNEL);
+	if (!fence)
+		return NULL;
+
+	i915_sw_fence_init(fence, fence_notify);
+	return fence;
+}
+
+static void free_fence(struct i915_sw_fence *fence)
+{
+	i915_sw_fence_fini(fence);
+	kfree(fence);
+}
+
+static int __test_self(struct i915_sw_fence *fence)
+{
+	if (i915_sw_fence_done(fence))
+		return -EINVAL;
+
+	i915_sw_fence_commit(fence);
+	if (!i915_sw_fence_done(fence))
+		return -EINVAL;
+
+	i915_sw_fence_wait(fence);
+	if (!i915_sw_fence_done(fence))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int test_self(void *arg)
+{
+	struct i915_sw_fence *fence;
+	int ret;
+
+	/* Test i915_sw_fence signaling and completion testing */
+	fence = alloc_fence();
+	if (!fence)
+		return -ENOMEM;
+
+	ret = __test_self(fence);
+
+	free_fence(fence);
+	return ret;
+}
+
+static int test_dag(void *arg)
+{
+	struct i915_sw_fence *A, *B, *C;
+	int ret = -EINVAL;
+
+	/* Test detection of cycles within the i915_sw_fence graphs */
+	if (!IS_ENABLED(CONFIG_DRM_I915_SW_FENCE_CHECK_DAG))
+		return 0;
+
+	A = alloc_fence();
+	if (!A)
+		return -ENOMEM;
+
+	if (i915_sw_fence_await_sw_fence_gfp(A, A, GFP_KERNEL) != -EINVAL) {
+		pr_err("recursive cycle not detected (AA)\n");
+		goto err_A;
+	}
+
+	B = alloc_fence();
+	if (!B) {
+		ret = -ENOMEM;
+		goto err_A;
+	}
+
+	i915_sw_fence_await_sw_fence_gfp(A, B, GFP_KERNEL);
+	if (i915_sw_fence_await_sw_fence_gfp(B, A, GFP_KERNEL) != -EINVAL) {
+		pr_err("single depth cycle not detected (BAB)\n");
+		goto err_B;
+	}
+
+	C = alloc_fence();
+	if (!C) {
+		ret = -ENOMEM;
+		goto err_B;
+	}
+
+	if (i915_sw_fence_await_sw_fence_gfp(B, C, GFP_KERNEL) == -EINVAL) {
+		pr_err("invalid cycle detected\n");
+		goto err_C;
+	}
+	if (i915_sw_fence_await_sw_fence_gfp(C, B, GFP_KERNEL) != -EINVAL) {
+		pr_err("single depth cycle not detected (CBC)\n");
+		goto err_C;
+	}
+	if (i915_sw_fence_await_sw_fence_gfp(C, A, GFP_KERNEL) != -EINVAL) {
+		pr_err("cycle not detected (BA, CB, AC)\n");
+		goto err_C;
+	}
+	if (i915_sw_fence_await_sw_fence_gfp(A, C, GFP_KERNEL) == -EINVAL) {
+		pr_err("invalid cycle detected\n");
+		goto err_C;
+	}
+
+	i915_sw_fence_commit(A);
+	i915_sw_fence_commit(B);
+	i915_sw_fence_commit(C);
+
+	ret = 0;
+	if (!i915_sw_fence_done(C)) {
+		pr_err("fence C not done\n");
+		ret = -EINVAL;
+	}
+	if (!i915_sw_fence_done(B)) {
+		pr_err("fence B not done\n");
+		ret = -EINVAL;
+	}
+	if (!i915_sw_fence_done(A)) {
+		pr_err("fence A not done\n");
+		ret = -EINVAL;
+	}
+err_C:
+	free_fence(C);
+err_B:
+	free_fence(B);
+err_A:
+	free_fence(A);
+	return ret;
+}
+
+static int test_AB(void *arg)
+{
+	struct i915_sw_fence *A, *B;
+	int ret;
+
+	/* Test i915_sw_fence (A) waiting on an event source (B) */
+	A = alloc_fence();
+	if (!A)
+		return -ENOMEM;
+	B = alloc_fence();
+	if (!B) {
+		ret = -ENOMEM;
+		goto err_A;
+	}
+
+	ret = i915_sw_fence_await_sw_fence_gfp(A, B, GFP_KERNEL);
+	if (ret < 0)
+		goto err_B;
+	if (ret == 0) {
+		pr_err("Incorrectly reported fence A was complete before await\n");
+		ret = -EINVAL;
+		goto err_B;
+	}
+
+	ret = -EINVAL;
+	i915_sw_fence_commit(A);
+	if (i915_sw_fence_done(A))
+		goto err_B;
+
+	i915_sw_fence_commit(B);
+	if (!i915_sw_fence_done(B)) {
+		pr_err("Fence B is not done\n");
+		goto err_B;
+	}
+
+	if (!i915_sw_fence_done(A)) {
+		pr_err("Fence A is not done\n");
+		goto err_B;
+	}
+
+	ret = 0;
+err_B:
+	free_fence(B);
+err_A:
+	free_fence(A);
+	return ret;
+}
+
+static int test_ABC(void *arg)
+{
+	struct i915_sw_fence *A, *B, *C;
+	int ret;
+
+	/* Test a chain of fences, A waits on B who waits on C */
+	A = alloc_fence();
+	if (!A)
+		return -ENOMEM;
+
+	B = alloc_fence();
+	if (!B) {
+		ret = -ENOMEM;
+		goto err_A;
+	}
+
+	C = alloc_fence();
+	if (!C) {
+		ret = -ENOMEM;
+		goto err_B;
+	}
+
+	ret = i915_sw_fence_await_sw_fence_gfp(A, B, GFP_KERNEL);
+	if (ret < 0)
+		goto err_C;
+	if (ret == 0) {
+		pr_err("Incorrectly reported fence B was complete before await\n");
+		goto err_C;
+	}
+
+	ret = i915_sw_fence_await_sw_fence_gfp(B, C, GFP_KERNEL);
+	if (ret < 0)
+		goto err_C;
+	if (ret == 0) {
+		pr_err("Incorrectly reported fence C was complete before await\n");
+		goto err_C;
+	}
+
+	ret = -EINVAL;
+	i915_sw_fence_commit(A);
+	if (i915_sw_fence_done(A)) {
+		pr_err("Fence A completed early\n");
+		goto err_C;
+	}
+
+	i915_sw_fence_commit(B);
+	if (i915_sw_fence_done(B)) {
+		pr_err("Fence B completed early\n");
+		goto err_C;
+	}
+
+	if (i915_sw_fence_done(A)) {
+		pr_err("Fence A completed early (after signaling B)\n");
+		goto err_C;
+	}
+
+	i915_sw_fence_commit(C);
+
+	ret = 0;
+	if (!i915_sw_fence_done(C)) {
+		pr_err("Fence C not done\n");
+		ret = -EINVAL;
+	}
+	if (!i915_sw_fence_done(B)) {
+		pr_err("Fence B not done\n");
+		ret = -EINVAL;
+	}
+	if (!i915_sw_fence_done(A)) {
+		pr_err("Fence A not done\n");
+		ret = -EINVAL;
+	}
+err_C:
+	free_fence(C);
+err_B:
+	free_fence(B);
+err_A:
+	free_fence(A);
+	return ret;
+}
+
+static int test_AB_C(void *arg)
+{
+	struct i915_sw_fence *A, *B, *C;
+	int ret = -EINVAL;
+
+	/* Test multiple fences (AB) waiting on a single event (C) */
+	A = alloc_fence();
+	if (!A)
+		return -ENOMEM;
+
+	B = alloc_fence();
+	if (!B) {
+		ret = -ENOMEM;
+		goto err_A;
+	}
+
+	C = alloc_fence();
+	if (!C) {
+		ret = -ENOMEM;
+		goto err_B;
+	}
+
+	ret = i915_sw_fence_await_sw_fence_gfp(A, C, GFP_KERNEL);
+	if (ret < 0)
+		goto err_C;
+	if (ret == 0) {
+		ret = -EINVAL;
+		goto err_C;
+	}
+
+	ret = i915_sw_fence_await_sw_fence_gfp(B, C, GFP_KERNEL);
+	if (ret < 0)
+		goto err_C;
+	if (ret == 0) {
+		ret = -EINVAL;
+		goto err_C;
+	}
+
+	i915_sw_fence_commit(A);
+	i915_sw_fence_commit(B);
+
+	ret = 0;
+	if (i915_sw_fence_done(A)) {
+		pr_err("Fence A completed early\n");
+		ret = -EINVAL;
+	}
+
+	if (i915_sw_fence_done(B)) {
+		pr_err("Fence B completed early\n");
+		ret = -EINVAL;
+	}
+
+	i915_sw_fence_commit(C);
+	if (!i915_sw_fence_done(C)) {
+		pr_err("Fence C not done\n");
+		ret = -EINVAL;
+	}
+
+	if (!i915_sw_fence_done(B)) {
+		pr_err("Fence B not done\n");
+		ret = -EINVAL;
+	}
+
+	if (!i915_sw_fence_done(A)) {
+		pr_err("Fence A not done\n");
+		ret = -EINVAL;
+	}
+
+err_C:
+	free_fence(C);
+err_B:
+	free_fence(B);
+err_A:
+	free_fence(A);
+	return ret;
+}
+
+static int test_C_AB(void *arg)
+{
+	struct i915_sw_fence *A, *B, *C;
+	int ret;
+
+	/* Test multiple event sources (A,B) for a single fence (C) */
+	A = alloc_fence();
+	if (!A)
+		return -ENOMEM;
+
+	B = alloc_fence();
+	if (!B) {
+		ret = -ENOMEM;
+		goto err_A;
+	}
+
+	C = alloc_fence();
+	if (!C) {
+		ret = -ENOMEM;
+		goto err_B;
+	}
+
+	ret = i915_sw_fence_await_sw_fence_gfp(C, A, GFP_KERNEL);
+	if (ret < 0)
+		goto err_C;
+	if (ret == 0) {
+		ret = -EINVAL;
+		goto err_C;
+	}
+
+	ret = i915_sw_fence_await_sw_fence_gfp(C, B, GFP_KERNEL);
+	if (ret < 0)
+		goto err_C;
+	if (ret == 0) {
+		ret = -EINVAL;
+		goto err_C;
+	}
+
+	ret = 0;
+	i915_sw_fence_commit(C);
+	if (i915_sw_fence_done(C))
+		ret = -EINVAL;
+
+	i915_sw_fence_commit(A);
+	i915_sw_fence_commit(B);
+
+	if (!i915_sw_fence_done(A)) {
+		pr_err("Fence A not done\n");
+		ret = -EINVAL;
+	}
+
+	if (!i915_sw_fence_done(B)) {
+		pr_err("Fence B not done\n");
+		ret = -EINVAL;
+	}
+
+	if (!i915_sw_fence_done(C)) {
+		pr_err("Fence C not done\n");
+		ret = -EINVAL;
+	}
+
+err_C:
+	free_fence(C);
+err_B:
+	free_fence(B);
+err_A:
+	free_fence(A);
+	return ret;
+}
+
+static int test_chain(void *arg)
+{
+	int nfences = 4096;
+	struct i915_sw_fence **fences;
+	int ret, i;
+
+	/* Test a long chain of fences */
+	fences = kmalloc_array(nfences, sizeof(*fences), GFP_KERNEL);
+	if (!fences)
+		return -ENOMEM;
+
+	for (i = 0; i < nfences; i++) {
+		fences[i] = alloc_fence();
+		if (!fences[i]) {
+			nfences = i;
+			ret = -ENOMEM;
+			goto err;
+		}
+
+		if (i > 0) {
+			ret = i915_sw_fence_await_sw_fence_gfp(fences[i],
+							       fences[i - 1],
+							       GFP_KERNEL);
+			if (ret < 0) {
+				nfences = i + 1;
+				goto err;
+			}
+
+			i915_sw_fence_commit(fences[i]);
+		}
+	}
+
+	ret = 0;
+	for (i = nfences; --i; ) {
+		if (i915_sw_fence_done(fences[i])) {
+			if (ret == 0)
+				pr_err("Fence[%d] completed early\n", i);
+			ret = -EINVAL;
+		}
+	}
+	i915_sw_fence_commit(fences[0]);
+	for (i = 0; ret == 0 && i < nfences; i++) {
+		if (!i915_sw_fence_done(fences[i])) {
+			pr_err("Fence[%d] is not done\n", i);
+			ret = -EINVAL;
+		}
+	}
+
+err:
+	for (i = 0; i < nfences; i++)
+		free_fence(fences[i]);
+	kfree(fences);
+	return ret;
+}
+
+struct task_ipc {
+	struct work_struct work;
+	struct completion started;
+	struct i915_sw_fence *in, *out;
+	int value;
+};
+
+static void task_ipc(struct work_struct *work)
+{
+	struct task_ipc *ipc = container_of(work, typeof(*ipc), work);
+
+	complete(&ipc->started);
+
+	i915_sw_fence_wait(ipc->in);
+	smp_store_mb(ipc->value, 1);
+	i915_sw_fence_commit(ipc->out);
+}
+
+static int test_ipc(void *arg)
+{
+	struct task_ipc ipc;
+	int ret = 0;
+
+	/* Test use of i915_sw_fence as an interprocess signaling mechanism */
+	ipc.in = alloc_fence();
+	if (!ipc.in)
+		return -ENOMEM;
+	ipc.out = alloc_fence();
+	if (!ipc.out) {
+		ret = -ENOMEM;
+		goto err_in;
+	}
+
+	/* use a completion to avoid chicken-and-egg testing */
+	init_completion(&ipc.started);
+
+	ipc.value = 0;
+	INIT_WORK_ONSTACK(&ipc.work, task_ipc);
+	schedule_work(&ipc.work);
+
+	wait_for_completion(&ipc.started);
+
+	usleep_range(1000, 2000);
+	if (READ_ONCE(ipc.value)) {
+		pr_err("worker updated value before i915_sw_fence was signaled\n");
+		ret = -EINVAL;
+	}
+
+	i915_sw_fence_commit(ipc.in);
+	i915_sw_fence_wait(ipc.out);
+
+	if (!READ_ONCE(ipc.value)) {
+		pr_err("worker signaled i915_sw_fence before value was posted\n");
+		ret = -EINVAL;
+	}
+
+	flush_work(&ipc.work);
+	destroy_work_on_stack(&ipc.work);
+	free_fence(ipc.out);
+err_in:
+	free_fence(ipc.in);
+	return ret;
+}
+
+int i915_sw_fence_mock_selftests(void)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(test_self),
+		SUBTEST(test_dag),
+		SUBTEST(test_AB),
+		SUBTEST(test_ABC),
+		SUBTEST(test_AB_C),
+		SUBTEST(test_C_AB),
+		SUBTEST(test_chain),
+		SUBTEST(test_ipc),
+	};
+
+	return i915_subtests(tests, NULL);
+}
diff --git a/drivers/gpu/drm/i915/selftests/i915_syncmap.c b/drivers/gpu/drm/i915/selftests/i915_syncmap.c
new file mode 100644
index 000000000000..bcab3d00a785
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/i915_syncmap.c
@@ -0,0 +1,616 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "../i915_selftest.h"
+#include "i915_random.h"
+
+static char *
+__sync_print(struct i915_syncmap *p,
+	     char *buf, unsigned long *sz,
+	     unsigned int depth,
+	     unsigned int last,
+	     unsigned int idx)
+{
+	unsigned long len;
+	unsigned int i, X;
+
+	if (depth) {
+		unsigned int d;
+
+		for (d = 0; d < depth - 1; d++) {
+			if (last & BIT(depth - d - 1))
+				len = scnprintf(buf, *sz, "|   ");
+			else
+				len = scnprintf(buf, *sz, "    ");
+			buf += len;
+			*sz -= len;
+		}
+		len = scnprintf(buf, *sz, "%x-> ", idx);
+		buf += len;
+		*sz -= len;
+	}
+
+	/* We mark bits after the prefix as "X" */
+	len = scnprintf(buf, *sz, "0x%016llx", p->prefix << p->height << SHIFT);
+	buf += len;
+	*sz -= len;
+	X = (p->height + SHIFT) / 4;
+	scnprintf(buf - X, *sz + X, "%*s", X, "XXXXXXXXXXXXXXXXX");
+
+	if (!p->height) {
+		for_each_set_bit(i, (unsigned long *)&p->bitmap, KSYNCMAP) {
+			len = scnprintf(buf, *sz, " %x:%x,",
+					i, __sync_seqno(p)[i]);
+			buf += len;
+			*sz -= len;
+		}
+		buf -= 1;
+		*sz += 1;
+	}
+
+	len = scnprintf(buf, *sz, "\n");
+	buf += len;
+	*sz -= len;
+
+	if (p->height) {
+		for_each_set_bit(i, (unsigned long *)&p->bitmap, KSYNCMAP) {
+			buf = __sync_print(__sync_child(p)[i], buf, sz,
+					   depth + 1,
+					   last << 1 | !!(p->bitmap >> (i + 1)),
+					   i);
+		}
+	}
+
+	return buf;
+}
+
+static bool
+i915_syncmap_print_to_buf(struct i915_syncmap *p, char *buf, unsigned long sz)
+{
+	if (!p)
+		return false;
+
+	while (p->parent)
+		p = p->parent;
+
+	__sync_print(p, buf, &sz, 0, 1, 0);
+	return true;
+}
+
+static int check_syncmap_free(struct i915_syncmap **sync)
+{
+	i915_syncmap_free(sync);
+	if (*sync) {
+		pr_err("sync not cleared after free\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int dump_syncmap(struct i915_syncmap *sync, int err)
+{
+	char *buf;
+
+	if (!err)
+		return check_syncmap_free(&sync);
+
+	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!buf)
+		goto skip;
+
+	if (i915_syncmap_print_to_buf(sync, buf, PAGE_SIZE))
+		pr_err("%s", buf);
+
+	kfree(buf);
+
+skip:
+	i915_syncmap_free(&sync);
+	return err;
+}
+
+static int igt_syncmap_init(void *arg)
+{
+	struct i915_syncmap *sync = (void *)~0ul;
+
+	/*
+	 * Cursory check that we can initialise a random pointer and transform
+	 * it into the root pointer of a syncmap.
+	 */
+
+	i915_syncmap_init(&sync);
+	return check_syncmap_free(&sync);
+}
+
+static int check_seqno(struct i915_syncmap *leaf, unsigned int idx, u32 seqno)
+{
+	if (leaf->height) {
+		pr_err("%s: not a leaf, height is %d\n",
+		       __func__, leaf->height);
+		return -EINVAL;
+	}
+
+	if (__sync_seqno(leaf)[idx] != seqno) {
+		pr_err("%s: seqno[%d], found %x, expected %x\n",
+		       __func__, idx, __sync_seqno(leaf)[idx], seqno);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int check_one(struct i915_syncmap **sync, u64 context, u32 seqno)
+{
+	int err;
+
+	err = i915_syncmap_set(sync, context, seqno);
+	if (err)
+		return err;
+
+	if ((*sync)->height) {
+		pr_err("Inserting first context=%llx did not return leaf (height=%d, prefix=%llx\n",
+		       context, (*sync)->height, (*sync)->prefix);
+		return -EINVAL;
+	}
+
+	if ((*sync)->parent) {
+		pr_err("Inserting first context=%llx created branches!\n",
+		       context);
+		return -EINVAL;
+	}
+
+	if (hweight32((*sync)->bitmap) != 1) {
+		pr_err("First bitmap does not contain a single entry, found %x (count=%d)!\n",
+		       (*sync)->bitmap, hweight32((*sync)->bitmap));
+		return -EINVAL;
+	}
+
+	err = check_seqno((*sync), ilog2((*sync)->bitmap), seqno);
+	if (err)
+		return err;
+
+	if (!i915_syncmap_is_later(sync, context, seqno)) {
+		pr_err("Lookup of first context=%llx/seqno=%x failed!\n",
+		       context, seqno);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int igt_syncmap_one(void *arg)
+{
+	I915_RND_STATE(prng);
+	IGT_TIMEOUT(end_time);
+	struct i915_syncmap *sync;
+	unsigned long max = 1;
+	int err;
+
+	/*
+	 * Check that inserting a new id, creates a leaf and only that leaf.
+	 */
+
+	i915_syncmap_init(&sync);
+
+	do {
+		u64 context = i915_prandom_u64_state(&prng);
+		unsigned long loop;
+
+		err = check_syncmap_free(&sync);
+		if (err)
+			goto out;
+
+		for (loop = 0; loop <= max; loop++) {
+			err = check_one(&sync, context,
+					prandom_u32_state(&prng));
+			if (err)
+				goto out;
+		}
+		max++;
+	} while (!__igt_timeout(end_time, NULL));
+	pr_debug("%s: Completed %lu single insertions\n",
+		 __func__, max * (max - 1) / 2);
+out:
+	return dump_syncmap(sync, err);
+}
+
+static int check_leaf(struct i915_syncmap **sync, u64 context, u32 seqno)
+{
+	int err;
+
+	err = i915_syncmap_set(sync, context, seqno);
+	if (err)
+		return err;
+
+	if ((*sync)->height) {
+		pr_err("Inserting context=%llx did not return leaf (height=%d, prefix=%llx\n",
+		       context, (*sync)->height, (*sync)->prefix);
+		return -EINVAL;
+	}
+
+	if (hweight32((*sync)->bitmap) != 1) {
+		pr_err("First entry into leaf (context=%llx) does not contain a single entry, found %x (count=%d)!\n",
+		       context, (*sync)->bitmap, hweight32((*sync)->bitmap));
+		return -EINVAL;
+	}
+
+	err = check_seqno((*sync), ilog2((*sync)->bitmap), seqno);
+	if (err)
+		return err;
+
+	if (!i915_syncmap_is_later(sync, context, seqno)) {
+		pr_err("Lookup of first entry context=%llx/seqno=%x failed!\n",
+		       context, seqno);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int igt_syncmap_join_above(void *arg)
+{
+	struct i915_syncmap *sync;
+	unsigned int pass, order;
+	int err;
+
+	i915_syncmap_init(&sync);
+
+	/*
+	 * When we have a new id that doesn't fit inside the existing tree,
+	 * we need to add a new layer above.
+	 *
+	 * 1: 0x00000001
+	 * 2: 0x00000010
+	 * 3: 0x00000100
+	 * 4: 0x00001000
+	 * ...
+	 * Each pass the common prefix shrinks and we have to insert a join.
+	 * Each join will only contain two branches, the latest of which
+	 * is always a leaf.
+	 *
+	 * If we then reuse the same set of contexts, we expect to build an
+	 * identical tree.
+	 */
+	for (pass = 0; pass < 3; pass++) {
+		for (order = 0; order < 64; order += SHIFT) {
+			u64 context = BIT_ULL(order);
+			struct i915_syncmap *join;
+
+			err = check_leaf(&sync, context, 0);
+			if (err)
+				goto out;
+
+			join = sync->parent;
+			if (!join) /* very first insert will have no parents */
+				continue;
+
+			if (!join->height) {
+				pr_err("Parent with no height!\n");
+				err = -EINVAL;
+				goto out;
+			}
+
+			if (hweight32(join->bitmap) != 2) {
+				pr_err("Join does not have 2 children: %x (%d)\n",
+				       join->bitmap, hweight32(join->bitmap));
+				err = -EINVAL;
+				goto out;
+			}
+
+			if (__sync_child(join)[__sync_branch_idx(join, context)] != sync) {
+				pr_err("Leaf misplaced in parent!\n");
+				err = -EINVAL;
+				goto out;
+			}
+		}
+	}
+out:
+	return dump_syncmap(sync, err);
+}
+
+static int igt_syncmap_join_below(void *arg)
+{
+	struct i915_syncmap *sync;
+	unsigned int step, order, idx;
+	int err;
+
+	i915_syncmap_init(&sync);
+
+	/*
+	 * Check that we can split a compacted branch by replacing it with
+	 * a join.
+	 */
+	for (step = 0; step < KSYNCMAP; step++) {
+		for (order = 64 - SHIFT; order > 0; order -= SHIFT) {
+			u64 context = step * BIT_ULL(order);
+
+			err = i915_syncmap_set(&sync, context, 0);
+			if (err)
+				goto out;
+
+			if (sync->height) {
+				pr_err("Inserting context=%llx (order=%d, step=%d) did not return leaf (height=%d, prefix=%llx\n",
+				       context, order, step, sync->height, sync->prefix);
+				err = -EINVAL;
+				goto out;
+			}
+		}
+	}
+
+	for (step = 0; step < KSYNCMAP; step++) {
+		for (order = SHIFT; order < 64; order += SHIFT) {
+			u64 context = step * BIT_ULL(order);
+
+			if (!i915_syncmap_is_later(&sync, context, 0)) {
+				pr_err("1: context %llx (order=%d, step=%d) not found\n",
+				       context, order, step);
+				err = -EINVAL;
+				goto out;
+			}
+
+			for (idx = 1; idx < KSYNCMAP; idx++) {
+				if (i915_syncmap_is_later(&sync, context + idx, 0)) {
+					pr_err("1: context %llx (order=%d, step=%d) should not exist\n",
+					       context + idx, order, step);
+					err = -EINVAL;
+					goto out;
+				}
+			}
+		}
+	}
+
+	for (order = SHIFT; order < 64; order += SHIFT) {
+		for (step = 0; step < KSYNCMAP; step++) {
+			u64 context = step * BIT_ULL(order);
+
+			if (!i915_syncmap_is_later(&sync, context, 0)) {
+				pr_err("2: context %llx (order=%d, step=%d) not found\n",
+				       context, order, step);
+				err = -EINVAL;
+				goto out;
+			}
+		}
+	}
+
+out:
+	return dump_syncmap(sync, err);
+}
+
+static int igt_syncmap_neighbours(void *arg)
+{
+	I915_RND_STATE(prng);
+	IGT_TIMEOUT(end_time);
+	struct i915_syncmap *sync;
+	int err;
+
+	/*
+	 * Each leaf holds KSYNCMAP seqno. Check that when we create KSYNCMAP
+	 * neighbouring ids, they all fit into the same leaf.
+	 */
+
+	i915_syncmap_init(&sync);
+	do {
+		u64 context = i915_prandom_u64_state(&prng) & ~MASK;
+		unsigned int idx;
+
+		if (i915_syncmap_is_later(&sync, context, 0)) /* Skip repeats */
+			continue;
+
+		for (idx = 0; idx < KSYNCMAP; idx++) {
+			err = i915_syncmap_set(&sync, context + idx, 0);
+			if (err)
+				goto out;
+
+			if (sync->height) {
+				pr_err("Inserting context=%llx did not return leaf (height=%d, prefix=%llx\n",
+				       context, sync->height, sync->prefix);
+				err = -EINVAL;
+				goto out;
+			}
+
+			if (sync->bitmap != BIT(idx + 1) - 1) {
+				pr_err("Inserting neighbouring context=0x%llx+%d, did not fit into the same leaf bitmap=%x (%d), expected %lx (%d)\n",
+				       context, idx,
+				       sync->bitmap, hweight32(sync->bitmap),
+				       BIT(idx + 1) - 1, idx + 1);
+				err = -EINVAL;
+				goto out;
+			}
+		}
+	} while (!__igt_timeout(end_time, NULL));
+out:
+	return dump_syncmap(sync, err);
+}
+
+static int igt_syncmap_compact(void *arg)
+{
+	struct i915_syncmap *sync;
+	unsigned int idx, order;
+	int err;
+
+	i915_syncmap_init(&sync);
+
+	/*
+	 * The syncmap are "space efficient" compressed radix trees - any
+	 * branch with only one child is skipped and replaced by the child.
+	 *
+	 * If we construct a tree with ids that are neighbouring at a non-zero
+	 * height, we form a join but each child of that join is directly a
+	 * leaf holding the single id.
+	 */
+	for (order = SHIFT; order < 64; order += SHIFT) {
+		err = check_syncmap_free(&sync);
+		if (err)
+			goto out;
+
+		/* Create neighbours in the parent */
+		for (idx = 0; idx < KSYNCMAP; idx++) {
+			u64 context = idx * BIT_ULL(order) + idx;
+
+			err = i915_syncmap_set(&sync, context, 0);
+			if (err)
+				goto out;
+
+			if (sync->height) {
+				pr_err("Inserting context=%llx (order=%d, idx=%d) did not return leaf (height=%d, prefix=%llx\n",
+				       context, order, idx,
+				       sync->height, sync->prefix);
+				err = -EINVAL;
+				goto out;
+			}
+		}
+
+		sync = sync->parent;
+		if (sync->parent) {
+			pr_err("Parent (join) of last leaf was not the sync!\n");
+			err = -EINVAL;
+			goto out;
+		}
+
+		if (sync->height != order) {
+			pr_err("Join does not have the expected height, found %d, expected %d\n",
+			       sync->height, order);
+			err = -EINVAL;
+			goto out;
+		}
+
+		if (sync->bitmap != BIT(KSYNCMAP) - 1) {
+			pr_err("Join is not full!, found %x (%d) expected %lx (%d)\n",
+			       sync->bitmap, hweight32(sync->bitmap),
+			       BIT(KSYNCMAP) - 1, KSYNCMAP);
+			err = -EINVAL;
+			goto out;
+		}
+
+		/* Each of our children should be a leaf */
+		for (idx = 0; idx < KSYNCMAP; idx++) {
+			struct i915_syncmap *leaf = __sync_child(sync)[idx];
+
+			if (leaf->height) {
+				pr_err("Child %d is a not leaf!\n", idx);
+				err = -EINVAL;
+				goto out;
+			}
+
+			if (leaf->parent != sync) {
+				pr_err("Child %d is not attached to us!\n",
+				       idx);
+				err = -EINVAL;
+				goto out;
+			}
+
+			if (!is_power_of_2(leaf->bitmap)) {
+				pr_err("Child %d holds more than one id, found %x (%d)\n",
+				       idx, leaf->bitmap, hweight32(leaf->bitmap));
+				err = -EINVAL;
+				goto out;
+			}
+
+			if (leaf->bitmap != BIT(idx)) {
+				pr_err("Child %d has wrong seqno idx, found %d, expected %d\n",
+				       idx, ilog2(leaf->bitmap), idx);
+				err = -EINVAL;
+				goto out;
+			}
+		}
+	}
+out:
+	return dump_syncmap(sync, err);
+}
+
+static int igt_syncmap_random(void *arg)
+{
+	I915_RND_STATE(prng);
+	IGT_TIMEOUT(end_time);
+	struct i915_syncmap *sync;
+	unsigned long count, phase, i;
+	u32 seqno;
+	int err;
+
+	i915_syncmap_init(&sync);
+
+	/*
+	 * Having tried to test the individual operations within i915_syncmap,
+	 * run a smoketest exploring the entire u64 space with random
+	 * insertions.
+	 */
+
+	count = 0;
+	phase = jiffies + HZ/100 + 1;
+	do {
+		u64 context = i915_prandom_u64_state(&prng);
+
+		err = i915_syncmap_set(&sync, context, 0);
+		if (err)
+			goto out;
+
+		count++;
+	} while (!time_after(jiffies, phase));
+	seqno = 0;
+
+	phase = 0;
+	do {
+		I915_RND_STATE(ctx);
+		u32 last_seqno = seqno;
+		bool expect;
+
+		seqno = prandom_u32_state(&prng);
+		expect = seqno_later(last_seqno, seqno);
+
+		for (i = 0; i < count; i++) {
+			u64 context = i915_prandom_u64_state(&ctx);
+
+			if (i915_syncmap_is_later(&sync, context, seqno) != expect) {
+				pr_err("context=%llu, last=%u this=%u did not match expectation (%d)\n",
+				       context, last_seqno, seqno, expect);
+				err = -EINVAL;
+				goto out;
+			}
+
+			err = i915_syncmap_set(&sync, context, seqno);
+			if (err)
+				goto out;
+		}
+
+		phase++;
+	} while (!__igt_timeout(end_time, NULL));
+	pr_debug("Completed %lu passes, each of %lu contexts\n", phase, count);
+out:
+	return dump_syncmap(sync, err);
+}
+
+int i915_syncmap_mock_selftests(void)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_syncmap_init),
+		SUBTEST(igt_syncmap_one),
+		SUBTEST(igt_syncmap_join_above),
+		SUBTEST(igt_syncmap_join_below),
+		SUBTEST(igt_syncmap_neighbours),
+		SUBTEST(igt_syncmap_compact),
+		SUBTEST(igt_syncmap_random),
+	};
+
+	return i915_subtests(tests, NULL);
+}
diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c
index 19860a372d90..7276194c04f7 100644
--- a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c
@@ -117,7 +117,7 @@ static int igt_random_insert_remove(void *arg)
 
 	mock_engine_reset(engine);
 
-	waiters = drm_malloc_gfp(count, sizeof(*waiters), GFP_TEMPORARY);
+	waiters = kvmalloc_array(count, sizeof(*waiters), GFP_TEMPORARY);
 	if (!waiters)
 		goto out_engines;
 
@@ -169,7 +169,7 @@ out_order:
 out_bitmap:
 	kfree(bitmap);
 out_waiters:
-	drm_free_large(waiters);
+	kvfree(waiters);
 out_engines:
 	mock_engine_flush(engine);
 	return err;
@@ -187,7 +187,7 @@ static int igt_insert_complete(void *arg)
 
 	mock_engine_reset(engine);
 
-	waiters = drm_malloc_gfp(count, sizeof(*waiters), GFP_TEMPORARY);
+	waiters = kvmalloc_array(count, sizeof(*waiters), GFP_TEMPORARY);
 	if (!waiters)
 		goto out_engines;
 
@@ -254,7 +254,7 @@ static int igt_insert_complete(void *arg)
 out_bitmap:
 	kfree(bitmap);
 out_waiters:
-	drm_free_large(waiters);
+	kvfree(waiters);
 out_engines:
 	mock_engine_flush(engine);
 	return err;
@@ -368,7 +368,7 @@ static int igt_wakeup(void *arg)
 
 	mock_engine_reset(engine);
 
-	waiters = drm_malloc_gfp(count, sizeof(*waiters), GFP_TEMPORARY);
+	waiters = kvmalloc_array(count, sizeof(*waiters), GFP_TEMPORARY);
 	if (!waiters)
 		goto out_engines;
 
@@ -454,7 +454,7 @@ out_waiters:
 		put_task_struct(waiters[n].tsk);
 	}
 
-	drm_free_large(waiters);
+	kvfree(waiters);
 out_engines:
 	mock_engine_flush(engine);
 	return err;
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 0ad624a1db90..5b18a2dc19a8 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -52,11 +52,12 @@ static void hw_delay_complete(unsigned long data)
 	spin_unlock(&engine->hw_lock);
 }
 
-static int mock_context_pin(struct intel_engine_cs *engine,
-			    struct i915_gem_context *ctx)
+static struct intel_ring *
+mock_context_pin(struct intel_engine_cs *engine,
+		 struct i915_gem_context *ctx)
 {
 	i915_gem_context_get(ctx);
-	return 0;
+	return engine->buffer;
 }
 
 static void mock_context_unpin(struct intel_engine_cs *engine,
@@ -72,7 +73,6 @@ static int mock_request_alloc(struct drm_i915_gem_request *request)
 	INIT_LIST_HEAD(&mock->link);
 	mock->delay = 0;
 
-	request->ring = request->engine->buffer;
 	return 0;
 }
 
@@ -112,7 +112,6 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
 	if (!ring)
 		return NULL;
 
-	ring->engine = engine;
 	ring->size = sz;
 	ring->effective_size = sz;
 	ring->vaddr = (void *)(ring + 1);
@@ -141,7 +140,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 
 	/* minimal engine setup for requests */
 	engine->base.i915 = i915;
-	engine->base.name = name;
+	snprintf(engine->base.name, sizeof(engine->base.name), "%s", name);
 	engine->base.id = id++;
 	engine->base.status_page.page_addr = (void *)(engine + 1);
 
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 9f24c5da3f8d..627e2aa09766 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -30,6 +30,7 @@
 #include "mock_gem_device.h"
 #include "mock_gem_object.h"
 #include "mock_gtt.h"
+#include "mock_uncore.h"
 
 void mock_device_flush(struct drm_i915_private *i915)
 {
@@ -73,6 +74,7 @@ static void mock_device_release(struct drm_device *dev)
 
 	destroy_workqueue(i915->wq);
 
+	kmem_cache_destroy(i915->priorities);
 	kmem_cache_destroy(i915->dependencies);
 	kmem_cache_destroy(i915->requests);
 	kmem_cache_destroy(i915->vmas);
@@ -119,6 +121,7 @@ struct drm_i915_private *mock_gem_device(void)
 		goto err;
 
 	device_initialize(&pdev->dev);
+	pdev->class = PCI_BASE_CLASS_DISPLAY << 16;
 	pdev->dev.release = release_dev;
 	dev_set_name(&pdev->dev, "mock");
 	dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
@@ -143,6 +146,7 @@ struct drm_i915_private *mock_gem_device(void)
 	mkwrite_device_info(i915)->gen = -1;
 
 	spin_lock_init(&i915->mm.object_stat_lock);
+	mock_uncore_init(i915);
 
 	init_waitqueue_head(&i915->gpu_error.wait_queue);
 	init_waitqueue_head(&i915->gpu_error.reset_queue);
@@ -184,12 +188,16 @@ struct drm_i915_private *mock_gem_device(void)
 	if (!i915->dependencies)
 		goto err_requests;
 
+	i915->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN);
+	if (!i915->priorities)
+		goto err_dependencies;
+
 	mutex_lock(&i915->drm.struct_mutex);
 	INIT_LIST_HEAD(&i915->gt.timelines);
 	err = i915_gem_timeline_init__global(i915);
 	if (err) {
 		mutex_unlock(&i915->drm.struct_mutex);
-		goto err_dependencies;
+		goto err_priorities;
 	}
 
 	mock_init_ggtt(i915);
@@ -209,6 +217,8 @@ struct drm_i915_private *mock_gem_device(void)
 err_engine:
 	for_each_engine(engine, i915, id)
 		mock_engine_free(engine);
+err_priorities:
+	kmem_cache_destroy(i915->priorities);
 err_dependencies:
 	kmem_cache_destroy(i915->dependencies);
 err_requests:
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c
new file mode 100644
index 000000000000..47b1f47c5812
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "mock_timeline.h"
+
+struct intel_timeline *mock_timeline(u64 context)
+{
+	static struct lock_class_key class;
+	struct intel_timeline *tl;
+
+	tl = kzalloc(sizeof(*tl), GFP_KERNEL);
+	if (!tl)
+		return NULL;
+
+	__intel_timeline_init(tl, NULL, context, &class, "mock");
+
+	return tl;
+}
+
+void mock_timeline_destroy(struct intel_timeline *tl)
+{
+	__intel_timeline_fini(tl);
+	kfree(tl);
+}
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.h b/drivers/gpu/drm/i915/selftests/mock_timeline.h
new file mode 100644
index 000000000000..c27ff4639b8b
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MOCK_TIMELINE__
+#define __MOCK_TIMELINE__
+
+#include "../i915_gem_timeline.h"
+
+struct intel_timeline *mock_timeline(u64 context);
+void mock_timeline_destroy(struct intel_timeline *tl);
+
+#endif /* !__MOCK_TIMELINE__ */
diff --git a/drivers/gpu/drm/i915/selftests/mock_uncore.c b/drivers/gpu/drm/i915/selftests/mock_uncore.c
new file mode 100644
index 000000000000..8ef14c7e5e38
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/mock_uncore.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "mock_uncore.h"
+
+#define __nop_write(x) \
+static void \
+nop_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { }
+__nop_write(8)
+__nop_write(16)
+__nop_write(32)
+
+#define __nop_read(x) \
+static u##x \
+nop_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { return 0; }
+__nop_read(8)
+__nop_read(16)
+__nop_read(32)
+__nop_read(64)
+
+void mock_uncore_init(struct drm_i915_private *i915)
+{
+	ASSIGN_WRITE_MMIO_VFUNCS(i915, nop);
+	ASSIGN_READ_MMIO_VFUNCS(i915, nop);
+}
diff --git a/drivers/gpu/drm/i915/selftests/mock_uncore.h b/drivers/gpu/drm/i915/selftests/mock_uncore.h
new file mode 100644
index 000000000000..d79aa3ca4d51
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/mock_uncore.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MOCK_UNCORE_H
+#define __MOCK_UNCORE_H
+
+void mock_uncore_init(struct drm_i915_private *i915);
+
+#endif /* !__MOCK_UNCORE_H */
diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c
index d63e853a0300..49546222c6d3 100644
--- a/drivers/gpu/drm/imx/ipuv3-plane.c
+++ b/drivers/gpu/drm/imx/ipuv3-plane.c
@@ -273,7 +273,7 @@ void ipu_plane_state_reset(struct drm_plane *plane)
 
 	if (ipu_state) {
 		ipu_state->base.plane = plane;
-		ipu_state->base.rotation = DRM_ROTATE_0;
+		ipu_state->base.rotation = DRM_MODE_ROTATE_0;
 	}
 
 	plane->state = &ipu_state->base;
diff --git a/drivers/gpu/drm/mga/Makefile b/drivers/gpu/drm/mga/Makefile
index 60684785c203..49e972c2f787 100644
--- a/drivers/gpu/drm/mga/Makefile
+++ b/drivers/gpu/drm/mga/Makefile
@@ -2,7 +2,6 @@
 # Makefile for the drm device driver.  This driver provides support for the
 # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
 
-ccflags-y := -Iinclude/drm
 mga-y := mga_drv.o mga_dma.o mga_state.o mga_warp.o mga_irq.o
 
 mga-$(CONFIG_COMPAT) += mga_ioc32.o
diff --git a/drivers/gpu/drm/mgag200/Makefile b/drivers/gpu/drm/mgag200/Makefile
index a9a0300f09fc..3d91d1d6c45d 100644
--- a/drivers/gpu/drm/mgag200/Makefile
+++ b/drivers/gpu/drm/mgag200/Makefile
@@ -1,4 +1,3 @@
-ccflags-y := -Iinclude/drm
 mgag200-y   := mgag200_main.o mgag200_mode.o mgag200_cursor.o \
 	mgag200_drv.o mgag200_fb.o mgag200_i2c.o mgag200_ttm.o
 
diff --git a/drivers/gpu/drm/mgag200/mgag200_ttm.c b/drivers/gpu/drm/mgag200/mgag200_ttm.c
index 565a217b46f2..3e7e1cd31395 100644
--- a/drivers/gpu/drm/mgag200/mgag200_ttm.c
+++ b/drivers/gpu/drm/mgag200/mgag200_ttm.c
@@ -26,8 +26,9 @@
  * Authors: Dave Airlie <airlied@redhat.com>
  */
 #include <drm/drmP.h>
+#include <drm/ttm/ttm_page_alloc.h>
+
 #include "mgag200_drv.h"
-#include <ttm/ttm_page_alloc.h>
 
 static inline struct mga_device *
 mgag200_bdev(struct ttm_bo_device *bd)
diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
index 5241ac8803ba..33008fa1be9b 100644
--- a/drivers/gpu/drm/msm/Makefile
+++ b/drivers/gpu/drm/msm/Makefile
@@ -1,4 +1,4 @@
-ccflags-y := -Iinclude/drm -Idrivers/gpu/drm/msm
+ccflags-y := -Idrivers/gpu/drm/msm
 ccflags-$(CONFIG_DRM_MSM_DSI) += -Idrivers/gpu/drm/msm/dsi
 
 msm-y := \
diff --git a/drivers/gpu/drm/msm/dsi/dsi.h b/drivers/gpu/drm/msm/dsi/dsi.h
index 32369975d155..9e6017387efb 100644
--- a/drivers/gpu/drm/msm/dsi/dsi.h
+++ b/drivers/gpu/drm/msm/dsi/dsi.h
@@ -17,9 +17,9 @@
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
 
-#include "drm_crtc.h"
-#include "drm_mipi_dsi.h"
-#include "drm_panel.h"
+#include <drm/drm_crtc.h>
+#include <drm/drm_mipi_dsi.h>
+#include <drm/drm_panel.h>
 
 #include "msm_drv.h"
 
diff --git a/drivers/gpu/drm/msm/edp/edp.h b/drivers/gpu/drm/msm/edp/edp.h
index ba5bedde5241..e0f5818ec9ca 100644
--- a/drivers/gpu/drm/msm/edp/edp.h
+++ b/drivers/gpu/drm/msm/edp/edp.h
@@ -18,9 +18,9 @@
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
+#include <drm/drm_crtc.h>
+#include <drm/drm_dp_helper.h>
 
-#include "drm_crtc.h"
-#include "drm_dp_helper.h"
 #include "msm_drv.h"
 
 #define edp_read(offset) msm_readl((offset))
diff --git a/drivers/gpu/drm/msm/edp/edp_ctrl.c b/drivers/gpu/drm/msm/edp/edp_ctrl.c
index 149bfe7ddd82..e32a4a4f3797 100644
--- a/drivers/gpu/drm/msm/edp/edp_ctrl.c
+++ b/drivers/gpu/drm/msm/edp/edp_ctrl.c
@@ -14,10 +14,10 @@
 #include <linux/clk.h>
 #include <linux/gpio/consumer.h>
 #include <linux/regulator/consumer.h>
+#include <drm/drm_crtc.h>
+#include <drm/drm_dp_helper.h>
+#include <drm/drm_edid.h>
 
-#include "drm_crtc.h"
-#include "drm_dp_helper.h"
-#include "drm_edid.h"
 #include "edp.h"
 #include "edp.xml.h"
 
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c
index f29194a74a19..698e514203c6 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c
@@ -15,12 +15,12 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include "mdp4_kms.h"
-
+#include <drm/drm_crtc.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_flip_work.h>
 #include <drm/drm_mode.h>
-#include "drm_crtc.h"
-#include "drm_crtc_helper.h"
-#include "drm_flip_work.h"
+
+#include "mdp4_kms.h"
 
 struct mdp4_crtc {
 	struct drm_crtc base;
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dsi_encoder.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dsi_encoder.c
index 106f0e772595..6a1ebdace391 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dsi_encoder.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dsi_encoder.c
@@ -17,10 +17,10 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include "mdp4_kms.h"
+#include <drm/drm_crtc.h>
+#include <drm/drm_crtc_helper.h>
 
-#include "drm_crtc.h"
-#include "drm_crtc_helper.h"
+#include "mdp4_kms.h"
 
 struct mdp4_dsi_encoder {
 	struct drm_encoder base;
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dtv_encoder.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dtv_encoder.c
index 24258e3025e3..ba8e587f734b 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dtv_encoder.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dtv_encoder.c
@@ -15,11 +15,10 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include "mdp4_kms.h"
-
-#include "drm_crtc.h"
-#include "drm_crtc_helper.h"
+#include <drm/drm_crtc.h>
+#include <drm/drm_crtc_helper.h>
 
+#include "mdp4_kms.h"
 
 struct mdp4_dtv_encoder {
 	struct drm_encoder base;
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h
index 62712ca164ee..c413779d488a 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h
@@ -18,12 +18,14 @@
 #ifndef __MDP4_KMS_H__
 #define __MDP4_KMS_H__
 
+#include <drm/drm_panel.h>
+
 #include "msm_drv.h"
 #include "msm_kms.h"
 #include "mdp/mdp_kms.h"
 #include "mdp4.xml.h"
 
-#include "drm_panel.h"
+struct device_node;
 
 struct mdp4_kms {
 	struct mdp_kms base;
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_lcdc_encoder.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_lcdc_encoder.c
index a06b064f86c1..4a645926edb7 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_lcdc_encoder.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_lcdc_encoder.c
@@ -16,10 +16,10 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include "mdp4_kms.h"
+#include <drm/drm_crtc.h>
+#include <drm/drm_crtc_helper.h>
 
-#include "drm_crtc.h"
-#include "drm_crtc_helper.h"
+#include "mdp4_kms.h"
 
 struct mdp4_lcdc_encoder {
 	struct drm_encoder base;
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c
index 8dafc7bdba48..aa7402e03f67 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c
@@ -11,10 +11,10 @@
  * GNU General Public License for more details.
  */
 
-#include "mdp5_kms.h"
+#include <drm/drm_crtc.h>
+#include <drm/drm_crtc_helper.h>
 
-#include "drm_crtc.h"
-#include "drm_crtc_helper.h"
+#include "mdp5_kms.h"
 
 static struct mdp5_kms *get_kms(struct drm_encoder *encoder)
 {
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
index 9217e0d6e93e..0764a6498110 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
@@ -16,13 +16,13 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include "mdp5_kms.h"
-
 #include <linux/sort.h>
 #include <drm/drm_mode.h>
-#include "drm_crtc.h"
-#include "drm_crtc_helper.h"
-#include "drm_flip_work.h"
+#include <drm/drm_crtc.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_flip_work.h>
+
+#include "mdp5_kms.h"
 
 #define CURSOR_WIDTH	64
 #define CURSOR_HEIGHT	64
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c
index c2ab0f033031..97f3294fbfc6 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c
@@ -16,10 +16,10 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include "mdp5_kms.h"
+#include <drm/drm_crtc.h>
+#include <drm/drm_crtc_helper.h>
 
-#include "drm_crtc.h"
-#include "drm_crtc_helper.h"
+#include "mdp5_kms.h"
 
 static struct mdp5_kms *get_kms(struct drm_encoder *encoder)
 {
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
index d3d6b4cae1e6..e2b3346ead48 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
@@ -527,31 +527,28 @@ static struct drm_encoder *get_encoder_from_crtc(struct drm_crtc *crtc)
 	return NULL;
 }
 
-static int mdp5_get_scanoutpos(struct drm_device *dev, unsigned int pipe,
-			       unsigned int flags, int *vpos, int *hpos,
-			       ktime_t *stime, ktime_t *etime,
-			       const struct drm_display_mode *mode)
+static bool mdp5_get_scanoutpos(struct drm_device *dev, unsigned int pipe,
+				bool in_vblank_irq, int *vpos, int *hpos,
+				ktime_t *stime, ktime_t *etime,
+				const struct drm_display_mode *mode)
 {
 	struct msm_drm_private *priv = dev->dev_private;
 	struct drm_crtc *crtc;
 	struct drm_encoder *encoder;
 	int line, vsw, vbp, vactive_start, vactive_end, vfp_end;
-	int ret = 0;
 
 	crtc = priv->crtcs[pipe];
 	if (!crtc) {
 		DRM_ERROR("Invalid crtc %d\n", pipe);
-		return 0;
+		return false;
 	}
 
 	encoder = get_encoder_from_crtc(crtc);
 	if (!encoder) {
 		DRM_ERROR("no encoder found for crtc %d\n", pipe);
-		return 0;
+		return false;
 	}
 
-	ret |= DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE;
-
 	vsw = mode->crtc_vsync_end - mode->crtc_vsync_start;
 	vbp = mode->crtc_vtotal - mode->crtc_vsync_end;
 
@@ -575,10 +572,8 @@ static int mdp5_get_scanoutpos(struct drm_device *dev, unsigned int pipe,
 
 	if (line < vactive_start) {
 		line -= vactive_start;
-		ret |= DRM_SCANOUTPOS_IN_VBLANK;
 	} else if (line > vactive_end) {
 		line = line - vfp_end - vactive_start;
-		ret |= DRM_SCANOUTPOS_IN_VBLANK;
 	} else {
 		line -= vactive_start;
 	}
@@ -589,31 +584,7 @@ static int mdp5_get_scanoutpos(struct drm_device *dev, unsigned int pipe,
 	if (etime)
 		*etime = ktime_get();
 
-	return ret;
-}
-
-static int mdp5_get_vblank_timestamp(struct drm_device *dev, unsigned int pipe,
-				     int *max_error,
-				     struct timeval *vblank_time,
-				     unsigned flags)
-{
-	struct msm_drm_private *priv = dev->dev_private;
-	struct drm_crtc *crtc;
-
-	if (pipe < 0 || pipe >= priv->num_crtcs) {
-		DRM_ERROR("Invalid crtc %d\n", pipe);
-		return -EINVAL;
-	}
-
-	crtc = priv->crtcs[pipe];
-	if (!crtc) {
-		DRM_ERROR("Invalid crtc %d\n", pipe);
-		return -EINVAL;
-	}
-
-	return drm_calc_vbltimestamp_from_scanoutpos(dev, pipe, max_error,
-						     vblank_time, flags,
-						     &crtc->mode);
+	return true;
 }
 
 static u32 mdp5_get_vblank_counter(struct drm_device *dev, unsigned int pipe)
@@ -725,7 +696,7 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev)
 	dev->mode_config.max_width = 0xffff;
 	dev->mode_config.max_height = 0xffff;
 
-	dev->driver->get_vblank_timestamp = mdp5_get_vblank_timestamp;
+	dev->driver->get_vblank_timestamp = drm_calc_vbltimestamp_from_scanoutpos;
 	dev->driver->get_scanout_position = mdp5_get_scanoutpos;
 	dev->driver->get_vblank_counter = mdp5_get_vblank_counter;
 	dev->max_vblank_count = 0xffffffff;
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
index a38c5fe6cc19..5e7d9af4cba8 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
@@ -67,11 +67,11 @@ static void mdp5_plane_install_rotation_property(struct drm_device *dev,
 		struct drm_plane *plane)
 {
 	drm_plane_create_rotation_property(plane,
-					   DRM_ROTATE_0,
-					   DRM_ROTATE_0 |
-					   DRM_ROTATE_180 |
-					   DRM_REFLECT_X |
-					   DRM_REFLECT_Y);
+					   DRM_MODE_ROTATE_0,
+					   DRM_MODE_ROTATE_0 |
+					   DRM_MODE_ROTATE_180 |
+					   DRM_MODE_REFLECT_X |
+					   DRM_MODE_REFLECT_Y);
 }
 
 /* helper to install properties which are common to planes and crtcs */
@@ -369,14 +369,14 @@ static int mdp5_plane_atomic_check_with_state(struct drm_crtc_state *crtc_state,
 			caps |= MDP_PIPE_CAP_SCALE;
 
 		rotation = drm_rotation_simplify(state->rotation,
-						 DRM_ROTATE_0 |
-						 DRM_REFLECT_X |
-						 DRM_REFLECT_Y);
+						 DRM_MODE_ROTATE_0 |
+						 DRM_MODE_REFLECT_X |
+						 DRM_MODE_REFLECT_Y);
 
-		if (rotation & DRM_REFLECT_X)
+		if (rotation & DRM_MODE_REFLECT_X)
 			caps |= MDP_PIPE_CAP_HFLIP;
 
-		if (rotation & DRM_REFLECT_Y)
+		if (rotation & DRM_MODE_REFLECT_Y)
 			caps |= MDP_PIPE_CAP_VFLIP;
 
 		if (plane->type == DRM_PLANE_TYPE_CURSOR)
@@ -970,11 +970,11 @@ static int mdp5_plane_mode_set(struct drm_plane *plane,
 	DBG("scale config = %x", config);
 
 	rotation = drm_rotation_simplify(pstate->rotation,
-					 DRM_ROTATE_0 |
-					 DRM_REFLECT_X |
-					 DRM_REFLECT_Y);
-	hflip = !!(rotation & DRM_REFLECT_X);
-	vflip = !!(rotation & DRM_REFLECT_Y);
+					 DRM_MODE_ROTATE_0 |
+					 DRM_MODE_REFLECT_X |
+					 DRM_MODE_REFLECT_Y);
+	hflip = !!(rotation & DRM_MODE_REFLECT_X);
+	vflip = !!(rotation & DRM_MODE_REFLECT_Y);
 
 	spin_lock_irqsave(&mdp5_plane->pipe_lock, flags);
 
diff --git a/drivers/gpu/drm/msm/msm_fb.c b/drivers/gpu/drm/msm/msm_fb.c
index 5cf165c9c3a9..ba2733a95a4f 100644
--- a/drivers/gpu/drm/msm/msm_fb.c
+++ b/drivers/gpu/drm/msm/msm_fb.c
@@ -15,12 +15,12 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <drm/drm_crtc.h>
+#include <drm/drm_crtc_helper.h>
+
 #include "msm_drv.h"
 #include "msm_kms.h"
 
-#include "drm_crtc.h"
-#include "drm_crtc_helper.h"
-
 struct msm_framebuffer {
 	struct drm_framebuffer base;
 	const struct msm_format *format;
diff --git a/drivers/gpu/drm/msm/msm_fbdev.c b/drivers/gpu/drm/msm/msm_fbdev.c
index 951e40faf6e8..feea8ba4e05b 100644
--- a/drivers/gpu/drm/msm/msm_fbdev.c
+++ b/drivers/gpu/drm/msm/msm_fbdev.c
@@ -15,10 +15,10 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include "msm_drv.h"
+#include <drm/drm_crtc.h>
+#include <drm/drm_fb_helper.h>
 
-#include "drm_crtc.h"
-#include "drm_fb_helper.h"
+#include "msm_drv.h"
 #include "msm_gem.h"
 
 extern int msm_gem_mmap_obj(struct drm_gem_object *obj,
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 68e509b3b9e4..465dab942afa 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -50,13 +50,13 @@ static struct page **get_pages_vram(struct drm_gem_object *obj,
 	struct page **p;
 	int ret, i;
 
-	p = drm_malloc_ab(npages, sizeof(struct page *));
+	p = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
 	if (!p)
 		return ERR_PTR(-ENOMEM);
 
 	ret = drm_mm_insert_node(&priv->vram.mm, msm_obj->vram_node, npages);
 	if (ret) {
-		drm_free_large(p);
+		kvfree(p);
 		return ERR_PTR(ret);
 	}
 
@@ -127,7 +127,7 @@ static void put_pages(struct drm_gem_object *obj)
 			drm_gem_put_pages(obj, msm_obj->pages, true, false);
 		else {
 			drm_mm_remove_node(msm_obj->vram_node);
-			drm_free_large(msm_obj->pages);
+			kvfree(msm_obj->pages);
 		}
 
 		msm_obj->pages = NULL;
@@ -707,7 +707,7 @@ void msm_gem_free_object(struct drm_gem_object *obj)
 		 * ours, just free the array we allocated:
 		 */
 		if (msm_obj->pages)
-			drm_free_large(msm_obj->pages);
+			kvfree(msm_obj->pages);
 
 		drm_prime_gem_destroy(obj, msm_obj->sgt);
 	} else {
@@ -863,7 +863,7 @@ struct drm_gem_object *msm_gem_import(struct drm_device *dev,
 
 	msm_obj = to_msm_bo(obj);
 	msm_obj->sgt = sgt;
-	msm_obj->pages = drm_malloc_ab(npages, sizeof(struct page *));
+	msm_obj->pages = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
 	if (!msm_obj->pages) {
 		ret = -ENOMEM;
 		goto fail;
diff --git a/drivers/gpu/drm/nouveau/Kbuild b/drivers/gpu/drm/nouveau/Kbuild
index fde6e3656636..2e9ce53ae3a8 100644
--- a/drivers/gpu/drm/nouveau/Kbuild
+++ b/drivers/gpu/drm/nouveau/Kbuild
@@ -1,4 +1,3 @@
-ccflags-y := -Iinclude/drm
 ccflags-y += -I$(src)/include
 ccflags-y += -I$(src)/include/nvkm
 ccflags-y += -I$(src)/nvkm
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index 549763f5e17d..8d1df5678eaa 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -98,7 +98,7 @@ calc(int blanks, int blanke, int total, int line)
 	return line;
 }
 
-static int
+static bool
 nouveau_display_scanoutpos_head(struct drm_crtc *crtc, int *vpos, int *hpos,
 				ktime_t *stime, ktime_t *etime)
 {
@@ -111,16 +111,16 @@ nouveau_display_scanoutpos_head(struct drm_crtc *crtc, int *vpos, int *hpos,
 	};
 	struct nouveau_display *disp = nouveau_display(crtc->dev);
 	struct drm_vblank_crtc *vblank = &crtc->dev->vblank[drm_crtc_index(crtc)];
-	int ret, retry = 20;
+	int retry = 20;
+	bool ret = false;
 
 	do {
 		ret = nvif_mthd(&disp->disp, 0, &args, sizeof(args));
 		if (ret != 0)
-			return 0;
+			return false;
 
 		if (args.scan.vline) {
-			ret |= DRM_SCANOUTPOS_ACCURATE;
-			ret |= DRM_SCANOUTPOS_VALID;
+			ret = true;
 			break;
 		}
 
@@ -133,14 +133,12 @@ nouveau_display_scanoutpos_head(struct drm_crtc *crtc, int *vpos, int *hpos,
 	if (stime) *stime = ns_to_ktime(args.scan.time[0]);
 	if (etime) *etime = ns_to_ktime(args.scan.time[1]);
 
-	if (*vpos < 0)
-		ret |= DRM_SCANOUTPOS_IN_VBLANK;
 	return ret;
 }
 
-int
+bool
 nouveau_display_scanoutpos(struct drm_device *dev, unsigned int pipe,
-			   unsigned int flags, int *vpos, int *hpos,
+			   bool in_vblank_irq, int *vpos, int *hpos,
 			   ktime_t *stime, ktime_t *etime,
 			   const struct drm_display_mode *mode)
 {
@@ -153,28 +151,7 @@ nouveau_display_scanoutpos(struct drm_device *dev, unsigned int pipe,
 		}
 	}
 
-	return 0;
-}
-
-int
-nouveau_display_vblstamp(struct drm_device *dev, unsigned int pipe,
-			 int *max_error, struct timeval *time, unsigned flags)
-{
-	struct drm_crtc *crtc;
-
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		if (nouveau_crtc(crtc)->index == pipe) {
-			struct drm_display_mode *mode;
-			if (drm_drv_uses_atomic_modeset(dev))
-				mode = &crtc->state->adjusted_mode;
-			else
-				mode = &crtc->hwmode;
-			return drm_calc_vbltimestamp_from_scanoutpos(dev,
-					pipe, max_error, time, flags, mode);
-		}
-	}
-
-	return -EINVAL;
+	return false;
 }
 
 static void
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.h b/drivers/gpu/drm/nouveau/nouveau_display.h
index e1d772d39488..201aec2ea5b8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.h
+++ b/drivers/gpu/drm/nouveau/nouveau_display.h
@@ -68,11 +68,9 @@ int  nouveau_display_suspend(struct drm_device *dev, bool runtime);
 void nouveau_display_resume(struct drm_device *dev, bool runtime);
 int  nouveau_display_vblank_enable(struct drm_device *, unsigned int);
 void nouveau_display_vblank_disable(struct drm_device *, unsigned int);
-int  nouveau_display_scanoutpos(struct drm_device *, unsigned int,
-				unsigned int, int *, int *, ktime_t *,
-				ktime_t *, const struct drm_display_mode *);
-int  nouveau_display_vblstamp(struct drm_device *, unsigned int, int *,
-			      struct timeval *, unsigned);
+bool  nouveau_display_scanoutpos(struct drm_device *, unsigned int,
+				 bool, int *, int *, ktime_t *,
+				 ktime_t *, const struct drm_display_mode *);
 
 int  nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 			    struct drm_pending_vblank_event *event,
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 36268e1802b5..6844372366d3 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -29,8 +29,8 @@
 #include <linux/pm_runtime.h>
 #include <linux/vga_switcheroo.h>
 
-#include "drmP.h"
-#include "drm_crtc_helper.h"
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
 
 #include <core/gpuobj.h>
 #include <core/option.h>
@@ -881,7 +881,7 @@ done:
 }
 
 static void
-nouveau_drm_preclose(struct drm_device *dev, struct drm_file *fpriv)
+nouveau_drm_postclose(struct drm_device *dev, struct drm_file *fpriv)
 {
 	struct nouveau_cli *cli = nouveau_cli(fpriv);
 	struct nouveau_drm *drm = nouveau_drm(dev);
@@ -897,12 +897,6 @@ nouveau_drm_preclose(struct drm_device *dev, struct drm_file *fpriv)
 	list_del(&cli->head);
 	mutex_unlock(&drm->client.mutex);
 
-}
-
-static void
-nouveau_drm_postclose(struct drm_device *dev, struct drm_file *fpriv)
-{
-	struct nouveau_cli *cli = nouveau_cli(fpriv);
 	nouveau_cli_fini(cli);
 	kfree(cli);
 	pm_runtime_mark_last_busy(dev->dev);
@@ -974,7 +968,6 @@ driver_stub = {
 	.load = nouveau_drm_load,
 	.unload = nouveau_drm_unload,
 	.open = nouveau_drm_open,
-	.preclose = nouveau_drm_preclose,
 	.postclose = nouveau_drm_postclose,
 	.lastclose = nouveau_vga_lastclose,
 
@@ -985,7 +978,7 @@ driver_stub = {
 	.enable_vblank = nouveau_display_vblank_enable,
 	.disable_vblank = nouveau_display_vblank_disable,
 	.get_scanout_position = nouveau_display_scanoutpos,
-	.get_vblank_timestamp = nouveau_display_vblstamp,
+	.get_vblank_timestamp = drm_calc_vbltimestamp_from_scanoutpos,
 
 	.ioctls = nouveau_ioctls,
 	.num_ioctls = ARRAY_SIZE(nouveau_ioctls),
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index eadec2f49ad3..aaa25641fed6 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -43,7 +43,7 @@
 #include <nvif/device.h>
 #include <nvif/ioctl.h>
 
-#include <drmP.h>
+#include <drm/drmP.h>
 
 #include <drm/ttm/ttm_bo_api.h>
 #include <drm/ttm/ttm_bo_driver.h>
diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c
index 13e5cc5f07fe..999c35a25498 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
@@ -28,7 +28,7 @@
 #include "nouveau_ttm.h"
 #include "nouveau_gem.h"
 
-#include "drm_legacy.h"
+#include <drm/drm_legacy.h>
 
 #include <core/tegra.h>
 
diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index a7663249b3ba..9303daa79aba 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -1033,7 +1033,7 @@ nv50_wndw_reset(struct drm_plane *plane)
 		plane->funcs->atomic_destroy_state(plane, plane->state);
 	plane->state = &asyw->state;
 	plane->state->plane = plane;
-	plane->state->rotation = DRM_ROTATE_0;
+	plane->state->rotation = DRM_MODE_ROTATE_0;
 }
 
 static void
diff --git a/drivers/gpu/drm/omapdrm/Makefile b/drivers/gpu/drm/omapdrm/Makefile
index 48b7b750c05c..b391be7ecb6c 100644
--- a/drivers/gpu/drm/omapdrm/Makefile
+++ b/drivers/gpu/drm/omapdrm/Makefile
@@ -6,7 +6,6 @@
 obj-y += dss/
 obj-y += displays/
 
-ccflags-y := -Iinclude/drm
 omapdrm-y := omap_drv.o \
 	omap_irq.o \
 	omap_debugfs.o \
diff --git a/drivers/gpu/drm/omapdrm/omap_drv.c b/drivers/gpu/drm/omapdrm/omap_drv.c
index e1f47f0b3ccf..663e930a7b0f 100644
--- a/drivers/gpu/drm/omapdrm/omap_drv.c
+++ b/drivers/gpu/drm/omapdrm/omap_drv.c
@@ -577,7 +577,7 @@ static void dev_lastclose(struct drm_device *dev)
 
 		drm_object_property_set_value(&crtc->base,
 					      crtc->primary->rotation_property,
-					      DRM_ROTATE_0);
+					      DRM_MODE_ROTATE_0);
 	}
 
 	for (i = 0; i < priv->num_planes; i++) {
@@ -588,7 +588,7 @@ static void dev_lastclose(struct drm_device *dev)
 
 		drm_object_property_set_value(&plane->base,
 					      plane->rotation_property,
-					      DRM_ROTATE_0);
+					      DRM_MODE_ROTATE_0);
 	}
 
 	if (priv->fbdev) {
diff --git a/drivers/gpu/drm/omapdrm/omap_fb.c b/drivers/gpu/drm/omapdrm/omap_fb.c
index 29dc677dd4d3..5ca0537bb427 100644
--- a/drivers/gpu/drm/omapdrm/omap_fb.c
+++ b/drivers/gpu/drm/omapdrm/omap_fb.c
@@ -167,30 +167,30 @@ void omap_framebuffer_update_scanout(struct drm_framebuffer *fb,
 		uint32_t w = win->src_w;
 		uint32_t h = win->src_h;
 
-		switch (win->rotation & DRM_ROTATE_MASK) {
+		switch (win->rotation & DRM_MODE_ROTATE_MASK) {
 		default:
 			dev_err(fb->dev->dev, "invalid rotation: %02x",
 					(uint32_t)win->rotation);
 			/* fallthru to default to no rotation */
 		case 0:
-		case DRM_ROTATE_0:
+		case DRM_MODE_ROTATE_0:
 			orient = 0;
 			break;
-		case DRM_ROTATE_90:
+		case DRM_MODE_ROTATE_90:
 			orient = MASK_XY_FLIP | MASK_X_INVERT;
 			break;
-		case DRM_ROTATE_180:
+		case DRM_MODE_ROTATE_180:
 			orient = MASK_X_INVERT | MASK_Y_INVERT;
 			break;
-		case DRM_ROTATE_270:
+		case DRM_MODE_ROTATE_270:
 			orient = MASK_XY_FLIP | MASK_Y_INVERT;
 			break;
 		}
 
-		if (win->rotation & DRM_REFLECT_X)
+		if (win->rotation & DRM_MODE_REFLECT_X)
 			orient ^= MASK_X_INVERT;
 
-		if (win->rotation & DRM_REFLECT_Y)
+		if (win->rotation & DRM_MODE_REFLECT_Y)
 			orient ^= MASK_Y_INVERT;
 
 		/* adjust x,y offset for flip/invert: */
@@ -205,9 +205,9 @@ void omap_framebuffer_update_scanout(struct drm_framebuffer *fb,
 		info->rotation_type = OMAP_DSS_ROT_TILER;
 		info->screen_width  = omap_gem_tiled_stride(plane->bo, orient);
 	} else {
-		switch (win->rotation & DRM_ROTATE_MASK) {
+		switch (win->rotation & DRM_MODE_ROTATE_MASK) {
 		case 0:
-		case DRM_ROTATE_0:
+		case DRM_MODE_ROTATE_0:
 			/* OK */
 			break;
 
diff --git a/drivers/gpu/drm/omapdrm/omap_plane.c b/drivers/gpu/drm/omapdrm/omap_plane.c
index 9168154d749e..d3d6818c68f8 100644
--- a/drivers/gpu/drm/omapdrm/omap_plane.c
+++ b/drivers/gpu/drm/omapdrm/omap_plane.c
@@ -141,7 +141,7 @@ static void omap_plane_atomic_disable(struct drm_plane *plane,
 	struct omap_plane_state *omap_state = to_omap_plane_state(plane->state);
 	struct omap_plane *omap_plane = to_omap_plane(plane);
 
-	plane->state->rotation = DRM_ROTATE_0;
+	plane->state->rotation = DRM_MODE_ROTATE_0;
 	omap_state->zorder = plane->type == DRM_PLANE_TYPE_PRIMARY
 			   ? 0 : omap_plane->id;
 
@@ -177,7 +177,7 @@ static int omap_plane_atomic_check(struct drm_plane *plane,
 	if (state->crtc_y + state->crtc_h > crtc_state->adjusted_mode.vdisplay)
 		return -EINVAL;
 
-	if (state->rotation != DRM_ROTATE_0 &&
+	if (state->rotation != DRM_MODE_ROTATE_0 &&
 	    !omap_framebuffer_supports_rotation(state->fb))
 		return -EINVAL;
 
@@ -213,15 +213,15 @@ void omap_plane_install_properties(struct drm_plane *plane,
 	if (priv->has_dmm) {
 		if (!plane->rotation_property)
 			drm_plane_create_rotation_property(plane,
-							   DRM_ROTATE_0,
-							   DRM_ROTATE_0 | DRM_ROTATE_90 |
-							   DRM_ROTATE_180 | DRM_ROTATE_270 |
-							   DRM_REFLECT_X | DRM_REFLECT_Y);
+							   DRM_MODE_ROTATE_0,
+							   DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_90 |
+							   DRM_MODE_ROTATE_180 | DRM_MODE_ROTATE_270 |
+							   DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y);
 
 		/* Attach the rotation property also to the crtc object */
 		if (plane->rotation_property && obj != &plane->base)
 			drm_object_attach_property(obj, plane->rotation_property,
-						   DRM_ROTATE_0);
+						   DRM_MODE_ROTATE_0);
 	}
 
 	drm_object_attach_property(obj, priv->zorder_prop, 0);
@@ -273,7 +273,7 @@ static void omap_plane_reset(struct drm_plane *plane)
 	 */
 	omap_state->zorder = plane->type == DRM_PLANE_TYPE_PRIMARY
 			   ? 0 : omap_plane->id;
-	omap_state->base.rotation = DRM_ROTATE_0;
+	omap_state->base.rotation = DRM_MODE_ROTATE_0;
 
 	plane->state = &omap_state->base;
 	plane->state->plane = plane;
diff --git a/drivers/gpu/drm/pl111/Kconfig b/drivers/gpu/drm/pl111/Kconfig
new file mode 100644
index 000000000000..309f4fd52de7
--- /dev/null
+++ b/drivers/gpu/drm/pl111/Kconfig
@@ -0,0 +1,13 @@
+config DRM_PL111
+	tristate "DRM Support for PL111 CLCD Controller"
+	depends on DRM
+	depends on ARM || ARM64 || COMPILE_TEST
+	depends on COMMON_CLK
+	select DRM_KMS_HELPER
+	select DRM_KMS_CMA_HELPER
+	select DRM_GEM_CMA_HELPER
+	select VT_HW_CONSOLE_BINDING if FRAMEBUFFER_CONSOLE
+	help
+	  Choose this option for DRM support for the PL111 CLCD controller.
+	  If M is selected the module will be called pl111_drm.
+
diff --git a/drivers/gpu/drm/pl111/Makefile b/drivers/gpu/drm/pl111/Makefile
new file mode 100644
index 000000000000..59483d610ef5
--- /dev/null
+++ b/drivers/gpu/drm/pl111/Makefile
@@ -0,0 +1,7 @@
+pl111_drm-y +=	pl111_connector.o \
+		pl111_display.o \
+		pl111_drv.o
+
+pl111_drm-$(CONFIG_DEBUG_FS) += pl111_debugfs.o
+
+obj-$(CONFIG_DRM_PL111) += pl111_drm.o
diff --git a/drivers/gpu/drm/pl111/pl111_connector.c b/drivers/gpu/drm/pl111/pl111_connector.c
new file mode 100644
index 000000000000..3f213d7e7692
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_connector.c
@@ -0,0 +1,127 @@
+/*
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * Parts of this file were based on sources as follows:
+ *
+ * Copyright (c) 2006-2008 Intel Corporation
+ * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
+ * Copyright (C) 2011 Texas Instruments
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms of
+ * such GNU licence.
+ *
+ */
+
+/**
+ * pl111_drm_connector.c
+ * Implementation of the connector functions for PL111 DRM
+ */
+#include <linux/amba/clcd-regs.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_of.h>
+#include <drm/drm_panel.h>
+
+#include "pl111_drm.h"
+
+static void pl111_connector_destroy(struct drm_connector *connector)
+{
+	struct pl111_drm_connector *pl111_connector =
+		to_pl111_connector(connector);
+
+	if (pl111_connector->panel)
+		drm_panel_detach(pl111_connector->panel);
+
+	drm_connector_unregister(connector);
+	drm_connector_cleanup(connector);
+}
+
+static enum drm_connector_status pl111_connector_detect(struct drm_connector
+							*connector, bool force)
+{
+	struct pl111_drm_connector *pl111_connector =
+		to_pl111_connector(connector);
+
+	return (pl111_connector->panel ?
+		connector_status_connected :
+		connector_status_disconnected);
+}
+
+static int pl111_connector_helper_get_modes(struct drm_connector *connector)
+{
+	struct pl111_drm_connector *pl111_connector =
+		to_pl111_connector(connector);
+
+	if (!pl111_connector->panel)
+		return 0;
+
+	return drm_panel_get_modes(pl111_connector->panel);
+}
+
+const struct drm_connector_funcs connector_funcs = {
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.destroy = pl111_connector_destroy,
+	.detect = pl111_connector_detect,
+	.dpms = drm_atomic_helper_connector_dpms,
+	.reset = drm_atomic_helper_connector_reset,
+	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+};
+
+const struct drm_connector_helper_funcs connector_helper_funcs = {
+	.get_modes = pl111_connector_helper_get_modes,
+};
+
+/* Walks the OF graph to find the panel node and then asks DRM to look
+ * up the panel.
+ */
+static struct drm_panel *pl111_get_panel(struct device *dev)
+{
+	struct device_node *endpoint, *panel_node;
+	struct device_node *np = dev->of_node;
+	struct drm_panel *panel;
+
+	endpoint = of_graph_get_next_endpoint(np, NULL);
+	if (!endpoint) {
+		dev_err(dev, "no endpoint to fetch panel\n");
+		return NULL;
+	}
+
+	/* don't proceed if we have an endpoint but no panel_node tied to it */
+	panel_node = of_graph_get_remote_port_parent(endpoint);
+	of_node_put(endpoint);
+	if (!panel_node) {
+		dev_err(dev, "no valid panel node\n");
+		return NULL;
+	}
+
+	panel = of_drm_find_panel(panel_node);
+	of_node_put(panel_node);
+
+	return panel;
+}
+
+int pl111_connector_init(struct drm_device *dev)
+{
+	struct pl111_drm_dev_private *priv = dev->dev_private;
+	struct pl111_drm_connector *pl111_connector = &priv->connector;
+	struct drm_connector *connector = &pl111_connector->connector;
+
+	drm_connector_init(dev, connector, &connector_funcs,
+			   DRM_MODE_CONNECTOR_DPI);
+	drm_connector_helper_add(connector, &connector_helper_funcs);
+
+	pl111_connector->panel = pl111_get_panel(dev->dev);
+	if (pl111_connector->panel)
+		drm_panel_attach(pl111_connector->panel, connector);
+
+	return 0;
+}
+
diff --git a/drivers/gpu/drm/pl111/pl111_debugfs.c b/drivers/gpu/drm/pl111/pl111_debugfs.c
new file mode 100644
index 000000000000..0d9dee199b2c
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_debugfs.c
@@ -0,0 +1,55 @@
+/*
+ *  Copyright © 2017 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/amba/clcd-regs.h>
+#include <linux/seq_file.h>
+#include <drm/drm_debugfs.h>
+#include <drm/drmP.h>
+#include "pl111_drm.h"
+
+#define REGDEF(reg) { reg, #reg }
+static const struct {
+	u32 reg;
+	const char *name;
+} pl111_reg_defs[] = {
+	REGDEF(CLCD_TIM0),
+	REGDEF(CLCD_TIM1),
+	REGDEF(CLCD_TIM2),
+	REGDEF(CLCD_TIM3),
+	REGDEF(CLCD_UBAS),
+	REGDEF(CLCD_PL111_CNTL),
+	REGDEF(CLCD_PL111_IENB),
+};
+
+int pl111_debugfs_regs(struct seq_file *m, void *unused)
+{
+	struct drm_info_node *node = (struct drm_info_node *)m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct pl111_drm_dev_private *priv = dev->dev_private;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(pl111_reg_defs); i++) {
+		seq_printf(m, "%s (0x%04x): 0x%08x\n",
+			   pl111_reg_defs[i].name, pl111_reg_defs[i].reg,
+			   readl(priv->regs + pl111_reg_defs[i].reg));
+	}
+
+	return 0;
+}
+
+static const struct drm_info_list pl111_debugfs_list[] = {
+	{"regs", pl111_debugfs_regs, 0},
+};
+
+int
+pl111_debugfs_init(struct drm_minor *minor)
+{
+	return drm_debugfs_create_files(pl111_debugfs_list,
+					ARRAY_SIZE(pl111_debugfs_list),
+					minor->debugfs_root, minor);
+}
diff --git a/drivers/gpu/drm/pl111/pl111_display.c b/drivers/gpu/drm/pl111/pl111_display.c
new file mode 100644
index 000000000000..3e0a4fa73ddb
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_display.c
@@ -0,0 +1,476 @@
+/*
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * Parts of this file were based on sources as follows:
+ *
+ * Copyright (c) 2006-2008 Intel Corporation
+ * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
+ * Copyright (C) 2011 Texas Instruments
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms of
+ * such GNU licence.
+ *
+ */
+
+#include <linux/amba/clcd-regs.h>
+#include <linux/clk.h>
+#include <linux/version.h>
+#include <linux/dma-buf.h>
+#include <linux/of_graph.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_panel.h>
+#include <drm/drm_gem_cma_helper.h>
+#include <drm/drm_fb_cma_helper.h>
+
+#include "pl111_drm.h"
+
+irqreturn_t pl111_irq(int irq, void *data)
+{
+	struct pl111_drm_dev_private *priv = data;
+	u32 irq_stat;
+	irqreturn_t status = IRQ_NONE;
+
+	irq_stat = readl(priv->regs + CLCD_PL111_MIS);
+
+	if (!irq_stat)
+		return IRQ_NONE;
+
+	if (irq_stat & CLCD_IRQ_NEXTBASE_UPDATE) {
+		drm_crtc_handle_vblank(&priv->pipe.crtc);
+
+		status = IRQ_HANDLED;
+	}
+
+	/* Clear the interrupt once done */
+	writel(irq_stat, priv->regs + CLCD_PL111_ICR);
+
+	return status;
+}
+
+static u32 pl111_get_fb_offset(struct drm_plane_state *pstate)
+{
+	struct drm_framebuffer *fb = pstate->fb;
+	struct drm_gem_cma_object *obj = drm_fb_cma_get_gem_obj(fb, 0);
+
+	return (obj->paddr +
+		fb->offsets[0] +
+		fb->format->cpp[0] * pstate->src_x +
+		fb->pitches[0] * pstate->src_y);
+}
+
+static int pl111_display_check(struct drm_simple_display_pipe *pipe,
+			       struct drm_plane_state *pstate,
+			       struct drm_crtc_state *cstate)
+{
+	const struct drm_display_mode *mode = &cstate->mode;
+	struct drm_framebuffer *old_fb = pipe->plane.state->fb;
+	struct drm_framebuffer *fb = pstate->fb;
+
+	if (mode->hdisplay % 16)
+		return -EINVAL;
+
+	if (fb) {
+		u32 offset = pl111_get_fb_offset(pstate);
+
+		/* FB base address must be dword aligned. */
+		if (offset & 3)
+			return -EINVAL;
+
+		/* There's no pitch register -- the mode's hdisplay
+		 * controls it.
+		 */
+		if (fb->pitches[0] != mode->hdisplay * fb->format->cpp[0])
+			return -EINVAL;
+
+		/* We can't change the FB format in a flicker-free
+		 * manner (and only update it during CRTC enable).
+		 */
+		if (old_fb && old_fb->format != fb->format)
+			cstate->mode_changed = true;
+	}
+
+	return 0;
+}
+
+static void pl111_display_enable(struct drm_simple_display_pipe *pipe,
+				 struct drm_crtc_state *cstate)
+{
+	struct drm_crtc *crtc = &pipe->crtc;
+	struct drm_plane *plane = &pipe->plane;
+	struct drm_device *drm = crtc->dev;
+	struct pl111_drm_dev_private *priv = drm->dev_private;
+	const struct drm_display_mode *mode = &cstate->mode;
+	struct drm_framebuffer *fb = plane->state->fb;
+	struct drm_connector *connector = &priv->connector.connector;
+	u32 cntl;
+	u32 ppl, hsw, hfp, hbp;
+	u32 lpp, vsw, vfp, vbp;
+	u32 cpl, tim2;
+	int ret;
+
+	ret = clk_set_rate(priv->clk, mode->clock * 1000);
+	if (ret) {
+		dev_err(drm->dev,
+			"Failed to set pixel clock rate to %d: %d\n",
+			mode->clock * 1000, ret);
+	}
+
+	clk_prepare_enable(priv->clk);
+
+	ppl = (mode->hdisplay / 16) - 1;
+	hsw = mode->hsync_end - mode->hsync_start - 1;
+	hfp = mode->hsync_start - mode->hdisplay - 1;
+	hbp = mode->htotal - mode->hsync_end - 1;
+
+	lpp = mode->vdisplay - 1;
+	vsw = mode->vsync_end - mode->vsync_start - 1;
+	vfp = mode->vsync_start - mode->vdisplay;
+	vbp = mode->vtotal - mode->vsync_end;
+
+	cpl = mode->hdisplay - 1;
+
+	writel((ppl << 2) |
+	       (hsw << 8) |
+	       (hfp << 16) |
+	       (hbp << 24),
+	       priv->regs + CLCD_TIM0);
+	writel(lpp |
+	       (vsw << 10) |
+	       (vfp << 16) |
+	       (vbp << 24),
+	       priv->regs + CLCD_TIM1);
+
+	spin_lock(&priv->tim2_lock);
+
+	tim2 = readl(priv->regs + CLCD_TIM2);
+	tim2 &= (TIM2_BCD | TIM2_PCD_LO_MASK | TIM2_PCD_HI_MASK);
+
+	if (mode->flags & DRM_MODE_FLAG_NHSYNC)
+		tim2 |= TIM2_IHS;
+
+	if (mode->flags & DRM_MODE_FLAG_NVSYNC)
+		tim2 |= TIM2_IVS;
+
+	if (connector->display_info.bus_flags & DRM_BUS_FLAG_DE_LOW)
+		tim2 |= TIM2_IOE;
+
+	if (connector->display_info.bus_flags & DRM_BUS_FLAG_PIXDATA_NEGEDGE)
+		tim2 |= TIM2_IPC;
+
+	tim2 |= cpl << 16;
+	writel(tim2, priv->regs + CLCD_TIM2);
+	spin_unlock(&priv->tim2_lock);
+
+	writel(0, priv->regs + CLCD_TIM3);
+
+	drm_panel_prepare(priv->connector.panel);
+
+	/* Enable and Power Up */
+	cntl = CNTL_LCDEN | CNTL_LCDTFT | CNTL_LCDPWR | CNTL_LCDVCOMP(1);
+
+	/* Note that the the hardware's format reader takes 'r' from
+	 * the low bit, while DRM formats list channels from high bit
+	 * to low bit as you read left to right.
+	 */
+	switch (fb->format->format) {
+	case DRM_FORMAT_ABGR8888:
+	case DRM_FORMAT_XBGR8888:
+		cntl |= CNTL_LCDBPP24;
+		break;
+	case DRM_FORMAT_ARGB8888:
+	case DRM_FORMAT_XRGB8888:
+		cntl |= CNTL_LCDBPP24 | CNTL_BGR;
+		break;
+	case DRM_FORMAT_BGR565:
+		cntl |= CNTL_LCDBPP16_565;
+		break;
+	case DRM_FORMAT_RGB565:
+		cntl |= CNTL_LCDBPP16_565 | CNTL_BGR;
+		break;
+	case DRM_FORMAT_ABGR1555:
+	case DRM_FORMAT_XBGR1555:
+		cntl |= CNTL_LCDBPP16;
+		break;
+	case DRM_FORMAT_ARGB1555:
+	case DRM_FORMAT_XRGB1555:
+		cntl |= CNTL_LCDBPP16 | CNTL_BGR;
+		break;
+	case DRM_FORMAT_ABGR4444:
+	case DRM_FORMAT_XBGR4444:
+		cntl |= CNTL_LCDBPP16_444;
+		break;
+	case DRM_FORMAT_ARGB4444:
+	case DRM_FORMAT_XRGB4444:
+		cntl |= CNTL_LCDBPP16_444 | CNTL_BGR;
+		break;
+	default:
+		WARN_ONCE(true, "Unknown FB format 0x%08x\n",
+			  fb->format->format);
+		break;
+	}
+
+	writel(cntl, priv->regs + CLCD_PL111_CNTL);
+
+	drm_panel_enable(priv->connector.panel);
+
+	drm_crtc_vblank_on(crtc);
+}
+
+void pl111_display_disable(struct drm_simple_display_pipe *pipe)
+{
+	struct drm_crtc *crtc = &pipe->crtc;
+	struct drm_device *drm = crtc->dev;
+	struct pl111_drm_dev_private *priv = drm->dev_private;
+
+	drm_crtc_vblank_off(crtc);
+
+	drm_panel_disable(priv->connector.panel);
+
+	/* Disable and Power Down */
+	writel(0, priv->regs + CLCD_PL111_CNTL);
+
+	drm_panel_unprepare(priv->connector.panel);
+
+	clk_disable_unprepare(priv->clk);
+}
+
+static void pl111_display_update(struct drm_simple_display_pipe *pipe,
+				 struct drm_plane_state *old_pstate)
+{
+	struct drm_crtc *crtc = &pipe->crtc;
+	struct drm_device *drm = crtc->dev;
+	struct pl111_drm_dev_private *priv = drm->dev_private;
+	struct drm_pending_vblank_event *event = crtc->state->event;
+	struct drm_plane *plane = &pipe->plane;
+	struct drm_plane_state *pstate = plane->state;
+	struct drm_framebuffer *fb = pstate->fb;
+
+	if (fb) {
+		u32 addr = pl111_get_fb_offset(pstate);
+
+		writel(addr, priv->regs + CLCD_UBAS);
+	}
+
+	if (event) {
+		crtc->state->event = NULL;
+
+		spin_lock_irq(&crtc->dev->event_lock);
+		if (crtc->state->active && drm_crtc_vblank_get(crtc) == 0)
+			drm_crtc_arm_vblank_event(crtc, event);
+		else
+			drm_crtc_send_vblank_event(crtc, event);
+		spin_unlock_irq(&crtc->dev->event_lock);
+	}
+}
+
+int pl111_enable_vblank(struct drm_device *drm, unsigned int crtc)
+{
+	struct pl111_drm_dev_private *priv = drm->dev_private;
+
+	writel(CLCD_IRQ_NEXTBASE_UPDATE, priv->regs + CLCD_PL111_IENB);
+
+	return 0;
+}
+
+void pl111_disable_vblank(struct drm_device *drm, unsigned int crtc)
+{
+	struct pl111_drm_dev_private *priv = drm->dev_private;
+
+	writel(0, priv->regs + CLCD_PL111_IENB);
+}
+
+static int pl111_display_prepare_fb(struct drm_simple_display_pipe *pipe,
+				    struct drm_plane_state *plane_state)
+{
+	return drm_fb_cma_prepare_fb(&pipe->plane, plane_state);
+}
+
+static const struct drm_simple_display_pipe_funcs pl111_display_funcs = {
+	.check = pl111_display_check,
+	.enable = pl111_display_enable,
+	.disable = pl111_display_disable,
+	.update = pl111_display_update,
+	.prepare_fb = pl111_display_prepare_fb,
+};
+
+static int pl111_clk_div_choose_div(struct clk_hw *hw, unsigned long rate,
+				    unsigned long *prate, bool set_parent)
+{
+	int best_div = 1, div;
+	struct clk_hw *parent = clk_hw_get_parent(hw);
+	unsigned long best_prate = 0;
+	unsigned long best_diff = ~0ul;
+	int max_div = (1 << (TIM2_PCD_LO_BITS + TIM2_PCD_HI_BITS)) - 1;
+
+	for (div = 1; div < max_div; div++) {
+		unsigned long this_prate, div_rate, diff;
+
+		if (set_parent)
+			this_prate = clk_hw_round_rate(parent, rate * div);
+		else
+			this_prate = *prate;
+		div_rate = DIV_ROUND_UP_ULL(this_prate, div);
+		diff = abs(rate - div_rate);
+
+		if (diff < best_diff) {
+			best_div = div;
+			best_diff = diff;
+			best_prate = this_prate;
+		}
+	}
+
+	*prate = best_prate;
+	return best_div;
+}
+
+static long pl111_clk_div_round_rate(struct clk_hw *hw, unsigned long rate,
+				     unsigned long *prate)
+{
+	int div = pl111_clk_div_choose_div(hw, rate, prate, true);
+
+	return DIV_ROUND_UP_ULL(*prate, div);
+}
+
+static unsigned long pl111_clk_div_recalc_rate(struct clk_hw *hw,
+					       unsigned long prate)
+{
+	struct pl111_drm_dev_private *priv =
+		container_of(hw, struct pl111_drm_dev_private, clk_div);
+	u32 tim2 = readl(priv->regs + CLCD_TIM2);
+	int div;
+
+	if (tim2 & TIM2_BCD)
+		return prate;
+
+	div = tim2 & TIM2_PCD_LO_MASK;
+	div |= (tim2 & TIM2_PCD_HI_MASK) >>
+		(TIM2_PCD_HI_SHIFT - TIM2_PCD_LO_BITS);
+	div += 2;
+
+	return DIV_ROUND_UP_ULL(prate, div);
+}
+
+static int pl111_clk_div_set_rate(struct clk_hw *hw, unsigned long rate,
+				  unsigned long prate)
+{
+	struct pl111_drm_dev_private *priv =
+		container_of(hw, struct pl111_drm_dev_private, clk_div);
+	int div = pl111_clk_div_choose_div(hw, rate, &prate, false);
+	u32 tim2;
+
+	spin_lock(&priv->tim2_lock);
+	tim2 = readl(priv->regs + CLCD_TIM2);
+	tim2 &= ~(TIM2_BCD | TIM2_PCD_LO_MASK | TIM2_PCD_HI_MASK);
+
+	if (div == 1) {
+		tim2 |= TIM2_BCD;
+	} else {
+		div -= 2;
+		tim2 |= div & TIM2_PCD_LO_MASK;
+		tim2 |= (div >> TIM2_PCD_LO_BITS) << TIM2_PCD_HI_SHIFT;
+	}
+
+	writel(tim2, priv->regs + CLCD_TIM2);
+	spin_unlock(&priv->tim2_lock);
+
+	return 0;
+}
+
+static const struct clk_ops pl111_clk_div_ops = {
+	.recalc_rate = pl111_clk_div_recalc_rate,
+	.round_rate = pl111_clk_div_round_rate,
+	.set_rate = pl111_clk_div_set_rate,
+};
+
+static int
+pl111_init_clock_divider(struct drm_device *drm)
+{
+	struct pl111_drm_dev_private *priv = drm->dev_private;
+	struct clk *parent = devm_clk_get(drm->dev, "clcdclk");
+	struct clk_hw *div = &priv->clk_div;
+	const char *parent_name;
+	struct clk_init_data init = {
+		.name = "pl111_div",
+		.ops = &pl111_clk_div_ops,
+		.parent_names = &parent_name,
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT,
+	};
+	int ret;
+
+	if (IS_ERR(parent)) {
+		dev_err(drm->dev, "CLCD: unable to get clcdclk.\n");
+		return PTR_ERR(parent);
+	}
+	parent_name = __clk_get_name(parent);
+
+	spin_lock_init(&priv->tim2_lock);
+	div->init = &init;
+
+	ret = devm_clk_hw_register(drm->dev, div);
+
+	priv->clk = div->clk;
+	return ret;
+}
+
+int pl111_display_init(struct drm_device *drm)
+{
+	struct pl111_drm_dev_private *priv = drm->dev_private;
+	struct device *dev = drm->dev;
+	struct device_node *endpoint;
+	u32 tft_r0b0g0[3];
+	int ret;
+	static const u32 formats[] = {
+		DRM_FORMAT_ABGR8888,
+		DRM_FORMAT_XBGR8888,
+		DRM_FORMAT_ARGB8888,
+		DRM_FORMAT_XRGB8888,
+		DRM_FORMAT_BGR565,
+		DRM_FORMAT_RGB565,
+		DRM_FORMAT_ABGR1555,
+		DRM_FORMAT_XBGR1555,
+		DRM_FORMAT_ARGB1555,
+		DRM_FORMAT_XRGB1555,
+		DRM_FORMAT_ABGR4444,
+		DRM_FORMAT_XBGR4444,
+		DRM_FORMAT_ARGB4444,
+		DRM_FORMAT_XRGB4444,
+	};
+
+	endpoint = of_graph_get_next_endpoint(dev->of_node, NULL);
+	if (!endpoint)
+		return -ENODEV;
+
+	if (of_property_read_u32_array(endpoint,
+				       "arm,pl11x,tft-r0g0b0-pads",
+				       tft_r0b0g0,
+				       ARRAY_SIZE(tft_r0b0g0)) != 0) {
+		dev_err(dev, "arm,pl11x,tft-r0g0b0-pads should be 3 ints\n");
+		of_node_put(endpoint);
+		return -ENOENT;
+	}
+	of_node_put(endpoint);
+
+	if (tft_r0b0g0[0] != 0 ||
+	    tft_r0b0g0[1] != 8 ||
+	    tft_r0b0g0[2] != 16) {
+		dev_err(dev, "arm,pl11x,tft-r0g0b0-pads != [0,8,16] not yet supported\n");
+		return -EINVAL;
+	}
+
+	ret = pl111_init_clock_divider(drm);
+	if (ret)
+		return ret;
+
+	ret = drm_simple_display_pipe_init(drm, &priv->pipe,
+					   &pl111_display_funcs,
+					   formats, ARRAY_SIZE(formats),
+					   &priv->connector.connector);
+	if (ret)
+		return ret;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/pl111/pl111_drm.h b/drivers/gpu/drm/pl111/pl111_drm.h
new file mode 100644
index 000000000000..5c685bfc8fdc
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm.h
@@ -0,0 +1,67 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ *
+ * Parts of this file were based on sources as follows:
+ *
+ * Copyright (c) 2006-2008 Intel Corporation
+ * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
+ * Copyright (C) 2011 Texas Instruments
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms of
+ * such GNU licence.
+ *
+ */
+
+#ifndef _PL111_DRM_H_
+#define _PL111_DRM_H_
+
+#include <drm/drm_gem.h>
+#include <drm/drm_simple_kms_helper.h>
+#include <linux/clk-provider.h>
+
+#define CLCD_IRQ_NEXTBASE_UPDATE BIT(2)
+
+struct drm_minor;
+
+struct pl111_drm_connector {
+	struct drm_connector connector;
+	struct drm_panel *panel;
+};
+
+struct pl111_drm_dev_private {
+	struct drm_device *drm;
+
+	struct pl111_drm_connector connector;
+	struct drm_simple_display_pipe pipe;
+	struct drm_fbdev_cma *fbdev;
+
+	void *regs;
+	/* The pixel clock (a reference to our clock divider off of CLCDCLK). */
+	struct clk *clk;
+	/* pl111's internal clock divider. */
+	struct clk_hw clk_div;
+	/* Lock to sync access to CLCD_TIM2 between the common clock
+	 * subsystem and pl111_display_enable().
+	 */
+	spinlock_t tim2_lock;
+};
+
+#define to_pl111_connector(x) \
+	container_of(x, struct pl111_drm_connector, connector)
+
+int pl111_display_init(struct drm_device *dev);
+int pl111_enable_vblank(struct drm_device *drm, unsigned int crtc);
+void pl111_disable_vblank(struct drm_device *drm, unsigned int crtc);
+irqreturn_t pl111_irq(int irq, void *data);
+int pl111_connector_init(struct drm_device *dev);
+int pl111_encoder_init(struct drm_device *dev);
+int pl111_dumb_create(struct drm_file *file_priv,
+		      struct drm_device *dev,
+		      struct drm_mode_create_dumb *args);
+int pl111_debugfs_init(struct drm_minor *minor);
+
+#endif /* _PL111_DRM_H_ */
diff --git a/drivers/gpu/drm/pl111/pl111_drv.c b/drivers/gpu/drm/pl111/pl111_drv.c
new file mode 100644
index 000000000000..e96efad37d27
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drv.c
@@ -0,0 +1,269 @@
+/*
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * Parts of this file were based on sources as follows:
+ *
+ * Copyright (c) 2006-2008 Intel Corporation
+ * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
+ * Copyright (C) 2011 Texas Instruments
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms of
+ * such GNU licence.
+ *
+ */
+
+/**
+ * DOC: ARM PrimeCell PL111 CLCD Driver
+ *
+ * The PL111 is a simple LCD controller that can support TFT and STN
+ * displays.  This driver exposes a standard KMS interface for them.
+ *
+ * This driver uses the same Device Tree binding as the fbdev CLCD
+ * driver.  While the fbdev driver supports panels that may be
+ * connected to the CLCD internally to the CLCD driver, in DRM the
+ * panels get split out to drivers/gpu/drm/panels/.  This means that,
+ * in converting from using fbdev to using DRM, you also need to write
+ * a panel driver (which may be as simple as an entry in
+ * panel-simple.c).
+ *
+ * The driver currently doesn't expose the cursor.  The DRM API for
+ * cursors requires support for 64x64 ARGB8888 cursor images, while
+ * the hardware can only support 64x64 monochrome with masking
+ * cursors.  While one could imagine trying to hack something together
+ * to look at the ARGB8888 and program reasonable in monochrome, we
+ * just don't expose the cursor at all instead, and leave cursor
+ * support to the X11 software cursor layer.
+ *
+ * TODO:
+ *
+ * - Fix race between setting plane base address and getting IRQ for
+ *   vsync firing the pageflip completion.
+ *
+ * - Expose the correct set of formats we can support based on the
+ *   "arm,pl11x,tft-r0g0b0-pads" DT property.
+ *
+ * - Use the "max-memory-bandwidth" DT property to filter the
+ *   supported formats.
+ *
+ * - Read back hardware state at boot to skip reprogramming the
+ *   hardware when doing a no-op modeset.
+ *
+ * - Use the CLKSEL bit to support switching between the two external
+ *   clock parents.
+ */
+
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd-regs.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_gem_cma_helper.h>
+#include <drm/drm_fb_cma_helper.h>
+
+#include "pl111_drm.h"
+
+#define DRIVER_DESC      "DRM module for PL111"
+
+static struct drm_mode_config_funcs mode_config_funcs = {
+	.fb_create = drm_fb_cma_create,
+	.atomic_check = drm_atomic_helper_check,
+	.atomic_commit = drm_atomic_helper_commit,
+};
+
+static int pl111_modeset_init(struct drm_device *dev)
+{
+	struct drm_mode_config *mode_config;
+	struct pl111_drm_dev_private *priv = dev->dev_private;
+	int ret = 0;
+
+	drm_mode_config_init(dev);
+	mode_config = &dev->mode_config;
+	mode_config->funcs = &mode_config_funcs;
+	mode_config->min_width = 1;
+	mode_config->max_width = 1024;
+	mode_config->min_height = 1;
+	mode_config->max_height = 768;
+
+	ret = pl111_connector_init(dev);
+	if (ret) {
+		dev_err(dev->dev, "Failed to create pl111_drm_connector\n");
+		goto out_config;
+	}
+
+	/* Don't actually attach if we didn't find a drm_panel
+	 * attached to us.  This will allow a kernel to include both
+	 * the fbdev pl111 driver and this one, and choose between
+	 * them based on which subsystem has support for the panel.
+	 */
+	if (!priv->connector.panel) {
+		dev_info(dev->dev,
+			 "Disabling due to lack of DRM panel device.\n");
+		ret = -ENODEV;
+		goto out_config;
+	}
+
+	ret = pl111_display_init(dev);
+	if (ret != 0) {
+		dev_err(dev->dev, "Failed to init display\n");
+		goto out_config;
+	}
+
+	ret = drm_vblank_init(dev, 1);
+	if (ret != 0) {
+		dev_err(dev->dev, "Failed to init vblank\n");
+		goto out_config;
+	}
+
+	drm_mode_config_reset(dev);
+
+	priv->fbdev = drm_fbdev_cma_init(dev, 32,
+					 dev->mode_config.num_connector);
+
+	drm_kms_helper_poll_init(dev);
+
+	goto finish;
+
+out_config:
+	drm_mode_config_cleanup(dev);
+finish:
+	return ret;
+}
+
+DEFINE_DRM_GEM_CMA_FOPS(drm_fops);
+
+static void pl111_lastclose(struct drm_device *dev)
+{
+	struct pl111_drm_dev_private *priv = dev->dev_private;
+
+	drm_fbdev_cma_restore_mode(priv->fbdev);
+}
+
+static struct drm_driver pl111_drm_driver = {
+	.driver_features =
+		DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME | DRIVER_ATOMIC,
+	.lastclose = pl111_lastclose,
+	.ioctls = NULL,
+	.fops = &drm_fops,
+	.name = "pl111",
+	.desc = DRIVER_DESC,
+	.date = "20170317",
+	.major = 1,
+	.minor = 0,
+	.patchlevel = 0,
+	.dumb_create = drm_gem_cma_dumb_create,
+	.dumb_destroy = drm_gem_dumb_destroy,
+	.dumb_map_offset = drm_gem_cma_dumb_map_offset,
+	.gem_free_object = drm_gem_cma_free_object,
+	.gem_vm_ops = &drm_gem_cma_vm_ops,
+
+	.enable_vblank = pl111_enable_vblank,
+	.disable_vblank = pl111_disable_vblank,
+
+	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
+	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
+	.gem_prime_import = drm_gem_prime_import,
+	.gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table,
+	.gem_prime_export = drm_gem_prime_export,
+	.gem_prime_get_sg_table	= drm_gem_cma_prime_get_sg_table,
+
+#if defined(CONFIG_DEBUG_FS)
+	.debugfs_init = pl111_debugfs_init,
+#endif
+};
+
+#ifdef CONFIG_ARM_AMBA
+static int pl111_amba_probe(struct amba_device *amba_dev,
+			    const struct amba_id *id)
+{
+	struct device *dev = &amba_dev->dev;
+	struct pl111_drm_dev_private *priv;
+	struct drm_device *drm;
+	int ret;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	drm = drm_dev_alloc(&pl111_drm_driver, dev);
+	if (IS_ERR(drm))
+		return PTR_ERR(drm);
+	amba_set_drvdata(amba_dev, drm);
+	priv->drm = drm;
+	drm->dev_private = priv;
+
+	priv->regs = devm_ioremap_resource(dev, &amba_dev->res);
+	if (IS_ERR(priv->regs)) {
+		dev_err(dev, "%s failed mmio\n", __func__);
+		return PTR_ERR(priv->regs);
+	}
+
+	/* turn off interrupts before requesting the irq */
+	writel(0, priv->regs + CLCD_PL111_IENB);
+
+	ret = devm_request_irq(dev, amba_dev->irq[0], pl111_irq, 0,
+			       "pl111", priv);
+	if (ret != 0) {
+		dev_err(dev, "%s failed irq %d\n", __func__, ret);
+		return ret;
+	}
+
+	ret = pl111_modeset_init(drm);
+	if (ret != 0)
+		goto dev_unref;
+
+	ret = drm_dev_register(drm, 0);
+	if (ret < 0)
+		goto dev_unref;
+
+	return 0;
+
+dev_unref:
+	drm_dev_unref(drm);
+	return ret;
+}
+
+static int pl111_amba_remove(struct amba_device *amba_dev)
+{
+	struct drm_device *drm = amba_get_drvdata(amba_dev);
+	struct pl111_drm_dev_private *priv = drm->dev_private;
+
+	drm_dev_unregister(drm);
+	if (priv->fbdev)
+		drm_fbdev_cma_fini(priv->fbdev);
+	drm_mode_config_cleanup(drm);
+	drm_dev_unref(drm);
+
+	return 0;
+}
+
+static struct amba_id pl111_id_table[] = {
+	{
+		.id = 0x00041111,
+		.mask = 0x000fffff,
+	},
+	{0, 0},
+};
+
+static struct amba_driver pl111_amba_driver = {
+	.drv = {
+		.name = "drm-clcd-pl111",
+	},
+	.probe = pl111_amba_probe,
+	.remove = pl111_amba_remove,
+	.id_table = pl111_id_table,
+};
+
+module_amba_driver(pl111_amba_driver);
+#endif /* CONFIG_ARM_AMBA */
+
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/qxl/Makefile b/drivers/gpu/drm/qxl/Makefile
index bacc4aff1201..33a7d0c434b7 100644
--- a/drivers/gpu/drm/qxl/Makefile
+++ b/drivers/gpu/drm/qxl/Makefile
@@ -2,8 +2,6 @@
 # Makefile for the drm device driver.  This driver provides support for the
 # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
 
-ccflags-y := -Iinclude/drm
-
 qxl-y := qxl_drv.o qxl_kms.o qxl_display.o qxl_ttm.o qxl_fb.o qxl_object.o qxl_gem.o qxl_cmd.o qxl_image.o qxl_draw.o qxl_debugfs.o qxl_irq.o qxl_dumb.o qxl_ioctl.o qxl_release.o qxl_prime.o
 
 obj-$(CONFIG_DRM_QXL)+= qxl.o
diff --git a/drivers/gpu/drm/qxl/qxl_debugfs.c b/drivers/gpu/drm/qxl/qxl_debugfs.c
index ffe821b61f7d..15c84068d3fb 100644
--- a/drivers/gpu/drm/qxl/qxl_debugfs.c
+++ b/drivers/gpu/drm/qxl/qxl_debugfs.c
@@ -30,7 +30,7 @@
 
 #include <linux/debugfs.h>
 
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "qxl_drv.h"
 #include "qxl_object.h"
 
diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index 4a340efd8ba6..03fe182203ce 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -23,16 +23,15 @@
  *          Alon Levy
  */
 
-
 #include <linux/crc32.h>
-
-#include "qxl_drv.h"
-#include "qxl_object.h"
-#include "drm_crtc_helper.h"
+#include <drm/drm_crtc_helper.h>
 #include <drm/drm_plane_helper.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_atomic.h>
 
+#include "qxl_drv.h"
+#include "qxl_object.h"
+
 static bool qxl_head_enabled(struct qxl_head *head)
 {
 	return head->width && head->height;
diff --git a/drivers/gpu/drm/qxl/qxl_drv.c b/drivers/gpu/drm/qxl/qxl_drv.c
index abf7b8360361..c2fc201d9e1b 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.c
+++ b/drivers/gpu/drm/qxl/qxl_drv.c
@@ -31,9 +31,9 @@
 #include <linux/module.h>
 #include <linux/console.h>
 
-#include "drmP.h"
-#include "drm/drm.h"
-#include "drm_crtc_helper.h"
+#include <drm/drmP.h>
+#include <drm/drm.h>
+#include <drm/drm_crtc_helper.h>
 #include "qxl_drv.h"
 #include "qxl_object.h"
 
diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h
index 5ea290a33a68..3591d2330a09 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.h
+++ b/drivers/gpu/drm/qxl/qxl_drv.h
@@ -36,20 +36,18 @@
 #include <linux/firmware.h>
 #include <linux/platform_device.h>
 
-#include "drmP.h"
-#include "drm_crtc.h"
-#include <ttm/ttm_bo_api.h>
-#include <ttm/ttm_bo_driver.h>
-#include <ttm/ttm_placement.h>
-#include <ttm/ttm_module.h>
-
+#include <drm/drm_crtc.h>
 #include <drm/drm_encoder.h>
 #include <drm/drm_gem.h>
-
+#include <drm/drmP.h>
+#include <drm/ttm/ttm_bo_api.h>
+#include <drm/ttm/ttm_bo_driver.h>
 /* just for ttm_validate_buffer */
-#include <ttm/ttm_execbuf_util.h>
-
+#include <drm/ttm/ttm_execbuf_util.h>
+#include <drm/ttm/ttm_module.h>
+#include <drm/ttm/ttm_placement.h>
 #include <drm/qxl_drm.h>
+
 #include "qxl_dev.h"
 
 #define DRIVER_AUTHOR		"Dave Airlie"
diff --git a/drivers/gpu/drm/qxl/qxl_fb.c b/drivers/gpu/drm/qxl/qxl_fb.c
index 14e2a49a4dcf..573e7e9a5f98 100644
--- a/drivers/gpu/drm/qxl/qxl_fb.c
+++ b/drivers/gpu/drm/qxl/qxl_fb.c
@@ -25,14 +25,15 @@
  */
 #include <linux/module.h>
 
-#include "drmP.h"
-#include "drm/drm.h"
-#include "drm/drm_crtc.h"
-#include "drm/drm_crtc_helper.h"
+#include <drm/drmP.h>
+#include <drm/drm.h>
+#include <drm/drm_crtc.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_fb_helper.h>
+
 #include "qxl_drv.h"
 
 #include "qxl_object.h"
-#include "drm_fb_helper.h"
 
 #define QXL_DIRTY_DELAY (HZ / 30)
 
diff --git a/drivers/gpu/drm/qxl/qxl_gem.c b/drivers/gpu/drm/qxl/qxl_gem.c
index 3f185c4da5b7..85f546719adb 100644
--- a/drivers/gpu/drm/qxl/qxl_gem.c
+++ b/drivers/gpu/drm/qxl/qxl_gem.c
@@ -23,8 +23,9 @@
  *          Alon Levy
  */
 
-#include "drmP.h"
-#include "drm/drm.h"
+#include <drm/drmP.h>
+#include <drm/drm.h>
+
 #include "qxl_drv.h"
 #include "qxl_object.h"
 
diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
index 0fdedee4509d..87fc1dbd0a2f 100644
--- a/drivers/gpu/drm/qxl/qxl_ttm.c
+++ b/drivers/gpu/drm/qxl/qxl_ttm.c
@@ -23,11 +23,11 @@
  *          Alon Levy
  */
 
-#include <ttm/ttm_bo_api.h>
-#include <ttm/ttm_bo_driver.h>
-#include <ttm/ttm_placement.h>
-#include <ttm/ttm_page_alloc.h>
-#include <ttm/ttm_module.h>
+#include <drm/ttm/ttm_bo_api.h>
+#include <drm/ttm/ttm_bo_driver.h>
+#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_page_alloc.h>
+#include <drm/ttm/ttm_module.h>
 #include <drm/drmP.h>
 #include <drm/drm.h>
 #include <drm/qxl_drm.h>
diff --git a/drivers/gpu/drm/r128/Makefile b/drivers/gpu/drm/r128/Makefile
index 1cc72ae3a880..1a6700ebaf09 100644
--- a/drivers/gpu/drm/r128/Makefile
+++ b/drivers/gpu/drm/r128/Makefile
@@ -2,7 +2,6 @@
 # Makefile for the drm device driver.  This driver provides support for the
 # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
 
-ccflags-y := -Iinclude/drm
 r128-y   := r128_drv.o r128_cce.o r128_state.o r128_irq.o
 
 r128-$(CONFIG_COMPAT)   += r128_ioc32.o
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
index 08bd17d3925c..a5d3cd3ecb5f 100644
--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the drm device driver.  This driver provides support for the
 # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
 
-ccflags-y := -Iinclude/drm -Idrivers/gpu/drm/amd/include
+ccflags-y := -Idrivers/gpu/drm/amd/include
 
 hostprogs-y := mkregtable
 clean-files := rn50_reg_safe.h r100_reg_safe.h r200_reg_safe.h rv515_reg_safe.h r300_reg_safe.h r420_reg_safe.h rs600_reg_safe.h r600_reg_safe.h evergreen_reg_safe.h cayman_reg_safe.h
diff --git a/drivers/gpu/drm/radeon/btc_dpm.c b/drivers/gpu/drm/radeon/btc_dpm.c
index 38e5123708e7..95652e643da1 100644
--- a/drivers/gpu/drm/radeon/btc_dpm.c
+++ b/drivers/gpu/drm/radeon/btc_dpm.c
@@ -22,7 +22,7 @@
  * Authors: Alex Deucher
  */
 
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "btcd.h"
diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c
index ea36dc4dd5d2..c97fbb2ab48b 100644
--- a/drivers/gpu/drm/radeon/ci_dpm.c
+++ b/drivers/gpu/drm/radeon/ci_dpm.c
@@ -22,7 +22,7 @@
  */
 
 #include <linux/firmware.h>
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "radeon_ucode.h"
diff --git a/drivers/gpu/drm/radeon/ci_smc.c b/drivers/gpu/drm/radeon/ci_smc.c
index 24760ee3063e..3356a21d97ec 100644
--- a/drivers/gpu/drm/radeon/ci_smc.c
+++ b/drivers/gpu/drm/radeon/ci_smc.c
@@ -23,7 +23,7 @@
  */
 
 #include <linux/firmware.h>
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "radeon.h"
 #include "cikd.h"
 #include "ppsmc.h"
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index 008c145b7f29..258912132b62 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -24,7 +24,7 @@
 #include <linux/firmware.h>
 #include <linux/slab.h>
 #include <linux/module.h>
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "radeon_audio.h"
diff --git a/drivers/gpu/drm/radeon/cypress_dpm.c b/drivers/gpu/drm/radeon/cypress_dpm.c
index a4edd0702718..3eb7899a4035 100644
--- a/drivers/gpu/drm/radeon/cypress_dpm.c
+++ b/drivers/gpu/drm/radeon/cypress_dpm.c
@@ -22,7 +22,7 @@
  * Authors: Alex Deucher
  */
 
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "evergreend.h"
diff --git a/drivers/gpu/drm/radeon/kv_dpm.c b/drivers/gpu/drm/radeon/kv_dpm.c
index a7e978677937..ae1529b0ef6f 100644
--- a/drivers/gpu/drm/radeon/kv_dpm.c
+++ b/drivers/gpu/drm/radeon/kv_dpm.c
@@ -21,7 +21,7 @@
  *
  */
 
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "radeon.h"
 #include "cikd.h"
 #include "r600_dpm.h"
diff --git a/drivers/gpu/drm/radeon/kv_smc.c b/drivers/gpu/drm/radeon/kv_smc.c
index 0000b59a6d05..af60bd32a287 100644
--- a/drivers/gpu/drm/radeon/kv_smc.c
+++ b/drivers/gpu/drm/radeon/kv_smc.c
@@ -22,7 +22,7 @@
  * Authors: Alex Deucher
  */
 
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "radeon.h"
 #include "cikd.h"
 #include "kv_dpm.h"
diff --git a/drivers/gpu/drm/radeon/ni_dpm.c b/drivers/gpu/drm/radeon/ni_dpm.c
index 4a601f990562..9416e72f86aa 100644
--- a/drivers/gpu/drm/radeon/ni_dpm.c
+++ b/drivers/gpu/drm/radeon/ni_dpm.c
@@ -21,7 +21,7 @@
  *
  */
 
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "nid.h"
diff --git a/drivers/gpu/drm/radeon/r600_dpm.c b/drivers/gpu/drm/radeon/r600_dpm.c
index c7fc1dbfd192..31d1b4710844 100644
--- a/drivers/gpu/drm/radeon/r600_dpm.c
+++ b/drivers/gpu/drm/radeon/r600_dpm.c
@@ -22,7 +22,7 @@
  * Authors: Alex Deucher
  */
 
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "r600d.h"
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index c1c8e2208a21..342e3b1fb9c7 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -68,11 +68,11 @@
 #include <linux/hashtable.h>
 #include <linux/dma-fence.h>
 
-#include <ttm/ttm_bo_api.h>
-#include <ttm/ttm_bo_driver.h>
-#include <ttm/ttm_placement.h>
-#include <ttm/ttm_module.h>
-#include <ttm/ttm_execbuf_util.h>
+#include <drm/ttm/ttm_bo_api.h>
+#include <drm/ttm/ttm_bo_driver.h>
+#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_module.h>
+#include <drm/ttm/ttm_execbuf_util.h>
 
 #include <drm/drm_gem.h>
 
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 3ac671f6c8e1..00b22af70f5c 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -87,7 +87,8 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 	p->dma_reloc_idx = 0;
 	/* FIXME: we assume that each relocs use 4 dwords */
 	p->nrelocs = chunk->length_dw / 4;
-	p->relocs = drm_calloc_large(p->nrelocs, sizeof(struct radeon_bo_list));
+	p->relocs = kvmalloc_array(p->nrelocs, sizeof(struct radeon_bo_list),
+			GFP_KERNEL | __GFP_ZERO);
 	if (p->relocs == NULL) {
 		return -ENOMEM;
 	}
@@ -341,7 +342,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 				continue;
 		}
 
-		p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
+		p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
 		size *= sizeof(uint32_t);
 		if (p->chunks[i].kdata == NULL) {
 			return -ENOMEM;
@@ -440,10 +441,10 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo
 		}
 	}
 	kfree(parser->track);
-	drm_free_large(parser->relocs);
-	drm_free_large(parser->vm_bos);
+	kvfree(parser->relocs);
+	kvfree(parser->vm_bos);
 	for (i = 0; i < parser->nchunks; i++)
-		drm_free_large(parser->chunks[i].kdata);
+		kvfree(parser->chunks[i].kdata);
 	kfree(parser->chunks);
 	kfree(parser->chunks_array);
 	radeon_ib_free(parser->rdev, &parser->ib);
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 93d45aa5c3d4..6f906abd612b 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -41,7 +41,7 @@
 #include <drm/drm_gem.h>
 #include <drm/drm_fb_helper.h>
 
-#include "drm_crtc_helper.h"
+#include <drm/drm_crtc_helper.h>
 #include "radeon_kfd.h"
 
 /*
@@ -115,10 +115,6 @@ int radeon_resume_kms(struct drm_device *dev, bool resume, bool fbcon);
 u32 radeon_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe);
 int radeon_enable_vblank_kms(struct drm_device *dev, unsigned int pipe);
 void radeon_disable_vblank_kms(struct drm_device *dev, unsigned int pipe);
-int radeon_get_vblank_timestamp_kms(struct drm_device *dev, unsigned int pipe,
-				    int *max_error,
-				    struct timeval *vblank_time,
-				    unsigned flags);
 void radeon_driver_irq_preinstall_kms(struct drm_device *dev);
 int radeon_driver_irq_postinstall_kms(struct drm_device *dev);
 void radeon_driver_irq_uninstall_kms(struct drm_device *dev);
@@ -530,6 +526,16 @@ static const struct file_operations radeon_driver_kms_fops = {
 #endif
 };
 
+static bool
+radeon_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe,
+				 bool in_vblank_irq, int *vpos, int *hpos,
+				 ktime_t *stime, ktime_t *etime,
+				 const struct drm_display_mode *mode)
+{
+	return radeon_get_crtc_scanoutpos(dev, pipe, 0, vpos, hpos,
+					  stime, etime, mode);
+}
+
 static struct drm_driver kms_driver = {
 	.driver_features =
 	    DRIVER_USE_AGP |
@@ -544,8 +550,8 @@ static struct drm_driver kms_driver = {
 	.get_vblank_counter = radeon_get_vblank_counter_kms,
 	.enable_vblank = radeon_enable_vblank_kms,
 	.disable_vblank = radeon_disable_vblank_kms,
-	.get_vblank_timestamp = radeon_get_vblank_timestamp_kms,
-	.get_scanout_position = radeon_get_crtc_scanoutpos,
+	.get_vblank_timestamp = drm_calc_vbltimestamp_from_scanoutpos,
+	.get_scanout_position = radeon_get_crtc_scanout_position,
 	.irq_preinstall = radeon_driver_irq_preinstall_kms,
 	.irq_postinstall = radeon_driver_irq_postinstall_kms,
 	.irq_uninstall = radeon_driver_irq_uninstall_kms,
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index dddb372de2b9..574bf7e6b118 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -587,7 +587,7 @@ error_unreserve:
 	ttm_eu_backoff_reservation(&ticket, &list);
 
 error_free:
-	drm_free_large(vm_bos);
+	kvfree(vm_bos);
 
 	if (r && r != -ERESTARTSYS)
 		DRM_ERROR("Couldn't update BO_VA (%d)\n", r);
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index 4761f27f2ca2..d0ad03674250 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -858,43 +858,6 @@ void radeon_disable_vblank_kms(struct drm_device *dev, int crtc)
 	spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
 }
 
-/**
- * radeon_get_vblank_timestamp_kms - get vblank timestamp
- *
- * @dev: drm dev pointer
- * @crtc: crtc to get the timestamp for
- * @max_error: max error
- * @vblank_time: time value
- * @flags: flags passed to the driver
- *
- * Gets the timestamp on the requested crtc based on the
- * scanout position.  (all asics).
- * Returns postive status flags on success, negative error on failure.
- */
-int radeon_get_vblank_timestamp_kms(struct drm_device *dev, int crtc,
-				    int *max_error,
-				    struct timeval *vblank_time,
-				    unsigned flags)
-{
-	struct drm_crtc *drmcrtc;
-	struct radeon_device *rdev = dev->dev_private;
-
-	if (crtc < 0 || crtc >= dev->num_crtcs) {
-		DRM_ERROR("Invalid crtc %d\n", crtc);
-		return -EINVAL;
-	}
-
-	/* Get associated drm_crtc: */
-	drmcrtc = &rdev->mode_info.crtcs[crtc]->base;
-	if (!drmcrtc)
-		return -EINVAL;
-
-	/* Helper routine in DRM core does all the work: */
-	return drm_calc_vbltimestamp_from_scanoutpos(dev, crtc, max_error,
-						     vblank_time, flags,
-						     &drmcrtc->hwmode);
-}
-
 const struct drm_ioctl_desc radeon_ioctls_kms[] = {
 	DRM_IOCTL_DEF_DRV(RADEON_CP_INIT, drm_invalid_op, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(RADEON_CP_START, drm_invalid_op, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index ad282648fc8b..00f5ec5c12c7 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -691,6 +691,9 @@ struct atom_voltage_table
 };
 
 /* Driver internal use only flags of radeon_get_crtc_scanoutpos() */
+#define DRM_SCANOUTPOS_VALID        (1 << 0)
+#define DRM_SCANOUTPOS_IN_VBLANK    (1 << 1)
+#define DRM_SCANOUTPOS_ACCURATE     (1 << 2)
 #define USE_REAL_VBLANKSTART 		(1 << 30)
 #define GET_DISTANCE_TO_VBLANKSTART	(1 << 31)
 
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index 8c7872339c2a..84802b201bef 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -314,7 +314,7 @@ unsigned radeon_ring_backup(struct radeon_device *rdev, struct radeon_ring *ring
 	}
 
 	/* and then save the content of the ring */
-	*data = drm_malloc_ab(size, sizeof(uint32_t));
+	*data = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
 	if (!*data) {
 		mutex_unlock(&rdev->ring_lock);
 		return 0;
@@ -356,7 +356,7 @@ int radeon_ring_restore(struct radeon_device *rdev, struct radeon_ring *ring,
 	}
 
 	radeon_ring_unlock_commit(rdev, ring, false);
-	drm_free_large(data);
+	kvfree(data);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 8b7623b5a624..faa021396da3 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -29,11 +29,11 @@
  *    Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
  *    Dave Airlie
  */
-#include <ttm/ttm_bo_api.h>
-#include <ttm/ttm_bo_driver.h>
-#include <ttm/ttm_placement.h>
-#include <ttm/ttm_module.h>
-#include <ttm/ttm_page_alloc.h>
+#include <drm/ttm/ttm_bo_api.h>
+#include <drm/ttm/ttm_bo_driver.h>
+#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_module.h>
+#include <drm/ttm/ttm_page_alloc.h>
 #include <drm/drmP.h>
 #include <drm/radeon_drm.h>
 #include <linux/seq_file.h>
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
index a1358748cea5..5f68245579a3 100644
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -132,8 +132,8 @@ struct radeon_bo_list *radeon_vm_get_bos(struct radeon_device *rdev,
 	struct radeon_bo_list *list;
 	unsigned i, idx;
 
-	list = drm_malloc_ab(vm->max_pde_used + 2,
-			     sizeof(struct radeon_bo_list));
+	list = kvmalloc_array(vm->max_pde_used + 2,
+			     sizeof(struct radeon_bo_list), GFP_KERNEL);
 	if (!list)
 		return NULL;
 
diff --git a/drivers/gpu/drm/radeon/rs780_dpm.c b/drivers/gpu/drm/radeon/rs780_dpm.c
index 94b48fc1e266..b5e4e09a8996 100644
--- a/drivers/gpu/drm/radeon/rs780_dpm.c
+++ b/drivers/gpu/drm/radeon/rs780_dpm.c
@@ -22,7 +22,7 @@
  * Authors: Alex Deucher
  */
 
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "rs780d.h"
diff --git a/drivers/gpu/drm/radeon/rv6xx_dpm.c b/drivers/gpu/drm/radeon/rv6xx_dpm.c
index 25e29303b119..d91aa3944593 100644
--- a/drivers/gpu/drm/radeon/rv6xx_dpm.c
+++ b/drivers/gpu/drm/radeon/rv6xx_dpm.c
@@ -22,7 +22,7 @@
  * Authors: Alex Deucher
  */
 
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "rv6xxd.h"
diff --git a/drivers/gpu/drm/radeon/rv730_dpm.c b/drivers/gpu/drm/radeon/rv730_dpm.c
index d37ba2cb886e..38fdb4152e2a 100644
--- a/drivers/gpu/drm/radeon/rv730_dpm.c
+++ b/drivers/gpu/drm/radeon/rv730_dpm.c
@@ -22,7 +22,7 @@
  * Authors: Alex Deucher
  */
 
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "radeon.h"
 #include "rv730d.h"
 #include "r600_dpm.h"
diff --git a/drivers/gpu/drm/radeon/rv740_dpm.c b/drivers/gpu/drm/radeon/rv740_dpm.c
index 4b850824fe06..afd597ec5085 100644
--- a/drivers/gpu/drm/radeon/rv740_dpm.c
+++ b/drivers/gpu/drm/radeon/rv740_dpm.c
@@ -22,7 +22,7 @@
  * Authors: Alex Deucher
  */
 
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "radeon.h"
 #include "rv740d.h"
 #include "r600_dpm.h"
diff --git a/drivers/gpu/drm/radeon/rv770_dpm.c b/drivers/gpu/drm/radeon/rv770_dpm.c
index a010decf59af..cb2a7ec4e217 100644
--- a/drivers/gpu/drm/radeon/rv770_dpm.c
+++ b/drivers/gpu/drm/radeon/rv770_dpm.c
@@ -22,7 +22,7 @@
  * Authors: Alex Deucher
  */
 
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "rv770d.h"
diff --git a/drivers/gpu/drm/radeon/rv770_smc.c b/drivers/gpu/drm/radeon/rv770_smc.c
index b2a224407365..2b7ddee3984c 100644
--- a/drivers/gpu/drm/radeon/rv770_smc.c
+++ b/drivers/gpu/drm/radeon/rv770_smc.c
@@ -23,7 +23,7 @@
  */
 
 #include <linux/firmware.h>
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "radeon.h"
 #include "rv770d.h"
 #include "rv770_dpm.h"
diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index c7af9fdd20c7..ee3e74266a13 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -21,7 +21,7 @@
  *
  */
 
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "sid.h"
diff --git a/drivers/gpu/drm/radeon/si_smc.c b/drivers/gpu/drm/radeon/si_smc.c
index e5bb92f16775..51155abda8d8 100644
--- a/drivers/gpu/drm/radeon/si_smc.c
+++ b/drivers/gpu/drm/radeon/si_smc.c
@@ -23,7 +23,7 @@
  */
 
 #include <linux/firmware.h>
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "radeon.h"
 #include "sid.h"
 #include "ppsmc.h"
diff --git a/drivers/gpu/drm/radeon/sumo_dpm.c b/drivers/gpu/drm/radeon/sumo_dpm.c
index f0d5c1724f55..fd4804829e46 100644
--- a/drivers/gpu/drm/radeon/sumo_dpm.c
+++ b/drivers/gpu/drm/radeon/sumo_dpm.c
@@ -21,7 +21,7 @@
  *
  */
 
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "sumod.h"
diff --git a/drivers/gpu/drm/radeon/sumo_smc.c b/drivers/gpu/drm/radeon/sumo_smc.c
index fb081d2ae374..cc051be42362 100644
--- a/drivers/gpu/drm/radeon/sumo_smc.c
+++ b/drivers/gpu/drm/radeon/sumo_smc.c
@@ -21,7 +21,7 @@
  *
  */
 
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "radeon.h"
 #include "sumod.h"
 #include "sumo_dpm.h"
diff --git a/drivers/gpu/drm/radeon/trinity_dpm.c b/drivers/gpu/drm/radeon/trinity_dpm.c
index 6730367ac228..2ef7c4e5e495 100644
--- a/drivers/gpu/drm/radeon/trinity_dpm.c
+++ b/drivers/gpu/drm/radeon/trinity_dpm.c
@@ -21,7 +21,7 @@
  *
  */
 
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "trinityd.h"
diff --git a/drivers/gpu/drm/radeon/trinity_smc.c b/drivers/gpu/drm/radeon/trinity_smc.c
index 99dd0455334d..0310e36e3159 100644
--- a/drivers/gpu/drm/radeon/trinity_smc.c
+++ b/drivers/gpu/drm/radeon/trinity_smc.c
@@ -21,7 +21,7 @@
  *
  */
 
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "radeon.h"
 #include "trinityd.h"
 #include "trinity_dpm.h"
diff --git a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c
index d8fa7a9c9240..1bccd827d2e4 100644
--- a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c
+++ b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c
@@ -104,26 +104,18 @@ static void analogix_dp_psr_work(struct work_struct *work)
 {
 	struct rockchip_dp_device *dp =
 				container_of(work, typeof(*dp), psr_work);
-	struct drm_crtc *crtc = dp->encoder.crtc;
-	int psr_state = dp->psr_state;
-	int vact_end;
 	int ret;
 	unsigned long flags;
 
-	if (!crtc)
-		return;
-
-	vact_end = crtc->mode.vtotal - crtc->mode.vsync_start + crtc->mode.vdisplay;
-
-	ret = rockchip_drm_wait_line_flag(dp->encoder.crtc, vact_end,
-					  PSR_WAIT_LINE_FLAG_TIMEOUT_MS);
+	ret = rockchip_drm_wait_vact_end(dp->encoder.crtc,
+					 PSR_WAIT_LINE_FLAG_TIMEOUT_MS);
 	if (ret) {
 		dev_err(dp->dev, "line flag interrupt did not arrive\n");
 		return;
 	}
 
 	spin_lock_irqsave(&dp->psr_lock, flags);
-	if (psr_state == EDP_VSC_PSR_STATE_ACTIVE)
+	if (dp->psr_state == EDP_VSC_PSR_STATE_ACTIVE)
 		analogix_dp_enable_psr(dp->dev);
 	else
 		analogix_dp_disable_psr(dp->dev);
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
index a48fcce3f5f6..47905faf5586 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
@@ -62,8 +62,7 @@ int rockchip_drm_dma_attach_device(struct drm_device *drm_dev,
 				   struct device *dev);
 void rockchip_drm_dma_detach_device(struct drm_device *drm_dev,
 				    struct device *dev);
-int rockchip_drm_wait_line_flag(struct drm_crtc *crtc, unsigned int line_num,
-				unsigned int mstimeout);
+int rockchip_drm_wait_vact_end(struct drm_crtc *crtc, unsigned int mstimeout);
 
 extern struct platform_driver cdn_dp_driver;
 extern struct platform_driver dw_hdmi_rockchip_pltfm_driver;
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
index 3f7a82d1e095..40a5e6ef6f2c 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
@@ -468,7 +468,7 @@ static bool vop_line_flag_irq_is_enabled(struct vop *vop)
 	return !!line_flag_irq;
 }
 
-static void vop_line_flag_irq_enable(struct vop *vop, int line_num)
+static void vop_line_flag_irq_enable(struct vop *vop)
 {
 	unsigned long flags;
 
@@ -477,7 +477,6 @@ static void vop_line_flag_irq_enable(struct vop *vop, int line_num)
 
 	spin_lock_irqsave(&vop->irq_lock, flags);
 
-	VOP_CTRL_SET(vop, line_flag_num[0], line_num);
 	VOP_INTR_SET_TYPE(vop, clear, LINE_FLAG_INTR, 1);
 	VOP_INTR_SET_TYPE(vop, enable, LINE_FLAG_INTR, 1);
 
@@ -981,6 +980,8 @@ static void vop_crtc_enable(struct drm_crtc *crtc)
 	VOP_CTRL_SET(vop, vact_st_end, val);
 	VOP_CTRL_SET(vop, vpost_st_end, val);
 
+	VOP_CTRL_SET(vop, line_flag_num[0], vact_end);
+
 	clk_set_rate(vop->dclk, adjusted_mode->clock * 1000);
 
 	VOP_CTRL_SET(vop, standby, 0);
@@ -1507,19 +1508,16 @@ static void vop_win_init(struct vop *vop)
 }
 
 /**
- * rockchip_drm_wait_line_flag - acqiure the give line flag event
+ * rockchip_drm_wait_vact_end
  * @crtc: CRTC to enable line flag
- * @line_num: interested line number
  * @mstimeout: millisecond for timeout
  *
- * Driver would hold here until the interested line flag interrupt have
- * happened or timeout to wait.
+ * Wait for vact_end line flag irq or timeout.
  *
  * Returns:
  * Zero on success, negative errno on failure.
  */
-int rockchip_drm_wait_line_flag(struct drm_crtc *crtc, unsigned int line_num,
-				unsigned int mstimeout)
+int rockchip_drm_wait_vact_end(struct drm_crtc *crtc, unsigned int mstimeout)
 {
 	struct vop *vop = to_vop(crtc);
 	unsigned long jiffies_left;
@@ -1527,14 +1525,14 @@ int rockchip_drm_wait_line_flag(struct drm_crtc *crtc, unsigned int line_num,
 	if (!crtc || !vop->is_enabled)
 		return -ENODEV;
 
-	if (line_num > crtc->mode.vtotal || mstimeout <= 0)
+	if (mstimeout <= 0)
 		return -EINVAL;
 
 	if (vop_line_flag_irq_is_enabled(vop))
 		return -EBUSY;
 
 	reinit_completion(&vop->line_flag_completion);
-	vop_line_flag_irq_enable(vop, line_num);
+	vop_line_flag_irq_enable(vop);
 
 	jiffies_left = wait_for_completion_timeout(&vop->line_flag_completion,
 						   msecs_to_jiffies(mstimeout));
@@ -1547,7 +1545,7 @@ int rockchip_drm_wait_line_flag(struct drm_crtc *crtc, unsigned int line_num,
 
 	return 0;
 }
-EXPORT_SYMBOL(rockchip_drm_wait_line_flag);
+EXPORT_SYMBOL(rockchip_drm_wait_vact_end);
 
 static int vop_bind(struct device *dev, struct device *master, void *data)
 {
diff --git a/drivers/gpu/drm/savage/Makefile b/drivers/gpu/drm/savage/Makefile
index d8f84ac7bb26..cfd436bb28e4 100644
--- a/drivers/gpu/drm/savage/Makefile
+++ b/drivers/gpu/drm/savage/Makefile
@@ -2,7 +2,6 @@
 # Makefile for the drm device driver.  This driver provides support for the
 # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
 
-ccflags-y = -Iinclude/drm
 savage-y := savage_drv.o savage_bci.o savage_state.o
 
 obj-$(CONFIG_DRM_SAVAGE)+= savage.o
diff --git a/drivers/gpu/drm/selftests/test-drm_mm.c b/drivers/gpu/drm/selftests/test-drm_mm.c
index fa356f5dae27..dfdd858eda0a 100644
--- a/drivers/gpu/drm/selftests/test-drm_mm.c
+++ b/drivers/gpu/drm/selftests/test-drm_mm.c
@@ -514,6 +514,8 @@ static int igt_reserve(void *ignored)
 		ret = __igt_reserve(count, size + 1);
 		if (ret)
 			return ret;
+
+		cond_resched();
 	}
 
 	return 0;
@@ -712,6 +714,10 @@ static int igt_insert(void *ignored)
 			return ret;
 
 		ret = __igt_insert(count, size + 1, false);
+		if (ret)
+			return ret;
+
+		cond_resched();
 	}
 
 	return 0;
@@ -741,6 +747,10 @@ static int igt_replace(void *ignored)
 			return ret;
 
 		ret = __igt_insert(count, size + 1, true);
+		if (ret)
+			return ret;
+
+		cond_resched();
 	}
 
 	return 0;
@@ -1011,6 +1021,8 @@ static int igt_insert_range(void *ignored)
 		ret = __igt_insert_range(count, size, max/4+1, 3*max/4-1);
 		if (ret)
 			return ret;
+
+		cond_resched();
 	}
 
 	return 0;
@@ -1056,6 +1068,7 @@ static int igt_align(void *ignored)
 		drm_mm_for_each_node_safe(node, next, &mm)
 			drm_mm_remove_node(node);
 		DRM_MM_BUG_ON(!drm_mm_clean(&mm));
+		cond_resched();
 	}
 
 	ret = 0;
@@ -1097,6 +1110,8 @@ static int igt_align_pot(int max)
 			       align, bit);
 			goto out;
 		}
+
+		cond_resched();
 	}
 
 	ret = 0;
@@ -1471,6 +1486,8 @@ static int igt_evict(void *ignored)
 				goto out;
 			}
 		}
+
+		cond_resched();
 	}
 
 	ret = 0;
@@ -1566,6 +1583,8 @@ static int igt_evict_range(void *ignored)
 				goto out;
 			}
 		}
+
+		cond_resched();
 	}
 
 	ret = 0;
@@ -1683,6 +1702,7 @@ static int igt_topdown(void *ignored)
 		drm_mm_for_each_node_safe(node, next, &mm)
 			drm_mm_remove_node(node);
 		DRM_MM_BUG_ON(!drm_mm_clean(&mm));
+		cond_resched();
 	}
 
 	ret = 0;
@@ -1783,6 +1803,7 @@ static int igt_bottomup(void *ignored)
 		drm_mm_for_each_node_safe(node, next, &mm)
 			drm_mm_remove_node(node);
 		DRM_MM_BUG_ON(!drm_mm_clean(&mm));
+		cond_resched();
 	}
 
 	ret = 0;
@@ -1970,6 +1991,8 @@ static int igt_color(void *ignored)
 			drm_mm_remove_node(node);
 			kfree(node);
 		}
+
+		cond_resched();
 	}
 
 	ret = 0;
@@ -2047,6 +2070,7 @@ static int evict_color(struct drm_mm *mm,
 		}
 	}
 
+	cond_resched();
 	return 0;
 }
 
@@ -2132,6 +2156,8 @@ static int igt_color_evict(void *ignored)
 				goto out;
 			}
 		}
+
+		cond_resched();
 	}
 
 	ret = 0;
@@ -2231,6 +2257,8 @@ static int igt_color_evict_range(void *ignored)
 				goto out;
 			}
 		}
+
+		cond_resched();
 	}
 
 	ret = 0;
diff --git a/drivers/gpu/drm/sis/Makefile b/drivers/gpu/drm/sis/Makefile
index 441c061c3ad0..7bf4c130c8fd 100644
--- a/drivers/gpu/drm/sis/Makefile
+++ b/drivers/gpu/drm/sis/Makefile
@@ -2,7 +2,6 @@
 # Makefile for the drm device driver.  This driver provides support for the
 # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
 
-ccflags-y = -Iinclude/drm
 sis-y := sis_drv.o sis_mm.o
 
 obj-$(CONFIG_DRM_SIS)   += sis.o
diff --git a/drivers/gpu/drm/sti/sti_compositor.c b/drivers/gpu/drm/sti/sti_compositor.c
index 11d4e885893a..6e4bf68262db 100644
--- a/drivers/gpu/drm/sti/sti_compositor.c
+++ b/drivers/gpu/drm/sti/sti_compositor.c
@@ -129,7 +129,7 @@ static int sti_compositor_bind(struct device *dev,
 			}
 			break;
 		default:
-			DRM_ERROR("Unknown subdev compoment type\n");
+			DRM_ERROR("Unknown subdev component type\n");
 			return 1;
 		}
 
diff --git a/drivers/gpu/drm/sti/sti_cursor.c b/drivers/gpu/drm/sti/sti_cursor.c
index cca75bddb9ad..5b3a41f74f21 100644
--- a/drivers/gpu/drm/sti/sti_cursor.c
+++ b/drivers/gpu/drm/sti/sti_cursor.c
@@ -33,7 +33,7 @@
 #define STI_CURS_MAX_SIZE   128
 
 /*
- * pixmap dma buffer stucture
+ * pixmap dma buffer structure
  *
  * @paddr:  physical address
  * @size:   buffer size
@@ -121,8 +121,7 @@ static int cursor_dbg_show(struct seq_file *s, void *data)
 	cursor_dbg_cml(s, cursor, readl(cursor->regs + CUR_CML));
 	DBGFS_DUMP(CUR_AWS);
 	DBGFS_DUMP(CUR_AWE);
-	seq_puts(s, "\n");
-
+	seq_putc(s, '\n');
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/sti/sti_dvo.c b/drivers/gpu/drm/sti/sti_dvo.c
index bb23318a44b7..24ebc6b2f34d 100644
--- a/drivers/gpu/drm/sti/sti_dvo.c
+++ b/drivers/gpu/drm/sti/sti_dvo.c
@@ -186,8 +186,7 @@ static int dvo_dbg_show(struct seq_file *s, void *data)
 	DBGFS_DUMP(DVO_LUT_PROG_MID);
 	DBGFS_DUMP(DVO_LUT_PROG_HIGH);
 	dvo_dbg_awg_microcode(s, dvo->regs + DVO_DIGSYNC_INSTR_I);
-	seq_puts(s, "\n");
-
+	seq_putc(s, '\n');
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/sti/sti_gdp.c b/drivers/gpu/drm/sti/sti_gdp.c
index 88f16cdf6a4b..5ee0503945c8 100644
--- a/drivers/gpu/drm/sti/sti_gdp.c
+++ b/drivers/gpu/drm/sti/sti_gdp.c
@@ -149,7 +149,7 @@ static void gdp_dbg_ctl(struct seq_file *s, int val)
 	seq_puts(s, "\tColor:");
 	for (i = 0; i < ARRAY_SIZE(gdp_format_to_str); i++) {
 		if (gdp_format_to_str[i].format == (val & 0x1F)) {
-			seq_printf(s, gdp_format_to_str[i].name);
+			seq_puts(s, gdp_format_to_str[i].name);
 			break;
 		}
 	}
@@ -266,8 +266,7 @@ static void gdp_node_dump_node(struct seq_file *s, struct sti_gdp_node *node)
 	seq_printf(s, "\n\tKEY2 0x%08X", node->gam_gdp_key2);
 	seq_printf(s, "\n\tPPT  0x%08X", node->gam_gdp_ppt);
 	gdp_dbg_ppt(s, node->gam_gdp_ppt);
-	seq_printf(s, "\n\tCML  0x%08X", node->gam_gdp_cml);
-	seq_puts(s, "\n");
+	seq_printf(s, "\n\tCML  0x%08X\n", node->gam_gdp_cml);
 }
 
 static int gdp_node_dbg_show(struct seq_file *s, void *arg)
diff --git a/drivers/gpu/drm/sti/sti_hda.c b/drivers/gpu/drm/sti/sti_hda.c
index 0c0a75bc8bc3..d6ed909d9d75 100644
--- a/drivers/gpu/drm/sti/sti_hda.c
+++ b/drivers/gpu/drm/sti/sti_hda.c
@@ -320,8 +320,7 @@ static void hda_dbg_awg_microcode(struct seq_file *s, void __iomem *reg)
 {
 	unsigned int i;
 
-	seq_puts(s, "\n\n");
-	seq_puts(s, "  HDA AWG microcode:");
+	seq_puts(s, "\n\n  HDA AWG microcode:");
 	for (i = 0; i < AWG_MAX_INST; i++) {
 		if (i % 8 == 0)
 			seq_printf(s, "\n  %04X:", i);
@@ -333,8 +332,7 @@ static void hda_dbg_video_dacs_ctrl(struct seq_file *s, void __iomem *reg)
 {
 	u32 val = readl(reg);
 
-	seq_puts(s, "\n");
-	seq_printf(s, "\n  %-25s 0x%08X", "VIDEO_DACS_CONTROL", val);
+	seq_printf(s, "\n\n  %-25s 0x%08X", "VIDEO_DACS_CONTROL", val);
 	seq_puts(s, "\tHD DACs ");
 	seq_puts(s, val & DAC_CFG_HD_HZUVW_OFF_MASK ? "disabled" : "enabled");
 }
@@ -356,8 +354,7 @@ static int hda_dbg_show(struct seq_file *s, void *data)
 	hda_dbg_awg_microcode(s, hda->regs + HDA_SYNC_AWGI);
 	if (hda->video_dacs_ctrl)
 		hda_dbg_video_dacs_ctrl(s, hda->video_dacs_ctrl);
-	seq_puts(s, "\n");
-
+	seq_putc(s, '\n');
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/sti/sti_hdmi.c b/drivers/gpu/drm/sti/sti_hdmi.c
index 243905b6ae59..a59c95a8081b 100644
--- a/drivers/gpu/drm/sti/sti_hdmi.c
+++ b/drivers/gpu/drm/sti/sti_hdmi.c
@@ -592,7 +592,7 @@ static void hdmi_dbg_cfg(struct seq_file *s, int val)
 {
 	int tmp;
 
-	seq_puts(s, "\t");
+	seq_putc(s, '\t');
 	tmp = val & HDMI_CFG_HDMI_NOT_DVI;
 	DBGFS_PRINT_STR("mode:", tmp ? "HDMI" : "DVI");
 	seq_puts(s, "\t\t\t\t\t");
@@ -616,7 +616,7 @@ static void hdmi_dbg_sta(struct seq_file *s, int val)
 {
 	int tmp;
 
-	seq_puts(s, "\t");
+	seq_putc(s, '\t');
 	tmp = (val & HDMI_STA_DLL_LCK);
 	DBGFS_PRINT_STR("pll:", tmp ? "locked" : "not locked");
 	seq_puts(s, "\t\t\t\t\t");
@@ -632,7 +632,7 @@ static void hdmi_dbg_sw_di_cfg(struct seq_file *s, int val)
 			       "once every field",
 			       "once every frame"};
 
-	seq_puts(s, "\t");
+	seq_putc(s, '\t');
 	tmp = (val & HDMI_IFRAME_CFG_DI_N(HDMI_IFRAME_MASK, 1));
 	DBGFS_PRINT_STR("Data island 1:", en_di[tmp]);
 	seq_puts(s, "\t\t\t\t\t");
@@ -664,16 +664,16 @@ static int hdmi_dbg_show(struct seq_file *s, void *data)
 	DBGFS_DUMP("\n", HDMI_STA);
 	hdmi_dbg_sta(s, hdmi_read(hdmi, HDMI_STA));
 	DBGFS_DUMP("", HDMI_ACTIVE_VID_XMIN);
-	seq_puts(s, "\t");
+	seq_putc(s, '\t');
 	DBGFS_PRINT_INT("Xmin:", hdmi_read(hdmi, HDMI_ACTIVE_VID_XMIN));
 	DBGFS_DUMP("", HDMI_ACTIVE_VID_XMAX);
-	seq_puts(s, "\t");
+	seq_putc(s, '\t');
 	DBGFS_PRINT_INT("Xmax:", hdmi_read(hdmi, HDMI_ACTIVE_VID_XMAX));
 	DBGFS_DUMP("", HDMI_ACTIVE_VID_YMIN);
-	seq_puts(s, "\t");
+	seq_putc(s, '\t');
 	DBGFS_PRINT_INT("Ymin:", hdmi_read(hdmi, HDMI_ACTIVE_VID_YMIN));
 	DBGFS_DUMP("", HDMI_ACTIVE_VID_YMAX);
-	seq_puts(s, "\t");
+	seq_putc(s, '\t');
 	DBGFS_PRINT_INT("Ymax:", hdmi_read(hdmi, HDMI_ACTIVE_VID_YMAX));
 	DBGFS_DUMP("", HDMI_SW_DI_CFG);
 	hdmi_dbg_sw_di_cfg(s, hdmi_read(hdmi, HDMI_SW_DI_CFG));
@@ -692,8 +692,7 @@ static int hdmi_dbg_show(struct seq_file *s, void *data)
 	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD4, HDMI_IFRAME_SLOT_AVI);
 	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD5, HDMI_IFRAME_SLOT_AVI);
 	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD6, HDMI_IFRAME_SLOT_AVI);
-	seq_puts(s, "\n");
-	seq_printf(s, "\n AUDIO Infoframe (Data Island slot N=%d):",
+	seq_printf(s, "\n\n AUDIO Infoframe (Data Island slot N=%d):",
 		   HDMI_IFRAME_SLOT_AUDIO);
 	DBGFS_DUMP_DI(HDMI_SW_DI_N_HEAD_WORD, HDMI_IFRAME_SLOT_AUDIO);
 	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD0, HDMI_IFRAME_SLOT_AUDIO);
@@ -703,8 +702,7 @@ static int hdmi_dbg_show(struct seq_file *s, void *data)
 	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD4, HDMI_IFRAME_SLOT_AUDIO);
 	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD5, HDMI_IFRAME_SLOT_AUDIO);
 	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD6, HDMI_IFRAME_SLOT_AUDIO);
-	seq_puts(s, "\n");
-	seq_printf(s, "\n VENDOR SPECIFIC Infoframe (Data Island slot N=%d):",
+	seq_printf(s, "\n\n VENDOR SPECIFIC Infoframe (Data Island slot N=%d):",
 		   HDMI_IFRAME_SLOT_VENDOR);
 	DBGFS_DUMP_DI(HDMI_SW_DI_N_HEAD_WORD, HDMI_IFRAME_SLOT_VENDOR);
 	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD0, HDMI_IFRAME_SLOT_VENDOR);
@@ -714,8 +712,7 @@ static int hdmi_dbg_show(struct seq_file *s, void *data)
 	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD4, HDMI_IFRAME_SLOT_VENDOR);
 	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD5, HDMI_IFRAME_SLOT_VENDOR);
 	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD6, HDMI_IFRAME_SLOT_VENDOR);
-	seq_puts(s, "\n");
-
+	seq_putc(s, '\n');
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/sti/sti_hqvdp.c b/drivers/gpu/drm/sti/sti_hqvdp.c
index 66f843148ef7..a1c161f77804 100644
--- a/drivers/gpu/drm/sti/sti_hqvdp.c
+++ b/drivers/gpu/drm/sti/sti_hqvdp.c
@@ -625,8 +625,7 @@ static int hqvdp_dbg_show(struct seq_file *s, void *data)
 		hqvdp_dbg_dump_cmd(s, (struct sti_hqvdp_cmd *)virt);
 	}
 
-	seq_puts(s, "\n");
-
+	seq_putc(s, '\n');
 	return 0;
 }
 
@@ -1357,12 +1356,12 @@ static int sti_hqvdp_probe(struct platform_device *pdev)
 
 	/* Get Memory resources */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (res == NULL) {
+	if (!res) {
 		DRM_ERROR("Get memory resource failed\n");
 		return -ENXIO;
 	}
 	hqvdp->regs = devm_ioremap(dev, res->start, resource_size(res));
-	if (hqvdp->regs == NULL) {
+	if (!hqvdp->regs) {
 		DRM_ERROR("Register mapping failed\n");
 		return -ENXIO;
 	}
diff --git a/drivers/gpu/drm/sti/sti_mixer.c b/drivers/gpu/drm/sti/sti_mixer.c
index 4ddc58f7fe2e..2bd1d46fe1cd 100644
--- a/drivers/gpu/drm/sti/sti_mixer.c
+++ b/drivers/gpu/drm/sti/sti_mixer.c
@@ -162,8 +162,7 @@ static int mixer_dbg_show(struct seq_file *s, void *arg)
 	DBGFS_DUMP(GAM_MIXER_MBP);
 	DBGFS_DUMP(GAM_MIXER_MX0);
 	mixer_dbg_mxn(s, mixer->regs + GAM_MIXER_MX0);
-	seq_puts(s, "\n");
-
+	seq_putc(s, '\n');
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/sti/sti_tvout.c b/drivers/gpu/drm/sti/sti_tvout.c
index 8b8ea717c121..8959fcc743a8 100644
--- a/drivers/gpu/drm/sti/sti_tvout.c
+++ b/drivers/gpu/drm/sti/sti_tvout.c
@@ -459,7 +459,7 @@ static void tvout_dbg_vip(struct seq_file *s, int val)
 				   "Aux (color matrix by-passed)",
 				   "", "", "", "", "", "Force value"};
 
-	seq_puts(s, "\t");
+	seq_putc(s, '\t');
 	mask = TVO_VIP_REORDER_MASK << TVO_VIP_REORDER_R_SHIFT;
 	r = (val & mask) >> TVO_VIP_REORDER_R_SHIFT;
 	mask = TVO_VIP_REORDER_MASK << TVO_VIP_REORDER_G_SHIFT;
@@ -558,8 +558,7 @@ static int tvout_dbg_show(struct seq_file *s, void *data)
 	DBGFS_DUMP(TVO_CSC_AUX_M6);
 	DBGFS_DUMP(TVO_CSC_AUX_M7);
 	DBGFS_DUMP(TVO_AUX_IN_VID_FORMAT);
-	seq_puts(s, "\n");
-
+	seq_putc(s, '\n');
 	return 0;
 }
 
@@ -847,7 +846,7 @@ static int sti_tvout_probe(struct platform_device *pdev)
 
 	tvout->dev = dev;
 
-	/* get Memory ressources */
+	/* get memory resources */
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "tvout-reg");
 	if (!res) {
 		DRM_ERROR("Invalid glue resource\n");
diff --git a/drivers/gpu/drm/sti/sti_vid.c b/drivers/gpu/drm/sti/sti_vid.c
index 2ad59892b57e..577a3341d3c1 100644
--- a/drivers/gpu/drm/sti/sti_vid.c
+++ b/drivers/gpu/drm/sti/sti_vid.c
@@ -61,7 +61,7 @@
 static void vid_dbg_ctl(struct seq_file *s, int val)
 {
 	val = val >> 30;
-	seq_puts(s, "\t");
+	seq_putc(s, '\t');
 
 	if (!(val & 1))
 		seq_puts(s, "NOT ");
@@ -114,8 +114,7 @@ static int vid_dbg_show(struct seq_file *s, void *arg)
 	DBGFS_DUMP(VID_BC);
 	DBGFS_DUMP(VID_TINT);
 	DBGFS_DUMP(VID_CSAT);
-	seq_puts(s, "\n");
-
+	seq_putc(s, '\n');
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/stm/Kconfig b/drivers/gpu/drm/stm/Kconfig
new file mode 100644
index 000000000000..2c4817fb0890
--- /dev/null
+++ b/drivers/gpu/drm/stm/Kconfig
@@ -0,0 +1,16 @@
+config DRM_STM
+	tristate "DRM Support for STMicroelectronics SoC Series"
+	depends on DRM && (ARCH_STM32 || ARCH_MULTIPLATFORM)
+	select DRM_KMS_HELPER
+	select DRM_GEM_CMA_HELPER
+	select DRM_KMS_CMA_HELPER
+	select DRM_PANEL
+	select VIDEOMODE_HELPERS
+	select FB_PROVIDE_GET_FB_UNMAPPED_AREA
+	default y
+
+	help
+	  Enable support for the on-chip display controller on
+	  STMicroelectronics STM32 MCUs.
+	  To compile this driver as a module, choose M here: the module
+	  will be called stm-drm.
diff --git a/drivers/gpu/drm/stm/Makefile b/drivers/gpu/drm/stm/Makefile
new file mode 100644
index 000000000000..a09ecf450218
--- /dev/null
+++ b/drivers/gpu/drm/stm/Makefile
@@ -0,0 +1,5 @@
+stm-drm-y := \
+	drv.o \
+	ltdc.o
+
+obj-$(CONFIG_DRM_STM) += stm-drm.o
diff --git a/drivers/gpu/drm/stm/drv.c b/drivers/gpu/drm/stm/drv.c
new file mode 100644
index 000000000000..83ab48f1fd00
--- /dev/null
+++ b/drivers/gpu/drm/stm/drv.c
@@ -0,0 +1,221 @@
+/*
+ * Copyright (C) STMicroelectronics SA 2017
+ *
+ * Authors: Philippe Cornu <philippe.cornu@st.com>
+ *          Yannick Fertre <yannick.fertre@st.com>
+ *          Fabien Dessenne <fabien.dessenne@st.com>
+ *          Mickael Reulier <mickael.reulier@st.com>
+ *
+ * License terms:  GNU General Public License (GPL), version 2
+ */
+
+#include <linux/component.h>
+#include <linux/of_platform.h>
+
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_fb_cma_helper.h>
+#include <drm/drm_gem_cma_helper.h>
+
+#include "ltdc.h"
+
+#define DRIVER_NAME		"stm"
+#define DRIVER_DESC		"STMicroelectronics SoC DRM"
+#define DRIVER_DATE		"20170330"
+#define DRIVER_MAJOR		1
+#define DRIVER_MINOR		0
+#define DRIVER_PATCH_LEVEL	0
+
+#define STM_MAX_FB_WIDTH	2048
+#define STM_MAX_FB_HEIGHT	2048 /* same as width to handle orientation */
+
+static void drv_output_poll_changed(struct drm_device *ddev)
+{
+	struct ltdc_device *ldev = ddev->dev_private;
+
+	drm_fbdev_cma_hotplug_event(ldev->fbdev);
+}
+
+static const struct drm_mode_config_funcs drv_mode_config_funcs = {
+	.fb_create = drm_fb_cma_create,
+	.output_poll_changed = drv_output_poll_changed,
+	.atomic_check = drm_atomic_helper_check,
+	.atomic_commit = drm_atomic_helper_commit,
+};
+
+static void drv_lastclose(struct drm_device *ddev)
+{
+	struct ltdc_device *ldev = ddev->dev_private;
+
+	DRM_DEBUG("%s\n", __func__);
+
+	drm_fbdev_cma_restore_mode(ldev->fbdev);
+}
+
+DEFINE_DRM_GEM_CMA_FOPS(drv_driver_fops);
+
+static struct drm_driver drv_driver = {
+	.driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME |
+			   DRIVER_ATOMIC,
+	.lastclose = drv_lastclose,
+	.name = DRIVER_NAME,
+	.desc = DRIVER_DESC,
+	.date = DRIVER_DATE,
+	.major = DRIVER_MAJOR,
+	.minor = DRIVER_MINOR,
+	.patchlevel = DRIVER_PATCH_LEVEL,
+	.fops = &drv_driver_fops,
+	.dumb_create = drm_gem_cma_dumb_create,
+	.dumb_map_offset = drm_gem_cma_dumb_map_offset,
+	.dumb_destroy = drm_gem_dumb_destroy,
+	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
+	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
+	.gem_free_object_unlocked = drm_gem_cma_free_object,
+	.gem_vm_ops = &drm_gem_cma_vm_ops,
+	.gem_prime_export = drm_gem_prime_export,
+	.gem_prime_import = drm_gem_prime_import,
+	.gem_prime_get_sg_table = drm_gem_cma_prime_get_sg_table,
+	.gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table,
+	.gem_prime_vmap = drm_gem_cma_prime_vmap,
+	.gem_prime_vunmap = drm_gem_cma_prime_vunmap,
+	.gem_prime_mmap = drm_gem_cma_prime_mmap,
+	.enable_vblank = ltdc_crtc_enable_vblank,
+	.disable_vblank = ltdc_crtc_disable_vblank,
+};
+
+static int drv_load(struct drm_device *ddev)
+{
+	struct platform_device *pdev = to_platform_device(ddev->dev);
+	struct drm_fbdev_cma *fbdev;
+	struct ltdc_device *ldev;
+	int ret;
+
+	DRM_DEBUG("%s\n", __func__);
+
+	ldev = devm_kzalloc(ddev->dev, sizeof(*ldev), GFP_KERNEL);
+	if (!ldev)
+		return -ENOMEM;
+
+	ddev->dev_private = (void *)ldev;
+
+	drm_mode_config_init(ddev);
+
+	/*
+	 * set max width and height as default value.
+	 * this value would be used to check framebuffer size limitation
+	 * at drm_mode_addfb().
+	 */
+	ddev->mode_config.min_width = 0;
+	ddev->mode_config.min_height = 0;
+	ddev->mode_config.max_width = STM_MAX_FB_WIDTH;
+	ddev->mode_config.max_height = STM_MAX_FB_HEIGHT;
+	ddev->mode_config.funcs = &drv_mode_config_funcs;
+
+	ret = ltdc_load(ddev);
+	if (ret)
+		goto err;
+
+	drm_mode_config_reset(ddev);
+	drm_kms_helper_poll_init(ddev);
+
+	if (ddev->mode_config.num_connector) {
+		ldev = ddev->dev_private;
+		fbdev = drm_fbdev_cma_init(ddev, 16,
+					   ddev->mode_config.num_connector);
+		if (IS_ERR(fbdev)) {
+			DRM_DEBUG("Warning: fails to create fbdev\n");
+			fbdev = NULL;
+		}
+		ldev->fbdev = fbdev;
+	}
+
+	platform_set_drvdata(pdev, ddev);
+
+	return 0;
+err:
+	drm_mode_config_cleanup(ddev);
+	return ret;
+}
+
+static void drv_unload(struct drm_device *ddev)
+{
+	struct ltdc_device *ldev = ddev->dev_private;
+
+	DRM_DEBUG("%s\n", __func__);
+
+	if (ldev->fbdev) {
+		drm_fbdev_cma_fini(ldev->fbdev);
+		ldev->fbdev = NULL;
+	}
+	drm_kms_helper_poll_fini(ddev);
+	ltdc_unload(ddev);
+	drm_mode_config_cleanup(ddev);
+}
+
+static int stm_drm_platform_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct drm_device *ddev;
+	int ret;
+
+	DRM_DEBUG("%s\n", __func__);
+
+	dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
+
+	ddev = drm_dev_alloc(&drv_driver, dev);
+	if (IS_ERR(ddev))
+		return PTR_ERR(ddev);
+
+	ret = drv_load(ddev);
+	if (ret)
+		goto err_unref;
+
+	ret = drm_dev_register(ddev, 0);
+	if (ret)
+		goto err_unref;
+
+	return 0;
+
+err_unref:
+	drm_dev_unref(ddev);
+
+	return ret;
+}
+
+static int stm_drm_platform_remove(struct platform_device *pdev)
+{
+	struct drm_device *ddev = platform_get_drvdata(pdev);
+
+	DRM_DEBUG("%s\n", __func__);
+
+	drm_dev_unregister(ddev);
+	drv_unload(ddev);
+	drm_dev_unref(ddev);
+
+	return 0;
+}
+
+static const struct of_device_id drv_dt_ids[] = {
+	{ .compatible = "st,stm32-ltdc"},
+	{ /* end node */ },
+};
+MODULE_DEVICE_TABLE(of, drv_dt_ids);
+
+static struct platform_driver stm_drm_platform_driver = {
+	.probe = stm_drm_platform_probe,
+	.remove = stm_drm_platform_remove,
+	.driver = {
+		.name = DRIVER_NAME,
+		.of_match_table = drv_dt_ids,
+	},
+};
+
+module_platform_driver(stm_drm_platform_driver);
+
+MODULE_AUTHOR("Philippe Cornu <philippe.cornu@st.com>");
+MODULE_AUTHOR("Yannick Fertre <yannick.fertre@st.com>");
+MODULE_AUTHOR("Fabien Dessenne <fabien.dessenne@st.com>");
+MODULE_AUTHOR("Mickael Reulier <mickael.reulier@st.com>");
+MODULE_DESCRIPTION("STMicroelectronics ST DRM LTDC driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/stm/ltdc.c b/drivers/gpu/drm/stm/ltdc.c
new file mode 100644
index 000000000000..700cc0800e51
--- /dev/null
+++ b/drivers/gpu/drm/stm/ltdc.c
@@ -0,0 +1,1160 @@
+/*
+ * Copyright (C) STMicroelectronics SA 2017
+ *
+ * Authors: Philippe Cornu <philippe.cornu@st.com>
+ *          Yannick Fertre <yannick.fertre@st.com>
+ *          Fabien Dessenne <fabien.dessenne@st.com>
+ *          Mickael Reulier <mickael.reulier@st.com>
+ *
+ * License terms:  GNU General Public License (GPL), version 2
+ */
+
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/of_address.h>
+#include <linux/of_graph.h>
+#include <linux/reset.h>
+
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_fb_cma_helper.h>
+#include <drm/drm_gem_cma_helper.h>
+#include <drm/drm_of.h>
+#include <drm/drm_panel.h>
+#include <drm/drm_plane_helper.h>
+
+#include <video/videomode.h>
+
+#include "ltdc.h"
+
+#define NB_CRTC 1
+#define CRTC_MASK GENMASK(NB_CRTC - 1, 0)
+
+#define MAX_IRQ 4
+
+#define HWVER_10200 0x010200
+#define HWVER_10300 0x010300
+#define HWVER_20101 0x020101
+
+/*
+ * The address of some registers depends on the HW version: such registers have
+ * an extra offset specified with reg_ofs.
+ */
+#define REG_OFS_NONE	0
+#define REG_OFS_4	4 /* Insertion of "Layer Configuration 2" reg */
+#define REG_OFS		(ldev->caps.reg_ofs)
+#define LAY_OFS		0x80	/* Register Offset between 2 layers */
+
+/* Global register offsets */
+#define LTDC_IDR	0x0000 /* IDentification */
+#define LTDC_LCR	0x0004 /* Layer Count */
+#define LTDC_SSCR	0x0008 /* Synchronization Size Configuration */
+#define LTDC_BPCR	0x000C /* Back Porch Configuration */
+#define LTDC_AWCR	0x0010 /* Active Width Configuration */
+#define LTDC_TWCR	0x0014 /* Total Width Configuration */
+#define LTDC_GCR	0x0018 /* Global Control */
+#define LTDC_GC1R	0x001C /* Global Configuration 1 */
+#define LTDC_GC2R	0x0020 /* Global Configuration 2 */
+#define LTDC_SRCR	0x0024 /* Shadow Reload Configuration */
+#define LTDC_GACR	0x0028 /* GAmma Correction */
+#define LTDC_BCCR	0x002C /* Background Color Configuration */
+#define LTDC_IER	0x0034 /* Interrupt Enable */
+#define LTDC_ISR	0x0038 /* Interrupt Status */
+#define LTDC_ICR	0x003C /* Interrupt Clear */
+#define LTDC_LIPCR	0x0040 /* Line Interrupt Position Configuration */
+#define LTDC_CPSR	0x0044 /* Current Position Status */
+#define LTDC_CDSR	0x0048 /* Current Display Status */
+
+/* Layer register offsets */
+#define LTDC_L1LC1R	(0x0080)	   /* L1 Layer Configuration 1 */
+#define LTDC_L1LC2R	(0x0084)	   /* L1 Layer Configuration 2 */
+#define LTDC_L1CR	(0x0084 + REG_OFS) /* L1 Control */
+#define LTDC_L1WHPCR	(0x0088 + REG_OFS) /* L1 Window Hor Position Config */
+#define LTDC_L1WVPCR	(0x008C + REG_OFS) /* L1 Window Vert Position Config */
+#define LTDC_L1CKCR	(0x0090 + REG_OFS) /* L1 Color Keying Configuration */
+#define LTDC_L1PFCR	(0x0094 + REG_OFS) /* L1 Pixel Format Configuration */
+#define LTDC_L1CACR	(0x0098 + REG_OFS) /* L1 Constant Alpha Config */
+#define LTDC_L1DCCR	(0x009C + REG_OFS) /* L1 Default Color Configuration */
+#define LTDC_L1BFCR	(0x00A0 + REG_OFS) /* L1 Blend Factors Configuration */
+#define LTDC_L1FBBCR	(0x00A4 + REG_OFS) /* L1 FrameBuffer Bus Control */
+#define LTDC_L1AFBCR	(0x00A8 + REG_OFS) /* L1 AuxFB Control */
+#define LTDC_L1CFBAR	(0x00AC + REG_OFS) /* L1 Color FrameBuffer Address */
+#define LTDC_L1CFBLR	(0x00B0 + REG_OFS) /* L1 Color FrameBuffer Length */
+#define LTDC_L1CFBLNR	(0x00B4 + REG_OFS) /* L1 Color FrameBuffer Line Nb */
+#define LTDC_L1AFBAR	(0x00B8 + REG_OFS) /* L1 AuxFB Address */
+#define LTDC_L1AFBLR	(0x00BC + REG_OFS) /* L1 AuxFB Length */
+#define LTDC_L1AFBLNR	(0x00C0 + REG_OFS) /* L1 AuxFB Line Number */
+#define LTDC_L1CLUTWR	(0x00C4 + REG_OFS) /* L1 CLUT Write */
+#define LTDC_L1YS1R	(0x00E0 + REG_OFS) /* L1 YCbCr Scale 1 */
+#define LTDC_L1YS2R	(0x00E4 + REG_OFS) /* L1 YCbCr Scale 2 */
+
+/* Bit definitions */
+#define SSCR_VSH	GENMASK(10, 0)	/* Vertical Synchronization Height */
+#define SSCR_HSW	GENMASK(27, 16)	/* Horizontal Synchronization Width */
+
+#define BPCR_AVBP	GENMASK(10, 0)	/* Accumulated Vertical Back Porch */
+#define BPCR_AHBP	GENMASK(27, 16)	/* Accumulated Horizontal Back Porch */
+
+#define AWCR_AAH	GENMASK(10, 0)	/* Accumulated Active Height */
+#define AWCR_AAW	GENMASK(27, 16)	/* Accumulated Active Width */
+
+#define TWCR_TOTALH	GENMASK(10, 0)	/* TOTAL Height */
+#define TWCR_TOTALW	GENMASK(27, 16)	/* TOTAL Width */
+
+#define GCR_LTDCEN	BIT(0)		/* LTDC ENable */
+#define GCR_DEN		BIT(16)		/* Dither ENable */
+#define GCR_PCPOL	BIT(28)		/* Pixel Clock POLarity */
+#define GCR_DEPOL	BIT(29)		/* Data Enable POLarity */
+#define GCR_VSPOL	BIT(30)		/* Vertical Synchro POLarity */
+#define GCR_HSPOL	BIT(31)		/* Horizontal Synchro POLarity */
+
+#define GC1R_WBCH	GENMASK(3, 0)	/* Width of Blue CHannel output */
+#define GC1R_WGCH	GENMASK(7, 4)	/* Width of Green Channel output */
+#define GC1R_WRCH	GENMASK(11, 8)	/* Width of Red Channel output */
+#define GC1R_PBEN	BIT(12)		/* Precise Blending ENable */
+#define GC1R_DT		GENMASK(15, 14)	/* Dithering Technique */
+#define GC1R_GCT	GENMASK(19, 17)	/* Gamma Correction Technique */
+#define GC1R_SHREN	BIT(21)		/* SHadow Registers ENabled */
+#define GC1R_BCP	BIT(22)		/* Background Colour Programmable */
+#define GC1R_BBEN	BIT(23)		/* Background Blending ENabled */
+#define GC1R_LNIP	BIT(24)		/* Line Number IRQ Position */
+#define GC1R_TP		BIT(25)		/* Timing Programmable */
+#define GC1R_IPP	BIT(26)		/* IRQ Polarity Programmable */
+#define GC1R_SPP	BIT(27)		/* Sync Polarity Programmable */
+#define GC1R_DWP	BIT(28)		/* Dither Width Programmable */
+#define GC1R_STREN	BIT(29)		/* STatus Registers ENabled */
+#define GC1R_BMEN	BIT(31)		/* Blind Mode ENabled */
+
+#define GC2R_EDCA	BIT(0)		/* External Display Control Ability  */
+#define GC2R_STSAEN	BIT(1)		/* Slave Timing Sync Ability ENabled */
+#define GC2R_DVAEN	BIT(2)		/* Dual-View Ability ENabled */
+#define GC2R_DPAEN	BIT(3)		/* Dual-Port Ability ENabled */
+#define GC2R_BW		GENMASK(6, 4)	/* Bus Width (log2 of nb of bytes) */
+#define GC2R_EDCEN	BIT(7)		/* External Display Control ENabled */
+
+#define SRCR_IMR	BIT(0)		/* IMmediate Reload */
+#define SRCR_VBR	BIT(1)		/* Vertical Blanking Reload */
+
+#define BCCR_BCBLACK	0x00		/* Background Color BLACK */
+#define BCCR_BCBLUE	GENMASK(7, 0)	/* Background Color BLUE */
+#define BCCR_BCGREEN	GENMASK(15, 8)	/* Background Color GREEN */
+#define BCCR_BCRED	GENMASK(23, 16)	/* Background Color RED */
+#define BCCR_BCWHITE	GENMASK(23, 0)	/* Background Color WHITE */
+
+#define IER_LIE		BIT(0)		/* Line Interrupt Enable */
+#define IER_FUIE	BIT(1)		/* Fifo Underrun Interrupt Enable */
+#define IER_TERRIE	BIT(2)		/* Transfer ERRor Interrupt Enable */
+#define IER_RRIE	BIT(3)		/* Register Reload Interrupt enable */
+
+#define ISR_LIF		BIT(0)		/* Line Interrupt Flag */
+#define ISR_FUIF	BIT(1)		/* Fifo Underrun Interrupt Flag */
+#define ISR_TERRIF	BIT(2)		/* Transfer ERRor Interrupt Flag */
+#define ISR_RRIF	BIT(3)		/* Register Reload Interrupt Flag */
+
+#define LXCR_LEN	BIT(0)		/* Layer ENable */
+#define LXCR_COLKEN	BIT(1)		/* Color Keying Enable */
+#define LXCR_CLUTEN	BIT(4)		/* Color Look-Up Table ENable */
+
+#define LXWHPCR_WHSTPOS	GENMASK(11, 0)	/* Window Horizontal StarT POSition */
+#define LXWHPCR_WHSPPOS	GENMASK(27, 16)	/* Window Horizontal StoP POSition */
+
+#define LXWVPCR_WVSTPOS	GENMASK(10, 0)	/* Window Vertical StarT POSition */
+#define LXWVPCR_WVSPPOS	GENMASK(26, 16)	/* Window Vertical StoP POSition */
+
+#define LXPFCR_PF	GENMASK(2, 0)	/* Pixel Format */
+
+#define LXCACR_CONSTA	GENMASK(7, 0)	/* CONSTant Alpha */
+
+#define LXBFCR_BF2	GENMASK(2, 0)	/* Blending Factor 2 */
+#define LXBFCR_BF1	GENMASK(10, 8)	/* Blending Factor 1 */
+
+#define LXCFBLR_CFBLL	GENMASK(12, 0)	/* Color Frame Buffer Line Length */
+#define LXCFBLR_CFBP	GENMASK(28, 16)	/* Color Frame Buffer Pitch in bytes */
+
+#define LXCFBLNR_CFBLN	GENMASK(10, 0)	 /* Color Frame Buffer Line Number */
+
+#define HSPOL_AL   0		/* Horizontal Sync POLarity Active Low */
+#define VSPOL_AL   0		/* Vertical Sync POLarity Active Low */
+#define DEPOL_AL   0		/* Data Enable POLarity Active Low */
+#define PCPOL_IPC  0		/* Input Pixel Clock */
+#define HSPOL_AH   GCR_HSPOL	/* Horizontal Sync POLarity Active High */
+#define VSPOL_AH   GCR_VSPOL	/* Vertical Sync POLarity Active High */
+#define DEPOL_AH   GCR_DEPOL	/* Data Enable POLarity Active High */
+#define PCPOL_IIPC GCR_PCPOL	/* Inverted Input Pixel Clock */
+#define CONSTA_MAX 0xFF		/* CONSTant Alpha MAX= 1.0 */
+#define BF1_PAXCA  0x600	/* Pixel Alpha x Constant Alpha */
+#define BF1_CA     0x400	/* Constant Alpha */
+#define BF2_1PAXCA 0x007	/* 1 - (Pixel Alpha x Constant Alpha) */
+#define BF2_1CA	   0x005	/* 1 - Constant Alpha */
+
+#define NB_PF           8       /* Max nb of HW pixel format */
+
+enum ltdc_pix_fmt {
+	PF_NONE,
+	/* RGB formats */
+	PF_ARGB8888,    /* ARGB [32 bits] */
+	PF_RGBA8888,    /* RGBA [32 bits] */
+	PF_RGB888,      /* RGB [24 bits] */
+	PF_RGB565,      /* RGB [16 bits] */
+	PF_ARGB1555,    /* ARGB A:1 bit RGB:15 bits [16 bits] */
+	PF_ARGB4444,    /* ARGB A:4 bits R/G/B: 4 bits each [16 bits] */
+	/* Indexed formats */
+	PF_L8,          /* Indexed 8 bits [8 bits] */
+	PF_AL44,        /* Alpha:4 bits + indexed 4 bits [8 bits] */
+	PF_AL88         /* Alpha:8 bits + indexed 8 bits [16 bits] */
+};
+
+/* The index gives the encoding of the pixel format for an HW version */
+static const enum ltdc_pix_fmt ltdc_pix_fmt_a0[NB_PF] = {
+	PF_ARGB8888,	/* 0x00 */
+	PF_RGB888,	/* 0x01 */
+	PF_RGB565,	/* 0x02 */
+	PF_ARGB1555,	/* 0x03 */
+	PF_ARGB4444,	/* 0x04 */
+	PF_L8,		/* 0x05 */
+	PF_AL44,	/* 0x06 */
+	PF_AL88		/* 0x07 */
+};
+
+static const enum ltdc_pix_fmt ltdc_pix_fmt_a1[NB_PF] = {
+	PF_ARGB8888,	/* 0x00 */
+	PF_RGB888,	/* 0x01 */
+	PF_RGB565,	/* 0x02 */
+	PF_RGBA8888,	/* 0x03 */
+	PF_AL44,	/* 0x04 */
+	PF_L8,		/* 0x05 */
+	PF_ARGB1555,	/* 0x06 */
+	PF_ARGB4444	/* 0x07 */
+};
+
+static inline u32 reg_read(void __iomem *base, u32 reg)
+{
+	return readl_relaxed(base + reg);
+}
+
+static inline void reg_write(void __iomem *base, u32 reg, u32 val)
+{
+	writel_relaxed(val, base + reg);
+}
+
+static inline void reg_set(void __iomem *base, u32 reg, u32 mask)
+{
+	reg_write(base, reg, reg_read(base, reg) | mask);
+}
+
+static inline void reg_clear(void __iomem *base, u32 reg, u32 mask)
+{
+	reg_write(base, reg, reg_read(base, reg) & ~mask);
+}
+
+static inline void reg_update_bits(void __iomem *base, u32 reg, u32 mask,
+				   u32 val)
+{
+	reg_write(base, reg, (reg_read(base, reg) & ~mask) | val);
+}
+
+static inline struct ltdc_device *crtc_to_ltdc(struct drm_crtc *crtc)
+{
+	return (struct ltdc_device *)crtc->dev->dev_private;
+}
+
+static inline struct ltdc_device *plane_to_ltdc(struct drm_plane *plane)
+{
+	return (struct ltdc_device *)plane->dev->dev_private;
+}
+
+static inline struct ltdc_device *encoder_to_ltdc(struct drm_encoder *enc)
+{
+	return (struct ltdc_device *)enc->dev->dev_private;
+}
+
+static inline struct ltdc_device *connector_to_ltdc(struct drm_connector *con)
+{
+	return (struct ltdc_device *)con->dev->dev_private;
+}
+
+static inline enum ltdc_pix_fmt to_ltdc_pixelformat(u32 drm_fmt)
+{
+	enum ltdc_pix_fmt pf;
+
+	switch (drm_fmt) {
+	case DRM_FORMAT_ARGB8888:
+	case DRM_FORMAT_XRGB8888:
+		pf = PF_ARGB8888;
+		break;
+	case DRM_FORMAT_RGBA8888:
+	case DRM_FORMAT_RGBX8888:
+		pf = PF_RGBA8888;
+		break;
+	case DRM_FORMAT_RGB888:
+		pf = PF_RGB888;
+		break;
+	case DRM_FORMAT_RGB565:
+		pf = PF_RGB565;
+		break;
+	case DRM_FORMAT_ARGB1555:
+	case DRM_FORMAT_XRGB1555:
+		pf = PF_ARGB1555;
+		break;
+	case DRM_FORMAT_ARGB4444:
+	case DRM_FORMAT_XRGB4444:
+		pf = PF_ARGB4444;
+		break;
+	case DRM_FORMAT_C8:
+		pf = PF_L8;
+		break;
+	default:
+		pf = PF_NONE;
+		break;
+	/* Note: There are no DRM_FORMAT for AL44 and AL88 */
+	}
+
+	return pf;
+}
+
+static inline u32 to_drm_pixelformat(enum ltdc_pix_fmt pf)
+{
+	switch (pf) {
+	case PF_ARGB8888:
+		return DRM_FORMAT_ARGB8888;
+	case PF_RGBA8888:
+		return DRM_FORMAT_RGBA8888;
+	case PF_RGB888:
+		return DRM_FORMAT_RGB888;
+	case PF_RGB565:
+		return DRM_FORMAT_RGB565;
+	case PF_ARGB1555:
+		return DRM_FORMAT_ARGB1555;
+	case PF_ARGB4444:
+		return DRM_FORMAT_ARGB4444;
+	case PF_L8:
+		return DRM_FORMAT_C8;
+	case PF_AL44: /* No DRM support */
+	case PF_AL88: /* No DRM support */
+	case PF_NONE:
+	default:
+		return 0;
+	}
+}
+
+static irqreturn_t ltdc_irq_thread(int irq, void *arg)
+{
+	struct drm_device *ddev = arg;
+	struct ltdc_device *ldev = ddev->dev_private;
+	struct drm_crtc *crtc = drm_crtc_from_index(ddev, 0);
+
+	/* Line IRQ : trigger the vblank event */
+	if (ldev->irq_status & ISR_LIF)
+		drm_crtc_handle_vblank(crtc);
+
+	/* Save FIFO Underrun & Transfer Error status */
+	mutex_lock(&ldev->err_lock);
+	if (ldev->irq_status & ISR_FUIF)
+		ldev->error_status |= ISR_FUIF;
+	if (ldev->irq_status & ISR_TERRIF)
+		ldev->error_status |= ISR_TERRIF;
+	mutex_unlock(&ldev->err_lock);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t ltdc_irq(int irq, void *arg)
+{
+	struct drm_device *ddev = arg;
+	struct ltdc_device *ldev = ddev->dev_private;
+
+	/* Read & Clear the interrupt status */
+	ldev->irq_status = reg_read(ldev->regs, LTDC_ISR);
+	reg_write(ldev->regs, LTDC_ICR, ldev->irq_status);
+
+	return IRQ_WAKE_THREAD;
+}
+
+/*
+ * DRM_CRTC
+ */
+
+static void ltdc_crtc_load_lut(struct drm_crtc *crtc)
+{
+	struct ltdc_device *ldev = crtc_to_ltdc(crtc);
+	unsigned int i, lay;
+
+	for (lay = 0; lay < ldev->caps.nb_layers; lay++)
+		for (i = 0; i < 256; i++)
+			reg_write(ldev->regs, LTDC_L1CLUTWR + lay * LAY_OFS,
+				  ldev->clut[i]);
+}
+
+static void ltdc_crtc_enable(struct drm_crtc *crtc)
+{
+	struct ltdc_device *ldev = crtc_to_ltdc(crtc);
+
+	DRM_DEBUG_DRIVER("\n");
+
+	/* Sets the background color value */
+	reg_write(ldev->regs, LTDC_BCCR, BCCR_BCBLACK);
+
+	/* Enable IRQ */
+	reg_set(ldev->regs, LTDC_IER, IER_RRIE | IER_FUIE | IER_TERRIE);
+
+	/* Immediately commit the planes */
+	reg_set(ldev->regs, LTDC_SRCR, SRCR_IMR);
+
+	/* Enable LTDC */
+	reg_set(ldev->regs, LTDC_GCR, GCR_LTDCEN);
+
+	drm_crtc_vblank_on(crtc);
+}
+
+static void ltdc_crtc_disable(struct drm_crtc *crtc)
+{
+	struct ltdc_device *ldev = crtc_to_ltdc(crtc);
+
+	DRM_DEBUG_DRIVER("\n");
+
+	drm_crtc_vblank_off(crtc);
+
+	/* disable LTDC */
+	reg_clear(ldev->regs, LTDC_GCR, GCR_LTDCEN);
+
+	/* disable IRQ */
+	reg_clear(ldev->regs, LTDC_IER, IER_RRIE | IER_FUIE | IER_TERRIE);
+
+	/* immediately commit disable of layers before switching off LTDC */
+	reg_set(ldev->regs, LTDC_SRCR, SRCR_IMR);
+}
+
+static void ltdc_crtc_mode_set_nofb(struct drm_crtc *crtc)
+{
+	struct ltdc_device *ldev = crtc_to_ltdc(crtc);
+	struct drm_display_mode *mode = &crtc->state->adjusted_mode;
+	struct videomode vm;
+	int rate = mode->clock * 1000;
+	u32 hsync, vsync, accum_hbp, accum_vbp, accum_act_w, accum_act_h;
+	u32 total_width, total_height;
+	u32 val;
+
+	drm_display_mode_to_videomode(mode, &vm);
+
+	DRM_DEBUG_DRIVER("CRTC:%d mode:%s\n", crtc->base.id, mode->name);
+	DRM_DEBUG_DRIVER("Video mode: %dx%d", vm.hactive, vm.vactive);
+	DRM_DEBUG_DRIVER(" hfp %d hbp %d hsl %d vfp %d vbp %d vsl %d\n",
+			 vm.hfront_porch, vm.hback_porch, vm.hsync_len,
+			 vm.vfront_porch, vm.vback_porch, vm.vsync_len);
+
+	/* Convert video timings to ltdc timings */
+	hsync = vm.hsync_len - 1;
+	vsync = vm.vsync_len - 1;
+	accum_hbp = hsync + vm.hback_porch;
+	accum_vbp = vsync + vm.vback_porch;
+	accum_act_w = accum_hbp + vm.hactive;
+	accum_act_h = accum_vbp + vm.vactive;
+	total_width = accum_act_w + vm.hfront_porch;
+	total_height = accum_act_h + vm.vfront_porch;
+
+	clk_disable(ldev->pixel_clk);
+
+	if (clk_set_rate(ldev->pixel_clk, rate) < 0) {
+		DRM_ERROR("Cannot set rate (%dHz) for pixel clk\n", rate);
+		return;
+	}
+
+	clk_enable(ldev->pixel_clk);
+
+	/* Configures the HS, VS, DE and PC polarities. */
+	val = HSPOL_AL | VSPOL_AL | DEPOL_AL | PCPOL_IPC;
+
+	if (vm.flags & DISPLAY_FLAGS_HSYNC_HIGH)
+		val |= HSPOL_AH;
+
+	if (vm.flags & DISPLAY_FLAGS_VSYNC_HIGH)
+		val |= VSPOL_AH;
+
+	if (vm.flags & DISPLAY_FLAGS_DE_HIGH)
+		val |= DEPOL_AH;
+
+	if (vm.flags & DISPLAY_FLAGS_PIXDATA_NEGEDGE)
+		val |= PCPOL_IIPC;
+
+	reg_update_bits(ldev->regs, LTDC_GCR,
+			GCR_HSPOL | GCR_VSPOL | GCR_DEPOL | GCR_PCPOL, val);
+
+	/* Set Synchronization size */
+	val = (hsync << 16) | vsync;
+	reg_update_bits(ldev->regs, LTDC_SSCR, SSCR_VSH | SSCR_HSW, val);
+
+	/* Set Accumulated Back porch */
+	val = (accum_hbp << 16) | accum_vbp;
+	reg_update_bits(ldev->regs, LTDC_BPCR, BPCR_AVBP | BPCR_AHBP, val);
+
+	/* Set Accumulated Active Width */
+	val = (accum_act_w << 16) | accum_act_h;
+	reg_update_bits(ldev->regs, LTDC_AWCR, AWCR_AAW | AWCR_AAH, val);
+
+	/* Set total width & height */
+	val = (total_width << 16) | total_height;
+	reg_update_bits(ldev->regs, LTDC_TWCR, TWCR_TOTALH | TWCR_TOTALW, val);
+
+	reg_write(ldev->regs, LTDC_LIPCR, (accum_act_h + 1));
+}
+
+static void ltdc_crtc_atomic_flush(struct drm_crtc *crtc,
+				   struct drm_crtc_state *old_crtc_state)
+{
+	struct ltdc_device *ldev = crtc_to_ltdc(crtc);
+	struct drm_pending_vblank_event *event = crtc->state->event;
+
+	DRM_DEBUG_ATOMIC("\n");
+
+	/* Commit shadow registers = update planes at next vblank */
+	reg_set(ldev->regs, LTDC_SRCR, SRCR_VBR);
+
+	if (event) {
+		crtc->state->event = NULL;
+
+		spin_lock_irq(&crtc->dev->event_lock);
+		if (drm_crtc_vblank_get(crtc) == 0)
+			drm_crtc_arm_vblank_event(crtc, event);
+		else
+			drm_crtc_send_vblank_event(crtc, event);
+		spin_unlock_irq(&crtc->dev->event_lock);
+	}
+}
+
+static struct drm_crtc_helper_funcs ltdc_crtc_helper_funcs = {
+	.load_lut = ltdc_crtc_load_lut,
+	.enable = ltdc_crtc_enable,
+	.disable = ltdc_crtc_disable,
+	.mode_set_nofb = ltdc_crtc_mode_set_nofb,
+	.atomic_flush = ltdc_crtc_atomic_flush,
+};
+
+int ltdc_crtc_enable_vblank(struct drm_device *ddev, unsigned int pipe)
+{
+	struct ltdc_device *ldev = ddev->dev_private;
+
+	DRM_DEBUG_DRIVER("\n");
+	reg_set(ldev->regs, LTDC_IER, IER_LIE);
+
+	return 0;
+}
+
+void ltdc_crtc_disable_vblank(struct drm_device *ddev, unsigned int pipe)
+{
+	struct ltdc_device *ldev = ddev->dev_private;
+
+	DRM_DEBUG_DRIVER("\n");
+	reg_clear(ldev->regs, LTDC_IER, IER_LIE);
+}
+
+static struct drm_crtc_funcs ltdc_crtc_funcs = {
+	.destroy = drm_crtc_cleanup,
+	.set_config = drm_atomic_helper_set_config,
+	.page_flip = drm_atomic_helper_page_flip,
+	.reset = drm_atomic_helper_crtc_reset,
+	.atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_crtc_destroy_state,
+};
+
+/*
+ * DRM_PLANE
+ */
+
+static int ltdc_plane_atomic_check(struct drm_plane *plane,
+				   struct drm_plane_state *state)
+{
+	struct drm_framebuffer *fb = state->fb;
+	u32 src_x, src_y, src_w, src_h;
+
+	DRM_DEBUG_DRIVER("\n");
+
+	if (!fb)
+		return 0;
+
+	/* convert src_ from 16:16 format */
+	src_x = state->src_x >> 16;
+	src_y = state->src_y >> 16;
+	src_w = state->src_w >> 16;
+	src_h = state->src_h >> 16;
+
+	/* Reject scaling */
+	if ((src_w != state->crtc_w) || (src_h != state->crtc_h)) {
+		DRM_ERROR("Scaling is not supported");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void ltdc_plane_atomic_update(struct drm_plane *plane,
+				     struct drm_plane_state *oldstate)
+{
+	struct ltdc_device *ldev = plane_to_ltdc(plane);
+	struct drm_plane_state *state = plane->state;
+	struct drm_framebuffer *fb = state->fb;
+	u32 lofs = plane->index * LAY_OFS;
+	u32 x0 = state->crtc_x;
+	u32 x1 = state->crtc_x + state->crtc_w - 1;
+	u32 y0 = state->crtc_y;
+	u32 y1 = state->crtc_y + state->crtc_h - 1;
+	u32 src_x, src_y, src_w, src_h;
+	u32 val, pitch_in_bytes, line_length, paddr, ahbp, avbp, bpcr;
+	enum ltdc_pix_fmt pf;
+
+	if (!state->crtc || !fb) {
+		DRM_DEBUG_DRIVER("fb or crtc NULL");
+		return;
+	}
+
+	/* convert src_ from 16:16 format */
+	src_x = state->src_x >> 16;
+	src_y = state->src_y >> 16;
+	src_w = state->src_w >> 16;
+	src_h = state->src_h >> 16;
+
+	DRM_DEBUG_DRIVER(
+		"plane:%d fb:%d (%dx%d)@(%d,%d) -> (%dx%d)@(%d,%d)\n",
+		plane->base.id, fb->base.id,
+		src_w, src_h, src_x, src_y,
+		state->crtc_w, state->crtc_h, state->crtc_x, state->crtc_y);
+
+	bpcr = reg_read(ldev->regs, LTDC_BPCR);
+	ahbp = (bpcr & BPCR_AHBP) >> 16;
+	avbp = bpcr & BPCR_AVBP;
+
+	/* Configures the horizontal start and stop position */
+	val = ((x1 + 1 + ahbp) << 16) + (x0 + 1 + ahbp);
+	reg_update_bits(ldev->regs, LTDC_L1WHPCR + lofs,
+			LXWHPCR_WHSTPOS | LXWHPCR_WHSPPOS, val);
+
+	/* Configures the vertical start and stop position */
+	val = ((y1 + 1 + avbp) << 16) + (y0 + 1 + avbp);
+	reg_update_bits(ldev->regs, LTDC_L1WVPCR + lofs,
+			LXWVPCR_WVSTPOS | LXWVPCR_WVSPPOS, val);
+
+	/* Specifies the pixel format */
+	pf = to_ltdc_pixelformat(fb->format->format);
+	for (val = 0; val < NB_PF; val++)
+		if (ldev->caps.pix_fmt_hw[val] == pf)
+			break;
+
+	if (val == NB_PF) {
+		DRM_ERROR("Pixel format %.4s not supported\n",
+			  (char *)&fb->format->format);
+		val = 0; /* set by default ARGB 32 bits */
+	}
+	reg_update_bits(ldev->regs, LTDC_L1PFCR + lofs, LXPFCR_PF, val);
+
+	/* Configures the color frame buffer pitch in bytes & line length */
+	pitch_in_bytes = fb->pitches[0];
+	line_length = drm_format_plane_cpp(fb->format->format, 0) *
+		      (x1 - x0 + 1) + (ldev->caps.bus_width >> 3) - 1;
+	val = ((pitch_in_bytes << 16) | line_length);
+	reg_update_bits(ldev->regs, LTDC_L1CFBLR + lofs,
+			LXCFBLR_CFBLL | LXCFBLR_CFBP, val);
+
+	/* Specifies the constant alpha value */
+	val = CONSTA_MAX;
+	reg_update_bits(ldev->regs, LTDC_L1CACR + lofs,
+			LXCACR_CONSTA, val);
+
+	/* Specifies the blending factors */
+	val = BF1_PAXCA | BF2_1PAXCA;
+	reg_update_bits(ldev->regs, LTDC_L1BFCR + lofs,
+			LXBFCR_BF2 | LXBFCR_BF1, val);
+
+	/* Configures the frame buffer line number */
+	val = y1 - y0 + 1;
+	reg_update_bits(ldev->regs, LTDC_L1CFBLNR + lofs,
+			LXCFBLNR_CFBLN, val);
+
+	/* Sets the FB address */
+	paddr = (u32)drm_fb_cma_get_gem_addr(fb, state, 0);
+
+	DRM_DEBUG_DRIVER("fb: phys 0x%08x", paddr);
+	reg_write(ldev->regs, LTDC_L1CFBAR + lofs, paddr);
+
+	/* Enable layer and CLUT if needed */
+	val = fb->format->format == DRM_FORMAT_C8 ? LXCR_CLUTEN : 0;
+	val |= LXCR_LEN;
+	reg_update_bits(ldev->regs, LTDC_L1CR + lofs,
+			LXCR_LEN | LXCR_CLUTEN, val);
+
+	mutex_lock(&ldev->err_lock);
+	if (ldev->error_status & ISR_FUIF) {
+		DRM_DEBUG_DRIVER("Fifo underrun\n");
+		ldev->error_status &= ~ISR_FUIF;
+	}
+	if (ldev->error_status & ISR_TERRIF) {
+		DRM_DEBUG_DRIVER("Transfer error\n");
+		ldev->error_status &= ~ISR_TERRIF;
+	}
+	mutex_unlock(&ldev->err_lock);
+}
+
+static void ltdc_plane_atomic_disable(struct drm_plane *plane,
+				      struct drm_plane_state *oldstate)
+{
+	struct ltdc_device *ldev = plane_to_ltdc(plane);
+	u32 lofs = plane->index * LAY_OFS;
+
+	/* disable layer */
+	reg_clear(ldev->regs, LTDC_L1CR + lofs, LXCR_LEN);
+
+	DRM_DEBUG_DRIVER("CRTC:%d plane:%d\n",
+			 oldstate->crtc->base.id, plane->base.id);
+}
+
+static struct drm_plane_funcs ltdc_plane_funcs = {
+	.update_plane = drm_atomic_helper_update_plane,
+	.disable_plane = drm_atomic_helper_disable_plane,
+	.destroy = drm_plane_cleanup,
+	.set_property = drm_atomic_helper_plane_set_property,
+	.reset = drm_atomic_helper_plane_reset,
+	.atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_plane_destroy_state,
+};
+
+static const struct drm_plane_helper_funcs ltdc_plane_helper_funcs = {
+	.atomic_check = ltdc_plane_atomic_check,
+	.atomic_update = ltdc_plane_atomic_update,
+	.atomic_disable = ltdc_plane_atomic_disable,
+};
+
+static struct drm_plane *ltdc_plane_create(struct drm_device *ddev,
+					   enum drm_plane_type type)
+{
+	unsigned long possible_crtcs = CRTC_MASK;
+	struct ltdc_device *ldev = ddev->dev_private;
+	struct device *dev = ddev->dev;
+	struct drm_plane *plane;
+	unsigned int i, nb_fmt = 0;
+	u32 formats[NB_PF];
+	u32 drm_fmt;
+	int ret;
+
+	/* Get supported pixel formats */
+	for (i = 0; i < NB_PF; i++) {
+		drm_fmt = to_drm_pixelformat(ldev->caps.pix_fmt_hw[i]);
+		if (!drm_fmt)
+			continue;
+		formats[nb_fmt++] = drm_fmt;
+	}
+
+	plane = devm_kzalloc(dev, sizeof(*plane), GFP_KERNEL);
+	if (!plane)
+		return 0;
+
+	ret = drm_universal_plane_init(ddev, plane, possible_crtcs,
+				       &ltdc_plane_funcs, formats, nb_fmt,
+				       type, NULL);
+	if (ret < 0)
+		return 0;
+
+	drm_plane_helper_add(plane, &ltdc_plane_helper_funcs);
+
+	DRM_DEBUG_DRIVER("plane:%d created\n", plane->base.id);
+
+	return plane;
+}
+
+static void ltdc_plane_destroy_all(struct drm_device *ddev)
+{
+	struct drm_plane *plane, *plane_temp;
+
+	list_for_each_entry_safe(plane, plane_temp,
+				 &ddev->mode_config.plane_list, head)
+		drm_plane_cleanup(plane);
+}
+
+static int ltdc_crtc_init(struct drm_device *ddev, struct drm_crtc *crtc)
+{
+	struct ltdc_device *ldev = ddev->dev_private;
+	struct drm_plane *primary, *overlay;
+	unsigned int i;
+	int res;
+
+	primary = ltdc_plane_create(ddev, DRM_PLANE_TYPE_PRIMARY);
+	if (!primary) {
+		DRM_ERROR("Can not create primary plane\n");
+		return -EINVAL;
+	}
+
+	res = drm_crtc_init_with_planes(ddev, crtc, primary, NULL,
+					&ltdc_crtc_funcs, NULL);
+	if (res) {
+		DRM_ERROR("Can not initialize CRTC\n");
+		goto cleanup;
+	}
+
+	drm_crtc_helper_add(crtc, &ltdc_crtc_helper_funcs);
+
+	DRM_DEBUG_DRIVER("CRTC:%d created\n", crtc->base.id);
+
+	/* Add planes. Note : the first layer is used by primary plane */
+	for (i = 1; i < ldev->caps.nb_layers; i++) {
+		overlay = ltdc_plane_create(ddev, DRM_PLANE_TYPE_OVERLAY);
+		if (!overlay) {
+			res = -ENOMEM;
+			DRM_ERROR("Can not create overlay plane %d\n", i);
+			goto cleanup;
+		}
+	}
+
+	return 0;
+
+cleanup:
+	ltdc_plane_destroy_all(ddev);
+	return res;
+}
+
+/*
+ * DRM_ENCODER
+ */
+
+static void ltdc_rgb_encoder_enable(struct drm_encoder *encoder)
+{
+	struct ltdc_device *ldev = encoder_to_ltdc(encoder);
+
+	DRM_DEBUG_DRIVER("\n");
+
+	drm_panel_prepare(ldev->panel);
+	drm_panel_enable(ldev->panel);
+}
+
+static void ltdc_rgb_encoder_disable(struct drm_encoder *encoder)
+{
+	struct ltdc_device *ldev = encoder_to_ltdc(encoder);
+
+	DRM_DEBUG_DRIVER("\n");
+
+	drm_panel_disable(ldev->panel);
+	drm_panel_unprepare(ldev->panel);
+}
+
+static const struct drm_encoder_helper_funcs ltdc_rgb_encoder_helper_funcs = {
+	.enable = ltdc_rgb_encoder_enable,
+	.disable = ltdc_rgb_encoder_disable,
+};
+
+static const struct drm_encoder_funcs ltdc_rgb_encoder_funcs = {
+	.destroy = drm_encoder_cleanup,
+};
+
+static struct drm_encoder *ltdc_rgb_encoder_create(struct drm_device *ddev)
+{
+	struct drm_encoder *encoder;
+
+	encoder = devm_kzalloc(ddev->dev, sizeof(*encoder), GFP_KERNEL);
+	if (!encoder)
+		return NULL;
+
+	encoder->possible_crtcs = CRTC_MASK;
+	encoder->possible_clones = 0; /* No cloning support */
+
+	drm_encoder_init(ddev, encoder, &ltdc_rgb_encoder_funcs,
+			 DRM_MODE_ENCODER_DPI, NULL);
+
+	drm_encoder_helper_add(encoder, &ltdc_rgb_encoder_helper_funcs);
+
+	DRM_DEBUG_DRIVER("RGB encoder:%d created\n", encoder->base.id);
+
+	return encoder;
+}
+
+/*
+ * DRM_CONNECTOR
+ */
+
+static int ltdc_rgb_connector_get_modes(struct drm_connector *connector)
+{
+	struct drm_device *ddev = connector->dev;
+	struct ltdc_device *ldev = ddev->dev_private;
+	int ret = 0;
+
+	DRM_DEBUG_DRIVER("\n");
+
+	if (ldev->panel)
+		ret = drm_panel_get_modes(ldev->panel);
+
+	return ret < 0 ? 0 : ret;
+}
+
+static struct drm_connector_helper_funcs ltdc_rgb_connector_helper_funcs = {
+	.get_modes = ltdc_rgb_connector_get_modes,
+};
+
+static enum drm_connector_status
+ltdc_rgb_connector_detect(struct drm_connector *connector, bool force)
+{
+	struct ltdc_device *ldev = connector_to_ltdc(connector);
+
+	return ldev->panel ? connector_status_connected :
+	       connector_status_disconnected;
+}
+
+static void ltdc_rgb_connector_destroy(struct drm_connector *connector)
+{
+	DRM_DEBUG_DRIVER("\n");
+
+	drm_connector_unregister(connector);
+	drm_connector_cleanup(connector);
+}
+
+static const struct drm_connector_funcs ltdc_rgb_connector_funcs = {
+	.dpms = drm_atomic_helper_connector_dpms,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.detect = ltdc_rgb_connector_detect,
+	.destroy = ltdc_rgb_connector_destroy,
+	.reset = drm_atomic_helper_connector_reset,
+	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+};
+
+struct drm_connector *ltdc_rgb_connector_create(struct drm_device *ddev)
+{
+	struct drm_connector *connector;
+	int err;
+
+	connector = devm_kzalloc(ddev->dev, sizeof(*connector), GFP_KERNEL);
+	if (!connector) {
+		DRM_ERROR("Failed to allocate connector\n");
+		return NULL;
+	}
+
+	connector->polled = DRM_CONNECTOR_POLL_HPD;
+
+	err = drm_connector_init(ddev, connector, &ltdc_rgb_connector_funcs,
+				 DRM_MODE_CONNECTOR_DPI);
+	if (err) {
+		DRM_ERROR("Failed to initialize connector\n");
+		return NULL;
+	}
+
+	drm_connector_helper_add(connector, &ltdc_rgb_connector_helper_funcs);
+
+	DRM_DEBUG_DRIVER("RGB connector:%d created\n", connector->base.id);
+
+	return connector;
+}
+
+static int ltdc_get_caps(struct drm_device *ddev)
+{
+	struct ltdc_device *ldev = ddev->dev_private;
+	u32 bus_width_log2, lcr, gc2r;
+
+	/* at least 1 layer must be managed */
+	lcr = reg_read(ldev->regs, LTDC_LCR);
+
+	ldev->caps.nb_layers = max_t(int, lcr, 1);
+
+	/* set data bus width */
+	gc2r = reg_read(ldev->regs, LTDC_GC2R);
+	bus_width_log2 = (gc2r & GC2R_BW) >> 4;
+	ldev->caps.bus_width = 8 << bus_width_log2;
+	ldev->caps.hw_version = reg_read(ldev->regs, LTDC_IDR);
+
+	switch (ldev->caps.hw_version) {
+	case HWVER_10200:
+	case HWVER_10300:
+		ldev->caps.reg_ofs = REG_OFS_NONE;
+		ldev->caps.pix_fmt_hw = ltdc_pix_fmt_a0;
+		break;
+	case HWVER_20101:
+		ldev->caps.reg_ofs = REG_OFS_4;
+		ldev->caps.pix_fmt_hw = ltdc_pix_fmt_a1;
+		break;
+	default:
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static struct drm_panel *ltdc_get_panel(struct drm_device *ddev)
+{
+	struct device *dev = ddev->dev;
+	struct device_node *np = dev->of_node;
+	struct device_node *entity, *port = NULL;
+	struct drm_panel *panel = NULL;
+
+	DRM_DEBUG_DRIVER("\n");
+
+	/*
+	 * Parse ltdc node to get remote port and find RGB panel / HDMI slave
+	 * If a dsi or a bridge (hdmi, lvds...) is connected to ltdc,
+	 * a remote port & RGB panel will not be found.
+	 */
+	for_each_endpoint_of_node(np, entity) {
+		if (!of_device_is_available(entity))
+			continue;
+
+		port = of_graph_get_remote_port_parent(entity);
+		if (port) {
+			panel = of_drm_find_panel(port);
+			of_node_put(port);
+			if (panel) {
+				DRM_DEBUG_DRIVER("remote panel %s\n",
+						 port->full_name);
+			} else {
+				DRM_DEBUG_DRIVER("panel missing\n");
+				of_node_put(entity);
+			}
+		}
+	}
+
+	return panel;
+}
+
+int ltdc_load(struct drm_device *ddev)
+{
+	struct platform_device *pdev = to_platform_device(ddev->dev);
+	struct ltdc_device *ldev = ddev->dev_private;
+	struct device *dev = ddev->dev;
+	struct device_node *np = dev->of_node;
+	struct drm_encoder *encoder;
+	struct drm_connector *connector = NULL;
+	struct drm_crtc *crtc;
+	struct reset_control *rstc;
+	struct resource res;
+	int irq, ret, i;
+
+	DRM_DEBUG_DRIVER("\n");
+
+	ldev->panel = ltdc_get_panel(ddev);
+	if (!ldev->panel)
+		return -EPROBE_DEFER;
+
+	rstc = of_reset_control_get(np, NULL);
+
+	mutex_init(&ldev->err_lock);
+
+	ldev->pixel_clk = devm_clk_get(dev, "lcd");
+	if (IS_ERR(ldev->pixel_clk)) {
+		DRM_ERROR("Unable to get lcd clock\n");
+		return -ENODEV;
+	}
+
+	if (clk_prepare_enable(ldev->pixel_clk)) {
+		DRM_ERROR("Unable to prepare pixel clock\n");
+		return -ENODEV;
+	}
+
+	if (of_address_to_resource(np, 0, &res)) {
+		DRM_ERROR("Unable to get resource\n");
+		return -ENODEV;
+	}
+
+	ldev->regs = devm_ioremap_resource(dev, &res);
+	if (IS_ERR(ldev->regs)) {
+		DRM_ERROR("Unable to get ltdc registers\n");
+		return PTR_ERR(ldev->regs);
+	}
+
+	for (i = 0; i < MAX_IRQ; i++) {
+		irq = platform_get_irq(pdev, i);
+		if (irq < 0)
+			continue;
+
+		ret = devm_request_threaded_irq(dev, irq, ltdc_irq,
+						ltdc_irq_thread, IRQF_ONESHOT,
+						dev_name(dev), ddev);
+		if (ret) {
+			DRM_ERROR("Failed to register LTDC interrupt\n");
+			return ret;
+		}
+	}
+
+	if (!IS_ERR(rstc))
+		reset_control_deassert(rstc);
+
+	/* Disable interrupts */
+	reg_clear(ldev->regs, LTDC_IER,
+		  IER_LIE | IER_RRIE | IER_FUIE | IER_TERRIE);
+
+	ret = ltdc_get_caps(ddev);
+	if (ret) {
+		DRM_ERROR("hardware identifier (0x%08x) not supported!\n",
+			  ldev->caps.hw_version);
+		return ret;
+	}
+
+	DRM_INFO("ltdc hw version 0x%08x - ready\n", ldev->caps.hw_version);
+
+	if (ldev->panel) {
+		encoder = ltdc_rgb_encoder_create(ddev);
+		if (!encoder) {
+			DRM_ERROR("Failed to create RGB encoder\n");
+			ret = -EINVAL;
+			goto err;
+		}
+
+		connector = ltdc_rgb_connector_create(ddev);
+		if (!connector) {
+			DRM_ERROR("Failed to create RGB connector\n");
+			ret = -EINVAL;
+			goto err;
+		}
+
+		ret = drm_mode_connector_attach_encoder(connector, encoder);
+		if (ret) {
+			DRM_ERROR("Failed to attach connector to encoder\n");
+			goto err;
+		}
+
+		drm_panel_attach(ldev->panel, connector);
+	}
+
+	crtc = devm_kzalloc(dev, sizeof(*crtc), GFP_KERNEL);
+	if (!crtc) {
+		DRM_ERROR("Failed to allocate crtc\n");
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	ret = ltdc_crtc_init(ddev, crtc);
+	if (ret) {
+		DRM_ERROR("Failed to init crtc\n");
+		goto err;
+	}
+
+	ret = drm_vblank_init(ddev, NB_CRTC);
+	if (ret) {
+		DRM_ERROR("Failed calling drm_vblank_init()\n");
+		goto err;
+	}
+
+	/* Allow usage of vblank without having to call drm_irq_install */
+	ddev->irq_enabled = 1;
+
+	return 0;
+err:
+	if (ldev->panel)
+		drm_panel_detach(ldev->panel);
+
+	clk_disable_unprepare(ldev->pixel_clk);
+
+	return ret;
+}
+
+void ltdc_unload(struct drm_device *ddev)
+{
+	struct ltdc_device *ldev = ddev->dev_private;
+
+	DRM_DEBUG_DRIVER("\n");
+
+	drm_vblank_cleanup(ddev);
+
+	if (ldev->panel)
+		drm_panel_detach(ldev->panel);
+
+	clk_disable_unprepare(ldev->pixel_clk);
+}
+
+MODULE_AUTHOR("Philippe Cornu <philippe.cornu@st.com>");
+MODULE_AUTHOR("Yannick Fertre <yannick.fertre@st.com>");
+MODULE_AUTHOR("Fabien Dessenne <fabien.dessenne@st.com>");
+MODULE_AUTHOR("Mickael Reulier <mickael.reulier@st.com>");
+MODULE_DESCRIPTION("STMicroelectronics ST DRM LTDC driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/stm/ltdc.h b/drivers/gpu/drm/stm/ltdc.h
new file mode 100644
index 000000000000..d7a9c736ac1e
--- /dev/null
+++ b/drivers/gpu/drm/stm/ltdc.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) STMicroelectronics SA 2017
+ *
+ * Authors: Philippe Cornu <philippe.cornu@st.com>
+ *          Yannick Fertre <yannick.fertre@st.com>
+ *          Fabien Dessenne <fabien.dessenne@st.com>
+ *          Mickael Reulier <mickael.reulier@st.com>
+ *
+ * License terms:  GNU General Public License (GPL), version 2
+ */
+
+#ifndef _LTDC_H_
+#define _LTDC_H_
+
+struct ltdc_caps {
+	u32 hw_version;		/* hardware version */
+	u32 nb_layers;		/* number of supported layers */
+	u32 reg_ofs;		/* register offset for applicable regs */
+	u32 bus_width;		/* bus width (32 or 64 bits) */
+	const u32 *pix_fmt_hw;	/* supported pixel formats */
+};
+
+struct ltdc_device {
+	struct drm_fbdev_cma *fbdev;
+	void __iomem *regs;
+	struct clk *pixel_clk;	/* lcd pixel clock */
+	struct drm_panel *panel;
+	struct mutex err_lock;	/* protecting error_status */
+	struct ltdc_caps caps;
+	u32 clut[256];		/* color look up table */
+	u32 error_status;
+	u32 irq_status;
+};
+
+int ltdc_crtc_enable_vblank(struct drm_device *dev, unsigned int pipe);
+void ltdc_crtc_disable_vblank(struct drm_device *dev, unsigned int pipe);
+int ltdc_load(struct drm_device *ddev);
+void ltdc_unload(struct drm_device *ddev);
+
+#endif
diff --git a/drivers/gpu/drm/tdfx/Makefile b/drivers/gpu/drm/tdfx/Makefile
index 0379f294b32a..74bd4ae32348 100644
--- a/drivers/gpu/drm/tdfx/Makefile
+++ b/drivers/gpu/drm/tdfx/Makefile
@@ -2,7 +2,6 @@
 # Makefile for the drm device driver.  This driver provides support for the
 # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
 
-ccflags-y := -Iinclude/drm
 tdfx-y := tdfx_drv.o
 
 obj-$(CONFIG_DRM_TDFX)	+= tdfx.o
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 9a1e34e48f64..51c48a8e00ec 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -892,7 +892,7 @@ static int tegra_drm_context_cleanup(int id, void *p, void *data)
 	return 0;
 }
 
-static void tegra_drm_preclose(struct drm_device *drm, struct drm_file *file)
+static void tegra_drm_postclose(struct drm_device *drm, struct drm_file *file)
 {
 	struct tegra_drm_file *fpriv = file->driver_priv;
 
@@ -960,7 +960,7 @@ static struct drm_driver tegra_drm_driver = {
 	.load = tegra_drm_load,
 	.unload = tegra_drm_unload,
 	.open = tegra_drm_open,
-	.preclose = tegra_drm_preclose,
+	.postclose = tegra_drm_postclose,
 	.lastclose = tegra_drm_lastclose,
 
 #if defined(CONFIG_DEBUG_FS)
diff --git a/drivers/gpu/drm/tilcdc/Makefile b/drivers/gpu/drm/tilcdc/Makefile
index 6f675175a9e5..55ebd516728f 100644
--- a/drivers/gpu/drm/tilcdc/Makefile
+++ b/drivers/gpu/drm/tilcdc/Makefile
@@ -1,4 +1,3 @@
-ccflags-y := -Iinclude/drm
 ifeq (, $(findstring -W,$(EXTRA_CFLAGS)))
 	ccflags-y += -Werror
 endif
diff --git a/drivers/gpu/drm/tilcdc/tilcdc_drv.c b/drivers/gpu/drm/tilcdc/tilcdc_drv.c
index d7ae5be56d12..d67e18983a7d 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_drv.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_drv.c
@@ -22,6 +22,7 @@
 #include <linux/suspend.h>
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_fb_helper.h>
 
 #include "tilcdc_drv.h"
 #include "tilcdc_regs.h"
@@ -29,8 +30,6 @@
 #include "tilcdc_panel.h"
 #include "tilcdc_external.h"
 
-#include "drm_fb_helper.h"
-
 static LIST_HEAD(module_list);
 
 static const u32 tilcdc_rev1_formats[] = { DRM_FORMAT_RGB565 };
diff --git a/drivers/gpu/drm/ttm/Makefile b/drivers/gpu/drm/ttm/Makefile
index f92325800f8a..4d0c938ff4b2 100644
--- a/drivers/gpu/drm/ttm/Makefile
+++ b/drivers/gpu/drm/ttm/Makefile
@@ -1,7 +1,6 @@
 #
 # Makefile for the drm device driver.  This driver provides support for the
 
-ccflags-y := -Iinclude/drm
 ttm-y := ttm_memory.o ttm_tt.o ttm_bo.o \
 	ttm_bo_util.o ttm_bo_vm.o ttm_module.o \
 	ttm_object.o ttm_lock.o ttm_execbuf_util.o ttm_page_alloc.o \
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 9f53df95f35c..b442d12f2f7d 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -30,9 +30,9 @@
 
 #define pr_fmt(fmt) "[TTM] " fmt
 
-#include <ttm/ttm_module.h>
-#include <ttm/ttm_bo_driver.h>
-#include <ttm/ttm_placement.h>
+#include <drm/ttm/ttm_module.h>
+#include <drm/ttm/ttm_bo_driver.h>
+#include <drm/ttm/ttm_placement.h>
 #include <drm/drm_vma_manager.h>
 #include <linux/mm.h>
 #include <linux/pfn_t.h>
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 5260179d788a..8ebc8d3560c3 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -39,7 +39,6 @@
 #include <linux/slab.h>
 #include <linux/export.h>
 #include <drm/drm_cache.h>
-#include <drm/drm_mem_util.h>
 #include <drm/ttm/ttm_module.h>
 #include <drm/ttm/ttm_bo_driver.h>
 #include <drm/ttm/ttm_placement.h>
@@ -53,14 +52,16 @@
  */
 static void ttm_tt_alloc_page_directory(struct ttm_tt *ttm)
 {
-	ttm->pages = drm_calloc_large(ttm->num_pages, sizeof(void*));
+	ttm->pages = kvmalloc_array(ttm->num_pages, sizeof(void*),
+			GFP_KERNEL | __GFP_ZERO);
 }
 
 static void ttm_dma_tt_alloc_page_directory(struct ttm_dma_tt *ttm)
 {
-	ttm->ttm.pages = drm_calloc_large(ttm->ttm.num_pages,
+	ttm->ttm.pages = kvmalloc_array(ttm->ttm.num_pages,
 					  sizeof(*ttm->ttm.pages) +
-					  sizeof(*ttm->dma_address));
+					  sizeof(*ttm->dma_address),
+					  GFP_KERNEL | __GFP_ZERO);
 	ttm->dma_address = (void *) (ttm->ttm.pages + ttm->ttm.num_pages);
 }
 
@@ -208,7 +209,7 @@ EXPORT_SYMBOL(ttm_tt_init);
 
 void ttm_tt_fini(struct ttm_tt *ttm)
 {
-	drm_free_large(ttm->pages);
+	kvfree(ttm->pages);
 	ttm->pages = NULL;
 }
 EXPORT_SYMBOL(ttm_tt_fini);
@@ -243,7 +244,7 @@ void ttm_dma_tt_fini(struct ttm_dma_tt *ttm_dma)
 {
 	struct ttm_tt *ttm = &ttm_dma->ttm;
 
-	drm_free_large(ttm->pages);
+	kvfree(ttm->pages);
 	ttm->pages = NULL;
 	ttm_dma->dma_address = NULL;
 }
diff --git a/drivers/gpu/drm/udl/Makefile b/drivers/gpu/drm/udl/Makefile
index 195bcac0b6c8..36f2e825102b 100644
--- a/drivers/gpu/drm/udl/Makefile
+++ b/drivers/gpu/drm/udl/Makefile
@@ -1,6 +1,3 @@
-
-ccflags-y := -Iinclude/drm
-
 udl-y := udl_drv.o udl_modeset.o udl_connector.o udl_encoder.o udl_main.o udl_fb.o udl_transfer.o udl_gem.o udl_dmabuf.o
 
 obj-$(CONFIG_DRM_UDL) := udl.o
diff --git a/drivers/gpu/drm/udl/udl_dmabuf.c b/drivers/gpu/drm/udl/udl_dmabuf.c
index ed0e636243b2..2e031a894813 100644
--- a/drivers/gpu/drm/udl/udl_dmabuf.c
+++ b/drivers/gpu/drm/udl/udl_dmabuf.c
@@ -228,7 +228,7 @@ static int udl_prime_create(struct drm_device *dev,
 		return -ENOMEM;
 
 	obj->sg = sg;
-	obj->pages = drm_malloc_ab(npages, sizeof(struct page *));
+	obj->pages = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
 	if (obj->pages == NULL) {
 		DRM_ERROR("obj pages is NULL %d\n", npages);
 		return -ENOMEM;
diff --git a/drivers/gpu/drm/udl/udl_gem.c b/drivers/gpu/drm/udl/udl_gem.c
index 775c50e4f02c..db9ceceba30e 100644
--- a/drivers/gpu/drm/udl/udl_gem.c
+++ b/drivers/gpu/drm/udl/udl_gem.c
@@ -146,7 +146,7 @@ int udl_gem_get_pages(struct udl_gem_object *obj)
 void udl_gem_put_pages(struct udl_gem_object *obj)
 {
 	if (obj->base.import_attach) {
-		drm_free_large(obj->pages);
+		kvfree(obj->pages);
 		obj->pages = NULL;
 		return;
 	}
diff --git a/drivers/gpu/drm/vc4/Kconfig b/drivers/gpu/drm/vc4/Kconfig
index 973b4203c0b2..b16aefe4a8d3 100644
--- a/drivers/gpu/drm/vc4/Kconfig
+++ b/drivers/gpu/drm/vc4/Kconfig
@@ -1,6 +1,6 @@
 config DRM_VC4
 	tristate "Broadcom VC4 Graphics"
-	depends on ARCH_BCM2835 || COMPILE_TEST
+	depends on ARCH_BCM || ARCH_BCM2835 || COMPILE_TEST
 	depends on DRM
 	depends on SND && SND_SOC
 	depends on COMMON_CLK
diff --git a/drivers/gpu/drm/vc4/Makefile b/drivers/gpu/drm/vc4/Makefile
index 61f45d122bd0..25bd5d30415d 100644
--- a/drivers/gpu/drm/vc4/Makefile
+++ b/drivers/gpu/drm/vc4/Makefile
@@ -1,5 +1,3 @@
-ccflags-y := -Iinclude/drm
-
 # Please keep these build lists sorted!
 
 # core driver code
@@ -9,6 +7,7 @@ vc4-y := \
 	vc4_drv.o \
 	vc4_dpi.o \
 	vc4_dsi.o \
+	vc4_fence.o \
 	vc4_kms.o \
 	vc4_gem.o \
 	vc4_hdmi.o \
diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c
index af29432a6471..80b2f9e55c5c 100644
--- a/drivers/gpu/drm/vc4/vc4_bo.c
+++ b/drivers/gpu/drm/vc4/vc4_bo.c
@@ -19,6 +19,8 @@
  * rendering can return quickly.
  */
 
+#include <linux/dma-buf.h>
+
 #include "vc4_drv.h"
 #include "uapi/drm/vc4_drm.h"
 
@@ -88,6 +90,10 @@ static void vc4_bo_destroy(struct vc4_bo *bo)
 
 	vc4->bo_stats.num_allocated--;
 	vc4->bo_stats.size_allocated -= obj->size;
+
+	if (bo->resv == &bo->_resv)
+		reservation_object_fini(bo->resv);
+
 	drm_gem_cma_free_object(obj);
 }
 
@@ -244,8 +250,12 @@ struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size,
 			return ERR_PTR(-ENOMEM);
 		}
 	}
+	bo = to_vc4_bo(&cma_obj->base);
 
-	return to_vc4_bo(&cma_obj->base);
+	bo->resv = &bo->_resv;
+	reservation_object_init(bo->resv);
+
+	return bo;
 }
 
 int vc4_dumb_create(struct drm_file *file_priv,
@@ -369,6 +379,13 @@ static void vc4_bo_cache_time_timer(unsigned long data)
 	schedule_work(&vc4->bo_cache.time_work);
 }
 
+struct reservation_object *vc4_prime_res_obj(struct drm_gem_object *obj)
+{
+	struct vc4_bo *bo = to_vc4_bo(obj);
+
+	return bo->resv;
+}
+
 struct dma_buf *
 vc4_prime_export(struct drm_device *dev, struct drm_gem_object *obj, int flags)
 {
@@ -440,6 +457,24 @@ void *vc4_prime_vmap(struct drm_gem_object *obj)
 	return drm_gem_cma_prime_vmap(obj);
 }
 
+struct drm_gem_object *
+vc4_prime_import_sg_table(struct drm_device *dev,
+			  struct dma_buf_attachment *attach,
+			  struct sg_table *sgt)
+{
+	struct drm_gem_object *obj;
+	struct vc4_bo *bo;
+
+	obj = drm_gem_cma_prime_import_sg_table(dev, attach, sgt);
+	if (IS_ERR(obj))
+		return obj;
+
+	bo = to_vc4_bo(obj);
+	bo->resv = attach->dmabuf->resv;
+
+	return obj;
+}
+
 int vc4_create_bo_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv)
 {
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index d86c8cce3182..0bfc4d88e4c2 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -32,13 +32,13 @@
  * ones that set the clock.
  */
 
-#include "drm_atomic.h"
-#include "drm_atomic_helper.h"
-#include "drm_crtc_helper.h"
-#include "linux/clk.h"
-#include "drm_fb_cma_helper.h"
-#include "linux/component.h"
-#include "linux/of_device.h"
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <linux/clk.h>
+#include <drm/drm_fb_cma_helper.h>
+#include <linux/component.h>
+#include <linux/of_device.h>
 #include "vc4_drv.h"
 #include "vc4_regs.h"
 
@@ -151,10 +151,10 @@ int vc4_crtc_debugfs_regs(struct seq_file *m, void *unused)
 }
 #endif
 
-int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id,
-			    unsigned int flags, int *vpos, int *hpos,
-			    ktime_t *stime, ktime_t *etime,
-			    const struct drm_display_mode *mode)
+bool vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id,
+			     bool in_vblank_irq, int *vpos, int *hpos,
+			     ktime_t *stime, ktime_t *etime,
+			     const struct drm_display_mode *mode)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	struct drm_crtc *crtc = drm_crtc_from_index(dev, crtc_id);
@@ -162,7 +162,7 @@ int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id,
 	u32 val;
 	int fifo_lines;
 	int vblank_lines;
-	int ret = 0;
+	bool ret = false;
 
 	/* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
 
@@ -198,7 +198,7 @@ int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id,
 	fifo_lines = vc4_crtc->cob_size / mode->crtc_hdisplay;
 
 	if (fifo_lines > 0)
-		ret |= DRM_SCANOUTPOS_VALID;
+		ret = true;
 
 	/* HVS more than fifo_lines into frame for compositing? */
 	if (*vpos > fifo_lines) {
@@ -216,7 +216,6 @@ int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id,
 		 */
 		*vpos -= fifo_lines + 1;
 
-		ret |= DRM_SCANOUTPOS_ACCURATE;
 		return ret;
 	}
 
@@ -229,10 +228,9 @@ int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id,
 	 * We can't get meaningful readings wrt. scanline position of the PV
 	 * and need to make things up in a approximative but consistent way.
 	 */
-	ret |= DRM_SCANOUTPOS_IN_VBLANK;
 	vblank_lines = mode->vtotal - mode->vdisplay;
 
-	if (flags & DRM_CALLED_FROM_VBLIRQ) {
+	if (in_vblank_irq) {
 		/*
 		 * Assume the irq handler got called close to first
 		 * line of vblank, so PV has about a full vblank
@@ -254,9 +252,10 @@ int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id,
 		 * we are at the very beginning of vblank, as the hvs just
 		 * started refilling, and the stime and etime timestamps
 		 * truly correspond to start of vblank.
+		 *
+		 * Unfortunately there's no way to report this to upper levels
+		 * and make it more useful.
 		 */
-		if ((val & SCALER_DISPSTATX_FULL) != SCALER_DISPSTATX_FULL)
-			ret |= DRM_SCANOUTPOS_ACCURATE;
 	} else {
 		/*
 		 * No clue where we are inside vblank. Return a vpos of zero,
@@ -270,19 +269,6 @@ int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id,
 	return ret;
 }
 
-int vc4_crtc_get_vblank_timestamp(struct drm_device *dev, unsigned int crtc_id,
-				  int *max_error, struct timeval *vblank_time,
-				  unsigned flags)
-{
-	struct drm_crtc *crtc = drm_crtc_from_index(dev, crtc_id);
-	struct drm_crtc_state *state = crtc->state;
-
-	/* Helper routine in DRM core does all the work: */
-	return drm_calc_vbltimestamp_from_scanoutpos(dev, crtc_id, max_error,
-						     vblank_time, flags,
-						     &state->adjusted_mode);
-}
-
 static void vc4_crtc_destroy(struct drm_crtc *crtc)
 {
 	drm_crtc_cleanup(crtc);
diff --git a/drivers/gpu/drm/vc4/vc4_dpi.c b/drivers/gpu/drm/vc4/vc4_dpi.c
index c6d703903fd9..39d68080873c 100644
--- a/drivers/gpu/drm/vc4/vc4_dpi.c
+++ b/drivers/gpu/drm/vc4/vc4_dpi.c
@@ -22,14 +22,14 @@
  * ALT2 function.
  */
 
-#include "drm_atomic_helper.h"
-#include "drm_crtc_helper.h"
-#include "drm_edid.h"
-#include "drm_panel.h"
-#include "linux/clk.h"
-#include "linux/component.h"
-#include "linux/of_graph.h"
-#include "linux/of_platform.h"
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_edid.h>
+#include <drm/drm_panel.h>
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/of_graph.h>
+#include <linux/of_platform.h>
 #include "vc4_drv.h"
 #include "vc4_regs.h"
 
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index 61e674baf3a6..136bb4213dc0 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -31,7 +31,7 @@
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
-#include "drm_fb_cma_helper.h"
+#include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_fb_helper.h>
 
 #include "uapi/drm/vc4_drm.h"
@@ -154,7 +154,7 @@ static struct drm_driver vc4_drm_driver = {
 	.irq_uninstall = vc4_irq_uninstall,
 
 	.get_scanout_position = vc4_crtc_get_scanoutpos,
-	.get_vblank_timestamp = vc4_crtc_get_vblank_timestamp,
+	.get_vblank_timestamp = drm_calc_vbltimestamp_from_scanoutpos,
 
 #if defined(CONFIG_DEBUG_FS)
 	.debugfs_init = vc4_debugfs_init,
@@ -168,8 +168,9 @@ static struct drm_driver vc4_drm_driver = {
 	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
 	.gem_prime_import = drm_gem_prime_import,
 	.gem_prime_export = vc4_prime_export,
+	.gem_prime_res_obj = vc4_prime_res_obj,
 	.gem_prime_get_sg_table	= drm_gem_cma_prime_get_sg_table,
-	.gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table,
+	.gem_prime_import_sg_table = vc4_prime_import_sg_table,
 	.gem_prime_vmap = vc4_prime_vmap,
 	.gem_prime_vunmap = drm_gem_cma_prime_vunmap,
 	.gem_prime_mmap = vc4_prime_mmap,
@@ -334,6 +335,7 @@ static int vc4_platform_drm_remove(struct platform_device *pdev)
 
 static const struct of_device_id vc4_of_match[] = {
 	{ .compatible = "brcm,bcm2835-vc4", },
+	{ .compatible = "brcm,cygnus-vc4", },
 	{},
 };
 MODULE_DEVICE_TABLE(of, vc4_of_match);
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index dffce6293d87..a97556f7ccba 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -6,10 +6,10 @@
  * published by the Free Software Foundation.
  */
 
-#include "drmP.h"
-#include "drm_gem_cma_helper.h"
-
+#include <linux/reservation.h>
+#include <drm/drmP.h>
 #include <drm/drm_encoder.h>
+#include <drm/drm_gem_cma_helper.h>
 
 struct vc4_dev {
 	struct drm_device *dev;
@@ -56,6 +56,8 @@ struct vc4_dev {
 	/* Protects bo_cache and the BO stats. */
 	struct mutex bo_lock;
 
+	uint64_t dma_fence_context;
+
 	/* Sequence number for the last job queued in bin_job_list.
 	 * Starts at 0 (no jobs emitted).
 	 */
@@ -95,12 +97,23 @@ struct vc4_dev {
 	 */
 	struct list_head seqno_cb_list;
 
-	/* The binner overflow memory that's currently set up in
-	 * BPOA/BPOS registers.  When overflow occurs and a new one is
-	 * allocated, the previous one will be moved to
-	 * vc4->current_exec's free list.
+	/* The memory used for storing binner tile alloc, tile state,
+	 * and overflow memory allocations.  This is freed when V3D
+	 * powers down.
+	 */
+	struct vc4_bo *bin_bo;
+
+	/* Size of blocks allocated within bin_bo. */
+	uint32_t bin_alloc_size;
+
+	/* Bitmask of the bin_alloc_size chunks in bin_bo that are
+	 * used.
 	 */
-	struct vc4_bo *overflow_mem;
+	uint32_t bin_alloc_used;
+
+	/* Bitmask of the current bin_alloc used for overflow memory. */
+	uint32_t bin_alloc_overflow;
+
 	struct work_struct overflow_mem_work;
 
 	int power_refcount;
@@ -150,6 +163,10 @@ struct vc4_bo {
 	 * DRM_IOCTL_VC4_CREATE_SHADER_BO.
 	 */
 	struct vc4_validated_shader_info *validated_shader;
+
+	/* normally (resv == &_resv) except for imported bo's */
+	struct reservation_object *resv;
+	struct reservation_object _resv;
 };
 
 static inline struct vc4_bo *
@@ -158,6 +175,19 @@ to_vc4_bo(struct drm_gem_object *bo)
 	return (struct vc4_bo *)bo;
 }
 
+struct vc4_fence {
+	struct dma_fence base;
+	struct drm_device *dev;
+	/* vc4 seqno for signaled() test */
+	uint64_t seqno;
+};
+
+static inline struct vc4_fence *
+to_vc4_fence(struct dma_fence *fence)
+{
+	return (struct vc4_fence *)fence;
+}
+
 struct vc4_seqno_cb {
 	struct work_struct work;
 	uint64_t seqno;
@@ -168,6 +198,7 @@ struct vc4_v3d {
 	struct vc4_dev *vc4;
 	struct platform_device *pdev;
 	void __iomem *regs;
+	struct clk *clk;
 };
 
 struct vc4_hvs {
@@ -230,6 +261,8 @@ struct vc4_exec_info {
 	/* Latest write_seqno of any BO that binning depends on. */
 	uint64_t bin_dep_seqno;
 
+	struct dma_fence *fence;
+
 	/* Last current addresses the hardware was processing when the
 	 * hangcheck timer checked on us.
 	 */
@@ -293,8 +326,12 @@ struct vc4_exec_info {
 	bool found_increment_semaphore_packet;
 	bool found_flush;
 	uint8_t bin_tiles_x, bin_tiles_y;
-	struct drm_gem_cma_object *tile_bo;
+	/* Physical address of the start of the tile alloc array
+	 * (where each tile's binned CL will start)
+	 */
 	uint32_t tile_alloc_offset;
+	/* Bitmask of which binner slots are freed when this job completes. */
+	uint32_t bin_slots;
 
 	/**
 	 * Computed addresses pointing into exec_bo where we start the
@@ -436,7 +473,11 @@ int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data,
 int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
 			     struct drm_file *file_priv);
 int vc4_mmap(struct file *filp, struct vm_area_struct *vma);
+struct reservation_object *vc4_prime_res_obj(struct drm_gem_object *obj);
 int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
+struct drm_gem_object *vc4_prime_import_sg_table(struct drm_device *dev,
+						 struct dma_buf_attachment *attach,
+						 struct sg_table *sgt);
 void *vc4_prime_vmap(struct drm_gem_object *obj);
 void vc4_bo_cache_init(struct drm_device *dev);
 void vc4_bo_cache_destroy(struct drm_device *dev);
@@ -446,13 +487,10 @@ int vc4_bo_stats_debugfs(struct seq_file *m, void *arg);
 extern struct platform_driver vc4_crtc_driver;
 bool vc4_event_pending(struct drm_crtc *crtc);
 int vc4_crtc_debugfs_regs(struct seq_file *m, void *arg);
-int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id,
-			    unsigned int flags, int *vpos, int *hpos,
-			    ktime_t *stime, ktime_t *etime,
-			    const struct drm_display_mode *mode);
-int vc4_crtc_get_vblank_timestamp(struct drm_device *dev, unsigned int crtc_id,
-				  int *max_error, struct timeval *vblank_time,
-				  unsigned flags);
+bool vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id,
+			     bool in_vblank_irq, int *vpos, int *hpos,
+			     ktime_t *stime, ktime_t *etime,
+			     const struct drm_display_mode *mode);
 
 /* vc4_debugfs.c */
 int vc4_debugfs_init(struct drm_minor *minor);
@@ -468,6 +506,9 @@ int vc4_dpi_debugfs_regs(struct seq_file *m, void *unused);
 extern struct platform_driver vc4_dsi_driver;
 int vc4_dsi_debugfs_regs(struct seq_file *m, void *unused);
 
+/* vc4_fence.c */
+extern const struct dma_fence_ops vc4_fence_ops;
+
 /* vc4_gem.c */
 void vc4_gem_init(struct drm_device *dev);
 void vc4_gem_destroy(struct drm_device *dev);
@@ -522,6 +563,7 @@ void vc4_plane_async_set_fb(struct drm_plane *plane,
 extern struct platform_driver vc4_v3d_driver;
 int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused);
 int vc4_v3d_debugfs_regs(struct seq_file *m, void *unused);
+int vc4_v3d_get_bin_slot(struct vc4_dev *vc4);
 
 /* vc4_validate.c */
 int
diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c
index 160f981d1cf4..0ef41df3915f 100644
--- a/drivers/gpu/drm/vc4/vc4_dsi.c
+++ b/drivers/gpu/drm/vc4/vc4_dsi.c
@@ -29,20 +29,20 @@
  * hopefully present.
  */
 
-#include "drm_atomic_helper.h"
-#include "drm_crtc_helper.h"
-#include "drm_edid.h"
-#include "drm_mipi_dsi.h"
-#include "drm_panel.h"
-#include "linux/clk.h"
-#include "linux/clk-provider.h"
-#include "linux/completion.h"
-#include "linux/component.h"
-#include "linux/dmaengine.h"
-#include "linux/i2c.h"
-#include "linux/of_address.h"
-#include "linux/of_platform.h"
-#include "linux/pm_runtime.h"
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_edid.h>
+#include <drm/drm_mipi_dsi.h>
+#include <drm/drm_panel.h>
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/completion.h>
+#include <linux/component.h>
+#include <linux/dmaengine.h>
+#include <linux/i2c.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/pm_runtime.h>
 #include "vc4_drv.h"
 #include "vc4_regs.h"
 
@@ -519,7 +519,8 @@ struct vc4_dsi {
 	/* DSI channel for the panel we're connected to. */
 	u32 channel;
 	u32 lanes;
-	enum mipi_dsi_pixel_format format;
+	u32 format;
+	u32 divider;
 	u32 mode_flags;
 
 	/* Input clock from CPRMAN to the digital PHY, for the DSI
@@ -906,13 +907,67 @@ static void vc4_dsi_encoder_disable(struct drm_encoder *encoder)
 	pm_runtime_put(dev);
 }
 
+/* Extends the mode's blank intervals to handle BCM2835's integer-only
+ * DSI PLL divider.
+ *
+ * On 2835, PLLD is set to 2Ghz, and may not be changed by the display
+ * driver since most peripherals are hanging off of the PLLD_PER
+ * divider.  PLLD_DSI1, which drives our DSI bit clock (and therefore
+ * the pixel clock), only has an integer divider off of DSI.
+ *
+ * To get our panel mode to refresh at the expected 60Hz, we need to
+ * extend the horizontal blank time.  This means we drive a
+ * higher-than-expected clock rate to the panel, but that's what the
+ * firmware does too.
+ */
+static bool vc4_dsi_encoder_mode_fixup(struct drm_encoder *encoder,
+				       const struct drm_display_mode *mode,
+				       struct drm_display_mode *adjusted_mode)
+{
+	struct vc4_dsi_encoder *vc4_encoder = to_vc4_dsi_encoder(encoder);
+	struct vc4_dsi *dsi = vc4_encoder->dsi;
+	struct clk *phy_parent = clk_get_parent(dsi->pll_phy_clock);
+	unsigned long parent_rate = clk_get_rate(phy_parent);
+	unsigned long pixel_clock_hz = mode->clock * 1000;
+	unsigned long pll_clock = pixel_clock_hz * dsi->divider;
+	int divider;
+
+	/* Find what divider gets us a faster clock than the requested
+	 * pixel clock.
+	 */
+	for (divider = 1; divider < 8; divider++) {
+		if (parent_rate / divider < pll_clock) {
+			divider--;
+			break;
+		}
+	}
+
+	/* Now that we've picked a PLL divider, calculate back to its
+	 * pixel clock.
+	 */
+	pll_clock = parent_rate / divider;
+	pixel_clock_hz = pll_clock / dsi->divider;
+
+	/* Round up the clk_set_rate() request slightly, since
+	 * PLLD_DSI1 is an integer divider and its rate selection will
+	 * never round up.
+	 */
+	adjusted_mode->clock = pixel_clock_hz / 1000 + 1;
+
+	/* Given the new pixel clock, adjust HFP to keep vrefresh the same. */
+	adjusted_mode->htotal = pixel_clock_hz / (mode->vrefresh * mode->vtotal);
+	adjusted_mode->hsync_end += adjusted_mode->htotal - mode->htotal;
+	adjusted_mode->hsync_start += adjusted_mode->htotal - mode->htotal;
+
+	return true;
+}
+
 static void vc4_dsi_encoder_enable(struct drm_encoder *encoder)
 {
-	struct drm_display_mode *mode = &encoder->crtc->mode;
+	struct drm_display_mode *mode = &encoder->crtc->state->adjusted_mode;
 	struct vc4_dsi_encoder *vc4_encoder = to_vc4_dsi_encoder(encoder);
 	struct vc4_dsi *dsi = vc4_encoder->dsi;
 	struct device *dev = &dsi->pdev->dev;
-	u32 format = 0, divider = 0;
 	bool debug_dump_regs = false;
 	unsigned long hs_clock;
 	u32 ui_ns;
@@ -940,26 +995,7 @@ static void vc4_dsi_encoder_enable(struct drm_encoder *encoder)
 		vc4_dsi_dump_regs(dsi);
 	}
 
-	switch (dsi->format) {
-	case MIPI_DSI_FMT_RGB888:
-		format = DSI_PFORMAT_RGB888;
-		divider = 24 / dsi->lanes;
-		break;
-	case MIPI_DSI_FMT_RGB666:
-		format = DSI_PFORMAT_RGB666;
-		divider = 24 / dsi->lanes;
-		break;
-	case MIPI_DSI_FMT_RGB666_PACKED:
-		format = DSI_PFORMAT_RGB666_PACKED;
-		divider = 18 / dsi->lanes;
-		break;
-	case MIPI_DSI_FMT_RGB565:
-		format = DSI_PFORMAT_RGB565;
-		divider = 16 / dsi->lanes;
-		break;
-	}
-
-	phy_clock = pixel_clock_hz * divider;
+	phy_clock = pixel_clock_hz * dsi->divider;
 	ret = clk_set_rate(dsi->pll_phy_clock, phy_clock);
 	if (ret) {
 		dev_err(&dsi->pdev->dev,
@@ -1134,8 +1170,9 @@ static void vc4_dsi_encoder_enable(struct drm_encoder *encoder)
 
 	if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO) {
 		DSI_PORT_WRITE(DISP0_CTRL,
-			       VC4_SET_FIELD(divider, DSI_DISP0_PIX_CLK_DIV) |
-			       VC4_SET_FIELD(format, DSI_DISP0_PFORMAT) |
+			       VC4_SET_FIELD(dsi->divider,
+					     DSI_DISP0_PIX_CLK_DIV) |
+			       VC4_SET_FIELD(dsi->format, DSI_DISP0_PFORMAT) |
 			       VC4_SET_FIELD(DSI_DISP0_LP_STOP_PERFRAME,
 					     DSI_DISP0_LP_STOP_CTRL) |
 			       DSI_DISP0_ST_END |
@@ -1347,9 +1384,31 @@ static int vc4_dsi_host_attach(struct mipi_dsi_host *host,
 
 	dsi->lanes = device->lanes;
 	dsi->channel = device->channel;
-	dsi->format = device->format;
 	dsi->mode_flags = device->mode_flags;
 
+	switch (device->format) {
+	case MIPI_DSI_FMT_RGB888:
+		dsi->format = DSI_PFORMAT_RGB888;
+		dsi->divider = 24 / dsi->lanes;
+		break;
+	case MIPI_DSI_FMT_RGB666:
+		dsi->format = DSI_PFORMAT_RGB666;
+		dsi->divider = 24 / dsi->lanes;
+		break;
+	case MIPI_DSI_FMT_RGB666_PACKED:
+		dsi->format = DSI_PFORMAT_RGB666_PACKED;
+		dsi->divider = 18 / dsi->lanes;
+		break;
+	case MIPI_DSI_FMT_RGB565:
+		dsi->format = DSI_PFORMAT_RGB565;
+		dsi->divider = 16 / dsi->lanes;
+		break;
+	default:
+		dev_err(&dsi->pdev->dev, "Unknown DSI format: %d.\n",
+			dsi->format);
+		return 0;
+	}
+
 	if (!(dsi->mode_flags & MIPI_DSI_MODE_VIDEO)) {
 		dev_err(&dsi->pdev->dev,
 			"Only VIDEO mode panels supported currently.\n");
@@ -1397,6 +1456,7 @@ static const struct mipi_dsi_host_ops vc4_dsi_host_ops = {
 static const struct drm_encoder_helper_funcs vc4_dsi_encoder_helper_funcs = {
 	.disable = vc4_dsi_encoder_disable,
 	.enable = vc4_dsi_encoder_enable,
+	.mode_fixup = vc4_dsi_encoder_mode_fixup,
 };
 
 static const struct of_device_id vc4_dsi_dt_match[] = {
diff --git a/drivers/gpu/drm/vc4/vc4_fence.c b/drivers/gpu/drm/vc4/vc4_fence.c
new file mode 100644
index 000000000000..dbf5a5a5d5f5
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_fence.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright © 2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "vc4_drv.h"
+
+static const char *vc4_fence_get_driver_name(struct dma_fence *fence)
+{
+	return "vc4";
+}
+
+static const char *vc4_fence_get_timeline_name(struct dma_fence *fence)
+{
+	return "vc4-v3d";
+}
+
+static bool vc4_fence_enable_signaling(struct dma_fence *fence)
+{
+	return true;
+}
+
+static bool vc4_fence_signaled(struct dma_fence *fence)
+{
+	struct vc4_fence *f = to_vc4_fence(fence);
+	struct vc4_dev *vc4 = to_vc4_dev(f->dev);
+
+	return vc4->finished_seqno >= f->seqno;
+}
+
+const struct dma_fence_ops vc4_fence_ops = {
+	.get_driver_name = vc4_fence_get_driver_name,
+	.get_timeline_name = vc4_fence_get_timeline_name,
+	.enable_signaling = vc4_fence_enable_signaling,
+	.signaled = vc4_fence_signaled,
+	.wait = dma_fence_default_wait,
+	.release = dma_fence_free,
+};
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
index e9c381c42139..9dc7646d49ed 100644
--- a/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -463,6 +463,8 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
 	for (i = 0; i < exec->bo_count; i++) {
 		bo = to_vc4_bo(&exec->bo[i]->base);
 		bo->seqno = seqno;
+
+		reservation_object_add_shared_fence(bo->resv, exec->fence);
 	}
 
 	list_for_each_entry(bo, &exec->unref_list, unref_head) {
@@ -472,7 +474,103 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
 	for (i = 0; i < exec->rcl_write_bo_count; i++) {
 		bo = to_vc4_bo(&exec->rcl_write_bo[i]->base);
 		bo->write_seqno = seqno;
+
+		reservation_object_add_excl_fence(bo->resv, exec->fence);
+	}
+}
+
+static void
+vc4_unlock_bo_reservations(struct drm_device *dev,
+			   struct vc4_exec_info *exec,
+			   struct ww_acquire_ctx *acquire_ctx)
+{
+	int i;
+
+	for (i = 0; i < exec->bo_count; i++) {
+		struct vc4_bo *bo = to_vc4_bo(&exec->bo[i]->base);
+
+		ww_mutex_unlock(&bo->resv->lock);
+	}
+
+	ww_acquire_fini(acquire_ctx);
+}
+
+/* Takes the reservation lock on all the BOs being referenced, so that
+ * at queue submit time we can update the reservations.
+ *
+ * We don't lock the RCL the tile alloc/state BOs, or overflow memory
+ * (all of which are on exec->unref_list).  They're entirely private
+ * to vc4, so we don't attach dma-buf fences to them.
+ */
+static int
+vc4_lock_bo_reservations(struct drm_device *dev,
+			 struct vc4_exec_info *exec,
+			 struct ww_acquire_ctx *acquire_ctx)
+{
+	int contended_lock = -1;
+	int i, ret;
+	struct vc4_bo *bo;
+
+	ww_acquire_init(acquire_ctx, &reservation_ww_class);
+
+retry:
+	if (contended_lock != -1) {
+		bo = to_vc4_bo(&exec->bo[contended_lock]->base);
+		ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock,
+						       acquire_ctx);
+		if (ret) {
+			ww_acquire_done(acquire_ctx);
+			return ret;
+		}
+	}
+
+	for (i = 0; i < exec->bo_count; i++) {
+		if (i == contended_lock)
+			continue;
+
+		bo = to_vc4_bo(&exec->bo[i]->base);
+
+		ret = ww_mutex_lock_interruptible(&bo->resv->lock, acquire_ctx);
+		if (ret) {
+			int j;
+
+			for (j = 0; j < i; j++) {
+				bo = to_vc4_bo(&exec->bo[j]->base);
+				ww_mutex_unlock(&bo->resv->lock);
+			}
+
+			if (contended_lock != -1 && contended_lock >= i) {
+				bo = to_vc4_bo(&exec->bo[contended_lock]->base);
+
+				ww_mutex_unlock(&bo->resv->lock);
+			}
+
+			if (ret == -EDEADLK) {
+				contended_lock = i;
+				goto retry;
+			}
+
+			ww_acquire_done(acquire_ctx);
+			return ret;
+		}
 	}
+
+	ww_acquire_done(acquire_ctx);
+
+	/* Reserve space for our shared (read-only) fence references,
+	 * before we commit the CL to the hardware.
+	 */
+	for (i = 0; i < exec->bo_count; i++) {
+		bo = to_vc4_bo(&exec->bo[i]->base);
+
+		ret = reservation_object_reserve_shared(bo->resv);
+		if (ret) {
+			vc4_unlock_bo_reservations(dev, exec, acquire_ctx);
+			return ret;
+		}
+	}
+
+	return 0;
 }
 
 /* Queues a struct vc4_exec_info for execution.  If no job is
@@ -484,19 +582,34 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
  * then bump the end address.  That's a change for a later date,
  * though.
  */
-static void
-vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec)
+static int
+vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec,
+		 struct ww_acquire_ctx *acquire_ctx)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	uint64_t seqno;
 	unsigned long irqflags;
+	struct vc4_fence *fence;
+
+	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+	if (!fence)
+		return -ENOMEM;
+	fence->dev = dev;
 
 	spin_lock_irqsave(&vc4->job_lock, irqflags);
 
 	seqno = ++vc4->emit_seqno;
 	exec->seqno = seqno;
+
+	dma_fence_init(&fence->base, &vc4_fence_ops, &vc4->job_lock,
+		       vc4->dma_fence_context, exec->seqno);
+	fence->seqno = exec->seqno;
+	exec->fence = &fence->base;
+
 	vc4_update_bo_seqnos(exec, seqno);
 
+	vc4_unlock_bo_reservations(dev, exec, acquire_ctx);
+
 	list_add_tail(&exec->head, &vc4->bin_job_list);
 
 	/* If no job was executing, kick ours off.  Otherwise, it'll
@@ -509,6 +622,8 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec)
 	}
 
 	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+
+	return 0;
 }
 
 /**
@@ -545,14 +660,15 @@ vc4_cl_lookup_bos(struct drm_device *dev,
 		return -EINVAL;
 	}
 
-	exec->bo = drm_calloc_large(exec->bo_count,
-				    sizeof(struct drm_gem_cma_object *));
+	exec->bo = kvmalloc_array(exec->bo_count,
+				    sizeof(struct drm_gem_cma_object *),
+				    GFP_KERNEL | __GFP_ZERO);
 	if (!exec->bo) {
 		DRM_ERROR("Failed to allocate validated BO pointers\n");
 		return -ENOMEM;
 	}
 
-	handles = drm_malloc_ab(exec->bo_count, sizeof(uint32_t));
+	handles = kvmalloc_array(exec->bo_count, sizeof(uint32_t), GFP_KERNEL);
 	if (!handles) {
 		ret = -ENOMEM;
 		DRM_ERROR("Failed to allocate incoming GEM handles\n");
@@ -584,7 +700,7 @@ vc4_cl_lookup_bos(struct drm_device *dev,
 	spin_unlock(&file_priv->table_lock);
 
 fail:
-	drm_free_large(handles);
+	kvfree(handles);
 	return ret;
 }
 
@@ -622,7 +738,7 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec)
 	 * read the contents back for validation, and I think the
 	 * bo->vaddr is uncached access.
 	 */
-	temp = drm_malloc_ab(temp_size, 1);
+	temp = kvmalloc_array(temp_size, 1, GFP_KERNEL);
 	if (!temp) {
 		DRM_ERROR("Failed to allocate storage for copying "
 			  "in bin/render CLs.\n");
@@ -697,7 +813,7 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec)
 	ret = vc4_wait_for_seqno(dev, exec->bin_dep_seqno, ~0ull, true);
 
 fail:
-	drm_free_large(temp);
+	kvfree(temp);
 	return ret;
 }
 
@@ -705,12 +821,19 @@ static void
 vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	unsigned long irqflags;
 	unsigned i;
 
+	/* If we got force-completed because of GPU reset rather than
+	 * through our IRQ handler, signal the fence now.
+	 */
+	if (exec->fence)
+		dma_fence_signal(exec->fence);
+
 	if (exec->bo) {
 		for (i = 0; i < exec->bo_count; i++)
 			drm_gem_object_unreference_unlocked(&exec->bo[i]->base);
-		drm_free_large(exec->bo);
+		kvfree(exec->bo);
 	}
 
 	while (!list_empty(&exec->unref_list)) {
@@ -720,6 +843,11 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
 		drm_gem_object_unreference_unlocked(&bo->base.base);
 	}
 
+	/* Free up the allocation of any bin slots we used. */
+	spin_lock_irqsave(&vc4->job_lock, irqflags);
+	vc4->bin_alloc_used &= ~exec->bin_slots;
+	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+
 	mutex_lock(&vc4->power_lock);
 	if (--vc4->power_refcount == 0) {
 		pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev);
@@ -874,6 +1002,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	struct drm_vc4_submit_cl *args = data;
 	struct vc4_exec_info *exec;
+	struct ww_acquire_ctx acquire_ctx;
 	int ret = 0;
 
 	if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) {
@@ -888,13 +1017,16 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
 	}
 
 	mutex_lock(&vc4->power_lock);
-	if (vc4->power_refcount++ == 0)
+	if (vc4->power_refcount++ == 0) {
 		ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
-	mutex_unlock(&vc4->power_lock);
-	if (ret < 0) {
-		kfree(exec);
-		return ret;
+		if (ret < 0) {
+			mutex_unlock(&vc4->power_lock);
+			vc4->power_refcount--;
+			kfree(exec);
+			return ret;
+		}
 	}
+	mutex_unlock(&vc4->power_lock);
 
 	exec->args = args;
 	INIT_LIST_HEAD(&exec->unref_list);
@@ -916,12 +1048,18 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
 	if (ret)
 		goto fail;
 
+	ret = vc4_lock_bo_reservations(dev, exec, &acquire_ctx);
+	if (ret)
+		goto fail;
+
 	/* Clear this out of the struct we'll be putting in the queue,
 	 * since it's part of our stack.
 	 */
 	exec->args = NULL;
 
-	vc4_queue_submit(dev, exec);
+	ret = vc4_queue_submit(dev, exec, &acquire_ctx);
+	if (ret)
+		goto fail;
 
 	/* Return the seqno for our job. */
 	args->seqno = vc4->emit_seqno;
@@ -939,6 +1077,8 @@ vc4_gem_init(struct drm_device *dev)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 
+	vc4->dma_fence_context = dma_fence_context_alloc(1);
+
 	INIT_LIST_HEAD(&vc4->bin_job_list);
 	INIT_LIST_HEAD(&vc4->render_job_list);
 	INIT_LIST_HEAD(&vc4->job_done_list);
@@ -968,9 +1108,9 @@ vc4_gem_destroy(struct drm_device *dev)
 	/* V3D should already have disabled its interrupt and cleared
 	 * the overflow allocation registers.  Now free the object.
 	 */
-	if (vc4->overflow_mem) {
-		drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base);
-		vc4->overflow_mem = NULL;
+	if (vc4->bin_bo) {
+		drm_gem_object_put_unlocked(&vc4->bin_bo->base.base);
+		vc4->bin_bo = NULL;
 	}
 
 	if (vc4->hang_state)
diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
index e9cbe269710b..ed63d4e85762 100644
--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
@@ -42,20 +42,21 @@
  * encoder block has CEC support.
  */
 
-#include "drm_atomic_helper.h"
-#include "drm_crtc_helper.h"
-#include "drm_edid.h"
-#include "linux/clk.h"
-#include "linux/component.h"
-#include "linux/i2c.h"
-#include "linux/of_address.h"
-#include "linux/of_gpio.h"
-#include "linux/of_platform.h"
-#include "linux/rational.h"
-#include "sound/dmaengine_pcm.h"
-#include "sound/pcm_drm_eld.h"
-#include "sound/pcm_params.h"
-#include "sound/soc.h"
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_edid.h>
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/i2c.h>
+#include <linux/of_address.h>
+#include <linux/of_gpio.h>
+#include <linux/of_platform.h>
+#include <linux/pm_runtime.h>
+#include <linux/rational.h>
+#include <sound/dmaengine_pcm.h>
+#include <sound/pcm_drm_eld.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
 #include "vc4_drv.h"
 #include "vc4_regs.h"
 
@@ -449,13 +450,38 @@ static void vc4_hdmi_set_infoframes(struct drm_encoder *encoder)
 	vc4_hdmi_set_spd_infoframe(encoder);
 }
 
-static void vc4_hdmi_encoder_mode_set(struct drm_encoder *encoder,
-				      struct drm_display_mode *unadjusted_mode,
-				      struct drm_display_mode *mode)
+static void vc4_hdmi_encoder_disable(struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct vc4_hdmi *hdmi = vc4->hdmi;
+	int ret;
+
+	HDMI_WRITE(VC4_HDMI_RAM_PACKET_CONFIG, 0);
+
+	HDMI_WRITE(VC4_HDMI_TX_PHY_RESET_CTL, 0xf << 16);
+	HD_WRITE(VC4_HD_VID_CTL,
+		 HD_READ(VC4_HD_VID_CTL) & ~VC4_HD_VID_CTL_ENABLE);
+
+	HD_WRITE(VC4_HD_M_CTL, VC4_HD_M_SW_RST);
+	udelay(1);
+	HD_WRITE(VC4_HD_M_CTL, 0);
+
+	clk_disable_unprepare(hdmi->hsm_clock);
+	clk_disable_unprepare(hdmi->pixel_clock);
+
+	ret = pm_runtime_put(&hdmi->pdev->dev);
+	if (ret < 0)
+		DRM_ERROR("Failed to release power domain: %d\n", ret);
+}
+
+static void vc4_hdmi_encoder_enable(struct drm_encoder *encoder)
 {
+	struct drm_display_mode *mode = &encoder->crtc->state->adjusted_mode;
 	struct vc4_hdmi_encoder *vc4_encoder = to_vc4_hdmi_encoder(encoder);
 	struct drm_device *dev = encoder->dev;
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct vc4_hdmi *hdmi = vc4->hdmi;
 	bool debug_dump_regs = false;
 	bool hsync_pos = mode->flags & DRM_MODE_FLAG_PHSYNC;
 	bool vsync_pos = mode->flags & DRM_MODE_FLAG_PVSYNC;
@@ -475,6 +501,64 @@ static void vc4_hdmi_encoder_mode_set(struct drm_encoder *encoder,
 					interlaced,
 					VC4_HDMI_VERTB_VBP));
 	u32 csc_ctl;
+	int ret;
+
+	ret = pm_runtime_get_sync(&hdmi->pdev->dev);
+	if (ret < 0) {
+		DRM_ERROR("Failed to retain power domain: %d\n", ret);
+		return;
+	}
+
+	/* This is the rate that is set by the firmware.  The number
+	 * needs to be a bit higher than the pixel clock rate
+	 * (generally 148.5Mhz).
+	 */
+	ret = clk_set_rate(hdmi->hsm_clock, 163682864);
+	if (ret) {
+		DRM_ERROR("Failed to set HSM clock rate: %d\n", ret);
+		return;
+	}
+
+	ret = clk_set_rate(hdmi->pixel_clock,
+			   mode->clock * 1000 *
+			   ((mode->flags & DRM_MODE_FLAG_DBLCLK) ? 2 : 1));
+	if (ret) {
+		DRM_ERROR("Failed to set pixel clock rate: %d\n", ret);
+		return;
+	}
+
+	ret = clk_prepare_enable(hdmi->pixel_clock);
+	if (ret) {
+		DRM_ERROR("Failed to turn on pixel clock: %d\n", ret);
+		return;
+	}
+
+	ret = clk_prepare_enable(hdmi->hsm_clock);
+	if (ret) {
+		DRM_ERROR("Failed to turn on HDMI state machine clock: %d\n",
+			  ret);
+		clk_disable_unprepare(hdmi->pixel_clock);
+		return;
+	}
+
+	HD_WRITE(VC4_HD_M_CTL, VC4_HD_M_SW_RST);
+	udelay(1);
+	HD_WRITE(VC4_HD_M_CTL, 0);
+
+	HD_WRITE(VC4_HD_M_CTL, VC4_HD_M_ENABLE);
+
+	HDMI_WRITE(VC4_HDMI_SW_RESET_CONTROL,
+		   VC4_HDMI_SW_RESET_HDMI |
+		   VC4_HDMI_SW_RESET_FORMAT_DETECT);
+
+	HDMI_WRITE(VC4_HDMI_SW_RESET_CONTROL, 0);
+
+	/* PHY should be in reset, like
+	 * vc4_hdmi_encoder_disable() does.
+	 */
+	HDMI_WRITE(VC4_HDMI_TX_PHY_RESET_CTL, 0xf << 16);
+
+	HDMI_WRITE(VC4_HDMI_TX_PHY_RESET_CTL, 0);
 
 	if (debug_dump_regs) {
 		DRM_INFO("HDMI regs before:\n");
@@ -483,9 +567,6 @@ static void vc4_hdmi_encoder_mode_set(struct drm_encoder *encoder,
 
 	HD_WRITE(VC4_HD_VID_CTL, 0);
 
-	clk_set_rate(vc4->hdmi->pixel_clock, mode->clock * 1000 *
-		     ((mode->flags & DRM_MODE_FLAG_DBLCLK) ? 2 : 1));
-
 	HDMI_WRITE(VC4_HDMI_SCHEDULER_CONTROL,
 		   HDMI_READ(VC4_HDMI_SCHEDULER_CONTROL) |
 		   VC4_HDMI_SCHEDULER_CONTROL_MANUAL_FORMAT |
@@ -559,28 +640,6 @@ static void vc4_hdmi_encoder_mode_set(struct drm_encoder *encoder,
 		DRM_INFO("HDMI regs after:\n");
 		vc4_hdmi_dump_regs(dev);
 	}
-}
-
-static void vc4_hdmi_encoder_disable(struct drm_encoder *encoder)
-{
-	struct drm_device *dev = encoder->dev;
-	struct vc4_dev *vc4 = to_vc4_dev(dev);
-
-	HDMI_WRITE(VC4_HDMI_RAM_PACKET_CONFIG, 0);
-
-	HDMI_WRITE(VC4_HDMI_TX_PHY_RESET_CTL, 0xf << 16);
-	HD_WRITE(VC4_HD_VID_CTL,
-		 HD_READ(VC4_HD_VID_CTL) & ~VC4_HD_VID_CTL_ENABLE);
-}
-
-static void vc4_hdmi_encoder_enable(struct drm_encoder *encoder)
-{
-	struct vc4_hdmi_encoder *vc4_encoder = to_vc4_hdmi_encoder(encoder);
-	struct drm_device *dev = encoder->dev;
-	struct vc4_dev *vc4 = to_vc4_dev(dev);
-	int ret;
-
-	HDMI_WRITE(VC4_HDMI_TX_PHY_RESET_CTL, 0);
 
 	HD_WRITE(VC4_HD_VID_CTL,
 		 HD_READ(VC4_HD_VID_CTL) |
@@ -646,7 +705,6 @@ static void vc4_hdmi_encoder_enable(struct drm_encoder *encoder)
 }
 
 static const struct drm_encoder_helper_funcs vc4_hdmi_encoder_helper_funcs = {
-	.mode_set = vc4_hdmi_encoder_mode_set,
 	.disable = vc4_hdmi_encoder_disable,
 	.enable = vc4_hdmi_encoder_enable,
 };
@@ -1147,33 +1205,6 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data)
 		return -EPROBE_DEFER;
 	}
 
-	/* Enable the clocks at startup.  We can't quite recover from
-	 * turning off the pixel clock during disable/enables yet, so
-	 * it's always running.
-	 */
-	ret = clk_prepare_enable(hdmi->pixel_clock);
-	if (ret) {
-		DRM_ERROR("Failed to turn on pixel clock: %d\n", ret);
-		goto err_put_i2c;
-	}
-
-	/* This is the rate that is set by the firmware.  The number
-	 * needs to be a bit higher than the pixel clock rate
-	 * (generally 148.5Mhz).
-	 */
-	ret = clk_set_rate(hdmi->hsm_clock, 163682864);
-	if (ret) {
-		DRM_ERROR("Failed to set HSM clock rate: %d\n", ret);
-		goto err_unprepare_pix;
-	}
-
-	ret = clk_prepare_enable(hdmi->hsm_clock);
-	if (ret) {
-		DRM_ERROR("Failed to turn on HDMI state machine clock: %d\n",
-			  ret);
-		goto err_unprepare_pix;
-	}
-
 	/* Only use the GPIO HPD pin if present in the DT, otherwise
 	 * we'll use the HDMI core's register.
 	 */
@@ -1185,7 +1216,7 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data)
 							 &hpd_gpio_flags);
 		if (hdmi->hpd_gpio < 0) {
 			ret = hdmi->hpd_gpio;
-			goto err_unprepare_hsm;
+			goto err_put_i2c;
 		}
 
 		hdmi->hpd_active_low = hpd_gpio_flags & OF_GPIO_ACTIVE_LOW;
@@ -1193,25 +1224,7 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data)
 
 	vc4->hdmi = hdmi;
 
-	/* HDMI core must be enabled. */
-	if (!(HD_READ(VC4_HD_M_CTL) & VC4_HD_M_ENABLE)) {
-		HD_WRITE(VC4_HD_M_CTL, VC4_HD_M_SW_RST);
-		udelay(1);
-		HD_WRITE(VC4_HD_M_CTL, 0);
-
-		HD_WRITE(VC4_HD_M_CTL, VC4_HD_M_ENABLE);
-
-		HDMI_WRITE(VC4_HDMI_SW_RESET_CONTROL,
-			   VC4_HDMI_SW_RESET_HDMI |
-			   VC4_HDMI_SW_RESET_FORMAT_DETECT);
-
-		HDMI_WRITE(VC4_HDMI_SW_RESET_CONTROL, 0);
-
-		/* PHY should be in reset, like
-		 * vc4_hdmi_encoder_disable() does.
-		 */
-		HDMI_WRITE(VC4_HDMI_TX_PHY_RESET_CTL, 0xf << 16);
-	}
+	pm_runtime_enable(dev);
 
 	drm_encoder_init(drm, hdmi->encoder, &vc4_hdmi_encoder_funcs,
 			 DRM_MODE_ENCODER_TMDS, NULL);
@@ -1231,10 +1244,7 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data)
 
 err_destroy_encoder:
 	vc4_hdmi_encoder_destroy(hdmi->encoder);
-err_unprepare_hsm:
-	clk_disable_unprepare(hdmi->hsm_clock);
-err_unprepare_pix:
-	clk_disable_unprepare(hdmi->pixel_clock);
+	pm_runtime_disable(dev);
 err_put_i2c:
 	put_device(&hdmi->ddc->dev);
 
@@ -1253,8 +1263,8 @@ static void vc4_hdmi_unbind(struct device *dev, struct device *master,
 	vc4_hdmi_connector_destroy(hdmi->connector);
 	vc4_hdmi_encoder_destroy(hdmi->encoder);
 
-	clk_disable_unprepare(hdmi->pixel_clock);
-	clk_disable_unprepare(hdmi->hsm_clock);
+	pm_runtime_disable(dev);
+
 	put_device(&hdmi->ddc->dev);
 
 	vc4->hdmi = NULL;
diff --git a/drivers/gpu/drm/vc4/vc4_hvs.c b/drivers/gpu/drm/vc4/vc4_hvs.c
index fd421ba3c5d7..2b62fc5b8d85 100644
--- a/drivers/gpu/drm/vc4/vc4_hvs.c
+++ b/drivers/gpu/drm/vc4/vc4_hvs.c
@@ -22,7 +22,7 @@
  * each CRTC.
  */
 
-#include "linux/component.h"
+#include <linux/component.h>
 #include "vc4_drv.h"
 #include "vc4_regs.h"
 
diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c
index cdc6e6760705..7d7af3a93d94 100644
--- a/drivers/gpu/drm/vc4/vc4_irq.c
+++ b/drivers/gpu/drm/vc4/vc4_irq.c
@@ -59,50 +59,45 @@ vc4_overflow_mem_work(struct work_struct *work)
 {
 	struct vc4_dev *vc4 =
 		container_of(work, struct vc4_dev, overflow_mem_work);
-	struct drm_device *dev = vc4->dev;
-	struct vc4_bo *bo;
+	struct vc4_bo *bo = vc4->bin_bo;
+	int bin_bo_slot;
+	struct vc4_exec_info *exec;
+	unsigned long irqflags;
 
-	bo = vc4_bo_create(dev, 256 * 1024, true);
-	if (IS_ERR(bo)) {
+	bin_bo_slot = vc4_v3d_get_bin_slot(vc4);
+	if (bin_bo_slot < 0) {
 		DRM_ERROR("Couldn't allocate binner overflow mem\n");
 		return;
 	}
 
-	/* If there's a job executing currently, then our previous
-	 * overflow allocation is getting used in that job and we need
-	 * to queue it to be released when the job is done.  But if no
-	 * job is executing at all, then we can free the old overflow
-	 * object direcctly.
-	 *
-	 * No lock necessary for this pointer since we're the only
-	 * ones that update the pointer, and our workqueue won't
-	 * reenter.
-	 */
-	if (vc4->overflow_mem) {
-		struct vc4_exec_info *current_exec;
-		unsigned long irqflags;
-
-		spin_lock_irqsave(&vc4->job_lock, irqflags);
-		current_exec = vc4_first_bin_job(vc4);
-		if (!current_exec)
-			current_exec = vc4_last_render_job(vc4);
-		if (current_exec) {
-			vc4->overflow_mem->seqno = current_exec->seqno;
-			list_add_tail(&vc4->overflow_mem->unref_head,
-				      &current_exec->unref_list);
-			vc4->overflow_mem = NULL;
+	spin_lock_irqsave(&vc4->job_lock, irqflags);
+
+	if (vc4->bin_alloc_overflow) {
+		/* If we had overflow memory allocated previously,
+		 * then that chunk will free when the current bin job
+		 * is done.  If we don't have a bin job running, then
+		 * the chunk will be done whenever the list of render
+		 * jobs has drained.
+		 */
+		exec = vc4_first_bin_job(vc4);
+		if (!exec)
+			exec = vc4_last_render_job(vc4);
+		if (exec) {
+			exec->bin_slots |= vc4->bin_alloc_overflow;
+		} else {
+			/* There's nothing queued in the hardware, so
+			 * the old slot is free immediately.
+			 */
+			vc4->bin_alloc_used &= ~vc4->bin_alloc_overflow;
 		}
-		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 	}
+	vc4->bin_alloc_overflow = BIT(bin_bo_slot);
 
-	if (vc4->overflow_mem)
-		drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base);
-	vc4->overflow_mem = bo;
-
-	V3D_WRITE(V3D_BPOA, bo->base.paddr);
+	V3D_WRITE(V3D_BPOA, bo->base.paddr + bin_bo_slot * vc4->bin_alloc_size);
 	V3D_WRITE(V3D_BPOS, bo->base.base.size);
 	V3D_WRITE(V3D_INTCTL, V3D_INT_OUTOMEM);
 	V3D_WRITE(V3D_INTENA, V3D_INT_OUTOMEM);
+	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 }
 
 static void
@@ -142,6 +137,10 @@ vc4_irq_finish_render_job(struct drm_device *dev)
 
 	vc4->finished_seqno++;
 	list_move_tail(&exec->head, &vc4->job_done_list);
+	if (exec->fence) {
+		dma_fence_signal_locked(exec->fence);
+		exec->fence = NULL;
+	}
 	vc4_submit_next_render_job(dev);
 
 	wake_up_all(&vc4->job_wait_queue);
diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c
index ad7925a9e0ea..928d191ef90f 100644
--- a/drivers/gpu/drm/vc4/vc4_kms.c
+++ b/drivers/gpu/drm/vc4/vc4_kms.c
@@ -14,12 +14,12 @@
  * crtc, HDMI encoder).
  */
 
-#include "drm_crtc.h"
-#include "drm_atomic.h"
-#include "drm_atomic_helper.h"
-#include "drm_crtc_helper.h"
-#include "drm_plane_helper.h"
-#include "drm_fb_cma_helper.h"
+#include <drm/drm_crtc.h>
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_plane_helper.h>
+#include <drm/drm_fb_cma_helper.h>
 #include "vc4_drv.h"
 
 static void vc4_output_poll_changed(struct drm_device *dev)
@@ -230,10 +230,12 @@ int vc4_kms_load(struct drm_device *dev)
 
 	drm_mode_config_reset(dev);
 
-	vc4->fbdev = drm_fbdev_cma_init(dev, 32,
-					dev->mode_config.num_connector);
-	if (IS_ERR(vc4->fbdev))
-		vc4->fbdev = NULL;
+	if (dev->mode_config.num_connector) {
+		vc4->fbdev = drm_fbdev_cma_init(dev, 32,
+						dev->mode_config.num_connector);
+		if (IS_ERR(vc4->fbdev))
+			vc4->fbdev = NULL;
+	}
 
 	drm_kms_helper_poll_init(dev);
 
diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index d34cd5393a9b..da18dec21696 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -18,12 +18,13 @@
  * into the region of the HVS that it has allocated for us.
  */
 
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_fb_cma_helper.h>
+#include <drm/drm_plane_helper.h>
+
 #include "vc4_drv.h"
 #include "vc4_regs.h"
-#include "drm_atomic.h"
-#include "drm_atomic_helper.h"
-#include "drm_fb_cma_helper.h"
-#include "drm_plane_helper.h"
 
 enum vc4_scaling_mode {
 	VC4_SCALING_NONE,
diff --git a/drivers/gpu/drm/vc4/vc4_render_cl.c b/drivers/gpu/drm/vc4/vc4_render_cl.c
index 4339471f517f..5dc19429d4ae 100644
--- a/drivers/gpu/drm/vc4/vc4_render_cl.c
+++ b/drivers/gpu/drm/vc4/vc4_render_cl.c
@@ -182,8 +182,7 @@ static void emit_tile(struct vc4_exec_info *exec,
 
 	if (has_bin) {
 		rcl_u8(setup, VC4_PACKET_BRANCH_TO_SUB_LIST);
-		rcl_u32(setup, (exec->tile_bo->paddr +
-				exec->tile_alloc_offset +
+		rcl_u32(setup, (exec->tile_alloc_offset +
 				(y * exec->bin_tiles_x + x) * 32));
 	}
 
diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c
index 7cc346ad9b0b..5ae5518e605b 100644
--- a/drivers/gpu/drm/vc4/vc4_v3d.c
+++ b/drivers/gpu/drm/vc4/vc4_v3d.c
@@ -16,8 +16,9 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include "linux/component.h"
-#include "linux/pm_runtime.h"
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/pm_runtime.h>
 #include "vc4_drv.h"
 #include "vc4_regs.h"
 
@@ -156,6 +157,144 @@ static void vc4_v3d_init_hw(struct drm_device *dev)
 	V3D_WRITE(V3D_VPMBASE, 0);
 }
 
+int vc4_v3d_get_bin_slot(struct vc4_dev *vc4)
+{
+	struct drm_device *dev = vc4->dev;
+	unsigned long irqflags;
+	int slot;
+	uint64_t seqno = 0;
+	struct vc4_exec_info *exec;
+
+try_again:
+	spin_lock_irqsave(&vc4->job_lock, irqflags);
+	slot = ffs(~vc4->bin_alloc_used);
+	if (slot != 0) {
+		/* Switch from ffs() bit index to a 0-based index. */
+		slot--;
+		vc4->bin_alloc_used |= BIT(slot);
+		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+		return slot;
+	}
+
+	/* Couldn't find an open slot.  Wait for render to complete
+	 * and try again.
+	 */
+	exec = vc4_last_render_job(vc4);
+	if (exec)
+		seqno = exec->seqno;
+	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+
+	if (seqno) {
+		int ret = vc4_wait_for_seqno(dev, seqno, ~0ull, true);
+
+		if (ret == 0)
+			goto try_again;
+
+		return ret;
+	}
+
+	return -ENOMEM;
+}
+
+/**
+ * vc4_allocate_bin_bo() - allocates the memory that will be used for
+ * tile binning.
+ *
+ * The binner has a limitation that the addresses in the tile state
+ * buffer that point into the tile alloc buffer or binner overflow
+ * memory only have 28 bits (256MB), and the top 4 on the bus for
+ * tile alloc references end up coming from the tile state buffer's
+ * address.
+ *
+ * To work around this, we allocate a single large buffer while V3D is
+ * in use, make sure that it has the top 4 bits constant across its
+ * entire extent, and then put the tile state, tile alloc, and binner
+ * overflow memory inside that buffer.
+ *
+ * This creates a limitation where we may not be able to execute a job
+ * if it doesn't fit within the buffer that we allocated up front.
+ * However, it turns out that 16MB is "enough for anybody", and
+ * real-world applications run into allocation failures from the
+ * overall CMA pool before they make scenes complicated enough to run
+ * out of bin space.
+ */
+int
+vc4_allocate_bin_bo(struct drm_device *drm)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(drm);
+	struct vc4_v3d *v3d = vc4->v3d;
+	uint32_t size = 16 * 1024 * 1024;
+	int ret = 0;
+	struct list_head list;
+
+	/* We may need to try allocating more than once to get a BO
+	 * that doesn't cross 256MB.  Track the ones we've allocated
+	 * that failed so far, so that we can free them when we've got
+	 * one that succeeded (if we freed them right away, our next
+	 * allocation would probably be the same chunk of memory).
+	 */
+	INIT_LIST_HEAD(&list);
+
+	while (true) {
+		struct vc4_bo *bo = vc4_bo_create(drm, size, true);
+
+		if (IS_ERR(bo)) {
+			ret = PTR_ERR(bo);
+
+			dev_err(&v3d->pdev->dev,
+				"Failed to allocate memory for tile binning: "
+				"%d. You may need to enable CMA or give it "
+				"more memory.",
+				ret);
+			break;
+		}
+
+		/* Check if this BO won't trigger the addressing bug. */
+		if ((bo->base.paddr & 0xf0000000) ==
+		    ((bo->base.paddr + bo->base.base.size - 1) & 0xf0000000)) {
+			vc4->bin_bo = bo;
+
+			/* Set up for allocating 512KB chunks of
+			 * binner memory.  The biggest allocation we
+			 * need to do is for the initial tile alloc +
+			 * tile state buffer.  We can render to a
+			 * maximum of ((2048*2048) / (32*32) = 4096
+			 * tiles in a frame (until we do floating
+			 * point rendering, at which point it would be
+			 * 8192).  Tile state is 48b/tile (rounded to
+			 * a page), and tile alloc is 32b/tile
+			 * (rounded to a page), plus a page of extra,
+			 * for a total of 320kb for our worst-case.
+			 * We choose 512kb so that it divides evenly
+			 * into our 16MB, and the rest of the 512kb
+			 * will be used as storage for the overflow
+			 * from the initial 32b CL per bin.
+			 */
+			vc4->bin_alloc_size = 512 * 1024;
+			vc4->bin_alloc_used = 0;
+			vc4->bin_alloc_overflow = 0;
+			WARN_ON_ONCE(sizeof(vc4->bin_alloc_used) * 8 !=
+				     bo->base.base.size / vc4->bin_alloc_size);
+
+			break;
+		}
+
+		/* Put it on the list to free later, and try again. */
+		list_add(&bo->unref_head, &list);
+	}
+
+	/* Free all the BOs we allocated but didn't choose. */
+	while (!list_empty(&list)) {
+		struct vc4_bo *bo = list_last_entry(&list,
+						    struct vc4_bo, unref_head);
+
+		list_del(&bo->unref_head);
+		drm_gem_object_put_unlocked(&bo->base.base);
+	}
+
+	return ret;
+}
+
 #ifdef CONFIG_PM
 static int vc4_v3d_runtime_suspend(struct device *dev)
 {
@@ -164,6 +303,11 @@ static int vc4_v3d_runtime_suspend(struct device *dev)
 
 	vc4_irq_uninstall(vc4->dev);
 
+	drm_gem_object_put_unlocked(&vc4->bin_bo->base.base);
+	vc4->bin_bo = NULL;
+
+	clk_disable_unprepare(v3d->clk);
+
 	return 0;
 }
 
@@ -171,6 +315,15 @@ static int vc4_v3d_runtime_resume(struct device *dev)
 {
 	struct vc4_v3d *v3d = dev_get_drvdata(dev);
 	struct vc4_dev *vc4 = v3d->vc4;
+	int ret;
+
+	ret = vc4_allocate_bin_bo(vc4->dev);
+	if (ret)
+		return ret;
+
+	ret = clk_prepare_enable(v3d->clk);
+	if (ret != 0)
+		return ret;
 
 	vc4_v3d_init_hw(vc4->dev);
 	vc4_irq_postinstall(vc4->dev);
@@ -202,12 +355,38 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data)
 	vc4->v3d = v3d;
 	v3d->vc4 = vc4;
 
+	v3d->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(v3d->clk)) {
+		int ret = PTR_ERR(v3d->clk);
+
+		if (ret == -ENOENT) {
+			/* bcm2835 didn't have a clock reference in the DT. */
+			ret = 0;
+			v3d->clk = NULL;
+		} else {
+			if (ret != -EPROBE_DEFER)
+				dev_err(dev, "Failed to get V3D clock: %d\n",
+					ret);
+			return ret;
+		}
+	}
+
 	if (V3D_READ(V3D_IDENT0) != V3D_EXPECTED_IDENT0) {
 		DRM_ERROR("V3D_IDENT0 read 0x%08x instead of 0x%08x\n",
 			  V3D_READ(V3D_IDENT0), V3D_EXPECTED_IDENT0);
 		return -EINVAL;
 	}
 
+	ret = clk_prepare_enable(v3d->clk);
+	if (ret != 0)
+		return ret;
+
+	ret = vc4_allocate_bin_bo(drm);
+	if (ret) {
+		clk_disable_unprepare(v3d->clk);
+		return ret;
+	}
+
 	/* Reset the binner overflow address/size at setup, to be sure
 	 * we don't reuse an old one.
 	 */
@@ -271,6 +450,7 @@ static int vc4_v3d_dev_remove(struct platform_device *pdev)
 
 static const struct of_device_id vc4_v3d_dt_match[] = {
 	{ .compatible = "brcm,bcm2835-v3d" },
+	{ .compatible = "brcm,cygnus-v3d" },
 	{ .compatible = "brcm,vc4-v3d" },
 	{}
 };
diff --git a/drivers/gpu/drm/vc4/vc4_validate.c b/drivers/gpu/drm/vc4/vc4_validate.c
index da6f1e138e8d..814b512c6b9a 100644
--- a/drivers/gpu/drm/vc4/vc4_validate.c
+++ b/drivers/gpu/drm/vc4/vc4_validate.c
@@ -172,7 +172,8 @@ vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo,
 	 * our math.
 	 */
 	if (width > 4096 || height > 4096) {
-		DRM_ERROR("Surface dimesions (%d,%d) too large", width, height);
+		DRM_ERROR("Surface dimensions (%d,%d) too large",
+			  width, height);
 		return false;
 	}
 
@@ -348,10 +349,11 @@ static int
 validate_tile_binning_config(VALIDATE_ARGS)
 {
 	struct drm_device *dev = exec->exec_bo->base.dev;
-	struct vc4_bo *tile_bo;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	uint8_t flags;
-	uint32_t tile_state_size, tile_alloc_size;
-	uint32_t tile_count;
+	uint32_t tile_state_size;
+	uint32_t tile_count, bin_addr;
+	int bin_slot;
 
 	if (exec->found_tile_binning_mode_config_packet) {
 		DRM_ERROR("Duplicate VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
@@ -377,13 +379,28 @@ validate_tile_binning_config(VALIDATE_ARGS)
 		return -EINVAL;
 	}
 
+	bin_slot = vc4_v3d_get_bin_slot(vc4);
+	if (bin_slot < 0) {
+		if (bin_slot != -EINTR && bin_slot != -ERESTARTSYS) {
+			DRM_ERROR("Failed to allocate binner memory: %d\n",
+				  bin_slot);
+		}
+		return bin_slot;
+	}
+
+	/* The slot we allocated will only be used by this job, and is
+	 * free when the job completes rendering.
+	 */
+	exec->bin_slots |= BIT(bin_slot);
+	bin_addr = vc4->bin_bo->base.paddr + bin_slot * vc4->bin_alloc_size;
+
 	/* The tile state data array is 48 bytes per tile, and we put it at
 	 * the start of a BO containing both it and the tile alloc.
 	 */
 	tile_state_size = 48 * tile_count;
 
 	/* Since the tile alloc array will follow us, align. */
-	exec->tile_alloc_offset = roundup(tile_state_size, 4096);
+	exec->tile_alloc_offset = bin_addr + roundup(tile_state_size, 4096);
 
 	*(uint8_t *)(validated + 14) =
 		((flags & ~(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK |
@@ -394,35 +411,13 @@ validate_tile_binning_config(VALIDATE_ARGS)
 		 VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128,
 			       VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE));
 
-	/* Initial block size. */
-	tile_alloc_size = 32 * tile_count;
-
-	/*
-	 * The initial allocation gets rounded to the next 256 bytes before
-	 * the hardware starts fulfilling further allocations.
-	 */
-	tile_alloc_size = roundup(tile_alloc_size, 256);
-
-	/* Add space for the extra allocations.  This is what gets used first,
-	 * before overflow memory.  It must have at least 4096 bytes, but we
-	 * want to avoid overflow memory usage if possible.
-	 */
-	tile_alloc_size += 1024 * 1024;
-
-	tile_bo = vc4_bo_create(dev, exec->tile_alloc_offset + tile_alloc_size,
-				true);
-	exec->tile_bo = &tile_bo->base;
-	if (IS_ERR(exec->tile_bo))
-		return PTR_ERR(exec->tile_bo);
-	list_add_tail(&tile_bo->unref_head, &exec->unref_list);
-
 	/* tile alloc address. */
-	*(uint32_t *)(validated + 0) = (exec->tile_bo->paddr +
-					exec->tile_alloc_offset);
+	*(uint32_t *)(validated + 0) = exec->tile_alloc_offset;
 	/* tile alloc size. */
-	*(uint32_t *)(validated + 4) = tile_alloc_size;
+	*(uint32_t *)(validated + 4) = (bin_addr + vc4->bin_alloc_size -
+					exec->tile_alloc_offset);
 	/* tile state address. */
-	*(uint32_t *)(validated + 8) = exec->tile_bo->paddr;
+	*(uint32_t *)(validated + 8) = bin_addr;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/vgem/Makefile b/drivers/gpu/drm/vgem/Makefile
index bfcdea1330e6..cb5d413b9c93 100644
--- a/drivers/gpu/drm/vgem/Makefile
+++ b/drivers/gpu/drm/vgem/Makefile
@@ -1,4 +1,3 @@
-ccflags-y := -Iinclude/drm
 vgem-y := vgem_drv.o vgem_fence.o
 
 obj-$(CONFIG_DRM_VGEM)	+= vgem.o
diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index 9fee38a942c4..18f401b442c2 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -42,10 +42,20 @@
 #define DRIVER_MAJOR	1
 #define DRIVER_MINOR	0
 
+static struct vgem_device {
+	struct drm_device drm;
+	struct platform_device *platform;
+} *vgem_device;
+
 static void vgem_gem_free_object(struct drm_gem_object *obj)
 {
 	struct drm_vgem_gem_object *vgem_obj = to_vgem_bo(obj);
 
+	kvfree(vgem_obj->pages);
+
+	if (obj->import_attach)
+		drm_prime_gem_destroy(obj, vgem_obj->table);
+
 	drm_gem_object_release(obj);
 	kfree(vgem_obj);
 }
@@ -56,26 +66,49 @@ static int vgem_gem_fault(struct vm_fault *vmf)
 	struct drm_vgem_gem_object *obj = vma->vm_private_data;
 	/* We don't use vmf->pgoff since that has the fake offset */
 	unsigned long vaddr = vmf->address;
-	struct page *page;
-
-	page = shmem_read_mapping_page(file_inode(obj->base.filp)->i_mapping,
-				       (vaddr - vma->vm_start) >> PAGE_SHIFT);
-	if (!IS_ERR(page)) {
-		vmf->page = page;
-		return 0;
-	} else switch (PTR_ERR(page)) {
-		case -ENOSPC:
-		case -ENOMEM:
-			return VM_FAULT_OOM;
-		case -EBUSY:
-			return VM_FAULT_RETRY;
-		case -EFAULT:
-		case -EINVAL:
-			return VM_FAULT_SIGBUS;
-		default:
-			WARN_ON_ONCE(PTR_ERR(page));
-			return VM_FAULT_SIGBUS;
+	int ret;
+	loff_t num_pages;
+	pgoff_t page_offset;
+	page_offset = (vaddr - vma->vm_start) >> PAGE_SHIFT;
+
+	num_pages = DIV_ROUND_UP(obj->base.size, PAGE_SIZE);
+
+	if (page_offset > num_pages)
+		return VM_FAULT_SIGBUS;
+
+	if (obj->pages) {
+		get_page(obj->pages[page_offset]);
+		vmf->page = obj->pages[page_offset];
+		ret = 0;
+	} else {
+		struct page *page;
+
+		page = shmem_read_mapping_page(
+					file_inode(obj->base.filp)->i_mapping,
+					page_offset);
+		if (!IS_ERR(page)) {
+			vmf->page = page;
+			ret = 0;
+		} else switch (PTR_ERR(page)) {
+			case -ENOSPC:
+			case -ENOMEM:
+				ret = VM_FAULT_OOM;
+				break;
+			case -EBUSY:
+				ret = VM_FAULT_RETRY;
+				break;
+			case -EFAULT:
+			case -EINVAL:
+				ret = VM_FAULT_SIGBUS;
+				break;
+			default:
+				WARN_ON(PTR_ERR(page));
+				ret = VM_FAULT_SIGBUS;
+				break;
+		}
+
 	}
+	return ret;
 }
 
 static const struct vm_operations_struct vgem_gem_vm_ops = {
@@ -112,12 +145,8 @@ static void vgem_postclose(struct drm_device *dev, struct drm_file *file)
 	kfree(vfile);
 }
 
-/* ioctls */
-
-static struct drm_gem_object *vgem_gem_create(struct drm_device *dev,
-					      struct drm_file *file,
-					      unsigned int *handle,
-					      unsigned long size)
+static struct drm_vgem_gem_object *__vgem_gem_create(struct drm_device *dev,
+						unsigned long size)
 {
 	struct drm_vgem_gem_object *obj;
 	int ret;
@@ -127,8 +156,31 @@ static struct drm_gem_object *vgem_gem_create(struct drm_device *dev,
 		return ERR_PTR(-ENOMEM);
 
 	ret = drm_gem_object_init(dev, &obj->base, roundup(size, PAGE_SIZE));
-	if (ret)
-		goto err_free;
+	if (ret) {
+		kfree(obj);
+		return ERR_PTR(ret);
+	}
+
+	return obj;
+}
+
+static void __vgem_gem_destroy(struct drm_vgem_gem_object *obj)
+{
+	drm_gem_object_release(&obj->base);
+	kfree(obj);
+}
+
+static struct drm_gem_object *vgem_gem_create(struct drm_device *dev,
+					      struct drm_file *file,
+					      unsigned int *handle,
+					      unsigned long size)
+{
+	struct drm_vgem_gem_object *obj;
+	int ret;
+
+	obj = __vgem_gem_create(dev, size);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
 
 	ret = drm_gem_handle_create(file, &obj->base, handle);
 	drm_gem_object_unreference_unlocked(&obj->base);
@@ -137,9 +189,8 @@ static struct drm_gem_object *vgem_gem_create(struct drm_device *dev,
 
 	return &obj->base;
 
-err_free:
-	kfree(obj);
 err:
+	__vgem_gem_destroy(obj);
 	return ERR_PTR(ret);
 }
 
@@ -256,6 +307,37 @@ static struct sg_table *vgem_prime_get_sg_table(struct drm_gem_object *obj)
 	return st;
 }
 
+static struct drm_gem_object* vgem_prime_import(struct drm_device *dev,
+						struct dma_buf *dma_buf)
+{
+	struct vgem_device *vgem = container_of(dev, typeof(*vgem), drm);
+
+	return drm_gem_prime_import_dev(dev, dma_buf, &vgem->platform->dev);
+}
+
+static struct drm_gem_object *vgem_prime_import_sg_table(struct drm_device *dev,
+			struct dma_buf_attachment *attach, struct sg_table *sg)
+{
+	struct drm_vgem_gem_object *obj;
+	int npages;
+
+	obj = __vgem_gem_create(dev, attach->dmabuf->size);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+
+	npages = PAGE_ALIGN(attach->dmabuf->size) / PAGE_SIZE;
+
+	obj->table = sg;
+	obj->pages = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
+	if (!obj->pages) {
+		__vgem_gem_destroy(obj);
+		return ERR_PTR(-ENOMEM);
+	}
+	drm_prime_sg_to_page_addr_arrays(obj->table, obj->pages, NULL,
+					npages);
+	return &obj->base;
+}
+
 static void *vgem_prime_vmap(struct drm_gem_object *obj)
 {
 	long n_pages = obj->size >> PAGE_SHIFT;
@@ -300,8 +382,19 @@ static int vgem_prime_mmap(struct drm_gem_object *obj,
 	return 0;
 }
 
+static void vgem_release(struct drm_device *dev)
+{
+	struct vgem_device *vgem = container_of(dev, typeof(*vgem), drm);
+
+	platform_device_unregister(vgem->platform);
+	drm_dev_fini(&vgem->drm);
+
+	kfree(vgem);
+}
+
 static struct drm_driver vgem_driver = {
 	.driver_features		= DRIVER_GEM | DRIVER_PRIME,
+	.release			= vgem_release,
 	.open				= vgem_open,
 	.postclose			= vgem_postclose,
 	.gem_free_object_unlocked	= vgem_gem_free_object,
@@ -314,8 +407,11 @@ static struct drm_driver vgem_driver = {
 	.dumb_map_offset		= vgem_gem_dumb_map,
 
 	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
+	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
 	.gem_prime_pin = vgem_prime_pin,
+	.gem_prime_import = vgem_prime_import,
 	.gem_prime_export = drm_gem_prime_export,
+	.gem_prime_import_sg_table = vgem_prime_import_sg_table,
 	.gem_prime_get_sg_table = vgem_prime_get_sg_table,
 	.gem_prime_vmap = vgem_prime_vmap,
 	.gem_prime_vunmap = vgem_prime_vunmap,
@@ -328,34 +424,48 @@ static struct drm_driver vgem_driver = {
 	.minor	= DRIVER_MINOR,
 };
 
-static struct drm_device *vgem_device;
-
 static int __init vgem_init(void)
 {
 	int ret;
 
-	vgem_device = drm_dev_alloc(&vgem_driver, NULL);
-	if (IS_ERR(vgem_device)) {
-		ret = PTR_ERR(vgem_device);
-		goto out;
+	vgem_device = kzalloc(sizeof(*vgem_device), GFP_KERNEL);
+	if (!vgem_device)
+		return -ENOMEM;
+
+	ret = drm_dev_init(&vgem_device->drm, &vgem_driver, NULL);
+	if (ret)
+		goto out_free;
+
+	vgem_device->platform =
+		platform_device_register_simple("vgem", -1, NULL, 0);
+	if (IS_ERR(vgem_device->platform)) {
+		ret = PTR_ERR(vgem_device->platform);
+		goto out_fini;
 	}
 
-	ret  = drm_dev_register(vgem_device, 0);
+	dma_coerce_mask_and_coherent(&vgem_device->platform->dev,
+				     DMA_BIT_MASK(64));
+
+	/* Final step: expose the device/driver to userspace */
+	ret  = drm_dev_register(&vgem_device->drm, 0);
 	if (ret)
-		goto out_unref;
+		goto out_unregister;
 
 	return 0;
 
-out_unref:
-	drm_dev_unref(vgem_device);
-out:
+out_unregister:
+	platform_device_unregister(vgem_device->platform);
+out_fini:
+	drm_dev_fini(&vgem_device->drm);
+out_free:
+	kfree(vgem_device);
 	return ret;
 }
 
 static void __exit vgem_exit(void)
 {
-	drm_dev_unregister(vgem_device);
-	drm_dev_unref(vgem_device);
+	drm_dev_unregister(&vgem_device->drm);
+	drm_dev_unref(&vgem_device->drm);
 }
 
 module_init(vgem_init);
diff --git a/drivers/gpu/drm/vgem/vgem_drv.h b/drivers/gpu/drm/vgem/vgem_drv.h
index cb59c7ab98b9..1aae01419112 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.h
+++ b/drivers/gpu/drm/vgem/vgem_drv.h
@@ -43,6 +43,8 @@ struct vgem_file {
 #define to_vgem_bo(x) container_of(x, struct drm_vgem_gem_object, base)
 struct drm_vgem_gem_object {
 	struct drm_gem_object base;
+	struct page **pages;
+	struct sg_table *table;
 };
 
 int vgem_fence_open(struct vgem_file *file);
diff --git a/drivers/gpu/drm/via/Makefile b/drivers/gpu/drm/via/Makefile
index d59e258e2c13..751fa8b8a014 100644
--- a/drivers/gpu/drm/via/Makefile
+++ b/drivers/gpu/drm/via/Makefile
@@ -2,7 +2,6 @@
 # Makefile for the drm device driver.  This driver provides support for the
 # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
 
-ccflags-y := -Iinclude/drm
 via-y    := via_irq.o via_drv.o via_map.o via_mm.o via_dma.o via_verifier.o via_video.o via_dmablit.o
 
 obj-$(CONFIG_DRM_VIA)	+=via.o
diff --git a/drivers/gpu/drm/virtio/Makefile b/drivers/gpu/drm/virtio/Makefile
index 3fb8eac1084f..7684f613bdc3 100644
--- a/drivers/gpu/drm/virtio/Makefile
+++ b/drivers/gpu/drm/virtio/Makefile
@@ -2,8 +2,6 @@
 # Makefile for the drm device driver.  This driver provides support for the
 # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
 
-ccflags-y := -Iinclude/drm
-
 virtio-gpu-y := virtgpu_drv.o virtgpu_kms.o virtgpu_drm_bus.o virtgpu_gem.o \
 	virtgpu_fb.o virtgpu_display.o virtgpu_vq.o virtgpu_ttm.o \
 	virtgpu_fence.o virtgpu_object.o virtgpu_debugfs.o virtgpu_plane.o \
diff --git a/drivers/gpu/drm/virtio/virtgpu_debugfs.c b/drivers/gpu/drm/virtio/virtgpu_debugfs.c
index f51240aa720d..73dc99046c43 100644
--- a/drivers/gpu/drm/virtio/virtgpu_debugfs.c
+++ b/drivers/gpu/drm/virtio/virtgpu_debugfs.c
@@ -24,8 +24,8 @@
  */
 
 #include <linux/debugfs.h>
+#include <drm/drmP.h>
 
-#include "drmP.h"
 #include "virtgpu_drv.h"
 
 static int
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c
index 2d29b0141545..63d35c7e416c 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.c
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.c
@@ -29,8 +29,8 @@
 #include <linux/module.h>
 #include <linux/console.h>
 #include <linux/pci.h>
-#include "drmP.h"
-#include "drm/drm.h"
+#include <drm/drmP.h>
+#include <drm/drm.h>
 
 #include "virtgpu_drv.h"
 static struct drm_driver driver;
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h
index 1328185bfd59..3a66abb8fd50 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.h
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
@@ -36,10 +36,10 @@
 #include <drm/drm_atomic.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_encoder.h>
-#include <ttm/ttm_bo_api.h>
-#include <ttm/ttm_bo_driver.h>
-#include <ttm/ttm_placement.h>
-#include <ttm/ttm_module.h>
+#include <drm/ttm/ttm_bo_api.h>
+#include <drm/ttm/ttm_bo_driver.h>
+#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_module.h>
 
 #define DRIVER_NAME "virtio_gpu"
 #define DRIVER_DESC "virtio GPU"
diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
index 61f3a963af95..b94bd5440e57 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
@@ -26,9 +26,10 @@
  */
 
 #include <drm/drmP.h>
-#include "virtgpu_drv.h"
 #include <drm/virtgpu_drm.h>
-#include "ttm/ttm_execbuf_util.h"
+#include <drm/ttm/ttm_execbuf_util.h>
+
+#include "virtgpu_drv.h"
 
 static void convert_to_hw_box(struct virtio_gpu_box *dst,
 			      const struct drm_virtgpu_3d_box *src)
@@ -119,13 +120,14 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
 	INIT_LIST_HEAD(&validate_list);
 	if (exbuf->num_bo_handles) {
 
-		bo_handles = drm_malloc_ab(exbuf->num_bo_handles,
-					   sizeof(uint32_t));
-		buflist = drm_calloc_large(exbuf->num_bo_handles,
-					   sizeof(struct ttm_validate_buffer));
+		bo_handles = kvmalloc_array(exbuf->num_bo_handles,
+					   sizeof(uint32_t), GFP_KERNEL);
+		buflist = kvmalloc_array(exbuf->num_bo_handles,
+					   sizeof(struct ttm_validate_buffer),
+					   GFP_KERNEL | __GFP_ZERO);
 		if (!bo_handles || !buflist) {
-			drm_free_large(bo_handles);
-			drm_free_large(buflist);
+			kvfree(bo_handles);
+			kvfree(buflist);
 			return -ENOMEM;
 		}
 
@@ -133,16 +135,16 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
 		if (copy_from_user(bo_handles, user_bo_handles,
 				   exbuf->num_bo_handles * sizeof(uint32_t))) {
 			ret = -EFAULT;
-			drm_free_large(bo_handles);
-			drm_free_large(buflist);
+			kvfree(bo_handles);
+			kvfree(buflist);
 			return ret;
 		}
 
 		for (i = 0; i < exbuf->num_bo_handles; i++) {
 			gobj = drm_gem_object_lookup(drm_file, bo_handles[i]);
 			if (!gobj) {
-				drm_free_large(bo_handles);
-				drm_free_large(buflist);
+				kvfree(bo_handles);
+				kvfree(buflist);
 				return -ENOENT;
 			}
 
@@ -151,7 +153,7 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
 
 			list_add(&buflist[i].head, &validate_list);
 		}
-		drm_free_large(bo_handles);
+		kvfree(bo_handles);
 	}
 
 	ret = virtio_gpu_object_list_validate(&ticket, &validate_list);
@@ -171,7 +173,7 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
 
 	/* fence the command bo */
 	virtio_gpu_unref_list(&validate_list);
-	drm_free_large(buflist);
+	kvfree(buflist);
 	dma_fence_put(&fence->f);
 	return 0;
 
@@ -179,7 +181,7 @@ out_unresv:
 	ttm_eu_backoff_reservation(&ticket, &validate_list);
 out_free:
 	virtio_gpu_unref_list(&validate_list);
-	drm_free_large(buflist);
+	kvfree(buflist);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/virtio/virtgpu_ttm.c b/drivers/gpu/drm/virtio/virtgpu_ttm.c
index 4e8e27d50922..c1f2af4ca4ca 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ttm.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ttm.c
@@ -25,11 +25,11 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#include <ttm/ttm_bo_api.h>
-#include <ttm/ttm_bo_driver.h>
-#include <ttm/ttm_placement.h>
-#include <ttm/ttm_page_alloc.h>
-#include <ttm/ttm_module.h>
+#include <drm/ttm/ttm_bo_api.h>
+#include <drm/ttm/ttm_bo_driver.h>
+#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_page_alloc.h>
+#include <drm/ttm/ttm_module.h>
 #include <drm/drmP.h>
 #include <drm/drm.h>
 #include <drm/virtgpu_drm.h>
diff --git a/drivers/gpu/drm/vmwgfx/Makefile b/drivers/gpu/drm/vmwgfx/Makefile
index aac17a640cce..a365330bbb82 100644
--- a/drivers/gpu/drm/vmwgfx/Makefile
+++ b/drivers/gpu/drm/vmwgfx/Makefile
@@ -1,6 +1,3 @@
-
-ccflags-y := -Iinclude/drm
-
 vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \
 	    vmwgfx_fb.o vmwgfx_ioctl.o vmwgfx_resource.o vmwgfx_buffer.o \
 	    vmwgfx_fifo.o vmwgfx_irq.o vmwgfx_ldu.o vmwgfx_ttm_glue.o \
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c
index 77cb7c627e09..99a7f4ab7d97 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c
@@ -25,8 +25,9 @@
  *
  **************************************************************************/
 
+#include <drm/ttm/ttm_bo_api.h>
+
 #include "vmwgfx_drv.h"
-#include "ttm/ttm_bo_api.h"
 
 /*
  * Size of inline command buffers. Try to make sure that a page size is a
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_context.c b/drivers/gpu/drm/vmwgfx/vmwgfx_context.c
index 443d1ed00de7..bcc6d4136c87 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_context.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_context.c
@@ -25,10 +25,11 @@
  *
  **************************************************************************/
 
+#include <drm/ttm/ttm_placement.h>
+
 #include "vmwgfx_drv.h"
 #include "vmwgfx_resource_priv.h"
 #include "vmwgfx_binding.h"
-#include "ttm/ttm_placement.h"
 
 struct vmw_user_context {
 	struct ttm_base_object base;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c
index 265c81e6cf39..6c026d75c180 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c
@@ -30,9 +30,10 @@
  * whenever the backing MOB is evicted.
  */
 
+#include <drm/ttm/ttm_placement.h>
+
 #include "vmwgfx_drv.h"
 #include "vmwgfx_resource_priv.h"
-#include <ttm/ttm_placement.h>
 #include "vmwgfx_so.h"
 
 /**
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index ef9f3a2a4030..a8876b070168 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -566,7 +566,7 @@ int vmw_du_primary_plane_atomic_check(struct drm_plane *plane,
 
 	ret = drm_plane_helper_check_update(plane, state->crtc, new_fb,
 					    &src, &dest, &clip,
-					    DRM_ROTATE_0,
+					    DRM_MODE_ROTATE_0,
 					    DRM_PLANE_HELPER_NO_SCALING,
 					    DRM_PLANE_HELPER_NO_SCALING,
 					    false, true, &visible);
@@ -845,7 +845,7 @@ void vmw_du_plane_reset(struct drm_plane *plane)
 
 	plane->state = &vps->base;
 	plane->state->plane = plane;
-	plane->state->rotation = DRM_ROTATE_0;
+	plane->state->rotation = DRM_MODE_ROTATE_0;
 }
 
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
index e57a0bad7a62..6063c9636d4a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
@@ -30,7 +30,7 @@
 #include <linux/kernel.h>
 #include <linux/frame.h>
 #include <asm/hypervisor.h>
-#include "drmP.h"
+#include <drm/drmP.h>
 #include "vmwgfx_msg.h"
 
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
index 92f8b1d04f0f..68f135c5b0d8 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
@@ -25,10 +25,11 @@
  *
  **************************************************************************/
 
+#include <drm/ttm/ttm_placement.h>
+
 #include "vmwgfx_drv.h"
 #include "vmwgfx_resource_priv.h"
 #include "vmwgfx_binding.h"
-#include "ttm/ttm_placement.h"
 
 struct vmw_shader {
 	struct vmw_resource res;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
index 7681341fe32b..5900cff5bbc3 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
@@ -25,11 +25,12 @@
  *
  **************************************************************************/
 
+#include <drm/ttm/ttm_placement.h>
+
 #include "vmwgfx_drv.h"
 #include "vmwgfx_resource_priv.h"
 #include "vmwgfx_so.h"
 #include "vmwgfx_binding.h"
-#include <ttm/ttm_placement.h>
 #include "device_include/svga3d_surfacedefs.h"
 
 
diff --git a/drivers/gpu/drm/zte/Makefile b/drivers/gpu/drm/zte/Makefile
index 01352b56c418..9df7766a7f9d 100644
--- a/drivers/gpu/drm/zte/Makefile
+++ b/drivers/gpu/drm/zte/Makefile
@@ -3,6 +3,7 @@ zxdrm-y := \
 	zx_hdmi.o \
 	zx_plane.o \
 	zx_tvenc.o \
+	zx_vga.o \
 	zx_vou.o
 
 obj-$(CONFIG_DRM_ZTE) += zxdrm.o
diff --git a/drivers/gpu/drm/zte/zx_common_regs.h b/drivers/gpu/drm/zte/zx_common_regs.h
new file mode 100644
index 000000000000..2afd80664c51
--- /dev/null
+++ b/drivers/gpu/drm/zte/zx_common_regs.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2017 Sanechips Technology Co., Ltd.
+ * Copyright 2017 Linaro Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ZX_COMMON_REGS_H__
+#define __ZX_COMMON_REGS_H__
+
+/* CSC registers */
+#define CSC_CTRL0			0x30
+#define CSC_COV_MODE_SHIFT		16
+#define CSC_COV_MODE_MASK		(0xffff << CSC_COV_MODE_SHIFT)
+#define CSC_BT601_IMAGE_RGB2YCBCR	0
+#define CSC_BT601_IMAGE_YCBCR2RGB	1
+#define CSC_BT601_VIDEO_RGB2YCBCR	2
+#define CSC_BT601_VIDEO_YCBCR2RGB	3
+#define CSC_BT709_IMAGE_RGB2YCBCR	4
+#define CSC_BT709_IMAGE_YCBCR2RGB	5
+#define CSC_BT709_VIDEO_RGB2YCBCR	6
+#define CSC_BT709_VIDEO_YCBCR2RGB	7
+#define CSC_BT2020_IMAGE_RGB2YCBCR	8
+#define CSC_BT2020_IMAGE_YCBCR2RGB	9
+#define CSC_BT2020_VIDEO_RGB2YCBCR	10
+#define CSC_BT2020_VIDEO_YCBCR2RGB	11
+#define CSC_WORK_ENABLE			BIT(0)
+
+#endif /* __ZX_COMMON_REGS_H__ */
diff --git a/drivers/gpu/drm/zte/zx_drm_drv.c b/drivers/gpu/drm/zte/zx_drm_drv.c
index 614e854f6be5..490aafc99610 100644
--- a/drivers/gpu/drm/zte/zx_drm_drv.c
+++ b/drivers/gpu/drm/zte/zx_drm_drv.c
@@ -233,6 +233,7 @@ static struct platform_driver *drivers[] = {
 	&zx_crtc_driver,
 	&zx_hdmi_driver,
 	&zx_tvenc_driver,
+	&zx_vga_driver,
 	&zx_drm_platform_driver,
 };
 
diff --git a/drivers/gpu/drm/zte/zx_drm_drv.h b/drivers/gpu/drm/zte/zx_drm_drv.h
index 5ca035b079c7..2a8cdc5f8be4 100644
--- a/drivers/gpu/drm/zte/zx_drm_drv.h
+++ b/drivers/gpu/drm/zte/zx_drm_drv.h
@@ -14,6 +14,7 @@
 extern struct platform_driver zx_crtc_driver;
 extern struct platform_driver zx_hdmi_driver;
 extern struct platform_driver zx_tvenc_driver;
+extern struct platform_driver zx_vga_driver;
 
 static inline u32 zx_readl(void __iomem *reg)
 {
diff --git a/drivers/gpu/drm/zte/zx_plane.c b/drivers/gpu/drm/zte/zx_plane.c
index d646ac931663..4a6252720c10 100644
--- a/drivers/gpu/drm/zte/zx_plane.c
+++ b/drivers/gpu/drm/zte/zx_plane.c
@@ -16,6 +16,7 @@
 #include <drm/drm_plane_helper.h>
 #include <drm/drmP.h>
 
+#include "zx_common_regs.h"
 #include "zx_drm_drv.h"
 #include "zx_plane.h"
 #include "zx_plane_regs.h"
diff --git a/drivers/gpu/drm/zte/zx_plane_regs.h b/drivers/gpu/drm/zte/zx_plane_regs.h
index 65f271aeabed..9c655f59f9f7 100644
--- a/drivers/gpu/drm/zte/zx_plane_regs.h
+++ b/drivers/gpu/drm/zte/zx_plane_regs.h
@@ -77,24 +77,6 @@
 #define LUMA_STRIDE(x)	 (((x) << LUMA_STRIDE_SHIFT) & LUMA_STRIDE_MASK)
 #define CHROMA_STRIDE(x) (((x) << CHROMA_STRIDE_SHIFT) & CHROMA_STRIDE_MASK)
 
-/* CSC registers */
-#define CSC_CTRL0			0x30
-#define CSC_COV_MODE_SHIFT		16
-#define CSC_COV_MODE_MASK		(0xffff << CSC_COV_MODE_SHIFT)
-#define CSC_BT601_IMAGE_RGB2YCBCR	0
-#define CSC_BT601_IMAGE_YCBCR2RGB	1
-#define CSC_BT601_VIDEO_RGB2YCBCR	2
-#define CSC_BT601_VIDEO_YCBCR2RGB	3
-#define CSC_BT709_IMAGE_RGB2YCBCR	4
-#define CSC_BT709_IMAGE_YCBCR2RGB	5
-#define CSC_BT709_VIDEO_RGB2YCBCR	6
-#define CSC_BT709_VIDEO_YCBCR2RGB	7
-#define CSC_BT2020_IMAGE_RGB2YCBCR	8
-#define CSC_BT2020_IMAGE_YCBCR2RGB	9
-#define CSC_BT2020_VIDEO_RGB2YCBCR	10
-#define CSC_BT2020_VIDEO_YCBCR2RGB	11
-#define CSC_WORK_ENABLE			BIT(0)
-
 /* RSZ registers */
 #define RSZ_SRC_CFG			0x00
 #define RSZ_DEST_CFG			0x04
diff --git a/drivers/gpu/drm/zte/zx_vga.c b/drivers/gpu/drm/zte/zx_vga.c
new file mode 100644
index 000000000000..1e0811f775cb
--- /dev/null
+++ b/drivers/gpu/drm/zte/zx_vga.c
@@ -0,0 +1,531 @@
+/*
+ * Copyright (C) 2017 Sanechips Technology Co., Ltd.
+ * Copyright 2017 Linaro Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drmP.h>
+
+#include "zx_drm_drv.h"
+#include "zx_vga_regs.h"
+#include "zx_vou.h"
+
+struct zx_vga_pwrctrl {
+	struct regmap *regmap;
+	u32 reg;
+	u32 mask;
+};
+
+struct zx_vga_i2c {
+	struct i2c_adapter adap;
+	struct mutex lock;
+};
+
+struct zx_vga {
+	struct drm_connector connector;
+	struct drm_encoder encoder;
+	struct zx_vga_i2c *ddc;
+	struct device *dev;
+	void __iomem *mmio;
+	struct clk *i2c_wclk;
+	struct zx_vga_pwrctrl pwrctrl;
+	struct completion complete;
+	bool connected;
+};
+
+#define to_zx_vga(x) container_of(x, struct zx_vga, x)
+
+static void zx_vga_encoder_enable(struct drm_encoder *encoder)
+{
+	struct zx_vga *vga = to_zx_vga(encoder);
+	struct zx_vga_pwrctrl *pwrctrl = &vga->pwrctrl;
+
+	/* Set bit to power up VGA DACs */
+	regmap_update_bits(pwrctrl->regmap, pwrctrl->reg, pwrctrl->mask,
+			   pwrctrl->mask);
+
+	vou_inf_enable(VOU_VGA, encoder->crtc);
+}
+
+static void zx_vga_encoder_disable(struct drm_encoder *encoder)
+{
+	struct zx_vga *vga = to_zx_vga(encoder);
+	struct zx_vga_pwrctrl *pwrctrl = &vga->pwrctrl;
+
+	vou_inf_disable(VOU_VGA, encoder->crtc);
+
+	/* Clear bit to power down VGA DACs */
+	regmap_update_bits(pwrctrl->regmap, pwrctrl->reg, pwrctrl->mask, 0);
+}
+
+static const struct drm_encoder_helper_funcs zx_vga_encoder_helper_funcs = {
+	.enable	= zx_vga_encoder_enable,
+	.disable = zx_vga_encoder_disable,
+};
+
+static const struct drm_encoder_funcs zx_vga_encoder_funcs = {
+	.destroy = drm_encoder_cleanup,
+};
+
+static int zx_vga_connector_get_modes(struct drm_connector *connector)
+{
+	struct zx_vga *vga = to_zx_vga(connector);
+	struct edid *edid;
+	int ret;
+
+	/*
+	 * Clear both detection bits to switch I2C bus from device
+	 * detecting to EDID reading.
+	 */
+	zx_writel(vga->mmio + VGA_AUTO_DETECT_SEL, 0);
+
+	edid = drm_get_edid(connector, &vga->ddc->adap);
+	if (!edid) {
+		/*
+		 * If EDID reading fails, we set the device state into
+		 * disconnected.  Locking is not required here, since the
+		 * VGA_AUTO_DETECT_SEL register write in irq handler cannot
+		 * be triggered when both detection bits are cleared as above.
+		 */
+		zx_writel(vga->mmio + VGA_AUTO_DETECT_SEL,
+			  VGA_DETECT_SEL_NO_DEVICE);
+		vga->connected = false;
+		return 0;
+	}
+
+	/*
+	 * As edid reading succeeds, device must be connected, so we set
+	 * up detection bit for unplug interrupt here.
+	 */
+	zx_writel(vga->mmio + VGA_AUTO_DETECT_SEL, VGA_DETECT_SEL_HAS_DEVICE);
+
+	drm_mode_connector_update_edid_property(connector, edid);
+	ret = drm_add_edid_modes(connector, edid);
+	kfree(edid);
+
+	return ret;
+}
+
+static enum drm_mode_status
+zx_vga_connector_mode_valid(struct drm_connector *connector,
+			    struct drm_display_mode *mode)
+{
+	return MODE_OK;
+}
+
+static struct drm_connector_helper_funcs zx_vga_connector_helper_funcs = {
+	.get_modes = zx_vga_connector_get_modes,
+	.mode_valid = zx_vga_connector_mode_valid,
+};
+
+static enum drm_connector_status
+zx_vga_connector_detect(struct drm_connector *connector, bool force)
+{
+	struct zx_vga *vga = to_zx_vga(connector);
+
+	return vga->connected ? connector_status_connected :
+				connector_status_disconnected;
+}
+
+static const struct drm_connector_funcs zx_vga_connector_funcs = {
+	.dpms = drm_atomic_helper_connector_dpms,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.detect = zx_vga_connector_detect,
+	.destroy = drm_connector_cleanup,
+	.reset = drm_atomic_helper_connector_reset,
+	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+};
+
+static int zx_vga_register(struct drm_device *drm, struct zx_vga *vga)
+{
+	struct drm_encoder *encoder = &vga->encoder;
+	struct drm_connector *connector = &vga->connector;
+	struct device *dev = vga->dev;
+	int ret;
+
+	encoder->possible_crtcs = VOU_CRTC_MASK;
+
+	ret = drm_encoder_init(drm, encoder, &zx_vga_encoder_funcs,
+			       DRM_MODE_ENCODER_DAC, NULL);
+	if (ret) {
+		DRM_DEV_ERROR(dev, "failed to init encoder: %d\n", ret);
+		return ret;
+	};
+
+	drm_encoder_helper_add(encoder, &zx_vga_encoder_helper_funcs);
+
+	vga->connector.polled = DRM_CONNECTOR_POLL_HPD;
+
+	ret = drm_connector_init(drm, connector, &zx_vga_connector_funcs,
+				 DRM_MODE_CONNECTOR_VGA);
+	if (ret) {
+		DRM_DEV_ERROR(dev, "failed to init connector: %d\n", ret);
+		goto clean_encoder;
+	};
+
+	drm_connector_helper_add(connector, &zx_vga_connector_helper_funcs);
+
+	ret = drm_mode_connector_attach_encoder(connector, encoder);
+	if (ret) {
+		DRM_DEV_ERROR(dev, "failed to attach encoder: %d\n", ret);
+		goto clean_connector;
+	};
+
+	return 0;
+
+clean_connector:
+	drm_connector_cleanup(connector);
+clean_encoder:
+	drm_encoder_cleanup(encoder);
+	return ret;
+}
+
+static int zx_vga_pwrctrl_init(struct zx_vga *vga)
+{
+	struct zx_vga_pwrctrl *pwrctrl = &vga->pwrctrl;
+	struct device *dev = vga->dev;
+	struct of_phandle_args out_args;
+	struct regmap *regmap;
+	int ret;
+
+	ret = of_parse_phandle_with_fixed_args(dev->of_node,
+				"zte,vga-power-control", 2, 0, &out_args);
+	if (ret)
+		return ret;
+
+	regmap = syscon_node_to_regmap(out_args.np);
+	if (IS_ERR(regmap)) {
+		ret = PTR_ERR(regmap);
+		goto out;
+	}
+
+	pwrctrl->regmap = regmap;
+	pwrctrl->reg = out_args.args[0];
+	pwrctrl->mask = out_args.args[1];
+
+out:
+	of_node_put(out_args.np);
+	return ret;
+}
+
+static int zx_vga_i2c_read(struct zx_vga *vga, struct i2c_msg *msg)
+{
+	int len = msg->len;
+	u8 *buf = msg->buf;
+	u32 offset = 0;
+	int i;
+
+	reinit_completion(&vga->complete);
+
+	/* Select combo write */
+	zx_writel_mask(vga->mmio + VGA_CMD_CFG, VGA_CMD_COMBO, VGA_CMD_COMBO);
+	zx_writel_mask(vga->mmio + VGA_CMD_CFG, VGA_CMD_RW, 0);
+
+	while (len > 0) {
+		u32 cnt;
+
+		/* Clear RX FIFO */
+		zx_writel_mask(vga->mmio + VGA_RXF_CTRL, VGA_RX_FIFO_CLEAR,
+			       VGA_RX_FIFO_CLEAR);
+
+		/* Data offset to read from */
+		zx_writel(vga->mmio + VGA_SUB_ADDR, offset);
+
+		/* Kick off the transfer */
+		zx_writel_mask(vga->mmio + VGA_CMD_CFG, VGA_CMD_TRANS,
+			       VGA_CMD_TRANS);
+
+		if (!wait_for_completion_timeout(&vga->complete,
+						 msecs_to_jiffies(1000))) {
+			DRM_DEV_ERROR(vga->dev, "transfer timeout\n");
+			return -ETIMEDOUT;
+		}
+
+		cnt = zx_readl(vga->mmio + VGA_RXF_STATUS);
+		cnt = (cnt & VGA_RXF_COUNT_MASK) >> VGA_RXF_COUNT_SHIFT;
+		/* FIFO status may report more data than we need to read */
+		cnt = min_t(u32, len, cnt);
+
+		for (i = 0; i < cnt; i++)
+			*buf++ = zx_readl(vga->mmio + VGA_DATA);
+
+		len -= cnt;
+		offset += cnt;
+	}
+
+	return 0;
+}
+
+static int zx_vga_i2c_write(struct zx_vga *vga, struct i2c_msg *msg)
+{
+	/*
+	 * The DDC I2C adapter is only for reading EDID data, so we assume
+	 * that the write to this adapter must be the EDID data offset.
+	 */
+	if ((msg->len != 1) || ((msg->addr != DDC_ADDR)))
+		return -EINVAL;
+
+	/* Hardware will take care of the slave address shifting */
+	zx_writel(vga->mmio + VGA_DEVICE_ADDR, msg->addr);
+
+	return 0;
+}
+
+static int zx_vga_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
+			   int num)
+{
+	struct zx_vga *vga = i2c_get_adapdata(adap);
+	struct zx_vga_i2c *ddc = vga->ddc;
+	int ret = 0;
+	int i;
+
+	mutex_lock(&ddc->lock);
+
+	for (i = 0; i < num; i++) {
+		if (msgs[i].flags & I2C_M_RD)
+			ret = zx_vga_i2c_read(vga, &msgs[i]);
+		else
+			ret = zx_vga_i2c_write(vga, &msgs[i]);
+
+		if (ret < 0)
+			break;
+	}
+
+	if (!ret)
+		ret = num;
+
+	mutex_unlock(&ddc->lock);
+
+	return ret;
+}
+
+static u32 zx_vga_i2c_func(struct i2c_adapter *adapter)
+{
+	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
+}
+
+static const struct i2c_algorithm zx_vga_algorithm = {
+	.master_xfer	= zx_vga_i2c_xfer,
+	.functionality	= zx_vga_i2c_func,
+};
+
+static int zx_vga_ddc_register(struct zx_vga *vga)
+{
+	struct device *dev = vga->dev;
+	struct i2c_adapter *adap;
+	struct zx_vga_i2c *ddc;
+	int ret;
+
+	ddc = devm_kzalloc(dev, sizeof(*ddc), GFP_KERNEL);
+	if (!ddc)
+		return -ENOMEM;
+
+	vga->ddc = ddc;
+	mutex_init(&ddc->lock);
+
+	adap = &ddc->adap;
+	adap->owner = THIS_MODULE;
+	adap->class = I2C_CLASS_DDC;
+	adap->dev.parent = dev;
+	adap->algo = &zx_vga_algorithm;
+	snprintf(adap->name, sizeof(adap->name), "zx vga i2c");
+
+	ret = i2c_add_adapter(adap);
+	if (ret) {
+		DRM_DEV_ERROR(dev, "failed to add I2C adapter: %d\n", ret);
+		return ret;
+	}
+
+	i2c_set_adapdata(adap, vga);
+
+	return 0;
+}
+
+static irqreturn_t zx_vga_irq_thread(int irq, void *dev_id)
+{
+	struct zx_vga *vga = dev_id;
+
+	drm_helper_hpd_irq_event(vga->connector.dev);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t zx_vga_irq_handler(int irq, void *dev_id)
+{
+	struct zx_vga *vga = dev_id;
+	u32 status;
+
+	status = zx_readl(vga->mmio + VGA_I2C_STATUS);
+
+	/* Clear interrupt status */
+	zx_writel_mask(vga->mmio + VGA_I2C_STATUS, VGA_CLEAR_IRQ,
+		       VGA_CLEAR_IRQ);
+
+	if (status & VGA_DEVICE_CONNECTED) {
+		/*
+		 * Since VGA_DETECT_SEL bits need to be reset for switching DDC
+		 * bus from device detection to EDID read, rather than setting
+		 * up HAS_DEVICE bit here, we need to do that in .get_modes
+		 * hook for unplug detecting after EDID read succeeds.
+		 */
+		vga->connected = true;
+		return IRQ_WAKE_THREAD;
+	}
+
+	if (status & VGA_DEVICE_DISCONNECTED) {
+		zx_writel(vga->mmio + VGA_AUTO_DETECT_SEL,
+			  VGA_DETECT_SEL_NO_DEVICE);
+		vga->connected = false;
+		return IRQ_WAKE_THREAD;
+	}
+
+	if (status & VGA_TRANS_DONE) {
+		complete(&vga->complete);
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_NONE;
+}
+
+static void zx_vga_hw_init(struct zx_vga *vga)
+{
+	unsigned long ref = clk_get_rate(vga->i2c_wclk);
+	int div;
+
+	/*
+	 * Set up I2C fast speed divider per formula below to get 400kHz.
+	 *   scl = ref / ((div + 1) * 4)
+	 */
+	div = DIV_ROUND_UP(ref / 1000, 400 * 4) - 1;
+	zx_writel(vga->mmio + VGA_CLK_DIV_FS, div);
+
+	/* Set up device detection */
+	zx_writel(vga->mmio + VGA_AUTO_DETECT_PARA, 0x80);
+	zx_writel(vga->mmio + VGA_AUTO_DETECT_SEL, VGA_DETECT_SEL_NO_DEVICE);
+
+	/*
+	 * We need to poke monitor via DDC bus to get connection irq
+	 * start working.
+	 */
+	zx_writel(vga->mmio + VGA_DEVICE_ADDR, DDC_ADDR);
+	zx_writel_mask(vga->mmio + VGA_CMD_CFG, VGA_CMD_TRANS, VGA_CMD_TRANS);
+}
+
+static int zx_vga_bind(struct device *dev, struct device *master, void *data)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct drm_device *drm = data;
+	struct resource *res;
+	struct zx_vga *vga;
+	int irq;
+	int ret;
+
+	vga = devm_kzalloc(dev, sizeof(*vga), GFP_KERNEL);
+	if (!vga)
+		return -ENOMEM;
+
+	vga->dev = dev;
+	dev_set_drvdata(dev, vga);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	vga->mmio = devm_ioremap_resource(dev, res);
+	if (IS_ERR(vga->mmio))
+		return PTR_ERR(vga->mmio);
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	vga->i2c_wclk = devm_clk_get(dev, "i2c_wclk");
+	if (IS_ERR(vga->i2c_wclk)) {
+		ret = PTR_ERR(vga->i2c_wclk);
+		DRM_DEV_ERROR(dev, "failed to get i2c_wclk: %d\n", ret);
+		return ret;
+	}
+
+	ret = zx_vga_pwrctrl_init(vga);
+	if (ret) {
+		DRM_DEV_ERROR(dev, "failed to init power control: %d\n", ret);
+		return ret;
+	}
+
+	ret = zx_vga_ddc_register(vga);
+	if (ret) {
+		DRM_DEV_ERROR(dev, "failed to register ddc: %d\n", ret);
+		return ret;
+	}
+
+	ret = zx_vga_register(drm, vga);
+	if (ret) {
+		DRM_DEV_ERROR(dev, "failed to register vga: %d\n", ret);
+		return ret;
+	}
+
+	init_completion(&vga->complete);
+
+	ret = devm_request_threaded_irq(dev, irq, zx_vga_irq_handler,
+					zx_vga_irq_thread, IRQF_SHARED,
+					dev_name(dev), vga);
+	if (ret) {
+		DRM_DEV_ERROR(dev, "failed to request threaded irq: %d\n", ret);
+		return ret;
+	}
+
+	ret = clk_prepare_enable(vga->i2c_wclk);
+	if (ret)
+		return ret;
+
+	zx_vga_hw_init(vga);
+
+	return 0;
+}
+
+static void zx_vga_unbind(struct device *dev, struct device *master,
+			  void *data)
+{
+	struct zx_vga *vga = dev_get_drvdata(dev);
+
+	clk_disable_unprepare(vga->i2c_wclk);
+}
+
+static const struct component_ops zx_vga_component_ops = {
+	.bind = zx_vga_bind,
+	.unbind = zx_vga_unbind,
+};
+
+static int zx_vga_probe(struct platform_device *pdev)
+{
+	return component_add(&pdev->dev, &zx_vga_component_ops);
+}
+
+static int zx_vga_remove(struct platform_device *pdev)
+{
+	component_del(&pdev->dev, &zx_vga_component_ops);
+	return 0;
+}
+
+static const struct of_device_id zx_vga_of_match[] = {
+	{ .compatible = "zte,zx296718-vga", },
+	{ /* end */ },
+};
+MODULE_DEVICE_TABLE(of, zx_vga_of_match);
+
+struct platform_driver zx_vga_driver = {
+	.probe = zx_vga_probe,
+	.remove = zx_vga_remove,
+	.driver	= {
+		.name = "zx-vga",
+		.of_match_table	= zx_vga_of_match,
+	},
+};
diff --git a/drivers/gpu/drm/zte/zx_vga_regs.h b/drivers/gpu/drm/zte/zx_vga_regs.h
new file mode 100644
index 000000000000..feaa345fe6a6
--- /dev/null
+++ b/drivers/gpu/drm/zte/zx_vga_regs.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2017 Sanechips Technology Co., Ltd.
+ * Copyright 2017 Linaro Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ZX_VGA_REGS_H__
+#define __ZX_VGA_REGS_H__
+
+#define VGA_CMD_CFG			0x04
+#define VGA_CMD_TRANS			BIT(6)
+#define VGA_CMD_COMBO			BIT(5)
+#define VGA_CMD_RW			BIT(4)
+#define VGA_SUB_ADDR			0x0c
+#define VGA_DEVICE_ADDR			0x10
+#define VGA_CLK_DIV_FS			0x14
+#define VGA_RXF_CTRL			0x20
+#define VGA_RX_FIFO_CLEAR		BIT(7)
+#define VGA_DATA			0x24
+#define VGA_I2C_STATUS			0x28
+#define VGA_DEVICE_DISCONNECTED		BIT(7)
+#define VGA_DEVICE_CONNECTED		BIT(6)
+#define VGA_CLEAR_IRQ			BIT(4)
+#define VGA_TRANS_DONE			BIT(0)
+#define VGA_RXF_STATUS			0x30
+#define VGA_RXF_COUNT_SHIFT		2
+#define VGA_RXF_COUNT_MASK		GENMASK(7, 2)
+#define VGA_AUTO_DETECT_PARA		0x34
+#define VGA_AUTO_DETECT_SEL		0x38
+#define VGA_DETECT_SEL_HAS_DEVICE	BIT(1)
+#define VGA_DETECT_SEL_NO_DEVICE	BIT(0)
+
+#endif /* __ZX_VGA_REGS_H__ */
diff --git a/drivers/gpu/drm/zte/zx_vou.c b/drivers/gpu/drm/zte/zx_vou.c
index b500c8dd0d9d..5fbd10b60ee5 100644
--- a/drivers/gpu/drm/zte/zx_vou.c
+++ b/drivers/gpu/drm/zte/zx_vou.c
@@ -23,6 +23,7 @@
 #include <drm/drm_plane_helper.h>
 #include <drm/drmP.h>
 
+#include "zx_common_regs.h"
 #include "zx_drm_drv.h"
 #include "zx_plane.h"
 #include "zx_vou.h"
@@ -122,6 +123,8 @@ struct zx_crtc {
 	struct drm_plane *primary;
 	struct zx_vou_hw *vou;
 	void __iomem *chnreg;
+	void __iomem *chncsc;
+	void __iomem *dither;
 	const struct zx_crtc_regs *regs;
 	const struct zx_crtc_bits *bits;
 	enum vou_chn_type chn_type;
@@ -204,6 +207,11 @@ static struct vou_inf vou_infs[] = {
 		.clocks_en_bits = BIT(15),
 		.clocks_sel_bits = BIT(11) | BIT(0),
 	},
+	[VOU_VGA] = {
+		.data_sel = VOU_RGB_888,
+		.clocks_en_bits = BIT(1),
+		.clocks_sel_bits = BIT(10),
+	},
 };
 
 static inline struct zx_vou_hw *crtc_to_vou(struct drm_crtc *crtc)
@@ -227,9 +235,26 @@ void vou_inf_enable(enum vou_inf_id id, struct drm_crtc *crtc)
 	struct zx_crtc *zcrtc = to_zx_crtc(crtc);
 	struct zx_vou_hw *vou = zcrtc->vou;
 	struct vou_inf *inf = &vou_infs[id];
+	void __iomem *dither = zcrtc->dither;
+	void __iomem *csc = zcrtc->chncsc;
 	bool is_main = zcrtc->chn_type == VOU_CHN_MAIN;
 	u32 data_sel_shift = id << 1;
 
+	if (inf->data_sel != VOU_YUV444) {
+		/* Enable channel CSC for RGB output */
+		zx_writel_mask(csc + CSC_CTRL0, CSC_COV_MODE_MASK,
+			       CSC_BT709_IMAGE_YCBCR2RGB << CSC_COV_MODE_SHIFT);
+		zx_writel_mask(csc + CSC_CTRL0, CSC_WORK_ENABLE,
+			       CSC_WORK_ENABLE);
+
+		/* Bypass Dither block for RGB output */
+		zx_writel_mask(dither + OSD_DITHER_CTRL0, DITHER_BYSPASS,
+			       DITHER_BYSPASS);
+	} else {
+		zx_writel_mask(csc + CSC_CTRL0, CSC_WORK_ENABLE, 0);
+		zx_writel_mask(dither + OSD_DITHER_CTRL0, DITHER_BYSPASS, 0);
+	}
+
 	/* Select data format */
 	zx_writel_mask(vou->vouctl + VOU_INF_DATA_SEL, 0x3 << data_sel_shift,
 		       inf->data_sel << data_sel_shift);
@@ -525,20 +550,24 @@ static int zx_crtc_init(struct drm_device *drm, struct zx_vou_hw *vou,
 
 	if (chn_type == VOU_CHN_MAIN) {
 		zplane->layer = vou->osd + MAIN_GL_OFFSET;
-		zplane->csc = vou->osd + MAIN_CSC_OFFSET;
+		zplane->csc = vou->osd + MAIN_GL_CSC_OFFSET;
 		zplane->hbsc = vou->osd + MAIN_HBSC_OFFSET;
 		zplane->rsz = vou->otfppu + MAIN_RSZ_OFFSET;
 		zplane->bits = &zx_gl_bits[0];
 		zcrtc->chnreg = vou->osd + OSD_MAIN_CHN;
+		zcrtc->chncsc = vou->osd + MAIN_CHN_CSC_OFFSET;
+		zcrtc->dither = vou->osd + MAIN_DITHER_OFFSET;
 		zcrtc->regs = &main_crtc_regs;
 		zcrtc->bits = &main_crtc_bits;
 	} else {
 		zplane->layer = vou->osd + AUX_GL_OFFSET;
-		zplane->csc = vou->osd + AUX_CSC_OFFSET;
+		zplane->csc = vou->osd + AUX_GL_CSC_OFFSET;
 		zplane->hbsc = vou->osd + AUX_HBSC_OFFSET;
 		zplane->rsz = vou->otfppu + AUX_RSZ_OFFSET;
 		zplane->bits = &zx_gl_bits[1];
 		zcrtc->chnreg = vou->osd + OSD_AUX_CHN;
+		zcrtc->chncsc = vou->osd + AUX_CHN_CSC_OFFSET;
+		zcrtc->dither = vou->osd + AUX_DITHER_OFFSET;
 		zcrtc->regs = &aux_crtc_regs;
 		zcrtc->bits = &aux_crtc_bits;
 	}
@@ -705,9 +734,6 @@ static void vou_hw_init(struct zx_vou_hw *vou)
 	/* Release reset for all VOU modules */
 	zx_writel(vou->vouctl + VOU_SOFT_RST, ~0);
 
-	/* Enable clock auto-gating for all VOU modules */
-	zx_writel(vou->vouctl + VOU_CLK_REQEN, ~0);
-
 	/* Enable all VOU module clocks */
 	zx_writel(vou->vouctl + VOU_CLK_EN, ~0);
 
diff --git a/drivers/gpu/drm/zte/zx_vou_regs.h b/drivers/gpu/drm/zte/zx_vou_regs.h
index c066ef123434..5a218351b497 100644
--- a/drivers/gpu/drm/zte/zx_vou_regs.h
+++ b/drivers/gpu/drm/zte/zx_vou_regs.h
@@ -13,13 +13,17 @@
 
 /* Sub-module offset */
 #define MAIN_GL_OFFSET			0x130
-#define MAIN_CSC_OFFSET			0x580
+#define MAIN_GL_CSC_OFFSET		0x580
+#define MAIN_CHN_CSC_OFFSET		0x6c0
 #define MAIN_HBSC_OFFSET		0x820
+#define MAIN_DITHER_OFFSET		0x960
 #define MAIN_RSZ_OFFSET			0x600 /* OTFPPU sub-module */
 
 #define AUX_GL_OFFSET			0x200
-#define AUX_CSC_OFFSET			0x5d0
+#define AUX_GL_CSC_OFFSET		0x5d0
+#define AUX_CHN_CSC_OFFSET		0x710
 #define AUX_HBSC_OFFSET			0x860
+#define AUX_DITHER_OFFSET		0x970
 #define AUX_RSZ_OFFSET			0x800
 
 #define OSD_VL0_OFFSET			0x040
@@ -78,6 +82,10 @@
 #define CHN_INTERLACE_BUF_CTRL		0x24
 #define CHN_INTERLACE_EN		BIT(2)
 
+/* Dither registers */
+#define OSD_DITHER_CTRL0		0x00
+#define DITHER_BYSPASS			BIT(31)
+
 /* TIMING_CTRL registers */
 #define TIMING_TC_ENABLE		0x04
 #define AUX_TC_EN			BIT(1)
diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
index 6283b99d2b17..d1263b82d646 100644
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c
@@ -94,9 +94,9 @@ static void dw_i2c_acpi_params(struct platform_device *pdev, char method[],
 static int dw_i2c_acpi_configure(struct platform_device *pdev)
 {
 	struct dw_i2c_dev *dev = platform_get_drvdata(pdev);
+	u32 ss_ht = 0, fp_ht = 0, hs_ht = 0, fs_ht = 0;
 	acpi_handle handle = ACPI_HANDLE(&pdev->dev);
 	const struct acpi_device_id *id;
-	u32 ss_ht, fp_ht, hs_ht, fs_ht;
 	struct acpi_device *adev;
 	const char *uid;
 
diff --git a/drivers/i2c/busses/i2c-tiny-usb.c b/drivers/i2c/busses/i2c-tiny-usb.c
index 0ed77eeff31e..a2e3dd715380 100644
--- a/drivers/i2c/busses/i2c-tiny-usb.c
+++ b/drivers/i2c/busses/i2c-tiny-usb.c
@@ -178,22 +178,39 @@ static int usb_read(struct i2c_adapter *adapter, int cmd,
 		    int value, int index, void *data, int len)
 {
 	struct i2c_tiny_usb *dev = (struct i2c_tiny_usb *)adapter->algo_data;
+	void *dmadata = kmalloc(len, GFP_KERNEL);
+	int ret;
+
+	if (!dmadata)
+		return -ENOMEM;
 
 	/* do control transfer */
-	return usb_control_msg(dev->usb_dev, usb_rcvctrlpipe(dev->usb_dev, 0),
+	ret = usb_control_msg(dev->usb_dev, usb_rcvctrlpipe(dev->usb_dev, 0),
 			       cmd, USB_TYPE_VENDOR | USB_RECIP_INTERFACE |
-			       USB_DIR_IN, value, index, data, len, 2000);
+			       USB_DIR_IN, value, index, dmadata, len, 2000);
+
+	memcpy(data, dmadata, len);
+	kfree(dmadata);
+	return ret;
 }
 
 static int usb_write(struct i2c_adapter *adapter, int cmd,
 		     int value, int index, void *data, int len)
 {
 	struct i2c_tiny_usb *dev = (struct i2c_tiny_usb *)adapter->algo_data;
+	void *dmadata = kmemdup(data, len, GFP_KERNEL);
+	int ret;
+
+	if (!dmadata)
+		return -ENOMEM;
 
 	/* do control transfer */
-	return usb_control_msg(dev->usb_dev, usb_sndctrlpipe(dev->usb_dev, 0),
+	ret = usb_control_msg(dev->usb_dev, usb_sndctrlpipe(dev->usb_dev, 0),
 			       cmd, USB_TYPE_VENDOR | USB_RECIP_INTERFACE,
-			       value, index, data, len, 2000);
+			       value, index, dmadata, len, 2000);
+
+	kfree(dmadata);
+	return ret;
 }
 
 static void i2c_tiny_usb_free(struct i2c_tiny_usb *dev)
diff --git a/drivers/input/mouse/elan_i2c_i2c.c b/drivers/input/mouse/elan_i2c_i2c.c
index a679e56c44cd..f431da07f861 100644
--- a/drivers/input/mouse/elan_i2c_i2c.c
+++ b/drivers/input/mouse/elan_i2c_i2c.c
@@ -554,32 +554,34 @@ static int elan_i2c_finish_fw_update(struct i2c_client *client,
 				     struct completion *completion)
 {
 	struct device *dev = &client->dev;
-	long ret;
 	int error;
 	int len;
-	u8 buffer[ETP_I2C_INF_LENGTH];
+	u8 buffer[ETP_I2C_REPORT_LEN];
+
+	len = i2c_master_recv(client, buffer, ETP_I2C_REPORT_LEN);
+	if (len != ETP_I2C_REPORT_LEN) {
+		error = len < 0 ? len : -EIO;
+		dev_warn(dev, "failed to read I2C data after FW WDT reset: %d (%d)\n",
+			error, len);
+	}
 
 	reinit_completion(completion);
 	enable_irq(client->irq);
 
 	error = elan_i2c_write_cmd(client, ETP_I2C_STAND_CMD, ETP_I2C_RESET);
-	if (!error)
-		ret = wait_for_completion_interruptible_timeout(completion,
-							msecs_to_jiffies(300));
-	disable_irq(client->irq);
-
 	if (error) {
 		dev_err(dev, "device reset failed: %d\n", error);
-		return error;
-	} else if (ret == 0) {
+	} else if (!wait_for_completion_timeout(completion,
+						msecs_to_jiffies(300))) {
 		dev_err(dev, "timeout waiting for device reset\n");
-		return -ETIMEDOUT;
-	} else if (ret < 0) {
-		error = ret;
-		dev_err(dev, "error waiting for device reset: %d\n", error);
-		return error;
+		error = -ETIMEDOUT;
 	}
 
+	disable_irq(client->irq);
+
+	if (error)
+		return error;
+
 	len = i2c_master_recv(client, buffer, ETP_I2C_INF_LENGTH);
 	if (len != ETP_I2C_INF_LENGTH) {
 		error = len < 0 ? len : -EIO;
diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c
index 2302aef2b2d4..dd042a9b0aaa 100644
--- a/drivers/input/touchscreen/atmel_mxt_ts.c
+++ b/drivers/input/touchscreen/atmel_mxt_ts.c
@@ -350,6 +350,7 @@ static bool mxt_object_readable(unsigned int type)
 	case MXT_TOUCH_KEYARRAY_T15:
 	case MXT_TOUCH_PROXIMITY_T23:
 	case MXT_TOUCH_PROXKEY_T52:
+	case MXT_TOUCH_MULTITOUCHSCREEN_T100:
 	case MXT_PROCI_GRIPFACE_T20:
 	case MXT_PROCG_NOISE_T22:
 	case MXT_PROCI_ONETOUCH_T24:
diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c
index 8cf8d8d5d4ef..f872817e81e4 100644
--- a/drivers/input/touchscreen/edt-ft5x06.c
+++ b/drivers/input/touchscreen/edt-ft5x06.c
@@ -471,7 +471,7 @@ static EDT_ATTR(gain, S_IWUSR | S_IRUGO, WORK_REGISTER_GAIN,
 static EDT_ATTR(offset, S_IWUSR | S_IRUGO, WORK_REGISTER_OFFSET,
 		M09_REGISTER_OFFSET, 0, 31);
 static EDT_ATTR(threshold, S_IWUSR | S_IRUGO, WORK_REGISTER_THRESHOLD,
-		M09_REGISTER_THRESHOLD, 20, 80);
+		M09_REGISTER_THRESHOLD, 0, 80);
 static EDT_ATTR(report_rate, S_IWUSR | S_IRUGO, WORK_REGISTER_REPORT_RATE,
 		NO_REGISTER, 3, 14);
 
diff --git a/drivers/leds/leds-pca955x.c b/drivers/leds/leds-pca955x.c
index 78a7ce816a47..9a873118ea5f 100644
--- a/drivers/leds/leds-pca955x.c
+++ b/drivers/leds/leds-pca955x.c
@@ -285,7 +285,7 @@ static int pca955x_probe(struct i2c_client *client,
 			"slave address 0x%02x\n",
 			client->name, chip->bits, client->addr);
 
-	if (!i2c_check_functionality(adapter, I2C_FUNC_I2C))
+	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA))
 		return -EIO;
 
 	if (pdata) {
diff --git a/drivers/mmc/core/pwrseq_simple.c b/drivers/mmc/core/pwrseq_simple.c
index 1304160de168..13ef162cf066 100644
--- a/drivers/mmc/core/pwrseq_simple.c
+++ b/drivers/mmc/core/pwrseq_simple.c
@@ -27,6 +27,7 @@ struct mmc_pwrseq_simple {
 	struct mmc_pwrseq pwrseq;
 	bool clk_enabled;
 	u32 post_power_on_delay_ms;
+	u32 power_off_delay_us;
 	struct clk *ext_clk;
 	struct gpio_descs *reset_gpios;
 };
@@ -78,6 +79,10 @@ static void mmc_pwrseq_simple_power_off(struct mmc_host *host)
 
 	mmc_pwrseq_simple_set_gpios_value(pwrseq, 1);
 
+	if (pwrseq->power_off_delay_us)
+		usleep_range(pwrseq->power_off_delay_us,
+			2 * pwrseq->power_off_delay_us);
+
 	if (!IS_ERR(pwrseq->ext_clk) && pwrseq->clk_enabled) {
 		clk_disable_unprepare(pwrseq->ext_clk);
 		pwrseq->clk_enabled = false;
@@ -119,6 +124,8 @@ static int mmc_pwrseq_simple_probe(struct platform_device *pdev)
 
 	device_property_read_u32(dev, "post-power-on-delay-ms",
 				 &pwrseq->post_power_on_delay_ms);
+	device_property_read_u32(dev, "power-off-delay-us",
+				 &pwrseq->power_off_delay_us);
 
 	pwrseq->pwrseq.dev = dev;
 	pwrseq->pwrseq.ops = &mmc_pwrseq_simple_ops;
diff --git a/drivers/mmc/host/cavium-octeon.c b/drivers/mmc/host/cavium-octeon.c
index 772d0900026d..951d2cdd7888 100644
--- a/drivers/mmc/host/cavium-octeon.c
+++ b/drivers/mmc/host/cavium-octeon.c
@@ -108,7 +108,7 @@ static void octeon_mmc_release_bus(struct cvm_mmc_host *host)
 static void octeon_mmc_int_enable(struct cvm_mmc_host *host, u64 val)
 {
 	writeq(val, host->base + MIO_EMM_INT(host));
-	if (!host->dma_active || (host->dma_active && !host->has_ciu3))
+	if (!host->has_ciu3)
 		writeq(val, host->base + MIO_EMM_INT_EN(host));
 }
 
@@ -267,7 +267,7 @@ static int octeon_mmc_probe(struct platform_device *pdev)
 	}
 
 	host->global_pwr_gpiod = devm_gpiod_get_optional(&pdev->dev,
-							 "power-gpios",
+							 "power",
 							 GPIOD_OUT_HIGH);
 	if (IS_ERR(host->global_pwr_gpiod)) {
 		dev_err(&pdev->dev, "Invalid power GPIO\n");
@@ -288,11 +288,20 @@ static int octeon_mmc_probe(struct platform_device *pdev)
 		if (ret) {
 			dev_err(&pdev->dev, "Error populating slots\n");
 			octeon_mmc_set_shared_power(host, 0);
-			return ret;
+			goto error;
 		}
 		i++;
 	}
 	return 0;
+
+error:
+	for (i = 0; i < CAVIUM_MAX_MMC; i++) {
+		if (host->slot[i])
+			cvm_mmc_of_slot_remove(host->slot[i]);
+		if (host->slot_pdev[i])
+			of_platform_device_destroy(&host->slot_pdev[i]->dev, NULL);
+	}
+	return ret;
 }
 
 static int octeon_mmc_remove(struct platform_device *pdev)
diff --git a/drivers/mmc/host/cavium-thunderx.c b/drivers/mmc/host/cavium-thunderx.c
index fe3d77267cd6..b9cc95998799 100644
--- a/drivers/mmc/host/cavium-thunderx.c
+++ b/drivers/mmc/host/cavium-thunderx.c
@@ -146,6 +146,12 @@ static int thunder_mmc_probe(struct pci_dev *pdev,
 	return 0;
 
 error:
+	for (i = 0; i < CAVIUM_MAX_MMC; i++) {
+		if (host->slot[i])
+			cvm_mmc_of_slot_remove(host->slot[i]);
+		if (host->slot_pdev[i])
+			of_platform_device_destroy(&host->slot_pdev[i]->dev, NULL);
+	}
 	clk_disable_unprepare(host->clk);
 	return ret;
 }
diff --git a/drivers/mmc/host/cavium.c b/drivers/mmc/host/cavium.c
index 58b51ba6aabd..b8aaf0fdb77c 100644
--- a/drivers/mmc/host/cavium.c
+++ b/drivers/mmc/host/cavium.c
@@ -839,14 +839,14 @@ static void cvm_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 		cvm_mmc_reset_bus(slot);
 		if (host->global_pwr_gpiod)
 			host->set_shared_power(host, 0);
-		else
+		else if (!IS_ERR(mmc->supply.vmmc))
 			mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, 0);
 		break;
 
 	case MMC_POWER_UP:
 		if (host->global_pwr_gpiod)
 			host->set_shared_power(host, 1);
-		else
+		else if (!IS_ERR(mmc->supply.vmmc))
 			mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, ios->vdd);
 		break;
 	}
@@ -968,20 +968,15 @@ static int cvm_mmc_of_parse(struct device *dev, struct cvm_mmc_slot *slot)
 		return -EINVAL;
 	}
 
-	mmc->supply.vmmc = devm_regulator_get_optional(dev, "vmmc");
-	if (IS_ERR(mmc->supply.vmmc)) {
-		if (PTR_ERR(mmc->supply.vmmc) == -EPROBE_DEFER)
-			return -EPROBE_DEFER;
-		/*
-		 * Legacy Octeon firmware has no regulator entry, fall-back to
-		 * a hard-coded voltage to get a sane OCR.
-		 */
+	ret = mmc_regulator_get_supply(mmc);
+	if (ret == -EPROBE_DEFER)
+		return ret;
+	/*
+	 * Legacy Octeon firmware has no regulator entry, fall-back to
+	 * a hard-coded voltage to get a sane OCR.
+	 */
+	if (IS_ERR(mmc->supply.vmmc))
 		mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
-	} else {
-		ret = mmc_regulator_get_ocrmask(mmc->supply.vmmc);
-		if (ret > 0)
-			mmc->ocr_avail = ret;
-	}
 
 	/* Common MMC bindings */
 	ret = mmc_of_parse(mmc);
diff --git a/drivers/mmc/host/sdhci-iproc.c b/drivers/mmc/host/sdhci-iproc.c
index 3275d4995812..61666d269771 100644
--- a/drivers/mmc/host/sdhci-iproc.c
+++ b/drivers/mmc/host/sdhci-iproc.c
@@ -187,7 +187,8 @@ static const struct sdhci_iproc_data iproc_cygnus_data = {
 };
 
 static const struct sdhci_pltfm_data sdhci_iproc_pltfm_data = {
-	.quirks = SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK,
+	.quirks = SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK |
+		  SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12,
 	.quirks2 = SDHCI_QUIRK2_ACMD23_BROKEN,
 	.ops = &sdhci_iproc_ops,
 };
diff --git a/drivers/mmc/host/sdhci-xenon-phy.c b/drivers/mmc/host/sdhci-xenon-phy.c
index 6356781f1cca..f7e26b031e76 100644
--- a/drivers/mmc/host/sdhci-xenon-phy.c
+++ b/drivers/mmc/host/sdhci-xenon-phy.c
@@ -787,14 +787,6 @@ int xenon_phy_adj(struct sdhci_host *host, struct mmc_ios *ios)
 	return ret;
 }
 
-void xenon_clean_phy(struct sdhci_host *host)
-{
-	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host);
-
-	kfree(priv->phy_params);
-}
-
 static int xenon_add_phy(struct device_node *np, struct sdhci_host *host,
 			 const char *phy_name)
 {
@@ -819,11 +811,7 @@ static int xenon_add_phy(struct device_node *np, struct sdhci_host *host,
 	if (ret)
 		return ret;
 
-	ret = xenon_emmc_phy_parse_param_dt(host, np, priv->phy_params);
-	if (ret)
-		xenon_clean_phy(host);
-
-	return ret;
+	return xenon_emmc_phy_parse_param_dt(host, np, priv->phy_params);
 }
 
 int xenon_phy_parse_dt(struct device_node *np, struct sdhci_host *host)
diff --git a/drivers/mmc/host/sdhci-xenon.c b/drivers/mmc/host/sdhci-xenon.c
index 67246655315b..bc1781bb070b 100644
--- a/drivers/mmc/host/sdhci-xenon.c
+++ b/drivers/mmc/host/sdhci-xenon.c
@@ -486,7 +486,7 @@ static int xenon_probe(struct platform_device *pdev)
 
 	err = xenon_sdhc_prepare(host);
 	if (err)
-		goto clean_phy_param;
+		goto err_clk;
 
 	err = sdhci_add_host(host);
 	if (err)
@@ -496,8 +496,6 @@ static int xenon_probe(struct platform_device *pdev)
 
 remove_sdhc:
 	xenon_sdhc_unprepare(host);
-clean_phy_param:
-	xenon_clean_phy(host);
 err_clk:
 	clk_disable_unprepare(pltfm_host->clk);
 free_pltfm:
@@ -510,8 +508,6 @@ static int xenon_remove(struct platform_device *pdev)
 	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 
-	xenon_clean_phy(host);
-
 	sdhci_remove_host(host, 0);
 
 	xenon_sdhc_unprepare(host);
diff --git a/drivers/mmc/host/sdhci-xenon.h b/drivers/mmc/host/sdhci-xenon.h
index 6e6523ea01ce..73debb42dc2f 100644
--- a/drivers/mmc/host/sdhci-xenon.h
+++ b/drivers/mmc/host/sdhci-xenon.h
@@ -93,7 +93,6 @@ struct xenon_priv {
 };
 
 int xenon_phy_adj(struct sdhci_host *host, struct mmc_ios *ios);
-void xenon_clean_phy(struct sdhci_host *host);
 int xenon_phy_parse_dt(struct device_node *np,
 		       struct sdhci_host *host);
 void xenon_soc_pad_ctrl(struct sdhci_host *host,
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index c5fd4259da33..b44a6aeb346d 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -2577,7 +2577,7 @@ int __bond_3ad_get_active_agg_info(struct bonding *bond,
 		return -1;
 
 	ad_info->aggregator_id = aggregator->aggregator_identifier;
-	ad_info->ports = aggregator->num_of_ports;
+	ad_info->ports = __agg_active_ports(aggregator);
 	ad_info->actor_key = aggregator->actor_oper_aggregator_key;
 	ad_info->partner_key = aggregator->partner_oper_aggregator_key;
 	ether_addr_copy(ad_info->partner_system,
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 2be78807fd6e..2359478b977f 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2612,11 +2612,13 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
 	bond_for_each_slave_rcu(bond, slave, iter) {
 		unsigned long trans_start = dev_trans_start(slave->dev);
 
+		slave->new_link = BOND_LINK_NOCHANGE;
+
 		if (slave->link != BOND_LINK_UP) {
 			if (bond_time_in_interval(bond, trans_start, 1) &&
 			    bond_time_in_interval(bond, slave->last_rx, 1)) {
 
-				slave->link  = BOND_LINK_UP;
+				slave->new_link = BOND_LINK_UP;
 				slave_state_changed = 1;
 
 				/* primary_slave has no meaning in round-robin
@@ -2643,7 +2645,7 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
 			if (!bond_time_in_interval(bond, trans_start, 2) ||
 			    !bond_time_in_interval(bond, slave->last_rx, 2)) {
 
-				slave->link  = BOND_LINK_DOWN;
+				slave->new_link = BOND_LINK_DOWN;
 				slave_state_changed = 1;
 
 				if (slave->link_failure_count < UINT_MAX)
@@ -2674,6 +2676,11 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
 		if (!rtnl_trylock())
 			goto re_arm;
 
+		bond_for_each_slave(bond, slave, iter) {
+			if (slave->new_link != BOND_LINK_NOCHANGE)
+				slave->link = slave->new_link;
+		}
+
 		if (slave_state_changed) {
 			bond_slave_state_change(bond);
 			if (BOND_MODE(bond) == BOND_MODE_XOR)
@@ -4271,10 +4278,10 @@ static int bond_check_params(struct bond_params *params)
 	int arp_validate_value, fail_over_mac_value, primary_reselect_value, i;
 	struct bond_opt_value newval;
 	const struct bond_opt_value *valptr;
-	int arp_all_targets_value;
+	int arp_all_targets_value = 0;
 	u16 ad_actor_sys_prio = 0;
 	u16 ad_user_port_key = 0;
-	__be32 arp_target[BOND_MAX_ARP_TARGETS];
+	__be32 arp_target[BOND_MAX_ARP_TARGETS] = { 0 };
 	int arp_ip_count;
 	int bond_mode	= BOND_MODE_ROUNDROBIN;
 	int xmit_hashtype = BOND_XMIT_POLICY_LAYER2;
@@ -4501,7 +4508,6 @@ static int bond_check_params(struct bond_params *params)
 		arp_validate_value = 0;
 	}
 
-	arp_all_targets_value = 0;
 	if (arp_all_targets) {
 		bond_opt_initstr(&newval, arp_all_targets);
 		valptr = bond_opt_parse(bond_opt_get(BOND_OPT_ARP_ALL_TARGETS),
diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c
index b0a3b85fc6f8..db02bc2fb4b2 100644
--- a/drivers/net/ethernet/8390/ax88796.c
+++ b/drivers/net/ethernet/8390/ax88796.c
@@ -748,13 +748,13 @@ static int ax_init_dev(struct net_device *dev)
 
 	ret = ax_mii_init(dev);
 	if (ret)
-		goto out_irq;
+		goto err_out;
 
 	ax_NS8390_init(dev, 0);
 
 	ret = register_netdev(dev);
 	if (ret)
-		goto out_irq;
+		goto err_out;
 
 	netdev_info(dev, "%dbit, irq %d, %lx, MAC: %pM\n",
 		    ei_local->word16 ? 16 : 8, dev->irq, dev->base_addr,
@@ -762,9 +762,6 @@ static int ax_init_dev(struct net_device *dev)
 
 	return 0;
 
- out_irq:
-	/* cleanup irq */
-	free_irq(dev->irq, dev);
  err_out:
 	return ret;
 }
diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c
index 63f2deec2a52..77a1c03255de 100644
--- a/drivers/net/ethernet/atheros/atlx/atl2.c
+++ b/drivers/net/ethernet/atheros/atlx/atl2.c
@@ -1353,6 +1353,7 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) &&
 		pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32))) {
 		printk(KERN_ERR "atl2: No usable DMA configuration, aborting\n");
+		err = -EIO;
 		goto err_dma;
 	}
 
@@ -1366,10 +1367,11 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	 * pcibios_set_master to do the needed arch specific settings */
 	pci_set_master(pdev);
 
-	err = -ENOMEM;
 	netdev = alloc_etherdev(sizeof(struct atl2_adapter));
-	if (!netdev)
+	if (!netdev) {
+		err = -ENOMEM;
 		goto err_alloc_etherdev;
+	}
 
 	SET_NETDEV_DEV(netdev, &pdev->dev);
 
@@ -1408,8 +1410,6 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (err)
 		goto err_sw_init;
 
-	err = -EIO;
-
 	netdev->hw_features = NETIF_F_HW_VLAN_CTAG_RX;
 	netdev->features |= (NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX);
 
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index f3a09ab55900..4eee18ce9be4 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -5078,9 +5078,11 @@ static netdev_features_t be_features_check(struct sk_buff *skb,
 	struct be_adapter *adapter = netdev_priv(dev);
 	u8 l4_hdr = 0;
 
-	/* The code below restricts offload features for some tunneled packets.
+	/* The code below restricts offload features for some tunneled and
+	 * Q-in-Q packets.
 	 * Offload features for normal (non tunnel) packets are unchanged.
 	 */
+	features = vlan_features_check(skb, features);
 	if (!skb->encapsulation ||
 	    !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
 		return features;
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 56a563f90b0b..f7c8649fd28f 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -3192,7 +3192,7 @@ static int fec_reset_phy(struct platform_device *pdev)
 {
 	int err, phy_reset;
 	bool active_high = false;
-	int msec = 1;
+	int msec = 1, phy_post_delay = 0;
 	struct device_node *np = pdev->dev.of_node;
 
 	if (!np)
@@ -3209,6 +3209,11 @@ static int fec_reset_phy(struct platform_device *pdev)
 	else if (!gpio_is_valid(phy_reset))
 		return 0;
 
+	err = of_property_read_u32(np, "phy-reset-post-delay", &phy_post_delay);
+	/* valid reset duration should be less than 1s */
+	if (!err && phy_post_delay > 1000)
+		return -EINVAL;
+
 	active_high = of_property_read_bool(np, "phy-reset-active-high");
 
 	err = devm_gpio_request_one(&pdev->dev, phy_reset,
@@ -3226,6 +3231,15 @@ static int fec_reset_phy(struct platform_device *pdev)
 
 	gpio_set_value_cansleep(phy_reset, !active_high);
 
+	if (!phy_post_delay)
+		return 0;
+
+	if (phy_post_delay > 20)
+		msleep(phy_post_delay);
+	else
+		usleep_range(phy_post_delay * 1000,
+			     phy_post_delay * 1000 + 1000);
+
 	return 0;
 }
 #else /* CONFIG_OF */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 5bdaf3d545b2..10d282841f5b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -774,7 +774,7 @@ static void cb_timeout_handler(struct work_struct *work)
 	mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
 		       mlx5_command_str(msg_to_opcode(ent->in)),
 		       msg_to_opcode(ent->in));
-	mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
+	mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
 }
 
 static void cmd_work_handler(struct work_struct *work)
@@ -804,6 +804,7 @@ static void cmd_work_handler(struct work_struct *work)
 	}
 
 	cmd->ent_arr[ent->idx] = ent;
+	set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state);
 	lay = get_inst(cmd, ent->idx);
 	ent->lay = lay;
 	memset(lay, 0, sizeof(*lay));
@@ -825,6 +826,20 @@ static void cmd_work_handler(struct work_struct *work)
 	if (ent->callback)
 		schedule_delayed_work(&ent->cb_timeout_work, cb_timeout);
 
+	/* Skip sending command to fw if internal error */
+	if (pci_channel_offline(dev->pdev) ||
+	    dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+		u8 status = 0;
+		u32 drv_synd;
+
+		ent->ret = mlx5_internal_err_ret_value(dev, msg_to_opcode(ent->in), &drv_synd, &status);
+		MLX5_SET(mbox_out, ent->out, status, status);
+		MLX5_SET(mbox_out, ent->out, syndrome, drv_synd);
+
+		mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
+		return;
+	}
+
 	/* ring doorbell after the descriptor is valid */
 	mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx);
 	wmb();
@@ -835,7 +850,7 @@ static void cmd_work_handler(struct work_struct *work)
 		poll_timeout(ent);
 		/* make sure we read the descriptor after ownership is SW */
 		rmb();
-		mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
+		mlx5_cmd_comp_handler(dev, 1UL << ent->idx, (ent->ret == -ETIMEDOUT));
 	}
 }
 
@@ -879,7 +894,7 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
 		wait_for_completion(&ent->done);
 	} else if (!wait_for_completion_timeout(&ent->done, timeout)) {
 		ent->ret = -ETIMEDOUT;
-		mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
+		mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
 	}
 
 	err = ent->ret;
@@ -1375,7 +1390,7 @@ static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
 	}
 }
 
-void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec)
+void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced)
 {
 	struct mlx5_cmd *cmd = &dev->cmd;
 	struct mlx5_cmd_work_ent *ent;
@@ -1395,6 +1410,19 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec)
 			struct semaphore *sem;
 
 			ent = cmd->ent_arr[i];
+
+			/* if we already completed the command, ignore it */
+			if (!test_and_clear_bit(MLX5_CMD_ENT_STATE_PENDING_COMP,
+						&ent->state)) {
+				/* only real completion can free the cmd slot */
+				if (!forced) {
+					mlx5_core_err(dev, "Command completion arrived after timeout (entry idx = %d).\n",
+						      ent->idx);
+					free_ent(cmd, ent->idx);
+				}
+				continue;
+			}
+
 			if (ent->callback)
 				cancel_delayed_work(&ent->cb_timeout_work);
 			if (ent->page_queue)
@@ -1417,7 +1445,10 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec)
 				mlx5_core_dbg(dev, "command completed. ret 0x%x, delivery status %s(0x%x)\n",
 					      ent->ret, deliv_status_to_str(ent->status), ent->status);
 			}
-			free_ent(cmd, ent->idx);
+
+			/* only real completion will free the entry slot */
+			if (!forced)
+				free_ent(cmd, ent->idx);
 
 			if (ent->callback) {
 				ds = ent->ts2 - ent->ts1;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 7b1566f0ae58..66b5fec15313 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -1041,6 +1041,8 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)
 #define MLX5_IB_GRH_BYTES       40
 #define MLX5_IPOIB_ENCAP_LEN    4
 #define MLX5_GID_SIZE           16
+#define MLX5_IPOIB_PSEUDO_LEN   20
+#define MLX5_IPOIB_HARD_LEN     (MLX5_IPOIB_PSEUDO_LEN + MLX5_IPOIB_ENCAP_LEN)
 
 static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
 					 struct mlx5_cqe64 *cqe,
@@ -1048,6 +1050,7 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
 					 struct sk_buff *skb)
 {
 	struct net_device *netdev = rq->netdev;
+	char *pseudo_header;
 	u8 *dgid;
 	u8 g;
 
@@ -1076,8 +1079,11 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
 	if (likely(netdev->features & NETIF_F_RXHASH))
 		mlx5e_skb_set_hash(cqe, skb);
 
+	/* 20 bytes of ipoib header and 4 for encap existing */
+	pseudo_header = skb_push(skb, MLX5_IPOIB_PSEUDO_LEN);
+	memset(pseudo_header, 0, MLX5_IPOIB_PSEUDO_LEN);
 	skb_reset_mac_header(skb);
-	skb_pull(skb, MLX5_IPOIB_ENCAP_LEN);
+	skb_pull(skb, MLX5_IPOIB_HARD_LEN);
 
 	skb->dev = netdev;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 11c27e4fadf6..ec63158ab643 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -43,6 +43,7 @@
 #include <net/tc_act/tc_vlan.h>
 #include <net/tc_act/tc_tunnel_key.h>
 #include <net/tc_act/tc_pedit.h>
+#include <net/tc_act/tc_csum.h>
 #include <net/vxlan.h>
 #include <net/arp.h>
 #include "en.h"
@@ -384,7 +385,7 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv,
 		if (e->flags & MLX5_ENCAP_ENTRY_VALID)
 			mlx5_encap_dealloc(priv->mdev, e->encap_id);
 
-		hlist_del_rcu(&e->encap_hlist);
+		hash_del_rcu(&e->encap_hlist);
 		kfree(e->encap_header);
 		kfree(e);
 	}
@@ -925,11 +926,11 @@ static int offload_pedit_fields(struct pedit_headers *masks,
 				struct mlx5e_tc_flow_parse_attr *parse_attr)
 {
 	struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
-	int i, action_size, nactions, max_actions, first, last;
+	int i, action_size, nactions, max_actions, first, last, first_z;
 	void *s_masks_p, *a_masks_p, *vals_p;
-	u32 s_mask, a_mask, val;
 	struct mlx5_fields *f;
 	u8 cmd, field_bsize;
+	u32 s_mask, a_mask;
 	unsigned long mask;
 	void *action;
 
@@ -946,7 +947,8 @@ static int offload_pedit_fields(struct pedit_headers *masks,
 	for (i = 0; i < ARRAY_SIZE(fields); i++) {
 		f = &fields[i];
 		/* avoid seeing bits set from previous iterations */
-		s_mask = a_mask = mask = val = 0;
+		s_mask = 0;
+		a_mask = 0;
 
 		s_masks_p = (void *)set_masks + f->offset;
 		a_masks_p = (void *)add_masks + f->offset;
@@ -981,12 +983,12 @@ static int offload_pedit_fields(struct pedit_headers *masks,
 			memset(a_masks_p, 0, f->size);
 		}
 
-		memcpy(&val, vals_p, f->size);
-
 		field_bsize = f->size * BITS_PER_BYTE;
+
+		first_z = find_first_zero_bit(&mask, field_bsize);
 		first = find_first_bit(&mask, field_bsize);
 		last  = find_last_bit(&mask, field_bsize);
-		if (first > 0 || last != (field_bsize - 1)) {
+		if (first > 0 || last != (field_bsize - 1) || first_z < last) {
 			printk(KERN_WARNING "mlx5: partial rewrite (mask %lx) is currently not offloaded\n",
 			       mask);
 			return -EOPNOTSUPP;
@@ -1002,11 +1004,11 @@ static int offload_pedit_fields(struct pedit_headers *masks,
 		}
 
 		if (field_bsize == 32)
-			MLX5_SET(set_action_in, action, data, ntohl(val));
+			MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p));
 		else if (field_bsize == 16)
-			MLX5_SET(set_action_in, action, data, ntohs(val));
+			MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p));
 		else if (field_bsize == 8)
-			MLX5_SET(set_action_in, action, data, val);
+			MLX5_SET(set_action_in, action, data, *(u8 *)vals_p);
 
 		action += action_size;
 		nactions++;
@@ -1109,6 +1111,28 @@ out_err:
 	return err;
 }
 
+static bool csum_offload_supported(struct mlx5e_priv *priv, u32 action, u32 update_flags)
+{
+	u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP |
+			 TCA_CSUM_UPDATE_FLAG_UDP;
+
+	/*  The HW recalcs checksums only if re-writing headers */
+	if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) {
+		netdev_warn(priv->netdev,
+			    "TC csum action is only offloaded with pedit\n");
+		return false;
+	}
+
+	if (update_flags & ~prot_flags) {
+		netdev_warn(priv->netdev,
+			    "can't offload TC csum action for some header/s - flags %#x\n",
+			    update_flags);
+		return false;
+	}
+
+	return true;
+}
+
 static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 				struct mlx5e_tc_flow_parse_attr *parse_attr,
 				struct mlx5e_tc_flow *flow)
@@ -1149,6 +1173,14 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 			continue;
 		}
 
+		if (is_tcf_csum(a)) {
+			if (csum_offload_supported(priv, attr->action,
+						   tcf_csum_update_flags(a)))
+				continue;
+
+			return -EOPNOTSUPP;
+		}
+
 		if (is_tcf_skbedit_mark(a)) {
 			u32 mark = tcf_skbedit_mark(a);
 
@@ -1651,6 +1683,14 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 			continue;
 		}
 
+		if (is_tcf_csum(a)) {
+			if (csum_offload_supported(priv, attr->action,
+						   tcf_csum_update_flags(a)))
+				continue;
+
+			return -EOPNOTSUPP;
+		}
+
 		if (is_tcf_mirred_egress_redirect(a)) {
 			int ifindex = tcf_mirred_ifindex(a);
 			struct net_device *out_dev, *encap_dev = NULL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index ea5d8d37a75c..33eae5ad2fb0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -422,7 +422,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
 			break;
 
 		case MLX5_EVENT_TYPE_CMD:
-			mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector));
+			mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false);
 			break;
 
 		case MLX5_EVENT_TYPE_PORT_CHANGE:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index d0515391d33b..44f59b1d6f0f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -90,7 +90,7 @@ static void trigger_cmd_completions(struct mlx5_core_dev *dev)
 	spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
 
 	mlx5_core_dbg(dev, "vector 0x%llx\n", vector);
-	mlx5_cmd_comp_handler(dev, vector);
+	mlx5_cmd_comp_handler(dev, vector, true);
 	return;
 
 no_trig:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 0c123d571b4c..fe5546bb4153 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -612,7 +612,6 @@ static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
 	struct mlx5_priv *priv  = &mdev->priv;
 	struct msix_entry *msix = priv->msix_arr;
 	int irq                 = msix[i + MLX5_EQ_VEC_COMP_BASE].vector;
-	int err;
 
 	if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) {
 		mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
@@ -622,18 +621,12 @@ static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
 	cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
 			priv->irq_info[i].mask);
 
-	err = irq_set_affinity_hint(irq, priv->irq_info[i].mask);
-	if (err) {
-		mlx5_core_warn(mdev, "irq_set_affinity_hint failed,irq 0x%.4x",
-			       irq);
-		goto err_clear_mask;
-	}
+#ifdef CONFIG_SMP
+	if (irq_set_affinity_hint(irq, priv->irq_info[i].mask))
+		mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
+#endif
 
 	return 0;
-
-err_clear_mask:
-	free_cpumask_var(priv->irq_info[i].mask);
-	return err;
 }
 
 static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i)
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index dec5d563ab19..959fd12d2e67 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1293,7 +1293,7 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	if (nla_put_u32(skb, IFLA_GENEVE_ID, vni))
 		goto nla_put_failure;
 
-	if (ip_tunnel_info_af(info) == AF_INET) {
+	if (rtnl_dereference(geneve->sock4)) {
 		if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE,
 				    info->key.u.ipv4.dst))
 			goto nla_put_failure;
@@ -1302,8 +1302,10 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
 			       !!(info->key.tun_flags & TUNNEL_CSUM)))
 			goto nla_put_failure;
 
+	}
+
 #if IS_ENABLED(CONFIG_IPV6)
-	} else {
+	if (rtnl_dereference(geneve->sock6)) {
 		if (nla_put_in6_addr(skb, IFLA_GENEVE_REMOTE6,
 				     &info->key.u.ipv6.dst))
 			goto nla_put_failure;
@@ -1315,8 +1317,8 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
 		if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
 			       !geneve->use_udp6_rx_checksums))
 			goto nla_put_failure;
-#endif
 	}
+#endif
 
 	if (nla_put_u8(skb, IFLA_GENEVE_TTL, info->key.ttl) ||
 	    nla_put_u8(skb, IFLA_GENEVE_TOS, info->key.tos) ||
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index 4fea1b3dfbb4..7b652bb7ebe4 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -873,7 +873,7 @@ static struct gtp_dev *gtp_find_dev(struct net *src_net, struct nlattr *nla[])
 
 	/* Check if there's an existing gtpX device to configure */
 	dev = dev_get_by_index_rcu(net, nla_get_u32(nla[GTPA_LINK]));
-	if (dev->netdev_ops == &gtp_netdev_ops)
+	if (dev && dev->netdev_ops == &gtp_netdev_ops)
 		gtp = netdev_priv(dev);
 
 	put_net(net);
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 60ffc9da6a28..c360dd6ead22 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -108,7 +108,7 @@ config MDIO_MOXART
 config MDIO_OCTEON
 	tristate "Octeon and some ThunderX SOCs MDIO buses"
 	depends on 64BIT
-	depends on HAS_IOMEM
+	depends on HAS_IOMEM && OF_MDIO
 	select MDIO_CAVIUM
 	help
 	  This module provides a driver for the Octeon and ThunderX MDIO
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 272b051a0199..9097e42bec2e 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -255,34 +255,6 @@ static int marvell_config_aneg(struct phy_device *phydev)
 {
 	int err;
 
-	/* The Marvell PHY has an errata which requires
-	 * that certain registers get written in order
-	 * to restart autonegotiation */
-	err = phy_write(phydev, MII_BMCR, BMCR_RESET);
-
-	if (err < 0)
-		return err;
-
-	err = phy_write(phydev, 0x1d, 0x1f);
-	if (err < 0)
-		return err;
-
-	err = phy_write(phydev, 0x1e, 0x200c);
-	if (err < 0)
-		return err;
-
-	err = phy_write(phydev, 0x1d, 0x5);
-	if (err < 0)
-		return err;
-
-	err = phy_write(phydev, 0x1e, 0);
-	if (err < 0)
-		return err;
-
-	err = phy_write(phydev, 0x1e, 0x100);
-	if (err < 0)
-		return err;
-
 	err = marvell_set_polarity(phydev, phydev->mdix_ctrl);
 	if (err < 0)
 		return err;
@@ -316,6 +288,42 @@ static int marvell_config_aneg(struct phy_device *phydev)
 	return 0;
 }
 
+static int m88e1101_config_aneg(struct phy_device *phydev)
+{
+	int err;
+
+	/* This Marvell PHY has an errata which requires
+	 * that certain registers get written in order
+	 * to restart autonegotiation
+	 */
+	err = phy_write(phydev, MII_BMCR, BMCR_RESET);
+
+	if (err < 0)
+		return err;
+
+	err = phy_write(phydev, 0x1d, 0x1f);
+	if (err < 0)
+		return err;
+
+	err = phy_write(phydev, 0x1e, 0x200c);
+	if (err < 0)
+		return err;
+
+	err = phy_write(phydev, 0x1d, 0x5);
+	if (err < 0)
+		return err;
+
+	err = phy_write(phydev, 0x1e, 0);
+	if (err < 0)
+		return err;
+
+	err = phy_write(phydev, 0x1e, 0x100);
+	if (err < 0)
+		return err;
+
+	return marvell_config_aneg(phydev);
+}
+
 static int m88e1111_config_aneg(struct phy_device *phydev)
 {
 	int err;
@@ -1892,7 +1900,7 @@ static struct phy_driver marvell_drivers[] = {
 		.flags = PHY_HAS_INTERRUPT,
 		.probe = marvell_probe,
 		.config_init = &marvell_config_init,
-		.config_aneg = &marvell_config_aneg,
+		.config_aneg = &m88e1101_config_aneg,
 		.read_status = &genphy_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
 		.config_intr = &marvell_config_intr,
diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
index f3ae88fdf332..8ab281b478f2 100644
--- a/drivers/net/usb/cdc_ether.c
+++ b/drivers/net/usb/cdc_ether.c
@@ -310,6 +310,26 @@ skip:
 		return -ENODEV;
 	}
 
+	return 0;
+
+bad_desc:
+	dev_info(&dev->udev->dev, "bad CDC descriptors\n");
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(usbnet_generic_cdc_bind);
+
+
+/* like usbnet_generic_cdc_bind() but handles filter initialization
+ * correctly
+ */
+int usbnet_ether_cdc_bind(struct usbnet *dev, struct usb_interface *intf)
+{
+	int rv;
+
+	rv = usbnet_generic_cdc_bind(dev, intf);
+	if (rv < 0)
+		goto bail_out;
+
 	/* Some devices don't initialise properly. In particular
 	 * the packet filter is not reset. There are devices that
 	 * don't do reset all the way. So the packet filter should
@@ -317,13 +337,10 @@ skip:
 	 */
 	usbnet_cdc_update_filter(dev);
 
-	return 0;
-
-bad_desc:
-	dev_info(&dev->udev->dev, "bad CDC descriptors\n");
-	return -ENODEV;
+bail_out:
+	return rv;
 }
-EXPORT_SYMBOL_GPL(usbnet_generic_cdc_bind);
+EXPORT_SYMBOL_GPL(usbnet_ether_cdc_bind);
 
 void usbnet_cdc_unbind(struct usbnet *dev, struct usb_interface *intf)
 {
@@ -417,7 +434,7 @@ int usbnet_cdc_bind(struct usbnet *dev, struct usb_interface *intf)
 	BUILD_BUG_ON((sizeof(((struct usbnet *)0)->data)
 			< sizeof(struct cdc_state)));
 
-	status = usbnet_generic_cdc_bind(dev, intf);
+	status = usbnet_ether_cdc_bind(dev, intf);
 	if (status < 0)
 		return status;
 
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index 765400b62168..2dfca96a63b6 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -681,7 +681,7 @@ static int smsc95xx_set_features(struct net_device *netdev,
 	if (ret < 0)
 		return ret;
 
-	if (features & NETIF_F_HW_CSUM)
+	if (features & NETIF_F_IP_CSUM)
 		read_buf |= Tx_COE_EN_;
 	else
 		read_buf &= ~Tx_COE_EN_;
@@ -1279,12 +1279,19 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf)
 
 	spin_lock_init(&pdata->mac_cr_lock);
 
+	/* LAN95xx devices do not alter the computed checksum of 0 to 0xffff.
+	 * RFC 2460, ipv6 UDP calculated checksum yields a result of zero must
+	 * be changed to 0xffff. RFC 768, ipv4 UDP computed checksum is zero,
+	 * it is transmitted as all ones. The zero transmitted checksum means
+	 * transmitter generated no checksum. Hence, enable csum offload only
+	 * for ipv4 packets.
+	 */
 	if (DEFAULT_TX_CSUM_ENABLE)
-		dev->net->features |= NETIF_F_HW_CSUM;
+		dev->net->features |= NETIF_F_IP_CSUM;
 	if (DEFAULT_RX_CSUM_ENABLE)
 		dev->net->features |= NETIF_F_RXCSUM;
 
-	dev->net->hw_features = NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
+	dev->net->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
 
 	smsc95xx_init_mac_address(dev);
 
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 9320d96a1632..3e9246cc49c3 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1989,6 +1989,7 @@ static const struct net_device_ops virtnet_netdev = {
 	.ndo_poll_controller = virtnet_netpoll,
 #endif
 	.ndo_xdp		= virtnet_xdp,
+	.ndo_features_check	= passthru_features_check,
 };
 
 static void virtnet_config_changed_work(struct work_struct *work)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index d5e0906262ea..a60926410438 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -925,6 +925,29 @@ static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 }
 
 #ifdef CONFIG_BLK_DEV_INTEGRITY
+static void nvme_prep_integrity(struct gendisk *disk, struct nvme_id_ns *id,
+		u16 bs)
+{
+	struct nvme_ns *ns = disk->private_data;
+	u16 old_ms = ns->ms;
+	u8 pi_type = 0;
+
+	ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms);
+	ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
+
+	/* PI implementation requires metadata equal t10 pi tuple size */
+	if (ns->ms == sizeof(struct t10_pi_tuple))
+		pi_type = id->dps & NVME_NS_DPS_PI_MASK;
+
+	if (blk_get_integrity(disk) &&
+	    (ns->pi_type != pi_type || ns->ms != old_ms ||
+	     bs != queue_logical_block_size(disk->queue) ||
+	     (ns->ms && ns->ext)))
+		blk_integrity_unregister(disk);
+
+	ns->pi_type = pi_type;
+}
+
 static void nvme_init_integrity(struct nvme_ns *ns)
 {
 	struct blk_integrity integrity;
@@ -951,6 +974,10 @@ static void nvme_init_integrity(struct nvme_ns *ns)
 	blk_queue_max_integrity_segments(ns->queue, 1);
 }
 #else
+static void nvme_prep_integrity(struct gendisk *disk, struct nvme_id_ns *id,
+		u16 bs)
+{
+}
 static void nvme_init_integrity(struct nvme_ns *ns)
 {
 }
@@ -997,37 +1024,22 @@ static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id)
 static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
 {
 	struct nvme_ns *ns = disk->private_data;
-	u8 lbaf, pi_type;
-	u16 old_ms;
-	unsigned short bs;
-
-	old_ms = ns->ms;
-	lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
-	ns->lba_shift = id->lbaf[lbaf].ds;
-	ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
-	ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
+	u16 bs;
 
 	/*
 	 * If identify namespace failed, use default 512 byte block size so
 	 * block layer can use before failing read/write for 0 capacity.
 	 */
+	ns->lba_shift = id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ds;
 	if (ns->lba_shift == 0)
 		ns->lba_shift = 9;
 	bs = 1 << ns->lba_shift;
-	/* XXX: PI implementation requires metadata equal t10 pi tuple size */
-	pi_type = ns->ms == sizeof(struct t10_pi_tuple) ?
-					id->dps & NVME_NS_DPS_PI_MASK : 0;
 
 	blk_mq_freeze_queue(disk->queue);
-	if (blk_get_integrity(disk) && (ns->pi_type != pi_type ||
-				ns->ms != old_ms ||
-				bs != queue_logical_block_size(disk->queue) ||
-				(ns->ms && ns->ext)))
-		blk_integrity_unregister(disk);
 
-	ns->pi_type = pi_type;
+	if (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)
+		nvme_prep_integrity(disk, id, bs);
 	blk_queue_logical_block_size(ns->queue, bs);
-
 	if (ns->ms && !blk_get_integrity(disk) && !ns->ext)
 		nvme_init_integrity(ns);
 	if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
@@ -1605,7 +1617,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 	}
 	memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd));
 
-	if (ctrl->ops->is_fabrics) {
+	if (ctrl->ops->flags & NVME_F_FABRICS) {
 		ctrl->icdoff = le16_to_cpu(id->icdoff);
 		ctrl->ioccsz = le32_to_cpu(id->ioccsz);
 		ctrl->iorcsz = le32_to_cpu(id->iorcsz);
@@ -2098,7 +2110,6 @@ static void nvme_ns_remove(struct nvme_ns *ns)
 		if (ns->ndev)
 			nvme_nvm_unregister_sysfs(ns);
 		del_gendisk(ns->disk);
-		blk_mq_abort_requeue_list(ns->queue);
 		blk_cleanup_queue(ns->queue);
 	}
 
@@ -2436,8 +2447,16 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
 			continue;
 		revalidate_disk(ns->disk);
 		blk_set_queue_dying(ns->queue);
-		blk_mq_abort_requeue_list(ns->queue);
-		blk_mq_start_stopped_hw_queues(ns->queue, true);
+
+		/*
+		 * Forcibly start all queues to avoid having stuck requests.
+		 * Note that we must ensure the queues are not stopped
+		 * when the final removal happens.
+		 */
+		blk_mq_start_hw_queues(ns->queue);
+
+		/* draining requests in requeue list */
+		blk_mq_kick_requeue_list(ns->queue);
 	}
 	mutex_unlock(&ctrl->namespaces_mutex);
 }
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index dca7165fabcf..5b14cbefb724 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -45,8 +45,6 @@ enum nvme_fc_queue_flags {
 
 #define NVMEFC_QUEUE_DELAY	3		/* ms units */
 
-#define NVME_FC_MAX_CONNECT_ATTEMPTS	1
-
 struct nvme_fc_queue {
 	struct nvme_fc_ctrl	*ctrl;
 	struct device		*dev;
@@ -165,8 +163,6 @@ struct nvme_fc_ctrl {
 	struct work_struct	delete_work;
 	struct work_struct	reset_work;
 	struct delayed_work	connect_work;
-	int			reconnect_delay;
-	int			connect_attempts;
 
 	struct kref		ref;
 	u32			flags;
@@ -1376,9 +1372,9 @@ done:
 	complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op);
 	if (!complete_rq) {
 		if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) {
-			status = cpu_to_le16(NVME_SC_ABORT_REQ);
+			status = cpu_to_le16(NVME_SC_ABORT_REQ << 1);
 			if (blk_queue_dying(rq->q))
-				status |= cpu_to_le16(NVME_SC_DNR);
+				status |= cpu_to_le16(NVME_SC_DNR << 1);
 		}
 		nvme_end_request(rq, status, result);
 	} else
@@ -1751,7 +1747,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
 	dev_warn(ctrl->ctrl.device,
 		"NVME-FC{%d}: transport association error detected: %s\n",
 		ctrl->cnum, errmsg);
-	dev_info(ctrl->ctrl.device,
+	dev_warn(ctrl->ctrl.device,
 		"NVME-FC{%d}: resetting controller\n", ctrl->cnum);
 
 	/* stop the queues on error, cleanup is in reset thread */
@@ -2195,9 +2191,6 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
 	if (!opts->nr_io_queues)
 		return 0;
 
-	dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n",
-			opts->nr_io_queues);
-
 	nvme_fc_init_io_queues(ctrl);
 
 	memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
@@ -2268,9 +2261,6 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
 	if (ctrl->queue_count == 1)
 		return 0;
 
-	dev_info(ctrl->ctrl.device, "Recreating %d I/O queues.\n",
-			opts->nr_io_queues);
-
 	nvme_fc_init_io_queues(ctrl);
 
 	ret = blk_mq_reinit_tagset(&ctrl->tag_set);
@@ -2306,7 +2296,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 	int ret;
 	bool changed;
 
-	ctrl->connect_attempts++;
+	++ctrl->ctrl.opts->nr_reconnects;
 
 	/*
 	 * Create the admin queue
@@ -2403,9 +2393,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
 	WARN_ON_ONCE(!changed);
 
-	ctrl->connect_attempts = 0;
-
-	kref_get(&ctrl->ctrl.kref);
+	ctrl->ctrl.opts->nr_reconnects = 0;
 
 	if (ctrl->queue_count > 1) {
 		nvme_start_queues(&ctrl->ctrl);
@@ -2536,26 +2524,32 @@ nvme_fc_delete_ctrl_work(struct work_struct *work)
 
 	/*
 	 * tear down the controller
-	 * This will result in the last reference on the nvme ctrl to
-	 * expire, calling the transport nvme_fc_nvme_ctrl_freed() callback.
-	 * From there, the transport will tear down it's logical queues and
-	 * association.
+	 * After the last reference on the nvme ctrl is removed,
+	 * the transport nvme_fc_nvme_ctrl_freed() callback will be
+	 * invoked. From there, the transport will tear down it's
+	 * logical queues and association.
 	 */
 	nvme_uninit_ctrl(&ctrl->ctrl);
 
 	nvme_put_ctrl(&ctrl->ctrl);
 }
 
-static int
-__nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl)
+static bool
+__nvme_fc_schedule_delete_work(struct nvme_fc_ctrl *ctrl)
 {
 	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
-		return -EBUSY;
+		return true;
 
 	if (!queue_work(nvme_fc_wq, &ctrl->delete_work))
-		return -EBUSY;
+		return true;
 
-	return 0;
+	return false;
+}
+
+static int
+__nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl)
+{
+	return __nvme_fc_schedule_delete_work(ctrl) ? -EBUSY : 0;
 }
 
 /*
@@ -2581,6 +2575,35 @@ nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl)
 }
 
 static void
+nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
+{
+	/* If we are resetting/deleting then do nothing */
+	if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) {
+		WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW ||
+			ctrl->ctrl.state == NVME_CTRL_LIVE);
+		return;
+	}
+
+	dev_info(ctrl->ctrl.device,
+		"NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n",
+		ctrl->cnum, status);
+
+	if (nvmf_should_reconnect(&ctrl->ctrl)) {
+		dev_info(ctrl->ctrl.device,
+			"NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
+			ctrl->cnum, ctrl->ctrl.opts->reconnect_delay);
+		queue_delayed_work(nvme_fc_wq, &ctrl->connect_work,
+				ctrl->ctrl.opts->reconnect_delay * HZ);
+	} else {
+		dev_warn(ctrl->ctrl.device,
+				"NVME-FC{%d}: Max reconnect attempts (%d) "
+				"reached. Removing controller\n",
+				ctrl->cnum, ctrl->ctrl.opts->nr_reconnects);
+		WARN_ON(__nvme_fc_schedule_delete_work(ctrl));
+	}
+}
+
+static void
 nvme_fc_reset_ctrl_work(struct work_struct *work)
 {
 	struct nvme_fc_ctrl *ctrl =
@@ -2591,34 +2614,9 @@ nvme_fc_reset_ctrl_work(struct work_struct *work)
 	nvme_fc_delete_association(ctrl);
 
 	ret = nvme_fc_create_association(ctrl);
-	if (ret) {
-		dev_warn(ctrl->ctrl.device,
-			"NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n",
-			ctrl->cnum, ret);
-		if (ctrl->connect_attempts >= NVME_FC_MAX_CONNECT_ATTEMPTS) {
-			dev_warn(ctrl->ctrl.device,
-				"NVME-FC{%d}: Max reconnect attempts (%d) "
-				"reached. Removing controller\n",
-				ctrl->cnum, ctrl->connect_attempts);
-
-			if (!nvme_change_ctrl_state(&ctrl->ctrl,
-				NVME_CTRL_DELETING)) {
-				dev_err(ctrl->ctrl.device,
-					"NVME-FC{%d}: failed to change state "
-					"to DELETING\n", ctrl->cnum);
-				return;
-			}
-
-			WARN_ON(!queue_work(nvme_fc_wq, &ctrl->delete_work));
-			return;
-		}
-
-		dev_warn(ctrl->ctrl.device,
-			"NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
-			ctrl->cnum, ctrl->reconnect_delay);
-		queue_delayed_work(nvme_fc_wq, &ctrl->connect_work,
-				ctrl->reconnect_delay * HZ);
-	} else
+	if (ret)
+		nvme_fc_reconnect_or_delete(ctrl, ret);
+	else
 		dev_info(ctrl->ctrl.device,
 			"NVME-FC{%d}: controller reset complete\n", ctrl->cnum);
 }
@@ -2632,7 +2630,7 @@ nvme_fc_reset_nvme_ctrl(struct nvme_ctrl *nctrl)
 {
 	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
 
-	dev_warn(ctrl->ctrl.device,
+	dev_info(ctrl->ctrl.device,
 		"NVME-FC{%d}: admin requested controller reset\n", ctrl->cnum);
 
 	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
@@ -2649,7 +2647,7 @@ nvme_fc_reset_nvme_ctrl(struct nvme_ctrl *nctrl)
 static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
 	.name			= "fc",
 	.module			= THIS_MODULE,
-	.is_fabrics		= true,
+	.flags			= NVME_F_FABRICS,
 	.reg_read32		= nvmf_reg_read32,
 	.reg_read64		= nvmf_reg_read64,
 	.reg_write32		= nvmf_reg_write32,
@@ -2671,34 +2669,9 @@ nvme_fc_connect_ctrl_work(struct work_struct *work)
 				struct nvme_fc_ctrl, connect_work);
 
 	ret = nvme_fc_create_association(ctrl);
-	if (ret) {
-		dev_warn(ctrl->ctrl.device,
-			"NVME-FC{%d}: Reconnect attempt failed (%d)\n",
-			ctrl->cnum, ret);
-		if (ctrl->connect_attempts >= NVME_FC_MAX_CONNECT_ATTEMPTS) {
-			dev_warn(ctrl->ctrl.device,
-				"NVME-FC{%d}: Max reconnect attempts (%d) "
-				"reached. Removing controller\n",
-				ctrl->cnum, ctrl->connect_attempts);
-
-			if (!nvme_change_ctrl_state(&ctrl->ctrl,
-				NVME_CTRL_DELETING)) {
-				dev_err(ctrl->ctrl.device,
-					"NVME-FC{%d}: failed to change state "
-					"to DELETING\n", ctrl->cnum);
-				return;
-			}
-
-			WARN_ON(!queue_work(nvme_fc_wq, &ctrl->delete_work));
-			return;
-		}
-
-		dev_warn(ctrl->ctrl.device,
-			"NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
-			ctrl->cnum, ctrl->reconnect_delay);
-		queue_delayed_work(nvme_fc_wq, &ctrl->connect_work,
-				ctrl->reconnect_delay * HZ);
-	} else
+	if (ret)
+		nvme_fc_reconnect_or_delete(ctrl, ret);
+	else
 		dev_info(ctrl->ctrl.device,
 			"NVME-FC{%d}: controller reconnect complete\n",
 			ctrl->cnum);
@@ -2755,7 +2728,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work);
 	INIT_WORK(&ctrl->reset_work, nvme_fc_reset_ctrl_work);
 	INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
-	ctrl->reconnect_delay = opts->reconnect_delay;
 	spin_lock_init(&ctrl->lock);
 
 	/* io queue count */
@@ -2819,7 +2791,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 		ctrl->ctrl.opts = NULL;
 		/* initiate nvme ctrl ref counting teardown */
 		nvme_uninit_ctrl(&ctrl->ctrl);
-		nvme_put_ctrl(&ctrl->ctrl);
 
 		/* as we're past the point where we transition to the ref
 		 * counting teardown path, if we return a bad pointer here,
@@ -2835,6 +2806,8 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 		return ERR_PTR(ret);
 	}
 
+	kref_get(&ctrl->ctrl.kref);
+
 	dev_info(ctrl->ctrl.device,
 		"NVME-FC{%d}: new ctrl: NQN \"%s\"\n",
 		ctrl->cnum, ctrl->ctrl.opts->subsysnqn);
@@ -2971,7 +2944,7 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts)
 static struct nvmf_transport_ops nvme_fc_transport = {
 	.name		= "fc",
 	.required_opts	= NVMF_OPT_TRADDR | NVMF_OPT_HOST_TRADDR,
-	.allowed_opts	= NVMF_OPT_RECONNECT_DELAY,
+	.allowed_opts	= NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_CTRL_LOSS_TMO,
 	.create_ctrl	= nvme_fc_create_ctrl,
 };
 
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 29c708ca9621..9d6a070d4391 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -208,7 +208,9 @@ struct nvme_ns {
 struct nvme_ctrl_ops {
 	const char *name;
 	struct module *module;
-	bool is_fabrics;
+	unsigned int flags;
+#define NVME_F_FABRICS			(1 << 0)
+#define NVME_F_METADATA_SUPPORTED	(1 << 1)
 	int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val);
 	int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val);
 	int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 4c2ff2bb26bc..d52701df7245 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -263,7 +263,7 @@ static void nvme_dbbuf_set(struct nvme_dev *dev)
 	c.dbbuf.prp2 = cpu_to_le64(dev->dbbuf_eis_dma_addr);
 
 	if (nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0)) {
-		dev_warn(dev->dev, "unable to set dbbuf\n");
+		dev_warn(dev->ctrl.device, "unable to set dbbuf\n");
 		/* Free memory and continue on */
 		nvme_dbbuf_dma_free(dev);
 	}
@@ -1394,11 +1394,11 @@ static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
 	result = pci_read_config_word(to_pci_dev(dev->dev), PCI_STATUS,
 				      &pci_status);
 	if (result == PCIBIOS_SUCCESSFUL)
-		dev_warn(dev->dev,
+		dev_warn(dev->ctrl.device,
 			 "controller is down; will reset: CSTS=0x%x, PCI_STATUS=0x%hx\n",
 			 csts, pci_status);
 	else
-		dev_warn(dev->dev,
+		dev_warn(dev->ctrl.device,
 			 "controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
 			 csts, result);
 }
@@ -1740,8 +1740,8 @@ static int nvme_pci_enable(struct nvme_dev *dev)
 	 */
 	if (pdev->vendor == PCI_VENDOR_ID_APPLE && pdev->device == 0x2001) {
 		dev->q_depth = 2;
-		dev_warn(dev->dev, "detected Apple NVMe controller, set "
-			"queue depth=%u to work around controller resets\n",
+		dev_warn(dev->ctrl.device, "detected Apple NVMe controller, "
+			"set queue depth=%u to work around controller resets\n",
 			dev->q_depth);
 	}
 
@@ -1759,7 +1759,7 @@ static int nvme_pci_enable(struct nvme_dev *dev)
 		if (dev->cmbsz) {
 			if (sysfs_add_file_to_group(&dev->ctrl.device->kobj,
 						    &dev_attr_cmb.attr, NULL))
-				dev_warn(dev->dev,
+				dev_warn(dev->ctrl.device,
 					 "failed to add sysfs attribute for CMB\n");
 		}
 	}
@@ -2047,6 +2047,7 @@ static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl)
 static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
 	.name			= "pcie",
 	.module			= THIS_MODULE,
+	.flags			= NVME_F_METADATA_SUPPORTED,
 	.reg_read32		= nvme_pci_reg_read32,
 	.reg_write32		= nvme_pci_reg_write32,
 	.reg_read64		= nvme_pci_reg_read64,
@@ -2293,6 +2294,8 @@ static const struct pci_device_id nvme_id_table[] = {
 	{ PCI_VDEVICE(INTEL, 0x0a54),
 		.driver_data = NVME_QUIRK_STRIPE_SIZE |
 				NVME_QUIRK_DEALLOCATE_ZEROES, },
+	{ PCI_VDEVICE(INTEL, 0xf1a5),	/* Intel 600P/P3100 */
+		.driver_data = NVME_QUIRK_NO_DEEPEST_PS },
 	{ PCI_VDEVICE(INTEL, 0x5845),	/* Qemu emulated controller */
 		.driver_data = NVME_QUIRK_IDENTIFY_CNS, },
 	{ PCI_DEVICE(0x1c58, 0x0003),	/* HGST adapter */
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index dd1c6deef82f..28bd255c144d 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1038,6 +1038,19 @@ static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
 		nvme_rdma_wr_error(cq, wc, "SEND");
 }
 
+static inline int nvme_rdma_queue_sig_limit(struct nvme_rdma_queue *queue)
+{
+	int sig_limit;
+
+	/*
+	 * We signal completion every queue depth/2 and also handle the
+	 * degenerated case of a  device with queue_depth=1, where we
+	 * would need to signal every message.
+	 */
+	sig_limit = max(queue->queue_size / 2, 1);
+	return (++queue->sig_count % sig_limit) == 0;
+}
+
 static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
 		struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge,
 		struct ib_send_wr *first, bool flush)
@@ -1065,9 +1078,6 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
 	 * Would have been way to obvious to handle this in hardware or
 	 * at least the RDMA stack..
 	 *
-	 * This messy and racy code sniplet is copy and pasted from the iSER
-	 * initiator, and the magic '32' comes from there as well.
-	 *
 	 * Always signal the flushes. The magic request used for the flush
 	 * sequencer is not allocated in our driver's tagset and it's
 	 * triggered to be freed by blk_cleanup_queue(). So we need to
@@ -1075,7 +1085,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
 	 * embedded in request's payload, is not freed when __ib_process_cq()
 	 * calls wr_cqe->done().
 	 */
-	if ((++queue->sig_count % 32) == 0 || flush)
+	if (nvme_rdma_queue_sig_limit(queue) || flush)
 		wr.send_flags |= IB_SEND_SIGNALED;
 
 	if (first)
@@ -1782,7 +1792,7 @@ static int nvme_rdma_reset_ctrl(struct nvme_ctrl *nctrl)
 static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
 	.name			= "rdma",
 	.module			= THIS_MODULE,
-	.is_fabrics		= true,
+	.flags			= NVME_F_FABRICS,
 	.reg_read32		= nvmf_reg_read32,
 	.reg_read64		= nvmf_reg_read64,
 	.reg_write32		= nvmf_reg_write32,
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index feb497134aee..e503cfff0337 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -558,7 +558,7 @@ static int nvme_loop_reset_ctrl(struct nvme_ctrl *nctrl)
 static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = {
 	.name			= "loop",
 	.module			= THIS_MODULE,
-	.is_fabrics		= true,
+	.flags			= NVME_F_FABRICS,
 	.reg_read32		= nvmf_reg_read32,
 	.reg_read64		= nvmf_reg_read64,
 	.reg_write32		= nvmf_reg_write32,
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 71fecc2debfc..703a42118ffc 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -523,7 +523,7 @@ static int __init of_platform_default_populate_init(void)
 arch_initcall_sync(of_platform_default_populate_init);
 #endif
 
-static int of_platform_device_destroy(struct device *dev, void *data)
+int of_platform_device_destroy(struct device *dev, void *data)
 {
 	/* Do not touch devices not populated from the device tree */
 	if (!dev->of_node || !of_node_check_flag(dev->of_node, OF_POPULATED))
@@ -544,6 +544,7 @@ static int of_platform_device_destroy(struct device *dev, void *data)
 	of_node_clear_flag(dev->of_node, OF_POPULATED_BUS);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(of_platform_device_destroy);
 
 /**
  * of_platform_depopulate() - Remove devices populated from device tree
diff --git a/drivers/pci/dwc/pci-imx6.c b/drivers/pci/dwc/pci-imx6.c
index a98cba55c7f0..19a289b8cc94 100644
--- a/drivers/pci/dwc/pci-imx6.c
+++ b/drivers/pci/dwc/pci-imx6.c
@@ -252,7 +252,34 @@ static void imx6_pcie_reset_phy(struct imx6_pcie *imx6_pcie)
 static int imx6q_pcie_abort_handler(unsigned long addr,
 		unsigned int fsr, struct pt_regs *regs)
 {
-	return 0;
+	unsigned long pc = instruction_pointer(regs);
+	unsigned long instr = *(unsigned long *)pc;
+	int reg = (instr >> 12) & 15;
+
+	/*
+	 * If the instruction being executed was a read,
+	 * make it look like it read all-ones.
+	 */
+	if ((instr & 0x0c100000) == 0x04100000) {
+		unsigned long val;
+
+		if (instr & 0x00400000)
+			val = 255;
+		else
+			val = -1;
+
+		regs->uregs[reg] = val;
+		regs->ARM_pc += 4;
+		return 0;
+	}
+
+	if ((instr & 0x0e100090) == 0x00100090) {
+		regs->uregs[reg] = -1;
+		regs->ARM_pc += 4;
+		return 0;
+	}
+
+	return 1;
 }
 
 static void imx6_pcie_assert_core_reset(struct imx6_pcie *imx6_pcie)
@@ -819,8 +846,8 @@ static int __init imx6_pcie_init(void)
 	 * we can install the handler here without risking it
 	 * accessing some uninitialized driver state.
 	 */
-	hook_fault_code(16 + 6, imx6q_pcie_abort_handler, SIGBUS, 0,
-			"imprecise external abort");
+	hook_fault_code(8, imx6q_pcie_abort_handler, SIGBUS, 0,
+			"external abort on non-linefetch");
 
 	return platform_driver_register(&imx6_pcie_driver);
 }
diff --git a/drivers/pci/endpoint/Kconfig b/drivers/pci/endpoint/Kconfig
index c23f146fb5a6..c09623ca8c3b 100644
--- a/drivers/pci/endpoint/Kconfig
+++ b/drivers/pci/endpoint/Kconfig
@@ -6,6 +6,7 @@ menu "PCI Endpoint"
 
 config PCI_ENDPOINT
 	bool "PCI Endpoint Support"
+	depends on HAS_DMA
 	help
 	   Enable this configuration option to support configurable PCI
 	   endpoint. This should be enabled if the platform has a PCI
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index b01bd5bba8e6..563901cd9c06 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2144,7 +2144,8 @@ bool pci_dev_keep_suspended(struct pci_dev *pci_dev)
 
 	if (!pm_runtime_suspended(dev)
 	    || pci_target_state(pci_dev) != pci_dev->current_state
-	    || platform_pci_need_resume(pci_dev))
+	    || platform_pci_need_resume(pci_dev)
+	    || (pci_dev->dev_flags & PCI_DEV_FLAGS_NEEDS_RESUME))
 		return false;
 
 	/*
diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c
index cc6e085008fb..f6a63406c76e 100644
--- a/drivers/pci/switch/switchtec.c
+++ b/drivers/pci/switch/switchtec.c
@@ -1291,7 +1291,6 @@ static struct switchtec_dev *stdev_create(struct pci_dev *pdev)
 	cdev = &stdev->cdev;
 	cdev_init(cdev, &switchtec_fops);
 	cdev->owner = THIS_MODULE;
-	cdev->kobj.parent = &dev->kobj;
 
 	return stdev;
 
@@ -1442,12 +1441,15 @@ static int switchtec_init_pci(struct switchtec_dev *stdev,
 	stdev->mmio_sys_info = stdev->mmio + SWITCHTEC_GAS_SYS_INFO_OFFSET;
 	stdev->mmio_flash_info = stdev->mmio + SWITCHTEC_GAS_FLASH_INFO_OFFSET;
 	stdev->mmio_ntb = stdev->mmio + SWITCHTEC_GAS_NTB_OFFSET;
-	stdev->partition = ioread8(&stdev->mmio_ntb->partition_id);
+	stdev->partition = ioread8(&stdev->mmio_sys_info->partition_id);
 	stdev->partition_count = ioread8(&stdev->mmio_ntb->partition_count);
 	stdev->mmio_part_cfg_all = stdev->mmio + SWITCHTEC_GAS_PART_CFG_OFFSET;
 	stdev->mmio_part_cfg = &stdev->mmio_part_cfg_all[stdev->partition];
 	stdev->mmio_pff_csr = stdev->mmio + SWITCHTEC_GAS_PFF_CSR_OFFSET;
 
+	if (stdev->partition_count < 1)
+		stdev->partition_count = 1;
+
 	init_pff(stdev);
 
 	pci_set_drvdata(pdev, stdev);
@@ -1479,11 +1481,7 @@ static int switchtec_pci_probe(struct pci_dev *pdev,
 		  SWITCHTEC_EVENT_EN_IRQ,
 		  &stdev->mmio_part_cfg->mrpc_comp_hdr);
 
-	rc = cdev_add(&stdev->cdev, stdev->dev.devt, 1);
-	if (rc)
-		goto err_put;
-
-	rc = device_add(&stdev->dev);
+	rc = cdev_device_add(&stdev->cdev, &stdev->dev);
 	if (rc)
 		goto err_devadd;
 
@@ -1492,7 +1490,6 @@ static int switchtec_pci_probe(struct pci_dev *pdev,
 	return 0;
 
 err_devadd:
-	cdev_del(&stdev->cdev);
 	stdev_kill(stdev);
 err_put:
 	ida_simple_remove(&switchtec_minor_ida, MINOR(stdev->dev.devt));
@@ -1506,8 +1503,7 @@ static void switchtec_pci_remove(struct pci_dev *pdev)
 
 	pci_set_drvdata(pdev, NULL);
 
-	device_del(&stdev->dev);
-	cdev_del(&stdev->cdev);
+	cdev_device_del(&stdev->cdev, &stdev->dev);
 	ida_simple_remove(&switchtec_minor_ida, MINOR(stdev->dev.devt));
 	dev_info(&stdev->dev, "unregistered.\n");
 
diff --git a/drivers/powercap/powercap_sys.c b/drivers/powercap/powercap_sys.c
index 14bde0db8c24..5b10b50f8686 100644
--- a/drivers/powercap/powercap_sys.c
+++ b/drivers/powercap/powercap_sys.c
@@ -538,6 +538,7 @@ struct powercap_zone *powercap_register_zone(
 
 	power_zone->id = result;
 	idr_init(&power_zone->idr);
+	result = -ENOMEM;
 	power_zone->name = kstrdup(name, GFP_KERNEL);
 	if (!power_zone->name)
 		goto err_name_alloc;
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index b3de973a6260..9dca53df3584 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -1088,7 +1088,7 @@ static u32 rtc_handler(void *context)
 	}
 	spin_unlock_irqrestore(&rtc_lock, flags);
 
-	pm_wakeup_event(dev, 0);
+	pm_wakeup_hard_event(dev);
 	acpi_clear_event(ACPI_EVENT_RTC);
 	acpi_disable_event(ACPI_EVENT_RTC, 0);
 	return ACPI_INTERRUPT_HANDLED;
diff --git a/drivers/scsi/csiostor/csio_hw.c b/drivers/scsi/csiostor/csio_hw.c
index 622bdabc8894..dab195f04da7 100644
--- a/drivers/scsi/csiostor/csio_hw.c
+++ b/drivers/scsi/csiostor/csio_hw.c
@@ -1769,7 +1769,6 @@ csio_hw_use_fwconfig(struct csio_hw *hw, int reset, u32 *fw_cfg_param)
 		goto bye;
 	}
 
-	mempool_free(mbp, hw->mb_mempool);
 	if (finicsum != cfcsum) {
 		csio_warn(hw,
 		      "Config File checksum mismatch: csum=%#x, computed=%#x\n",
@@ -1780,6 +1779,10 @@ csio_hw_use_fwconfig(struct csio_hw *hw, int reset, u32 *fw_cfg_param)
 	rv = csio_hw_validate_caps(hw, mbp);
 	if (rv != 0)
 		goto bye;
+
+	mempool_free(mbp, hw->mb_mempool);
+	mbp = NULL;
+
 	/*
 	 * Note that we're operating with parameters
 	 * not supplied by the driver, rather than from hard-wired
diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index b44c3136eb51..520325867e2b 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -1422,7 +1422,7 @@ static void fc_rport_recv_rtv_req(struct fc_rport_priv *rdata,
 	fp = fc_frame_alloc(lport, sizeof(*rtv));
 	if (!fp) {
 		rjt_data.reason = ELS_RJT_UNAB;
-		rjt_data.reason = ELS_EXPL_INSUF_RES;
+		rjt_data.explan = ELS_EXPL_INSUF_RES;
 		fc_seq_els_rsp_send(in_fp, ELS_LS_RJT, &rjt_data);
 		goto drop;
 	}
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 6d7840b096e6..f2c0ba6ced78 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -141,6 +141,13 @@ struct lpfc_dmabuf {
 	uint32_t   buffer_tag;	/* used for tagged queue ring */
 };
 
+struct lpfc_nvmet_ctxbuf {
+	struct list_head list;
+	struct lpfc_nvmet_rcv_ctx *context;
+	struct lpfc_iocbq *iocbq;
+	struct lpfc_sglq *sglq;
+};
+
 struct lpfc_dma_pool {
 	struct lpfc_dmabuf   *elements;
 	uint32_t    max_count;
@@ -163,9 +170,7 @@ struct rqb_dmabuf {
 	struct lpfc_dmabuf dbuf;
 	uint16_t total_size;
 	uint16_t bytes_recv;
-	void *context;
-	struct lpfc_iocbq *iocbq;
-	struct lpfc_sglq *sglq;
+	uint16_t idx;
 	struct lpfc_queue *hrq;	  /* ptr to associated Header RQ */
 	struct lpfc_queue *drq;	  /* ptr to associated Data RQ */
 };
@@ -670,6 +675,8 @@ struct lpfc_hba {
 					/* INIT_LINK mailbox command */
 #define LS_NPIV_FAB_SUPPORTED 0x2	/* Fabric supports NPIV */
 #define LS_IGNORE_ERATT       0x4	/* intr handler should ignore ERATT */
+#define LS_MDS_LINK_DOWN      0x8	/* MDS Diagnostics Link Down */
+#define LS_MDS_LOOPBACK      0x16	/* MDS Diagnostics Link Up (Loopback) */
 
 	uint32_t hba_flag;	/* hba generic flags */
 #define HBA_ERATT_HANDLED	0x1 /* This flag is set when eratt handled */
@@ -777,7 +784,6 @@ struct lpfc_hba {
 	uint32_t cfg_nvme_oas;
 	uint32_t cfg_nvme_io_channel;
 	uint32_t cfg_nvmet_mrq;
-	uint32_t cfg_nvmet_mrq_post;
 	uint32_t cfg_enable_nvmet;
 	uint32_t cfg_nvme_enable_fb;
 	uint32_t cfg_nvmet_fb_size;
@@ -943,6 +949,7 @@ struct lpfc_hba {
 	struct pci_pool *lpfc_mbuf_pool;
 	struct pci_pool *lpfc_hrb_pool;	/* header receive buffer pool */
 	struct pci_pool *lpfc_drb_pool; /* data receive buffer pool */
+	struct pci_pool *lpfc_nvmet_drb_pool; /* data receive buffer pool */
 	struct pci_pool *lpfc_hbq_pool;	/* SLI3 hbq buffer pool */
 	struct pci_pool *txrdy_payload_pool;
 	struct lpfc_dma_pool lpfc_mbuf_safety_pool;
@@ -1228,7 +1235,11 @@ lpfc_sli_read_hs(struct lpfc_hba *phba)
 static inline struct lpfc_sli_ring *
 lpfc_phba_elsring(struct lpfc_hba *phba)
 {
-	if (phba->sli_rev == LPFC_SLI_REV4)
-		return phba->sli4_hba.els_wq->pring;
+	if (phba->sli_rev == LPFC_SLI_REV4) {
+		if (phba->sli4_hba.els_wq)
+			return phba->sli4_hba.els_wq->pring;
+		else
+			return NULL;
+	}
 	return &phba->sli.sli3_ring[LPFC_ELS_RING];
 }
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 4830370bfab1..bb2d9e238225 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -60,9 +60,9 @@
 #define LPFC_MIN_DEVLOSS_TMO	1
 #define LPFC_MAX_DEVLOSS_TMO	255
 
-#define LPFC_DEF_MRQ_POST	256
-#define LPFC_MIN_MRQ_POST	32
-#define LPFC_MAX_MRQ_POST	512
+#define LPFC_DEF_MRQ_POST	512
+#define LPFC_MIN_MRQ_POST	512
+#define LPFC_MAX_MRQ_POST	2048
 
 /*
  * Write key size should be multiple of 4. If write key is changed
@@ -205,8 +205,9 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
 				atomic_read(&tgtp->xmt_ls_rsp_error));
 
 		len += snprintf(buf+len, PAGE_SIZE-len,
-				"FCP: Rcv %08x Drop %08x\n",
+				"FCP: Rcv %08x Release %08x Drop %08x\n",
 				atomic_read(&tgtp->rcv_fcp_cmd_in),
+				atomic_read(&tgtp->xmt_fcp_release),
 				atomic_read(&tgtp->rcv_fcp_cmd_drop));
 
 		if (atomic_read(&tgtp->rcv_fcp_cmd_in) !=
@@ -218,15 +219,12 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
 		}
 
 		len += snprintf(buf+len, PAGE_SIZE-len,
-				"FCP Rsp: RD %08x rsp %08x WR %08x rsp %08x\n",
+				"FCP Rsp: RD %08x rsp %08x WR %08x rsp %08x "
+				"drop %08x\n",
 				atomic_read(&tgtp->xmt_fcp_read),
 				atomic_read(&tgtp->xmt_fcp_read_rsp),
 				atomic_read(&tgtp->xmt_fcp_write),
-				atomic_read(&tgtp->xmt_fcp_rsp));
-
-		len += snprintf(buf+len, PAGE_SIZE-len,
-				"FCP Rsp: abort %08x drop %08x\n",
-				atomic_read(&tgtp->xmt_fcp_abort),
+				atomic_read(&tgtp->xmt_fcp_rsp),
 				atomic_read(&tgtp->xmt_fcp_drop));
 
 		len += snprintf(buf+len, PAGE_SIZE-len,
@@ -236,10 +234,22 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
 				atomic_read(&tgtp->xmt_fcp_rsp_drop));
 
 		len += snprintf(buf+len, PAGE_SIZE-len,
-				"ABORT: Xmt %08x Err %08x Cmpl %08x",
+				"ABORT: Xmt %08x Cmpl %08x\n",
+				atomic_read(&tgtp->xmt_fcp_abort),
+				atomic_read(&tgtp->xmt_fcp_abort_cmpl));
+
+		len += snprintf(buf + len, PAGE_SIZE - len,
+				"ABORT: Sol %08x  Usol %08x Err %08x Cmpl %08x",
+				atomic_read(&tgtp->xmt_abort_sol),
+				atomic_read(&tgtp->xmt_abort_unsol),
 				atomic_read(&tgtp->xmt_abort_rsp),
-				atomic_read(&tgtp->xmt_abort_rsp_error),
-				atomic_read(&tgtp->xmt_abort_cmpl));
+				atomic_read(&tgtp->xmt_abort_rsp_error));
+
+		len += snprintf(buf + len, PAGE_SIZE - len,
+				"IO_CTX: %08x outstanding %08x total %x",
+				phba->sli4_hba.nvmet_ctx_cnt,
+				phba->sli4_hba.nvmet_io_wait_cnt,
+				phba->sli4_hba.nvmet_io_wait_total);
 
 		len +=  snprintf(buf+len, PAGE_SIZE-len, "\n");
 		return len;
@@ -3312,14 +3322,6 @@ LPFC_ATTR_R(nvmet_mrq,
 	    "Specify number of RQ pairs for processing NVMET cmds");
 
 /*
- * lpfc_nvmet_mrq_post: Specify number buffers to post on every MRQ
- *
- */
-LPFC_ATTR_R(nvmet_mrq_post, LPFC_DEF_MRQ_POST,
-	    LPFC_MIN_MRQ_POST, LPFC_MAX_MRQ_POST,
-	    "Specify number of buffers to post on every MRQ");
-
-/*
  * lpfc_enable_fc4_type: Defines what FC4 types are supported.
  * Supported Values:  1 - register just FCP
  *                    3 - register both FCP and NVME
@@ -5154,7 +5156,6 @@ struct device_attribute *lpfc_hba_attrs[] = {
 	&dev_attr_lpfc_suppress_rsp,
 	&dev_attr_lpfc_nvme_io_channel,
 	&dev_attr_lpfc_nvmet_mrq,
-	&dev_attr_lpfc_nvmet_mrq_post,
 	&dev_attr_lpfc_nvme_enable_fb,
 	&dev_attr_lpfc_nvmet_fb_size,
 	&dev_attr_lpfc_enable_bg,
@@ -6194,7 +6195,6 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
 
 	lpfc_enable_fc4_type_init(phba, lpfc_enable_fc4_type);
 	lpfc_nvmet_mrq_init(phba, lpfc_nvmet_mrq);
-	lpfc_nvmet_mrq_post_init(phba, lpfc_nvmet_mrq_post);
 
 	/* Initialize first burst. Target vs Initiator are different. */
 	lpfc_nvme_enable_fb_init(phba, lpfc_nvme_enable_fb);
@@ -6291,7 +6291,6 @@ lpfc_nvme_mod_param_dep(struct lpfc_hba *phba)
 		/* Not NVME Target mode.  Turn off Target parameters. */
 		phba->nvmet_support = 0;
 		phba->cfg_nvmet_mrq = 0;
-		phba->cfg_nvmet_mrq_post = 0;
 		phba->cfg_nvmet_fb_size = 0;
 	}
 
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index 1c55408ac718..8912767e7bc8 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -75,6 +75,10 @@ void lpfc_init_vpi_cmpl(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_cancel_all_vport_retry_delay_timer(struct lpfc_hba *);
 void lpfc_retry_pport_discovery(struct lpfc_hba *);
 void lpfc_release_rpi(struct lpfc_hba *, struct lpfc_vport *, uint16_t);
+int lpfc_init_iocb_list(struct lpfc_hba *phba, int cnt);
+void lpfc_free_iocb_list(struct lpfc_hba *phba);
+int lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hrq,
+			struct lpfc_queue *drq, int count, int idx);
 
 void lpfc_mbx_cmpl_local_config_link(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_mbx_cmpl_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *);
@@ -246,16 +250,14 @@ struct hbq_dmabuf *lpfc_sli4_rb_alloc(struct lpfc_hba *);
 void lpfc_sli4_rb_free(struct lpfc_hba *, struct hbq_dmabuf *);
 struct rqb_dmabuf *lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba);
 void lpfc_sli4_nvmet_free(struct lpfc_hba *phba, struct rqb_dmabuf *dmab);
-void lpfc_nvmet_rq_post(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp,
-			struct lpfc_dmabuf *mp);
+void lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba,
+			    struct lpfc_nvmet_ctxbuf *ctxp);
 int lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport,
 			       struct fc_frame_header *fc_hdr);
 void lpfc_sli4_build_dflt_fcf_record(struct lpfc_hba *, struct fcf_record *,
 			uint16_t);
 int lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq,
 		     struct lpfc_rqe *hrqe, struct lpfc_rqe *drqe);
-int lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hq,
-			struct lpfc_queue *dq, int count);
 int lpfc_free_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hq);
 void lpfc_unregister_fcf(struct lpfc_hba *);
 void lpfc_unregister_fcf_rescan(struct lpfc_hba *);
@@ -271,6 +273,7 @@ int lpfc_sli4_fcf_rr_next_proc(struct lpfc_vport *, uint16_t);
 void lpfc_sli4_clear_fcf_rr_bmask(struct lpfc_hba *);
 
 int lpfc_mem_alloc(struct lpfc_hba *, int align);
+int lpfc_nvmet_mem_alloc(struct lpfc_hba *phba);
 int lpfc_mem_alloc_active_rrq_pool_s4(struct lpfc_hba *);
 void lpfc_mem_free(struct lpfc_hba *);
 void lpfc_mem_free_all(struct lpfc_hba *);
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index c7962dae4dab..f2cd19c6c2df 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -2092,6 +2092,7 @@ lpfc_fdmi_port_attr_fc4type(struct lpfc_vport *vport,
 
 	ae->un.AttrTypes[3] = 0x02; /* Type 1 - ELS */
 	ae->un.AttrTypes[2] = 0x01; /* Type 8 - FCP */
+	ae->un.AttrTypes[6] = 0x01; /* Type 40 - NVME */
 	ae->un.AttrTypes[7] = 0x01; /* Type 32 - CT */
 	size = FOURBYTES + 32;
 	ad->AttrLen = cpu_to_be16(size);
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index fce549a91911..4bcb92c844ca 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -798,21 +798,22 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
 				atomic_read(&tgtp->xmt_fcp_rsp));
 
 		len += snprintf(buf + len, size - len,
-				"FCP Rsp: abort %08x drop %08x\n",
-				atomic_read(&tgtp->xmt_fcp_abort),
-				atomic_read(&tgtp->xmt_fcp_drop));
-
-		len += snprintf(buf + len, size - len,
 				"FCP Rsp Cmpl: %08x err %08x drop %08x\n",
 				atomic_read(&tgtp->xmt_fcp_rsp_cmpl),
 				atomic_read(&tgtp->xmt_fcp_rsp_error),
 				atomic_read(&tgtp->xmt_fcp_rsp_drop));
 
 		len += snprintf(buf + len, size - len,
-				"ABORT: Xmt %08x Err %08x Cmpl %08x",
+				"ABORT: Xmt %08x Cmpl %08x\n",
+				atomic_read(&tgtp->xmt_fcp_abort),
+				atomic_read(&tgtp->xmt_fcp_abort_cmpl));
+
+		len += snprintf(buf + len, size - len,
+				"ABORT: Sol %08x  Usol %08x Err %08x Cmpl %08x",
+				atomic_read(&tgtp->xmt_abort_sol),
+				atomic_read(&tgtp->xmt_abort_unsol),
 				atomic_read(&tgtp->xmt_abort_rsp),
-				atomic_read(&tgtp->xmt_abort_rsp_error),
-				atomic_read(&tgtp->xmt_abort_cmpl));
+				atomic_read(&tgtp->xmt_abort_rsp_error));
 
 		len +=  snprintf(buf + len, size - len, "\n");
 
@@ -841,6 +842,12 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
 			}
 			spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
 		}
+
+		len += snprintf(buf + len, size - len,
+				"IO_CTX: %08x  outstanding %08x total %08x\n",
+				phba->sli4_hba.nvmet_ctx_cnt,
+				phba->sli4_hba.nvmet_io_wait_cnt,
+				phba->sli4_hba.nvmet_io_wait_total);
 	} else {
 		if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
 			return len;
@@ -1959,6 +1966,7 @@ lpfc_debugfs_nvmestat_write(struct file *file, const char __user *buf,
 		atomic_set(&tgtp->rcv_ls_req_out, 0);
 		atomic_set(&tgtp->rcv_ls_req_drop, 0);
 		atomic_set(&tgtp->xmt_ls_abort, 0);
+		atomic_set(&tgtp->xmt_ls_abort_cmpl, 0);
 		atomic_set(&tgtp->xmt_ls_rsp, 0);
 		atomic_set(&tgtp->xmt_ls_drop, 0);
 		atomic_set(&tgtp->xmt_ls_rsp_error, 0);
@@ -1967,19 +1975,22 @@ lpfc_debugfs_nvmestat_write(struct file *file, const char __user *buf,
 		atomic_set(&tgtp->rcv_fcp_cmd_in, 0);
 		atomic_set(&tgtp->rcv_fcp_cmd_out, 0);
 		atomic_set(&tgtp->rcv_fcp_cmd_drop, 0);
-		atomic_set(&tgtp->xmt_fcp_abort, 0);
 		atomic_set(&tgtp->xmt_fcp_drop, 0);
 		atomic_set(&tgtp->xmt_fcp_read_rsp, 0);
 		atomic_set(&tgtp->xmt_fcp_read, 0);
 		atomic_set(&tgtp->xmt_fcp_write, 0);
 		atomic_set(&tgtp->xmt_fcp_rsp, 0);
+		atomic_set(&tgtp->xmt_fcp_release, 0);
 		atomic_set(&tgtp->xmt_fcp_rsp_cmpl, 0);
 		atomic_set(&tgtp->xmt_fcp_rsp_error, 0);
 		atomic_set(&tgtp->xmt_fcp_rsp_drop, 0);
 
+		atomic_set(&tgtp->xmt_fcp_abort, 0);
+		atomic_set(&tgtp->xmt_fcp_abort_cmpl, 0);
+		atomic_set(&tgtp->xmt_abort_sol, 0);
+		atomic_set(&tgtp->xmt_abort_unsol, 0);
 		atomic_set(&tgtp->xmt_abort_rsp, 0);
 		atomic_set(&tgtp->xmt_abort_rsp_error, 0);
-		atomic_set(&tgtp->xmt_abort_cmpl, 0);
 	}
 	return nbytes;
 }
@@ -3070,11 +3081,11 @@ __lpfc_idiag_print_wq(struct lpfc_queue *qp, char *wqtype,
 			qp->assoc_qid, qp->q_cnt_1,
 			(unsigned long long)qp->q_cnt_4);
 	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
-			"\t\tWQID[%02d], QE-CNT[%04d], QE-SIZE[%04d], "
-			"HOST-IDX[%04d], PORT-IDX[%04d]",
+			"\t\tWQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
+			"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]",
 			qp->queue_id, qp->entry_count,
 			qp->entry_size, qp->host_index,
-			qp->hba_index);
+			qp->hba_index, qp->entry_repost);
 	len +=  snprintf(pbuffer + len,
 			LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n");
 	return len;
@@ -3121,11 +3132,11 @@ __lpfc_idiag_print_cq(struct lpfc_queue *qp, char *cqtype,
 			qp->assoc_qid, qp->q_cnt_1, qp->q_cnt_2,
 			qp->q_cnt_3, (unsigned long long)qp->q_cnt_4);
 	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
-			"\tCQID[%02d], QE-CNT[%04d], QE-SIZE[%04d], "
-			"HOST-IDX[%04d], PORT-IDX[%04d]",
+			"\tCQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
+			"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]",
 			qp->queue_id, qp->entry_count,
 			qp->entry_size, qp->host_index,
-			qp->hba_index);
+			qp->hba_index, qp->entry_repost);
 
 	len +=  snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n");
 
@@ -3143,20 +3154,20 @@ __lpfc_idiag_print_rqpair(struct lpfc_queue *qp, struct lpfc_queue *datqp,
 			"\t\t%s RQ info: ", rqtype);
 	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
 			"AssocCQID[%02d]: RQ-STAT[nopost:x%x nobuf:x%x "
-			"trunc:x%x rcv:x%llx]\n",
+			"posted:x%x rcv:x%llx]\n",
 			qp->assoc_qid, qp->q_cnt_1, qp->q_cnt_2,
 			qp->q_cnt_3, (unsigned long long)qp->q_cnt_4);
 	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
-			"\t\tHQID[%02d], QE-CNT[%04d], QE-SIZE[%04d], "
-			"HOST-IDX[%04d], PORT-IDX[%04d]\n",
+			"\t\tHQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
+			"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]\n",
 			qp->queue_id, qp->entry_count, qp->entry_size,
-			qp->host_index, qp->hba_index);
+			qp->host_index, qp->hba_index, qp->entry_repost);
 	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
-			"\t\tDQID[%02d], QE-CNT[%04d], QE-SIZE[%04d], "
-			"HOST-IDX[%04d], PORT-IDX[%04d]\n",
+			"\t\tDQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
+			"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]\n",
 			datqp->queue_id, datqp->entry_count,
 			datqp->entry_size, datqp->host_index,
-			datqp->hba_index);
+			datqp->hba_index, datqp->entry_repost);
 	return len;
 }
 
@@ -3242,10 +3253,10 @@ __lpfc_idiag_print_eq(struct lpfc_queue *qp, char *eqtype,
 			eqtype, qp->q_cnt_1, qp->q_cnt_2, qp->q_cnt_3,
 			(unsigned long long)qp->q_cnt_4);
 	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
-			"EQID[%02d], QE-CNT[%04d], QE-SIZE[%04d], "
-			"HOST-IDX[%04d], PORT-IDX[%04d]",
+			"EQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
+			"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]",
 			qp->queue_id, qp->entry_count, qp->entry_size,
-			qp->host_index, qp->hba_index);
+			qp->host_index, qp->hba_index, qp->entry_repost);
 	len +=  snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n");
 
 	return len;
@@ -5855,8 +5866,10 @@ lpfc_debugfs_terminate(struct lpfc_vport *vport)
 			atomic_dec(&lpfc_debugfs_hba_count);
 		}
 
-		debugfs_remove(lpfc_debugfs_root); /* lpfc */
-		lpfc_debugfs_root = NULL;
+		if (atomic_read(&lpfc_debugfs_hba_count) == 0) {
+			debugfs_remove(lpfc_debugfs_root); /* lpfc */
+			lpfc_debugfs_root = NULL;
+		}
 	}
 #endif
 	return;
diff --git a/drivers/scsi/lpfc/lpfc_disc.h b/drivers/scsi/lpfc/lpfc_disc.h
index 9d5a379f4b15..094c97b9e5f7 100644
--- a/drivers/scsi/lpfc/lpfc_disc.h
+++ b/drivers/scsi/lpfc/lpfc_disc.h
@@ -90,6 +90,7 @@ struct lpfc_nodelist {
 #define NLP_FCP_INITIATOR  0x10			/* entry is an FCP Initiator */
 #define NLP_NVME_TARGET    0x20			/* entry is a NVME Target */
 #define NLP_NVME_INITIATOR 0x40			/* entry is a NVME Initiator */
+#define NLP_NVME_DISCOVERY 0x80                 /* entry has NVME disc srvc */
 
 	uint16_t	nlp_fc4_type;		/* FC types node supports. */
 						/* Assigned from GID_FF, only
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 67827e397431..8e532b39ae93 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -1047,6 +1047,13 @@ stop_rr_fcf_flogi:
 				 irsp->ulpStatus, irsp->un.ulpWord[4],
 				 irsp->ulpTimeout);
 
+
+		/* If this is not a loop open failure, bail out */
+		if (!(irsp->ulpStatus == IOSTAT_LOCAL_REJECT &&
+		      ((irsp->un.ulpWord[4] & IOERR_PARAM_MASK) ==
+					IOERR_LOOP_OPEN_FAILURE)))
+			goto flogifail;
+
 		/* FLOGI failed, so there is no fabric */
 		spin_lock_irq(shost->host_lock);
 		vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP);
@@ -2077,16 +2084,19 @@ lpfc_cmpl_els_prli(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 
 	if (irsp->ulpStatus) {
 		/* Check for retry */
+		ndlp->fc4_prli_sent--;
 		if (lpfc_els_retry(phba, cmdiocb, rspiocb)) {
 			/* ELS command is being retried */
-			ndlp->fc4_prli_sent--;
 			goto out;
 		}
+
 		/* PRLI failed */
 		lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS,
-				 "2754 PRLI failure DID:%06X Status:x%x/x%x\n",
+				 "2754 PRLI failure DID:%06X Status:x%x/x%x, "
+				 "data: x%x\n",
 				 ndlp->nlp_DID, irsp->ulpStatus,
-				 irsp->un.ulpWord[4]);
+				 irsp->un.ulpWord[4], ndlp->fc4_prli_sent);
+
 		/* Do not call DSM for lpfc_els_abort'ed ELS cmds */
 		if (lpfc_error_lost_link(irsp))
 			goto out;
@@ -7441,6 +7451,13 @@ lpfc_els_flush_cmd(struct lpfc_vport *vport)
 	 */
 	spin_lock_irq(&phba->hbalock);
 	pring = lpfc_phba_elsring(phba);
+
+	/* Bail out if we've no ELS wq, like in PCI error recovery case. */
+	if (unlikely(!pring)) {
+		spin_unlock_irq(&phba->hbalock);
+		return;
+	}
+
 	if (phba->sli_rev == LPFC_SLI_REV4)
 		spin_lock(&pring->ring_lock);
 
@@ -8667,7 +8684,8 @@ lpfc_cmpl_els_fdisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 		lpfc_do_scr_ns_plogi(phba, vport);
 	goto out;
 fdisc_failed:
-	if (vport->fc_vport->vport_state != FC_VPORT_NO_FABRIC_RSCS)
+	if (vport->fc_vport &&
+	    (vport->fc_vport->vport_state != FC_VPORT_NO_FABRIC_RSCS))
 		lpfc_vport_set_state(vport, FC_VPORT_FAILED);
 	/* Cancel discovery timer */
 	lpfc_can_disctmo(vport);
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 0482c5580331..3ffcd9215ca8 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -693,15 +693,16 @@ lpfc_work_done(struct lpfc_hba *phba)
 	pring = lpfc_phba_elsring(phba);
 	status = (ha_copy & (HA_RXMASK  << (4*LPFC_ELS_RING)));
 	status >>= (4*LPFC_ELS_RING);
-	if ((status & HA_RXMASK) ||
-	    (pring->flag & LPFC_DEFERRED_RING_EVENT) ||
-	    (phba->hba_flag & HBA_SP_QUEUE_EVT)) {
+	if (pring && (status & HA_RXMASK ||
+		      pring->flag & LPFC_DEFERRED_RING_EVENT ||
+		      phba->hba_flag & HBA_SP_QUEUE_EVT)) {
 		if (pring->flag & LPFC_STOP_IOCB_EVENT) {
 			pring->flag |= LPFC_DEFERRED_RING_EVENT;
 			/* Set the lpfc data pending flag */
 			set_bit(LPFC_DATA_READY, &phba->data_flags);
 		} else {
-			if (phba->link_state >= LPFC_LINK_UP) {
+			if (phba->link_state >= LPFC_LINK_UP ||
+			    phba->link_flag & LS_MDS_LOOPBACK) {
 				pring->flag &= ~LPFC_DEFERRED_RING_EVENT;
 				lpfc_sli_handle_slow_ring_event(phba, pring,
 								(status &
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index 1d12f2be36bc..e0a5fce416ae 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -1356,6 +1356,7 @@ struct lpfc_mbx_wq_destroy {
 
 #define LPFC_HDR_BUF_SIZE 128
 #define LPFC_DATA_BUF_SIZE 2048
+#define LPFC_NVMET_DATA_BUF_SIZE 128
 struct rq_context {
 	uint32_t word0;
 #define lpfc_rq_context_rqe_count_SHIFT	16	/* Version 0 Only */
@@ -4420,6 +4421,19 @@ struct fcp_treceive64_wqe {
 };
 #define TXRDY_PAYLOAD_LEN      12
 
+#define CMD_SEND_FRAME	0xE1
+
+struct send_frame_wqe {
+	struct ulp_bde64 bde;          /* words 0-2 */
+	uint32_t frame_len;            /* word 3 */
+	uint32_t fc_hdr_wd0;           /* word 4 */
+	uint32_t fc_hdr_wd1;           /* word 5 */
+	struct wqe_common wqe_com;     /* words 6-11 */
+	uint32_t fc_hdr_wd2;           /* word 12 */
+	uint32_t fc_hdr_wd3;           /* word 13 */
+	uint32_t fc_hdr_wd4;           /* word 14 */
+	uint32_t fc_hdr_wd5;           /* word 15 */
+};
 
 union lpfc_wqe {
 	uint32_t words[16];
@@ -4438,7 +4452,7 @@ union lpfc_wqe {
 	struct fcp_trsp64_wqe fcp_trsp;
 	struct fcp_tsend64_wqe fcp_tsend;
 	struct fcp_treceive64_wqe fcp_treceive;
-
+	struct send_frame_wqe send_frame;
 };
 
 union lpfc_wqe128 {
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 4b1eb98c228d..9add9473cae5 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -1099,7 +1099,7 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba)
 
 		list_for_each_entry_safe(ctxp, ctxp_next, &nvmet_aborts, list) {
 			ctxp->flag &= ~(LPFC_NVMET_XBUSY | LPFC_NVMET_ABORT_OP);
-			lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
+			lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf);
 		}
 	}
 
@@ -3381,7 +3381,7 @@ lpfc_sli4_nvmet_sgl_update(struct lpfc_hba *phba)
 {
 	struct lpfc_sglq *sglq_entry = NULL, *sglq_entry_next = NULL;
 	uint16_t i, lxri, xri_cnt, els_xri_cnt;
-	uint16_t nvmet_xri_cnt, tot_cnt;
+	uint16_t nvmet_xri_cnt;
 	LIST_HEAD(nvmet_sgl_list);
 	int rc;
 
@@ -3389,15 +3389,9 @@ lpfc_sli4_nvmet_sgl_update(struct lpfc_hba *phba)
 	 * update on pci function's nvmet xri-sgl list
 	 */
 	els_xri_cnt = lpfc_sli4_get_els_iocb_cnt(phba);
-	nvmet_xri_cnt = phba->cfg_nvmet_mrq * phba->cfg_nvmet_mrq_post;
-	tot_cnt = phba->sli4_hba.max_cfg_param.max_xri - els_xri_cnt;
-	if (nvmet_xri_cnt > tot_cnt) {
-		phba->cfg_nvmet_mrq_post = tot_cnt / phba->cfg_nvmet_mrq;
-		nvmet_xri_cnt = phba->cfg_nvmet_mrq * phba->cfg_nvmet_mrq_post;
-		lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
-				"6301 NVMET post-sgl count changed to %d\n",
-				phba->cfg_nvmet_mrq_post);
-	}
+
+	/* For NVMET, ALL remaining XRIs are dedicated for IO processing */
+	nvmet_xri_cnt = phba->sli4_hba.max_cfg_param.max_xri - els_xri_cnt;
 
 	if (nvmet_xri_cnt > phba->sli4_hba.nvmet_xri_cnt) {
 		/* els xri-sgl expanded */
@@ -4546,6 +4540,19 @@ lpfc_sli4_async_fc_evt(struct lpfc_hba *phba, struct lpfc_acqe_fc_la *acqe_fc)
 	pmb->vport = phba->pport;
 
 	if (phba->sli4_hba.link_state.status != LPFC_FC_LA_TYPE_LINK_UP) {
+		phba->link_flag &= ~(LS_MDS_LINK_DOWN | LS_MDS_LOOPBACK);
+
+		switch (phba->sli4_hba.link_state.status) {
+		case LPFC_FC_LA_TYPE_MDS_LINK_DOWN:
+			phba->link_flag |= LS_MDS_LINK_DOWN;
+			break;
+		case LPFC_FC_LA_TYPE_MDS_LOOPBACK:
+			phba->link_flag |= LS_MDS_LOOPBACK;
+			break;
+		default:
+			break;
+		}
+
 		/* Parse and translate status field */
 		mb = &pmb->u.mb;
 		mb->mbxStatus = lpfc_sli4_parse_latt_fault(phba,
@@ -5830,6 +5837,9 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 		spin_lock_init(&phba->sli4_hba.abts_nvme_buf_list_lock);
 		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvme_buf_list);
 		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list);
+		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_nvmet_ctx_list);
+		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_nvmet_io_wait_list);
+
 		/* Fast-path XRI aborted CQ Event work queue list */
 		INIT_LIST_HEAD(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue);
 	}
@@ -5837,6 +5847,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 	/* This abort list used by worker thread */
 	spin_lock_init(&phba->sli4_hba.sgl_list_lock);
 	spin_lock_init(&phba->sli4_hba.nvmet_io_lock);
+	spin_lock_init(&phba->sli4_hba.nvmet_io_wait_lock);
 
 	/*
 	 * Initialize driver internal slow-path work queues
@@ -5951,16 +5962,21 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 		for (i = 0; i < lpfc_enable_nvmet_cnt; i++) {
 			if (wwn == lpfc_enable_nvmet[i]) {
 #if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
+				if (lpfc_nvmet_mem_alloc(phba))
+					break;
+
+				phba->nvmet_support = 1; /* a match */
+
 				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 						"6017 NVME Target %016llx\n",
 						wwn);
-				phba->nvmet_support = 1; /* a match */
 #else
 				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 						"6021 Can't enable NVME Target."
 						" NVME_TARGET_FC infrastructure"
 						" is not in kernel\n");
 #endif
+				break;
 			}
 		}
 	}
@@ -6269,7 +6285,7 @@ lpfc_unset_driver_resource_phase2(struct lpfc_hba *phba)
  *
  * This routine is invoked to free the driver's IOCB list and memory.
  **/
-static void
+void
 lpfc_free_iocb_list(struct lpfc_hba *phba)
 {
 	struct lpfc_iocbq *iocbq_entry = NULL, *iocbq_next = NULL;
@@ -6297,7 +6313,7 @@ lpfc_free_iocb_list(struct lpfc_hba *phba)
  *	0 - successful
  *	other values - error
  **/
-static int
+int
 lpfc_init_iocb_list(struct lpfc_hba *phba, int iocb_count)
 {
 	struct lpfc_iocbq *iocbq_entry = NULL;
@@ -6525,7 +6541,6 @@ lpfc_sli4_create_rpi_hdr(struct lpfc_hba *phba)
 	uint16_t rpi_limit, curr_rpi_range;
 	struct lpfc_dmabuf *dmabuf;
 	struct lpfc_rpi_hdr *rpi_hdr;
-	uint32_t rpi_count;
 
 	/*
 	 * If the SLI4 port supports extents, posting the rpi header isn't
@@ -6538,8 +6553,7 @@ lpfc_sli4_create_rpi_hdr(struct lpfc_hba *phba)
 		return NULL;
 
 	/* The limit on the logical index is just the max_rpi count. */
-	rpi_limit = phba->sli4_hba.max_cfg_param.rpi_base +
-	phba->sli4_hba.max_cfg_param.max_rpi - 1;
+	rpi_limit = phba->sli4_hba.max_cfg_param.max_rpi;
 
 	spin_lock_irq(&phba->hbalock);
 	/*
@@ -6550,18 +6564,10 @@ lpfc_sli4_create_rpi_hdr(struct lpfc_hba *phba)
 	curr_rpi_range = phba->sli4_hba.next_rpi;
 	spin_unlock_irq(&phba->hbalock);
 
-	/*
-	 * The port has a limited number of rpis. The increment here
-	 * is LPFC_RPI_HDR_COUNT - 1 to account for the starting value
-	 * and to allow the full max_rpi range per port.
-	 */
-	if ((curr_rpi_range + (LPFC_RPI_HDR_COUNT - 1)) > rpi_limit)
-		rpi_count = rpi_limit - curr_rpi_range;
-	else
-		rpi_count = LPFC_RPI_HDR_COUNT;
-
-	if (!rpi_count)
+	/* Reached full RPI range */
+	if (curr_rpi_range == rpi_limit)
 		return NULL;
+
 	/*
 	 * First allocate the protocol header region for the port.  The
 	 * port expects a 4KB DMA-mapped memory region that is 4K aligned.
@@ -6595,13 +6601,9 @@ lpfc_sli4_create_rpi_hdr(struct lpfc_hba *phba)
 
 	/* The rpi_hdr stores the logical index only. */
 	rpi_hdr->start_rpi = curr_rpi_range;
+	rpi_hdr->next_rpi = phba->sli4_hba.next_rpi + LPFC_RPI_HDR_COUNT;
 	list_add_tail(&rpi_hdr->list, &phba->sli4_hba.lpfc_rpi_hdr_list);
 
-	/*
-	 * The next_rpi stores the next logical module-64 rpi value used
-	 * to post physical rpis in subsequent rpi postings.
-	 */
-	phba->sli4_hba.next_rpi += rpi_count;
 	spin_unlock_irq(&phba->hbalock);
 	return rpi_hdr;
 
@@ -8172,7 +8174,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 			/* Create NVMET Receive Queue for header */
 			qdesc = lpfc_sli4_queue_alloc(phba,
 						      phba->sli4_hba.rq_esize,
-						      phba->sli4_hba.rq_ecount);
+						      LPFC_NVMET_RQE_DEF_COUNT);
 			if (!qdesc) {
 				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 						"3146 Failed allocate "
@@ -8194,7 +8196,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 			/* Create NVMET Receive Queue for data */
 			qdesc = lpfc_sli4_queue_alloc(phba,
 						      phba->sli4_hba.rq_esize,
-						      phba->sli4_hba.rq_ecount);
+						      LPFC_NVMET_RQE_DEF_COUNT);
 			if (!qdesc) {
 				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 						"3156 Failed allocate "
@@ -8326,46 +8328,6 @@ lpfc_sli4_queue_destroy(struct lpfc_hba *phba)
 }
 
 int
-lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hrq,
-		    struct lpfc_queue *drq, int count)
-{
-	int rc, i;
-	struct lpfc_rqe hrqe;
-	struct lpfc_rqe drqe;
-	struct lpfc_rqb *rqbp;
-	struct rqb_dmabuf *rqb_buffer;
-	LIST_HEAD(rqb_buf_list);
-
-	rqbp = hrq->rqbp;
-	for (i = 0; i < count; i++) {
-		rqb_buffer = (rqbp->rqb_alloc_buffer)(phba);
-		if (!rqb_buffer)
-			break;
-		rqb_buffer->hrq = hrq;
-		rqb_buffer->drq = drq;
-		list_add_tail(&rqb_buffer->hbuf.list, &rqb_buf_list);
-	}
-	while (!list_empty(&rqb_buf_list)) {
-		list_remove_head(&rqb_buf_list, rqb_buffer, struct rqb_dmabuf,
-				 hbuf.list);
-
-		hrqe.address_lo = putPaddrLow(rqb_buffer->hbuf.phys);
-		hrqe.address_hi = putPaddrHigh(rqb_buffer->hbuf.phys);
-		drqe.address_lo = putPaddrLow(rqb_buffer->dbuf.phys);
-		drqe.address_hi = putPaddrHigh(rqb_buffer->dbuf.phys);
-		rc = lpfc_sli4_rq_put(hrq, drq, &hrqe, &drqe);
-		if (rc < 0) {
-			(rqbp->rqb_free_buffer)(phba, rqb_buffer);
-		} else {
-			list_add_tail(&rqb_buffer->hbuf.list,
-				      &rqbp->rqb_buffer_list);
-			rqbp->buffer_count++;
-		}
-	}
-	return 1;
-}
-
-int
 lpfc_free_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *rq)
 {
 	struct lpfc_rqb *rqbp;
@@ -8784,9 +8746,6 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 		goto out_destroy;
 	}
 
-	lpfc_rq_adjust_repost(phba, phba->sli4_hba.hdr_rq, LPFC_ELS_HBQ);
-	lpfc_rq_adjust_repost(phba, phba->sli4_hba.dat_rq, LPFC_ELS_HBQ);
-
 	rc = lpfc_rq_create(phba, phba->sli4_hba.hdr_rq, phba->sli4_hba.dat_rq,
 			    phba->sli4_hba.els_cq, LPFC_USOL);
 	if (rc) {
@@ -11110,7 +11069,7 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 	struct lpfc_hba   *phba;
 	struct lpfc_vport *vport = NULL;
 	struct Scsi_Host  *shost = NULL;
-	int error, cnt;
+	int error;
 	uint32_t cfg_mode, intr_mode;
 
 	/* Allocate memory for HBA structure */
@@ -11144,22 +11103,6 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 		goto out_unset_pci_mem_s4;
 	}
 
-	cnt = phba->cfg_iocb_cnt * 1024;
-	if (phba->nvmet_support)
-		cnt += phba->cfg_nvmet_mrq_post * phba->cfg_nvmet_mrq;
-
-	/* Initialize and populate the iocb list per host */
-	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-			"2821 initialize iocb list %d total %d\n",
-			phba->cfg_iocb_cnt, cnt);
-	error = lpfc_init_iocb_list(phba, cnt);
-
-	if (error) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"1413 Failed to initialize iocb list.\n");
-		goto out_unset_driver_resource_s4;
-	}
-
 	INIT_LIST_HEAD(&phba->active_rrq_list);
 	INIT_LIST_HEAD(&phba->fcf.fcf_pri_list);
 
@@ -11168,7 +11111,7 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 	if (error) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"1414 Failed to set up driver resource.\n");
-		goto out_free_iocb_list;
+		goto out_unset_driver_resource_s4;
 	}
 
 	/* Get the default values for Model Name and Description */
@@ -11268,8 +11211,6 @@ out_destroy_shost:
 	lpfc_destroy_shost(phba);
 out_unset_driver_resource:
 	lpfc_unset_driver_resource_phase2(phba);
-out_free_iocb_list:
-	lpfc_free_iocb_list(phba);
 out_unset_driver_resource_s4:
 	lpfc_sli4_driver_resource_unset(phba);
 out_unset_pci_mem_s4:
diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c
index 5986c7957199..fcc05a1517c2 100644
--- a/drivers/scsi/lpfc/lpfc_mem.c
+++ b/drivers/scsi/lpfc/lpfc_mem.c
@@ -214,6 +214,21 @@ fail_free_drb_pool:
 	return -ENOMEM;
 }
 
+int
+lpfc_nvmet_mem_alloc(struct lpfc_hba *phba)
+{
+	phba->lpfc_nvmet_drb_pool =
+		pci_pool_create("lpfc_nvmet_drb_pool",
+				phba->pcidev, LPFC_NVMET_DATA_BUF_SIZE,
+				SGL_ALIGN_SZ, 0);
+	if (!phba->lpfc_nvmet_drb_pool) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"6024 Can't enable NVME Target - no memory\n");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
 /**
  * lpfc_mem_free - Frees memory allocated by lpfc_mem_alloc
  * @phba: HBA to free memory for
@@ -232,6 +247,9 @@ lpfc_mem_free(struct lpfc_hba *phba)
 
 	/* Free HBQ pools */
 	lpfc_sli_hbqbuf_free_all(phba);
+	if (phba->lpfc_nvmet_drb_pool)
+		pci_pool_destroy(phba->lpfc_nvmet_drb_pool);
+	phba->lpfc_nvmet_drb_pool = NULL;
 	if (phba->lpfc_drb_pool)
 		pci_pool_destroy(phba->lpfc_drb_pool);
 	phba->lpfc_drb_pool = NULL;
@@ -611,8 +629,6 @@ struct rqb_dmabuf *
 lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba)
 {
 	struct rqb_dmabuf *dma_buf;
-	struct lpfc_iocbq *nvmewqe;
-	union lpfc_wqe128 *wqe;
 
 	dma_buf = kzalloc(sizeof(struct rqb_dmabuf), GFP_KERNEL);
 	if (!dma_buf)
@@ -624,69 +640,15 @@ lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba)
 		kfree(dma_buf);
 		return NULL;
 	}
-	dma_buf->dbuf.virt = pci_pool_alloc(phba->lpfc_drb_pool, GFP_KERNEL,
-					    &dma_buf->dbuf.phys);
+	dma_buf->dbuf.virt = pci_pool_alloc(phba->lpfc_nvmet_drb_pool,
+					    GFP_KERNEL, &dma_buf->dbuf.phys);
 	if (!dma_buf->dbuf.virt) {
 		pci_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt,
 			      dma_buf->hbuf.phys);
 		kfree(dma_buf);
 		return NULL;
 	}
-	dma_buf->total_size = LPFC_DATA_BUF_SIZE;
-
-	dma_buf->context = kzalloc(sizeof(struct lpfc_nvmet_rcv_ctx),
-				   GFP_KERNEL);
-	if (!dma_buf->context) {
-		pci_pool_free(phba->lpfc_drb_pool, dma_buf->dbuf.virt,
-			      dma_buf->dbuf.phys);
-		pci_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt,
-			      dma_buf->hbuf.phys);
-		kfree(dma_buf);
-		return NULL;
-	}
-
-	dma_buf->iocbq = lpfc_sli_get_iocbq(phba);
-	if (!dma_buf->iocbq) {
-		kfree(dma_buf->context);
-		pci_pool_free(phba->lpfc_drb_pool, dma_buf->dbuf.virt,
-			      dma_buf->dbuf.phys);
-		pci_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt,
-			      dma_buf->hbuf.phys);
-		kfree(dma_buf);
-		lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
-				"2621 Ran out of nvmet iocb/WQEs\n");
-		return NULL;
-	}
-	dma_buf->iocbq->iocb_flag = LPFC_IO_NVMET;
-	nvmewqe = dma_buf->iocbq;
-	wqe = (union lpfc_wqe128 *)&nvmewqe->wqe;
-	/* Initialize WQE */
-	memset(wqe, 0, sizeof(union lpfc_wqe));
-	/* Word 7 */
-	bf_set(wqe_ct, &wqe->generic.wqe_com, SLI4_CT_RPI);
-	bf_set(wqe_class, &wqe->generic.wqe_com, CLASS3);
-	bf_set(wqe_pu, &wqe->generic.wqe_com, 1);
-	/* Word 10 */
-	bf_set(wqe_nvme, &wqe->fcp_tsend.wqe_com, 1);
-	bf_set(wqe_ebde_cnt, &wqe->generic.wqe_com, 0);
-	bf_set(wqe_qosd, &wqe->generic.wqe_com, 0);
-
-	dma_buf->iocbq->context1 = NULL;
-	spin_lock(&phba->sli4_hba.sgl_list_lock);
-	dma_buf->sglq = __lpfc_sli_get_nvmet_sglq(phba, dma_buf->iocbq);
-	spin_unlock(&phba->sli4_hba.sgl_list_lock);
-	if (!dma_buf->sglq) {
-		lpfc_sli_release_iocbq(phba, dma_buf->iocbq);
-		kfree(dma_buf->context);
-		pci_pool_free(phba->lpfc_drb_pool, dma_buf->dbuf.virt,
-			      dma_buf->dbuf.phys);
-		pci_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt,
-			      dma_buf->hbuf.phys);
-		kfree(dma_buf);
-		lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
-				"6132 Ran out of nvmet XRIs\n");
-		return NULL;
-	}
+	dma_buf->total_size = LPFC_NVMET_DATA_BUF_SIZE;
 	return dma_buf;
 }
 
@@ -705,20 +667,9 @@ lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba)
 void
 lpfc_sli4_nvmet_free(struct lpfc_hba *phba, struct rqb_dmabuf *dmab)
 {
-	unsigned long flags;
-
-	__lpfc_clear_active_sglq(phba, dmab->sglq->sli4_lxritag);
-	dmab->sglq->state = SGL_FREED;
-	dmab->sglq->ndlp = NULL;
-
-	spin_lock_irqsave(&phba->sli4_hba.sgl_list_lock, flags);
-	list_add_tail(&dmab->sglq->list, &phba->sli4_hba.lpfc_nvmet_sgl_list);
-	spin_unlock_irqrestore(&phba->sli4_hba.sgl_list_lock, flags);
-
-	lpfc_sli_release_iocbq(phba, dmab->iocbq);
-	kfree(dmab->context);
 	pci_pool_free(phba->lpfc_hrb_pool, dmab->hbuf.virt, dmab->hbuf.phys);
-	pci_pool_free(phba->lpfc_drb_pool, dmab->dbuf.virt, dmab->dbuf.phys);
+	pci_pool_free(phba->lpfc_nvmet_drb_pool,
+		      dmab->dbuf.virt, dmab->dbuf.phys);
 	kfree(dmab);
 }
 
@@ -803,6 +754,11 @@ lpfc_rq_buf_free(struct lpfc_hba *phba, struct lpfc_dmabuf *mp)
 	rc = lpfc_sli4_rq_put(rqb_entry->hrq, rqb_entry->drq, &hrqe, &drqe);
 	if (rc < 0) {
 		(rqbp->rqb_free_buffer)(phba, rqb_entry);
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"6409 Cannot post to RQ %d: %x %x\n",
+				rqb_entry->hrq->queue_id,
+				rqb_entry->hrq->host_index,
+				rqb_entry->hrq->hba_index);
 	} else {
 		list_add_tail(&rqb_entry->hbuf.list, &rqbp->rqb_buffer_list);
 		rqbp->buffer_count++;
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index 8777c2d5f50d..bff3de053df4 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -1944,7 +1944,13 @@ lpfc_cmpl_prli_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 
 		/* Target driver cannot solicit NVME FB. */
 		if (bf_get_be32(prli_tgt, nvpr)) {
+			/* Complete the nvme target roles.  The transport
+			 * needs to know if the rport is capable of
+			 * discovery in addition to its role.
+			 */
 			ndlp->nlp_type |= NLP_NVME_TARGET;
+			if (bf_get_be32(prli_disc, nvpr))
+				ndlp->nlp_type |= NLP_NVME_DISCOVERY;
 			if ((bf_get_be32(prli_fba, nvpr) == 1) &&
 			    (bf_get_be32(prli_fb_sz, nvpr) > 0) &&
 			    (phba->cfg_nvme_enable_fb) &&
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index 0488580eea12..074a6b5e7763 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -142,7 +142,7 @@ out:
 }
 
 /**
- * lpfc_nvmet_rq_post - Repost a NVMET RQ DMA buffer and clean up context
+ * lpfc_nvmet_ctxbuf_post - Repost a NVMET RQ DMA buffer and clean up context
  * @phba: HBA buffer is associated with
  * @ctxp: context to clean up
  * @mp: Buffer to free
@@ -155,24 +155,113 @@ out:
  * Returns: None
  **/
 void
-lpfc_nvmet_rq_post(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp,
-		   struct lpfc_dmabuf *mp)
+lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf)
 {
-	if (ctxp) {
-		if (ctxp->flag)
-			lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
-				"6314 rq_post ctx xri x%x flag x%x\n",
-				ctxp->oxid, ctxp->flag);
-
-		if (ctxp->txrdy) {
-			pci_pool_free(phba->txrdy_payload_pool, ctxp->txrdy,
-				      ctxp->txrdy_phys);
-			ctxp->txrdy = NULL;
-			ctxp->txrdy_phys = 0;
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
+	struct lpfc_nvmet_rcv_ctx *ctxp = ctx_buf->context;
+	struct lpfc_nvmet_tgtport *tgtp;
+	struct fc_frame_header *fc_hdr;
+	struct rqb_dmabuf *nvmebuf;
+	struct lpfc_dmabuf *hbufp;
+	uint32_t *payload;
+	uint32_t size, oxid, sid, rc;
+	unsigned long iflag;
+
+	if (ctxp->txrdy) {
+		pci_pool_free(phba->txrdy_payload_pool, ctxp->txrdy,
+			      ctxp->txrdy_phys);
+		ctxp->txrdy = NULL;
+		ctxp->txrdy_phys = 0;
+	}
+	ctxp->state = LPFC_NVMET_STE_FREE;
+
+	spin_lock_irqsave(&phba->sli4_hba.nvmet_io_wait_lock, iflag);
+	if (phba->sli4_hba.nvmet_io_wait_cnt) {
+		hbufp = &nvmebuf->hbuf;
+		list_remove_head(&phba->sli4_hba.lpfc_nvmet_io_wait_list,
+				 nvmebuf, struct rqb_dmabuf,
+				 hbuf.list);
+		phba->sli4_hba.nvmet_io_wait_cnt--;
+		spin_unlock_irqrestore(&phba->sli4_hba.nvmet_io_wait_lock,
+				       iflag);
+
+		fc_hdr = (struct fc_frame_header *)(nvmebuf->hbuf.virt);
+		oxid = be16_to_cpu(fc_hdr->fh_ox_id);
+		tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+		payload = (uint32_t *)(nvmebuf->dbuf.virt);
+		size = nvmebuf->bytes_recv;
+		sid = sli4_sid_from_fc_hdr(fc_hdr);
+
+		ctxp = (struct lpfc_nvmet_rcv_ctx *)ctx_buf->context;
+		memset(ctxp, 0, sizeof(ctxp->ctx));
+		ctxp->wqeq = NULL;
+		ctxp->txrdy = NULL;
+		ctxp->offset = 0;
+		ctxp->phba = phba;
+		ctxp->size = size;
+		ctxp->oxid = oxid;
+		ctxp->sid = sid;
+		ctxp->state = LPFC_NVMET_STE_RCV;
+		ctxp->entry_cnt = 1;
+		ctxp->flag = 0;
+		ctxp->ctxbuf = ctx_buf;
+		spin_lock_init(&ctxp->ctxlock);
+
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+		if (phba->ktime_on) {
+			ctxp->ts_cmd_nvme = ktime_get_ns();
+			ctxp->ts_isr_cmd = ctxp->ts_cmd_nvme;
+			ctxp->ts_nvme_data = 0;
+			ctxp->ts_data_wqput = 0;
+			ctxp->ts_isr_data = 0;
+			ctxp->ts_data_nvme = 0;
+			ctxp->ts_nvme_status = 0;
+			ctxp->ts_status_wqput = 0;
+			ctxp->ts_isr_status = 0;
+			ctxp->ts_status_nvme = 0;
 		}
-		ctxp->state = LPFC_NVMET_STE_FREE;
+#endif
+		atomic_inc(&tgtp->rcv_fcp_cmd_in);
+		/*
+		 * The calling sequence should be:
+		 * nvmet_fc_rcv_fcp_req->lpfc_nvmet_xmt_fcp_op/cmp- req->done
+		 * lpfc_nvmet_xmt_fcp_op_cmp should free the allocated ctxp.
+		 * When we return from nvmet_fc_rcv_fcp_req, all relevant info
+		 * the NVME command / FC header is stored.
+		 * A buffer has already been reposted for this IO, so just free
+		 * the nvmebuf.
+		 */
+		rc = nvmet_fc_rcv_fcp_req(phba->targetport, &ctxp->ctx.fcp_req,
+					  payload, size);
+
+		/* Process FCP command */
+		if (rc == 0) {
+			atomic_inc(&tgtp->rcv_fcp_cmd_out);
+			nvmebuf->hrq->rqbp->rqb_free_buffer(phba, nvmebuf);
+			return;
+		}
+
+		atomic_inc(&tgtp->rcv_fcp_cmd_drop);
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+				"2582 FCP Drop IO x%x: err x%x: x%x x%x x%x\n",
+				ctxp->oxid, rc,
+				atomic_read(&tgtp->rcv_fcp_cmd_in),
+				atomic_read(&tgtp->rcv_fcp_cmd_out),
+				atomic_read(&tgtp->xmt_fcp_release));
+
+		lpfc_nvmet_defer_release(phba, ctxp);
+		lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, sid, oxid);
+		nvmebuf->hrq->rqbp->rqb_free_buffer(phba, nvmebuf);
+		return;
 	}
-	lpfc_rq_buf_free(phba, mp);
+	spin_unlock_irqrestore(&phba->sli4_hba.nvmet_io_wait_lock, iflag);
+
+	spin_lock_irqsave(&phba->sli4_hba.nvmet_io_lock, iflag);
+	list_add_tail(&ctx_buf->list,
+		      &phba->sli4_hba.lpfc_nvmet_ctx_list);
+	phba->sli4_hba.nvmet_ctx_cnt++;
+	spin_unlock_irqrestore(&phba->sli4_hba.nvmet_io_lock, iflag);
+#endif
 }
 
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
@@ -502,6 +591,7 @@ lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport,
 				"6150 LS Drop IO x%x: Prep\n",
 				ctxp->oxid);
 		lpfc_in_buf_free(phba, &nvmebuf->dbuf);
+		atomic_inc(&nvmep->xmt_ls_abort);
 		lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp,
 						ctxp->sid, ctxp->oxid);
 		return -ENOMEM;
@@ -545,6 +635,7 @@ lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport,
 	lpfc_nlp_put(nvmewqeq->context1);
 
 	lpfc_in_buf_free(phba, &nvmebuf->dbuf);
+	atomic_inc(&nvmep->xmt_ls_abort);
 	lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp, ctxp->sid, ctxp->oxid);
 	return -ENXIO;
 }
@@ -612,9 +703,9 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
 	lpfc_nvmeio_data(phba, "NVMET FCP CMND: xri x%x op x%x len x%x\n",
 			 ctxp->oxid, rsp->op, rsp->rsplen);
 
+	ctxp->flag |= LPFC_NVMET_IO_INP;
 	rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, nvmewqeq);
 	if (rc == WQE_SUCCESS) {
-		ctxp->flag |= LPFC_NVMET_IO_INP;
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 		if (!phba->ktime_on)
 			return 0;
@@ -692,6 +783,7 @@ static void
 lpfc_nvmet_xmt_fcp_release(struct nvmet_fc_target_port *tgtport,
 			   struct nvmefc_tgt_fcp_req *rsp)
 {
+	struct lpfc_nvmet_tgtport *lpfc_nvmep = tgtport->private;
 	struct lpfc_nvmet_rcv_ctx *ctxp =
 		container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req);
 	struct lpfc_hba *phba = ctxp->phba;
@@ -710,10 +802,12 @@ lpfc_nvmet_xmt_fcp_release(struct nvmet_fc_target_port *tgtport,
 	lpfc_nvmeio_data(phba, "NVMET FCP FREE: xri x%x ste %d\n", ctxp->oxid,
 			 ctxp->state, 0);
 
+	atomic_inc(&lpfc_nvmep->xmt_fcp_release);
+
 	if (aborting)
 		return;
 
-	lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
+	lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf);
 }
 
 static struct nvmet_fc_target_template lpfc_tgttemplate = {
@@ -734,17 +828,128 @@ static struct nvmet_fc_target_template lpfc_tgttemplate = {
 	.target_priv_sz = sizeof(struct lpfc_nvmet_tgtport),
 };
 
+void
+lpfc_nvmet_cleanup_io_context(struct lpfc_hba *phba)
+{
+	struct lpfc_nvmet_ctxbuf *ctx_buf, *next_ctx_buf;
+	unsigned long flags;
+
+	list_for_each_entry_safe(
+		ctx_buf, next_ctx_buf,
+		&phba->sli4_hba.lpfc_nvmet_ctx_list, list) {
+		spin_lock_irqsave(
+			&phba->sli4_hba.abts_nvme_buf_list_lock, flags);
+		list_del_init(&ctx_buf->list);
+		spin_unlock_irqrestore(
+			&phba->sli4_hba.abts_nvme_buf_list_lock, flags);
+		__lpfc_clear_active_sglq(phba,
+					 ctx_buf->sglq->sli4_lxritag);
+		ctx_buf->sglq->state = SGL_FREED;
+		ctx_buf->sglq->ndlp = NULL;
+
+		spin_lock_irqsave(&phba->sli4_hba.sgl_list_lock, flags);
+		list_add_tail(&ctx_buf->sglq->list,
+			      &phba->sli4_hba.lpfc_nvmet_sgl_list);
+		spin_unlock_irqrestore(&phba->sli4_hba.sgl_list_lock,
+				       flags);
+
+		lpfc_sli_release_iocbq(phba, ctx_buf->iocbq);
+		kfree(ctx_buf->context);
+	}
+}
+
+int
+lpfc_nvmet_setup_io_context(struct lpfc_hba *phba)
+{
+	struct lpfc_nvmet_ctxbuf *ctx_buf;
+	struct lpfc_iocbq *nvmewqe;
+	union lpfc_wqe128 *wqe;
+	int i;
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_NVME,
+			"6403 Allocate NVMET resources for %d XRIs\n",
+			phba->sli4_hba.nvmet_xri_cnt);
+
+	/* For all nvmet xris, allocate resources needed to process a
+	 * received command on a per xri basis.
+	 */
+	for (i = 0; i < phba->sli4_hba.nvmet_xri_cnt; i++) {
+		ctx_buf = kzalloc(sizeof(*ctx_buf), GFP_KERNEL);
+		if (!ctx_buf) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
+					"6404 Ran out of memory for NVMET\n");
+			return -ENOMEM;
+		}
+
+		ctx_buf->context = kzalloc(sizeof(*ctx_buf->context),
+					   GFP_KERNEL);
+		if (!ctx_buf->context) {
+			kfree(ctx_buf);
+			lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
+					"6405 Ran out of NVMET "
+					"context memory\n");
+			return -ENOMEM;
+		}
+		ctx_buf->context->ctxbuf = ctx_buf;
+
+		ctx_buf->iocbq = lpfc_sli_get_iocbq(phba);
+		if (!ctx_buf->iocbq) {
+			kfree(ctx_buf->context);
+			kfree(ctx_buf);
+			lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
+					"6406 Ran out of NVMET iocb/WQEs\n");
+			return -ENOMEM;
+		}
+		ctx_buf->iocbq->iocb_flag = LPFC_IO_NVMET;
+		nvmewqe = ctx_buf->iocbq;
+		wqe = (union lpfc_wqe128 *)&nvmewqe->wqe;
+		/* Initialize WQE */
+		memset(wqe, 0, sizeof(union lpfc_wqe));
+		/* Word 7 */
+		bf_set(wqe_ct, &wqe->generic.wqe_com, SLI4_CT_RPI);
+		bf_set(wqe_class, &wqe->generic.wqe_com, CLASS3);
+		bf_set(wqe_pu, &wqe->generic.wqe_com, 1);
+		/* Word 10 */
+		bf_set(wqe_nvme, &wqe->fcp_tsend.wqe_com, 1);
+		bf_set(wqe_ebde_cnt, &wqe->generic.wqe_com, 0);
+		bf_set(wqe_qosd, &wqe->generic.wqe_com, 0);
+
+		ctx_buf->iocbq->context1 = NULL;
+		spin_lock(&phba->sli4_hba.sgl_list_lock);
+		ctx_buf->sglq = __lpfc_sli_get_nvmet_sglq(phba, ctx_buf->iocbq);
+		spin_unlock(&phba->sli4_hba.sgl_list_lock);
+		if (!ctx_buf->sglq) {
+			lpfc_sli_release_iocbq(phba, ctx_buf->iocbq);
+			kfree(ctx_buf->context);
+			kfree(ctx_buf);
+			lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
+					"6407 Ran out of NVMET XRIs\n");
+			return -ENOMEM;
+		}
+		spin_lock(&phba->sli4_hba.nvmet_io_lock);
+		list_add_tail(&ctx_buf->list,
+			      &phba->sli4_hba.lpfc_nvmet_ctx_list);
+		spin_unlock(&phba->sli4_hba.nvmet_io_lock);
+	}
+	phba->sli4_hba.nvmet_ctx_cnt = phba->sli4_hba.nvmet_xri_cnt;
+	return 0;
+}
+
 int
 lpfc_nvmet_create_targetport(struct lpfc_hba *phba)
 {
 	struct lpfc_vport  *vport = phba->pport;
 	struct lpfc_nvmet_tgtport *tgtp;
 	struct nvmet_fc_port_info pinfo;
-	int error = 0;
+	int error;
 
 	if (phba->targetport)
 		return 0;
 
+	error = lpfc_nvmet_setup_io_context(phba);
+	if (error)
+		return error;
+
 	memset(&pinfo, 0, sizeof(struct nvmet_fc_port_info));
 	pinfo.node_name = wwn_to_u64(vport->fc_nodename.u.wwn);
 	pinfo.port_name = wwn_to_u64(vport->fc_portname.u.wwn);
@@ -772,13 +977,16 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba)
 					     &phba->pcidev->dev,
 					     &phba->targetport);
 #else
-	error = -ENOMEM;
+	error = -ENOENT;
 #endif
 	if (error) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC,
 				"6025 Cannot register NVME targetport "
 				"x%x\n", error);
 		phba->targetport = NULL;
+
+		lpfc_nvmet_cleanup_io_context(phba);
+
 	} else {
 		tgtp = (struct lpfc_nvmet_tgtport *)
 			phba->targetport->private;
@@ -795,6 +1003,7 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba)
 		atomic_set(&tgtp->rcv_ls_req_out, 0);
 		atomic_set(&tgtp->rcv_ls_req_drop, 0);
 		atomic_set(&tgtp->xmt_ls_abort, 0);
+		atomic_set(&tgtp->xmt_ls_abort_cmpl, 0);
 		atomic_set(&tgtp->xmt_ls_rsp, 0);
 		atomic_set(&tgtp->xmt_ls_drop, 0);
 		atomic_set(&tgtp->xmt_ls_rsp_error, 0);
@@ -802,18 +1011,21 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba)
 		atomic_set(&tgtp->rcv_fcp_cmd_in, 0);
 		atomic_set(&tgtp->rcv_fcp_cmd_out, 0);
 		atomic_set(&tgtp->rcv_fcp_cmd_drop, 0);
-		atomic_set(&tgtp->xmt_fcp_abort, 0);
 		atomic_set(&tgtp->xmt_fcp_drop, 0);
 		atomic_set(&tgtp->xmt_fcp_read_rsp, 0);
 		atomic_set(&tgtp->xmt_fcp_read, 0);
 		atomic_set(&tgtp->xmt_fcp_write, 0);
 		atomic_set(&tgtp->xmt_fcp_rsp, 0);
+		atomic_set(&tgtp->xmt_fcp_release, 0);
 		atomic_set(&tgtp->xmt_fcp_rsp_cmpl, 0);
 		atomic_set(&tgtp->xmt_fcp_rsp_error, 0);
 		atomic_set(&tgtp->xmt_fcp_rsp_drop, 0);
+		atomic_set(&tgtp->xmt_fcp_abort, 0);
+		atomic_set(&tgtp->xmt_fcp_abort_cmpl, 0);
+		atomic_set(&tgtp->xmt_abort_unsol, 0);
+		atomic_set(&tgtp->xmt_abort_sol, 0);
 		atomic_set(&tgtp->xmt_abort_rsp, 0);
 		atomic_set(&tgtp->xmt_abort_rsp_error, 0);
-		atomic_set(&tgtp->xmt_abort_cmpl, 0);
 	}
 	return error;
 }
@@ -864,7 +1076,7 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
 	list_for_each_entry_safe(ctxp, next_ctxp,
 				 &phba->sli4_hba.lpfc_abts_nvmet_ctx_list,
 				 list) {
-		if (ctxp->rqb_buffer->sglq->sli4_xritag != xri)
+		if (ctxp->ctxbuf->sglq->sli4_xritag != xri)
 			continue;
 
 		/* Check if we already received a free context call
@@ -885,7 +1097,7 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
 		    (ndlp->nlp_state == NLP_STE_UNMAPPED_NODE ||
 		     ndlp->nlp_state == NLP_STE_MAPPED_NODE)) {
 			lpfc_set_rrq_active(phba, ndlp,
-				ctxp->rqb_buffer->sglq->sli4_lxritag,
+				ctxp->ctxbuf->sglq->sli4_lxritag,
 				rxid, 1);
 			lpfc_sli4_abts_err_handler(phba, ndlp, axri);
 		}
@@ -894,8 +1106,8 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
 				"6318 XB aborted %x flg x%x (%x)\n",
 				ctxp->oxid, ctxp->flag, released);
 		if (released)
-			lpfc_nvmet_rq_post(phba, ctxp,
-					   &ctxp->rqb_buffer->hbuf);
+			lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf);
+
 		if (rrq_empty)
 			lpfc_worker_wake_up(phba);
 		return;
@@ -923,7 +1135,7 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport,
 	list_for_each_entry_safe(ctxp, next_ctxp,
 				 &phba->sli4_hba.lpfc_abts_nvmet_ctx_list,
 				 list) {
-		if (ctxp->rqb_buffer->sglq->sli4_xritag != xri)
+		if (ctxp->ctxbuf->sglq->sli4_xritag != xri)
 			continue;
 
 		spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
@@ -975,6 +1187,7 @@ lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba)
 		init_completion(&tgtp->tport_unreg_done);
 		nvmet_fc_unregister_targetport(phba->targetport);
 		wait_for_completion_timeout(&tgtp->tport_unreg_done, 5);
+		lpfc_nvmet_cleanup_io_context(phba);
 	}
 	phba->targetport = NULL;
 #endif
@@ -1010,6 +1223,7 @@ lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 		oxid = 0;
 		size = 0;
 		sid = 0;
+		ctxp = NULL;
 		goto dropit;
 	}
 
@@ -1104,39 +1318,71 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 	struct lpfc_nvmet_rcv_ctx *ctxp;
 	struct lpfc_nvmet_tgtport *tgtp;
 	struct fc_frame_header *fc_hdr;
+	struct lpfc_nvmet_ctxbuf *ctx_buf;
 	uint32_t *payload;
-	uint32_t size, oxid, sid, rc;
+	uint32_t size, oxid, sid, rc, qno;
+	unsigned long iflag;
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 	uint32_t id;
 #endif
 
+	ctx_buf = NULL;
 	if (!nvmebuf || !phba->targetport) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
-				"6157 FCP Drop IO\n");
+				"6157 NVMET FCP Drop IO\n");
 		oxid = 0;
 		size = 0;
 		sid = 0;
+		ctxp = NULL;
 		goto dropit;
 	}
 
+	spin_lock_irqsave(&phba->sli4_hba.nvmet_io_lock, iflag);
+	if (phba->sli4_hba.nvmet_ctx_cnt) {
+		list_remove_head(&phba->sli4_hba.lpfc_nvmet_ctx_list,
+				 ctx_buf, struct lpfc_nvmet_ctxbuf, list);
+		phba->sli4_hba.nvmet_ctx_cnt--;
+	}
+	spin_unlock_irqrestore(&phba->sli4_hba.nvmet_io_lock, iflag);
 
-	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
-	payload = (uint32_t *)(nvmebuf->dbuf.virt);
 	fc_hdr = (struct fc_frame_header *)(nvmebuf->hbuf.virt);
-	size = nvmebuf->bytes_recv;
 	oxid = be16_to_cpu(fc_hdr->fh_ox_id);
-	sid = sli4_sid_from_fc_hdr(fc_hdr);
+	size = nvmebuf->bytes_recv;
 
-	ctxp = (struct lpfc_nvmet_rcv_ctx *)nvmebuf->context;
-	if (ctxp == NULL) {
-		atomic_inc(&tgtp->rcv_fcp_cmd_drop);
-		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
-				"6158 FCP Drop IO x%x: Alloc\n",
-				oxid);
-		lpfc_nvmet_rq_post(phba, NULL, &nvmebuf->hbuf);
-		/* Cannot send ABTS without context */
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	if (phba->cpucheck_on & LPFC_CHECK_NVMET_RCV) {
+		id = smp_processor_id();
+		if (id < LPFC_CHECK_CPU_CNT)
+			phba->cpucheck_rcv_io[id]++;
+	}
+#endif
+
+	lpfc_nvmeio_data(phba, "NVMET FCP  RCV: xri x%x sz %d CPU %02x\n",
+			 oxid, size, smp_processor_id());
+
+	if (!ctx_buf) {
+		/* Queue this NVME IO to process later */
+		spin_lock_irqsave(&phba->sli4_hba.nvmet_io_wait_lock, iflag);
+		list_add_tail(&nvmebuf->hbuf.list,
+			      &phba->sli4_hba.lpfc_nvmet_io_wait_list);
+		phba->sli4_hba.nvmet_io_wait_cnt++;
+		phba->sli4_hba.nvmet_io_wait_total++;
+		spin_unlock_irqrestore(&phba->sli4_hba.nvmet_io_wait_lock,
+				       iflag);
+
+		/* Post a brand new DMA buffer to RQ */
+		qno = nvmebuf->idx;
+		lpfc_post_rq_buffer(
+			phba, phba->sli4_hba.nvmet_mrq_hdr[qno],
+			phba->sli4_hba.nvmet_mrq_data[qno], 1, qno);
 		return;
 	}
+
+	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+	payload = (uint32_t *)(nvmebuf->dbuf.virt);
+	sid = sli4_sid_from_fc_hdr(fc_hdr);
+
+	ctxp = (struct lpfc_nvmet_rcv_ctx *)ctx_buf->context;
 	memset(ctxp, 0, sizeof(ctxp->ctx));
 	ctxp->wqeq = NULL;
 	ctxp->txrdy = NULL;
@@ -1146,9 +1392,9 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 	ctxp->oxid = oxid;
 	ctxp->sid = sid;
 	ctxp->state = LPFC_NVMET_STE_RCV;
-	ctxp->rqb_buffer = nvmebuf;
 	ctxp->entry_cnt = 1;
 	ctxp->flag = 0;
+	ctxp->ctxbuf = ctx_buf;
 	spin_lock_init(&ctxp->ctxlock);
 
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
@@ -1164,22 +1410,16 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 		ctxp->ts_isr_status = 0;
 		ctxp->ts_status_nvme = 0;
 	}
-
-	if (phba->cpucheck_on & LPFC_CHECK_NVMET_RCV) {
-		id = smp_processor_id();
-		if (id < LPFC_CHECK_CPU_CNT)
-			phba->cpucheck_rcv_io[id]++;
-	}
 #endif
 
-	lpfc_nvmeio_data(phba, "NVMET FCP  RCV: xri x%x sz %d CPU %02x\n",
-			 oxid, size, smp_processor_id());
-
 	atomic_inc(&tgtp->rcv_fcp_cmd_in);
 	/*
 	 * The calling sequence should be:
 	 * nvmet_fc_rcv_fcp_req -> lpfc_nvmet_xmt_fcp_op/cmp -> req->done
 	 * lpfc_nvmet_xmt_fcp_op_cmp should free the allocated ctxp.
+	 * When we return from nvmet_fc_rcv_fcp_req, all relevant info in
+	 * the NVME command / FC header is stored, so we are free to repost
+	 * the buffer.
 	 */
 	rc = nvmet_fc_rcv_fcp_req(phba->targetport, &ctxp->ctx.fcp_req,
 				  payload, size);
@@ -1187,26 +1427,32 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 	/* Process FCP command */
 	if (rc == 0) {
 		atomic_inc(&tgtp->rcv_fcp_cmd_out);
+		lpfc_rq_buf_free(phba, &nvmebuf->hbuf); /* repost */
 		return;
 	}
 
 	atomic_inc(&tgtp->rcv_fcp_cmd_drop);
 	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
-			"6159 FCP Drop IO x%x: err x%x\n",
-			ctxp->oxid, rc);
+			"6159 FCP Drop IO x%x: err x%x: x%x x%x x%x\n",
+			ctxp->oxid, rc,
+			atomic_read(&tgtp->rcv_fcp_cmd_in),
+			atomic_read(&tgtp->rcv_fcp_cmd_out),
+			atomic_read(&tgtp->xmt_fcp_release));
 dropit:
 	lpfc_nvmeio_data(phba, "NVMET FCP DROP: xri x%x sz %d from %06x\n",
 			 oxid, size, sid);
 	if (oxid) {
+		lpfc_nvmet_defer_release(phba, ctxp);
 		lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, sid, oxid);
+		lpfc_rq_buf_free(phba, &nvmebuf->hbuf); /* repost */
 		return;
 	}
 
-	if (nvmebuf) {
-		nvmebuf->iocbq->hba_wqidx = 0;
-		/* We assume a rcv'ed cmd ALWAYs fits into 1 buffer */
-		lpfc_nvmet_rq_post(phba, NULL, &nvmebuf->hbuf);
-	}
+	if (ctx_buf)
+		lpfc_nvmet_ctxbuf_post(phba, ctx_buf);
+
+	if (nvmebuf)
+		lpfc_rq_buf_free(phba, &nvmebuf->hbuf); /* repost */
 #endif
 }
 
@@ -1258,7 +1504,7 @@ lpfc_nvmet_unsol_fcp_event(struct lpfc_hba *phba,
 			   uint64_t isr_timestamp)
 {
 	if (phba->nvmet_support == 0) {
-		lpfc_nvmet_rq_post(phba, NULL, &nvmebuf->hbuf);
+		lpfc_rq_buf_free(phba, &nvmebuf->hbuf);
 		return;
 	}
 	lpfc_nvmet_unsol_fcp_buffer(phba, pring, nvmebuf,
@@ -1459,7 +1705,7 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba,
 	nvmewqe = ctxp->wqeq;
 	if (nvmewqe == NULL) {
 		/* Allocate buffer for  command wqe */
-		nvmewqe = ctxp->rqb_buffer->iocbq;
+		nvmewqe = ctxp->ctxbuf->iocbq;
 		if (nvmewqe == NULL) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
 					"6110 lpfc_nvmet_prep_fcp_wqe: No "
@@ -1486,7 +1732,7 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba,
 		return NULL;
 	}
 
-	sgl  = (struct sli4_sge *)ctxp->rqb_buffer->sglq->sgl;
+	sgl  = (struct sli4_sge *)ctxp->ctxbuf->sglq->sgl;
 	switch (rsp->op) {
 	case NVMET_FCOP_READDATA:
 	case NVMET_FCOP_READDATA_RSP:
@@ -1811,7 +2057,8 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	result = wcqe->parameter;
 
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
-	atomic_inc(&tgtp->xmt_abort_cmpl);
+	if (ctxp->flag & LPFC_NVMET_ABORT_OP)
+		atomic_inc(&tgtp->xmt_fcp_abort_cmpl);
 
 	ctxp->state = LPFC_NVMET_STE_DONE;
 
@@ -1826,6 +2073,7 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	}
 	ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
 	spin_unlock_irqrestore(&ctxp->ctxlock, flags);
+	atomic_inc(&tgtp->xmt_abort_rsp);
 
 	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
 			"6165 ABORT cmpl: xri x%x flg x%x (%d) "
@@ -1834,15 +2082,16 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 			wcqe->word0, wcqe->total_data_placed,
 			result, wcqe->word3);
 
+	cmdwqe->context2 = NULL;
+	cmdwqe->context3 = NULL;
 	/*
 	 * if transport has released ctx, then can reuse it. Otherwise,
 	 * will be recycled by transport release call.
 	 */
 	if (released)
-		lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
+		lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf);
 
-	cmdwqe->context2 = NULL;
-	cmdwqe->context3 = NULL;
+	/* This is the iocbq for the abort, not the command */
 	lpfc_sli_release_iocbq(phba, cmdwqe);
 
 	/* Since iaab/iaar are NOT set, there is no work left.
@@ -1876,7 +2125,8 @@ lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	result = wcqe->parameter;
 
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
-	atomic_inc(&tgtp->xmt_abort_cmpl);
+	if (ctxp->flag & LPFC_NVMET_ABORT_OP)
+		atomic_inc(&tgtp->xmt_fcp_abort_cmpl);
 
 	if (!ctxp) {
 		/* if context is clear, related io alrady complete */
@@ -1906,6 +2156,7 @@ lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	}
 	ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
 	spin_unlock_irqrestore(&ctxp->ctxlock, flags);
+	atomic_inc(&tgtp->xmt_abort_rsp);
 
 	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
 			"6316 ABTS cmpl xri x%x flg x%x (%x) "
@@ -1913,15 +2164,15 @@ lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 			ctxp->oxid, ctxp->flag, released,
 			wcqe->word0, wcqe->total_data_placed,
 			result, wcqe->word3);
+
+	cmdwqe->context2 = NULL;
+	cmdwqe->context3 = NULL;
 	/*
 	 * if transport has released ctx, then can reuse it. Otherwise,
 	 * will be recycled by transport release call.
 	 */
 	if (released)
-		lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
-
-	cmdwqe->context2 = NULL;
-	cmdwqe->context3 = NULL;
+		lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf);
 
 	/* Since iaab/iaar are NOT set, there is no work left.
 	 * For LPFC_NVMET_XBUSY, lpfc_sli4_nvmet_xri_aborted
@@ -1952,7 +2203,7 @@ lpfc_nvmet_xmt_ls_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	result = wcqe->parameter;
 
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
-	atomic_inc(&tgtp->xmt_abort_cmpl);
+	atomic_inc(&tgtp->xmt_ls_abort_cmpl);
 
 	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
 			"6083 Abort cmpl: ctx %p WCQE: %08x %08x %08x %08x\n",
@@ -1983,10 +2234,6 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba,
 			sid, xri, ctxp->wqeq->sli4_xritag);
 
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
-	if (!ctxp->wqeq) {
-		ctxp->wqeq = ctxp->rqb_buffer->iocbq;
-		ctxp->wqeq->hba_wqidx = 0;
-	}
 
 	ndlp = lpfc_findnode_did(phba->pport, sid);
 	if (!ndlp || !NLP_CHK_NODE_ACT(ndlp) ||
@@ -2082,7 +2329,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
 	if (!ctxp->wqeq) {
-		ctxp->wqeq = ctxp->rqb_buffer->iocbq;
+		ctxp->wqeq = ctxp->ctxbuf->iocbq;
 		ctxp->wqeq->hba_wqidx = 0;
 	}
 
@@ -2103,6 +2350,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 	/* Issue ABTS for this WQE based on iotag */
 	ctxp->abort_wqeq = lpfc_sli_get_iocbq(phba);
 	if (!ctxp->abort_wqeq) {
+		atomic_inc(&tgtp->xmt_abort_rsp_error);
 		lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
 				"6161 ABORT failed: No wqeqs: "
 				"xri: x%x\n", ctxp->oxid);
@@ -2127,6 +2375,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 	/* driver queued commands are in process of being flushed */
 	if (phba->hba_flag & HBA_NVME_IOQ_FLUSH) {
 		spin_unlock_irqrestore(&phba->hbalock, flags);
+		atomic_inc(&tgtp->xmt_abort_rsp_error);
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
 				"6163 Driver in reset cleanup - flushing "
 				"NVME Req now. hba_flag x%x oxid x%x\n",
@@ -2139,6 +2388,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 	/* Outstanding abort is in progress */
 	if (abts_wqeq->iocb_flag & LPFC_DRIVER_ABORTED) {
 		spin_unlock_irqrestore(&phba->hbalock, flags);
+		atomic_inc(&tgtp->xmt_abort_rsp_error);
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
 				"6164 Outstanding NVME I/O Abort Request "
 				"still pending on oxid x%x\n",
@@ -2189,9 +2439,12 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 	abts_wqeq->context2 = ctxp;
 	rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abts_wqeq);
 	spin_unlock_irqrestore(&phba->hbalock, flags);
-	if (rc == WQE_SUCCESS)
+	if (rc == WQE_SUCCESS) {
+		atomic_inc(&tgtp->xmt_abort_sol);
 		return 0;
+	}
 
+	atomic_inc(&tgtp->xmt_abort_rsp_error);
 	ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
 	lpfc_sli_release_iocbq(phba, abts_wqeq);
 	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
@@ -2214,7 +2467,7 @@ lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba,
 
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
 	if (!ctxp->wqeq) {
-		ctxp->wqeq = ctxp->rqb_buffer->iocbq;
+		ctxp->wqeq = ctxp->ctxbuf->iocbq;
 		ctxp->wqeq->hba_wqidx = 0;
 	}
 
@@ -2230,11 +2483,11 @@ lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba,
 	rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abts_wqeq);
 	spin_unlock_irqrestore(&phba->hbalock, flags);
 	if (rc == WQE_SUCCESS) {
-		atomic_inc(&tgtp->xmt_abort_rsp);
 		return 0;
 	}
 
 aerr:
+	atomic_inc(&tgtp->xmt_abort_rsp_error);
 	ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
 	atomic_inc(&tgtp->xmt_abort_rsp_error);
 	lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
@@ -2269,6 +2522,7 @@ lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *phba,
 	}
 	abts_wqeq = ctxp->wqeq;
 	wqe_abts = &abts_wqeq->wqe;
+
 	lpfc_nvmet_unsol_issue_abort(phba, ctxp, sid, xri);
 
 	spin_lock_irqsave(&phba->hbalock, flags);
@@ -2278,7 +2532,7 @@ lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *phba,
 	rc = lpfc_sli4_issue_wqe(phba, LPFC_ELS_RING, abts_wqeq);
 	spin_unlock_irqrestore(&phba->hbalock, flags);
 	if (rc == WQE_SUCCESS) {
-		atomic_inc(&tgtp->xmt_abort_rsp);
+		atomic_inc(&tgtp->xmt_abort_unsol);
 		return 0;
 	}
 
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.h b/drivers/scsi/lpfc/lpfc_nvmet.h
index 128759fe6650..6eb2f5d8d4ed 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.h
+++ b/drivers/scsi/lpfc/lpfc_nvmet.h
@@ -22,6 +22,7 @@
  ********************************************************************/
 
 #define LPFC_NVMET_DEFAULT_SEGS		(64 + 1)	/* 256K IOs */
+#define LPFC_NVMET_RQE_DEF_COUNT	512
 #define LPFC_NVMET_SUCCESS_LEN	12
 
 /* Used for NVME Target */
@@ -34,6 +35,7 @@ struct lpfc_nvmet_tgtport {
 	atomic_t rcv_ls_req_out;
 	atomic_t rcv_ls_req_drop;
 	atomic_t xmt_ls_abort;
+	atomic_t xmt_ls_abort_cmpl;
 
 	/* Stats counters - lpfc_nvmet_xmt_ls_rsp */
 	atomic_t xmt_ls_rsp;
@@ -47,9 +49,9 @@ struct lpfc_nvmet_tgtport {
 	atomic_t rcv_fcp_cmd_in;
 	atomic_t rcv_fcp_cmd_out;
 	atomic_t rcv_fcp_cmd_drop;
+	atomic_t xmt_fcp_release;
 
 	/* Stats counters - lpfc_nvmet_xmt_fcp_op */
-	atomic_t xmt_fcp_abort;
 	atomic_t xmt_fcp_drop;
 	atomic_t xmt_fcp_read_rsp;
 	atomic_t xmt_fcp_read;
@@ -62,12 +64,13 @@ struct lpfc_nvmet_tgtport {
 	atomic_t xmt_fcp_rsp_drop;
 
 
-	/* Stats counters - lpfc_nvmet_unsol_issue_abort */
+	/* Stats counters - lpfc_nvmet_xmt_fcp_abort */
+	atomic_t xmt_fcp_abort;
+	atomic_t xmt_fcp_abort_cmpl;
+	atomic_t xmt_abort_sol;
+	atomic_t xmt_abort_unsol;
 	atomic_t xmt_abort_rsp;
 	atomic_t xmt_abort_rsp_error;
-
-	/* Stats counters - lpfc_nvmet_xmt_abort_cmp */
-	atomic_t xmt_abort_cmpl;
 };
 
 struct lpfc_nvmet_rcv_ctx {
@@ -103,6 +106,7 @@ struct lpfc_nvmet_rcv_ctx {
 #define LPFC_NVMET_CTX_RLS		0x8  /* ctx free requested */
 #define LPFC_NVMET_ABTS_RCV		0x10  /* ABTS received on exchange */
 	struct rqb_dmabuf *rqb_buffer;
+	struct lpfc_nvmet_ctxbuf *ctxbuf;
 
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 	uint64_t ts_isr_cmd;
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 2a4fc00dfa9b..d6b184839bc2 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -74,6 +74,8 @@ static struct lpfc_iocbq *lpfc_sli4_els_wcqe_to_rspiocbq(struct lpfc_hba *,
 							 struct lpfc_iocbq *);
 static void lpfc_sli4_send_seq_to_ulp(struct lpfc_vport *,
 				      struct hbq_dmabuf *);
+static void lpfc_sli4_handle_mds_loopback(struct lpfc_vport *vport,
+					  struct hbq_dmabuf *dmabuf);
 static int lpfc_sli4_fp_handle_cqe(struct lpfc_hba *, struct lpfc_queue *,
 				    struct lpfc_cqe *);
 static int lpfc_sli4_post_sgl_list(struct lpfc_hba *, struct list_head *,
@@ -479,22 +481,23 @@ lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq,
 	if (unlikely(!hq) || unlikely(!dq))
 		return -ENOMEM;
 	put_index = hq->host_index;
-	temp_hrqe = hq->qe[hq->host_index].rqe;
+	temp_hrqe = hq->qe[put_index].rqe;
 	temp_drqe = dq->qe[dq->host_index].rqe;
 
 	if (hq->type != LPFC_HRQ || dq->type != LPFC_DRQ)
 		return -EINVAL;
-	if (hq->host_index != dq->host_index)
+	if (put_index != dq->host_index)
 		return -EINVAL;
 	/* If the host has not yet processed the next entry then we are done */
-	if (((hq->host_index + 1) % hq->entry_count) == hq->hba_index)
+	if (((put_index + 1) % hq->entry_count) == hq->hba_index)
 		return -EBUSY;
 	lpfc_sli_pcimem_bcopy(hrqe, temp_hrqe, hq->entry_size);
 	lpfc_sli_pcimem_bcopy(drqe, temp_drqe, dq->entry_size);
 
 	/* Update the host index to point to the next slot */
-	hq->host_index = ((hq->host_index + 1) % hq->entry_count);
+	hq->host_index = ((put_index + 1) % hq->entry_count);
 	dq->host_index = ((dq->host_index + 1) % dq->entry_count);
+	hq->RQ_buf_posted++;
 
 	/* Ring The Header Receive Queue Doorbell */
 	if (!(hq->host_index % hq->entry_repost)) {
@@ -5906,7 +5909,7 @@ lpfc_set_features(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox,
 		bf_set(lpfc_mbx_set_feature_mds,
 		       &mbox->u.mqe.un.set_feature, 1);
 		bf_set(lpfc_mbx_set_feature_mds_deep_loopbk,
-		       &mbox->u.mqe.un.set_feature, 0);
+		       &mbox->u.mqe.un.set_feature, 1);
 		mbox->u.mqe.un.set_feature.feature = LPFC_SET_MDS_DIAGS;
 		mbox->u.mqe.un.set_feature.param_len = 8;
 		break;
@@ -6512,6 +6515,50 @@ lpfc_set_host_data(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox)
 		 (phba->hba_flag & HBA_FCOE_MODE) ? "FCoE" : "FC");
 }
 
+int
+lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hrq,
+		    struct lpfc_queue *drq, int count, int idx)
+{
+	int rc, i;
+	struct lpfc_rqe hrqe;
+	struct lpfc_rqe drqe;
+	struct lpfc_rqb *rqbp;
+	struct rqb_dmabuf *rqb_buffer;
+	LIST_HEAD(rqb_buf_list);
+
+	rqbp = hrq->rqbp;
+	for (i = 0; i < count; i++) {
+		/* IF RQ is already full, don't bother */
+		if (rqbp->buffer_count + i >= rqbp->entry_count - 1)
+			break;
+		rqb_buffer = rqbp->rqb_alloc_buffer(phba);
+		if (!rqb_buffer)
+			break;
+		rqb_buffer->hrq = hrq;
+		rqb_buffer->drq = drq;
+		rqb_buffer->idx = idx;
+		list_add_tail(&rqb_buffer->hbuf.list, &rqb_buf_list);
+	}
+	while (!list_empty(&rqb_buf_list)) {
+		list_remove_head(&rqb_buf_list, rqb_buffer, struct rqb_dmabuf,
+				 hbuf.list);
+
+		hrqe.address_lo = putPaddrLow(rqb_buffer->hbuf.phys);
+		hrqe.address_hi = putPaddrHigh(rqb_buffer->hbuf.phys);
+		drqe.address_lo = putPaddrLow(rqb_buffer->dbuf.phys);
+		drqe.address_hi = putPaddrHigh(rqb_buffer->dbuf.phys);
+		rc = lpfc_sli4_rq_put(hrq, drq, &hrqe, &drqe);
+		if (rc < 0) {
+			rqbp->rqb_free_buffer(phba, rqb_buffer);
+		} else {
+			list_add_tail(&rqb_buffer->hbuf.list,
+				      &rqbp->rqb_buffer_list);
+			rqbp->buffer_count++;
+		}
+	}
+	return 1;
+}
+
 /**
  * lpfc_sli4_hba_setup - SLI4 device initialization PCI function
  * @phba: Pointer to HBA context object.
@@ -6524,7 +6571,7 @@ lpfc_set_host_data(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox)
 int
 lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 {
-	int rc, i;
+	int rc, i, cnt;
 	LPFC_MBOXQ_t *mboxq;
 	struct lpfc_mqe *mqe;
 	uint8_t *vpd;
@@ -6875,6 +6922,21 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 			goto out_destroy_queue;
 		}
 		phba->sli4_hba.nvmet_xri_cnt = rc;
+
+		cnt = phba->cfg_iocb_cnt * 1024;
+		/* We need 1 iocbq for every SGL, for IO processing */
+		cnt += phba->sli4_hba.nvmet_xri_cnt;
+		/* Initialize and populate the iocb list per host */
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"2821 initialize iocb list %d total %d\n",
+				phba->cfg_iocb_cnt, cnt);
+		rc = lpfc_init_iocb_list(phba, cnt);
+		if (rc) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"1413 Failed to init iocb list.\n");
+			goto out_destroy_queue;
+		}
+
 		lpfc_nvmet_create_targetport(phba);
 	} else {
 		/* update host scsi xri-sgl sizes and mappings */
@@ -6894,28 +6956,34 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 					"and mapping: %d\n", rc);
 			goto out_destroy_queue;
 		}
+
+		cnt = phba->cfg_iocb_cnt * 1024;
+		/* Initialize and populate the iocb list per host */
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"2820 initialize iocb list %d total %d\n",
+				phba->cfg_iocb_cnt, cnt);
+		rc = lpfc_init_iocb_list(phba, cnt);
+		if (rc) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"6301 Failed to init iocb list.\n");
+			goto out_destroy_queue;
+		}
 	}
 
 	if (phba->nvmet_support && phba->cfg_nvmet_mrq) {
-
 		/* Post initial buffers to all RQs created */
 		for (i = 0; i < phba->cfg_nvmet_mrq; i++) {
 			rqbp = phba->sli4_hba.nvmet_mrq_hdr[i]->rqbp;
 			INIT_LIST_HEAD(&rqbp->rqb_buffer_list);
 			rqbp->rqb_alloc_buffer = lpfc_sli4_nvmet_alloc;
 			rqbp->rqb_free_buffer = lpfc_sli4_nvmet_free;
-			rqbp->entry_count = 256;
+			rqbp->entry_count = LPFC_NVMET_RQE_DEF_COUNT;
 			rqbp->buffer_count = 0;
 
-			/* Divide by 4 and round down to multiple of 16 */
-			rc = (phba->cfg_nvmet_mrq_post >> 2) & 0xfff8;
-			phba->sli4_hba.nvmet_mrq_hdr[i]->entry_repost = rc;
-			phba->sli4_hba.nvmet_mrq_data[i]->entry_repost = rc;
-
 			lpfc_post_rq_buffer(
 				phba, phba->sli4_hba.nvmet_mrq_hdr[i],
 				phba->sli4_hba.nvmet_mrq_data[i],
-				phba->cfg_nvmet_mrq_post);
+				LPFC_NVMET_RQE_DEF_COUNT, i);
 		}
 	}
 
@@ -7082,6 +7150,7 @@ out_unset_queue:
 	/* Unset all the queues set up in this routine when error out */
 	lpfc_sli4_queue_unset(phba);
 out_destroy_queue:
+	lpfc_free_iocb_list(phba);
 	lpfc_sli4_queue_destroy(phba);
 out_stop_timers:
 	lpfc_stop_hba_timers(phba);
@@ -8621,8 +8690,11 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
 		memset(wqe, 0, sizeof(union lpfc_wqe128));
 	/* Some of the fields are in the right position already */
 	memcpy(wqe, &iocbq->iocb, sizeof(union lpfc_wqe));
-	wqe->generic.wqe_com.word7 = 0; /* The ct field has moved so reset */
-	wqe->generic.wqe_com.word10 = 0;
+	if (iocbq->iocb.ulpCommand != CMD_SEND_FRAME) {
+		/* The ct field has moved so reset */
+		wqe->generic.wqe_com.word7 = 0;
+		wqe->generic.wqe_com.word10 = 0;
+	}
 
 	abort_tag = (uint32_t) iocbq->iotag;
 	xritag = iocbq->sli4_xritag;
@@ -9116,6 +9188,10 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
 		}
 
 		break;
+	case CMD_SEND_FRAME:
+		bf_set(wqe_xri_tag, &wqe->generic.wqe_com, xritag);
+		bf_set(wqe_reqtag, &wqe->generic.wqe_com, iocbq->iotag);
+		return 0;
 	case CMD_XRI_ABORTED_CX:
 	case CMD_CREATE_XRI_CR: /* Do we expect to use this? */
 	case CMD_IOCB_FCP_IBIDIR64_CR: /* bidirectional xfer */
@@ -12788,6 +12864,7 @@ lpfc_sli4_sp_handle_rcqe(struct lpfc_hba *phba, struct lpfc_rcqe *rcqe)
 	struct fc_frame_header *fc_hdr;
 	struct lpfc_queue *hrq = phba->sli4_hba.hdr_rq;
 	struct lpfc_queue *drq = phba->sli4_hba.dat_rq;
+	struct lpfc_nvmet_tgtport *tgtp;
 	struct hbq_dmabuf *dma_buf;
 	uint32_t status, rq_id;
 	unsigned long iflags;
@@ -12808,7 +12885,6 @@ lpfc_sli4_sp_handle_rcqe(struct lpfc_hba *phba, struct lpfc_rcqe *rcqe)
 	case FC_STATUS_RQ_BUF_LEN_EXCEEDED:
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
 				"2537 Receive Frame Truncated!!\n");
-		hrq->RQ_buf_trunc++;
 	case FC_STATUS_RQ_SUCCESS:
 		lpfc_sli4_rq_release(hrq, drq);
 		spin_lock_irqsave(&phba->hbalock, iflags);
@@ -12819,6 +12895,7 @@ lpfc_sli4_sp_handle_rcqe(struct lpfc_hba *phba, struct lpfc_rcqe *rcqe)
 			goto out;
 		}
 		hrq->RQ_rcv_buf++;
+		hrq->RQ_buf_posted--;
 		memcpy(&dma_buf->cq_event.cqe.rcqe_cmpl, rcqe, sizeof(*rcqe));
 
 		/* If a NVME LS event (type 0x28), treat it as Fast path */
@@ -12832,8 +12909,21 @@ lpfc_sli4_sp_handle_rcqe(struct lpfc_hba *phba, struct lpfc_rcqe *rcqe)
 		spin_unlock_irqrestore(&phba->hbalock, iflags);
 		workposted = true;
 		break;
-	case FC_STATUS_INSUFF_BUF_NEED_BUF:
 	case FC_STATUS_INSUFF_BUF_FRM_DISC:
+		if (phba->nvmet_support) {
+			tgtp = phba->targetport->private;
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI | LOG_NVME,
+					"6402 RQE Error x%x, posted %d err_cnt "
+					"%d: %x %x %x\n",
+					status, hrq->RQ_buf_posted,
+					hrq->RQ_no_posted_buf,
+					atomic_read(&tgtp->rcv_fcp_cmd_in),
+					atomic_read(&tgtp->rcv_fcp_cmd_out),
+					atomic_read(&tgtp->xmt_fcp_release));
+		}
+		/* fallthrough */
+
+	case FC_STATUS_INSUFF_BUF_NEED_BUF:
 		hrq->RQ_no_posted_buf++;
 		/* Post more buffers if possible */
 		spin_lock_irqsave(&phba->hbalock, iflags);
@@ -12951,7 +13041,7 @@ lpfc_sli4_sp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 		while ((cqe = lpfc_sli4_cq_get(cq))) {
 			workposted |= lpfc_sli4_sp_handle_mcqe(phba, cqe);
 			if (!(++ecount % cq->entry_repost))
-				lpfc_sli4_cq_release(cq, LPFC_QUEUE_NOARM);
+				break;
 			cq->CQ_mbox++;
 		}
 		break;
@@ -12965,7 +13055,7 @@ lpfc_sli4_sp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 				workposted |= lpfc_sli4_sp_handle_cqe(phba, cq,
 								      cqe);
 			if (!(++ecount % cq->entry_repost))
-				lpfc_sli4_cq_release(cq, LPFC_QUEUE_NOARM);
+				break;
 		}
 
 		/* Track the max number of CQEs processed in 1 EQ */
@@ -13135,6 +13225,7 @@ lpfc_sli4_nvmet_handle_rcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 	struct lpfc_queue *drq;
 	struct rqb_dmabuf *dma_buf;
 	struct fc_frame_header *fc_hdr;
+	struct lpfc_nvmet_tgtport *tgtp;
 	uint32_t status, rq_id;
 	unsigned long iflags;
 	uint32_t fctl, idx;
@@ -13165,8 +13256,6 @@ lpfc_sli4_nvmet_handle_rcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 	case FC_STATUS_RQ_BUF_LEN_EXCEEDED:
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
 				"6126 Receive Frame Truncated!!\n");
-		hrq->RQ_buf_trunc++;
-		break;
 	case FC_STATUS_RQ_SUCCESS:
 		lpfc_sli4_rq_release(hrq, drq);
 		spin_lock_irqsave(&phba->hbalock, iflags);
@@ -13178,6 +13267,7 @@ lpfc_sli4_nvmet_handle_rcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 		}
 		spin_unlock_irqrestore(&phba->hbalock, iflags);
 		hrq->RQ_rcv_buf++;
+		hrq->RQ_buf_posted--;
 		fc_hdr = (struct fc_frame_header *)dma_buf->hbuf.virt;
 
 		/* Just some basic sanity checks on FCP Command frame */
@@ -13200,14 +13290,23 @@ lpfc_sli4_nvmet_handle_rcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 drop:
 		lpfc_in_buf_free(phba, &dma_buf->dbuf);
 		break;
-	case FC_STATUS_INSUFF_BUF_NEED_BUF:
 	case FC_STATUS_INSUFF_BUF_FRM_DISC:
+		if (phba->nvmet_support) {
+			tgtp = phba->targetport->private;
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI | LOG_NVME,
+					"6401 RQE Error x%x, posted %d err_cnt "
+					"%d: %x %x %x\n",
+					status, hrq->RQ_buf_posted,
+					hrq->RQ_no_posted_buf,
+					atomic_read(&tgtp->rcv_fcp_cmd_in),
+					atomic_read(&tgtp->rcv_fcp_cmd_out),
+					atomic_read(&tgtp->xmt_fcp_release));
+		}
+		/* fallthrough */
+
+	case FC_STATUS_INSUFF_BUF_NEED_BUF:
 		hrq->RQ_no_posted_buf++;
 		/* Post more buffers if possible */
-		spin_lock_irqsave(&phba->hbalock, iflags);
-		phba->hba_flag |= HBA_POST_RECEIVE_BUFFER;
-		spin_unlock_irqrestore(&phba->hbalock, iflags);
-		workposted = true;
 		break;
 	}
 out:
@@ -13361,7 +13460,7 @@ process_cq:
 	while ((cqe = lpfc_sli4_cq_get(cq))) {
 		workposted |= lpfc_sli4_fp_handle_cqe(phba, cq, cqe);
 		if (!(++ecount % cq->entry_repost))
-			lpfc_sli4_cq_release(cq, LPFC_QUEUE_NOARM);
+			break;
 	}
 
 	/* Track the max number of CQEs processed in 1 EQ */
@@ -13452,7 +13551,7 @@ lpfc_sli4_fof_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe)
 	while ((cqe = lpfc_sli4_cq_get(cq))) {
 		workposted |= lpfc_sli4_fp_handle_cqe(phba, cq, cqe);
 		if (!(++ecount % cq->entry_repost))
-			lpfc_sli4_cq_release(cq, LPFC_QUEUE_NOARM);
+			break;
 	}
 
 	/* Track the max number of CQEs processed in 1 EQ */
@@ -13534,7 +13633,7 @@ lpfc_sli4_fof_intr_handler(int irq, void *dev_id)
 	while ((eqe = lpfc_sli4_eq_get(eq))) {
 		lpfc_sli4_fof_handle_eqe(phba, eqe);
 		if (!(++ecount % eq->entry_repost))
-			lpfc_sli4_eq_release(eq, LPFC_QUEUE_NOARM);
+			break;
 		eq->EQ_processed++;
 	}
 
@@ -13651,7 +13750,7 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
 
 		lpfc_sli4_hba_handle_eqe(phba, eqe, hba_eqidx);
 		if (!(++ecount % fpeq->entry_repost))
-			lpfc_sli4_eq_release(fpeq, LPFC_QUEUE_NOARM);
+			break;
 		fpeq->EQ_processed++;
 	}
 
@@ -13832,17 +13931,10 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t entry_size,
 	}
 	queue->entry_size = entry_size;
 	queue->entry_count = entry_count;
-
-	/*
-	 * entry_repost is calculated based on the number of entries in the
-	 * queue. This works out except for RQs. If buffers are NOT initially
-	 * posted for every RQE, entry_repost should be adjusted accordingly.
-	 */
-	queue->entry_repost = (entry_count >> 3);
-	if (queue->entry_repost < LPFC_QUEUE_MIN_REPOST)
-		queue->entry_repost = LPFC_QUEUE_MIN_REPOST;
 	queue->phba = phba;
 
+	/* entry_repost will be set during q creation */
+
 	return queue;
 out_fail:
 	lpfc_sli4_queue_free(queue);
@@ -14073,6 +14165,7 @@ lpfc_eq_create(struct lpfc_hba *phba, struct lpfc_queue *eq, uint32_t imax)
 		status = -ENXIO;
 	eq->host_index = 0;
 	eq->hba_index = 0;
+	eq->entry_repost = LPFC_EQ_REPOST;
 
 	mempool_free(mbox, phba->mbox_mem_pool);
 	return status;
@@ -14146,9 +14239,9 @@ lpfc_cq_create(struct lpfc_hba *phba, struct lpfc_queue *cq,
 	default:
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
 				"0361 Unsupported CQ count: "
-				"entry cnt %d sz %d pg cnt %d repost %d\n",
+				"entry cnt %d sz %d pg cnt %d\n",
 				cq->entry_count, cq->entry_size,
-				cq->page_count, cq->entry_repost);
+				cq->page_count);
 		if (cq->entry_count < 256) {
 			status = -EINVAL;
 			goto out;
@@ -14201,6 +14294,7 @@ lpfc_cq_create(struct lpfc_hba *phba, struct lpfc_queue *cq,
 	cq->assoc_qid = eq->queue_id;
 	cq->host_index = 0;
 	cq->hba_index = 0;
+	cq->entry_repost = LPFC_CQ_REPOST;
 
 out:
 	mempool_free(mbox, phba->mbox_mem_pool);
@@ -14392,6 +14486,7 @@ lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp,
 		cq->assoc_qid = eq->queue_id;
 		cq->host_index = 0;
 		cq->hba_index = 0;
+		cq->entry_repost = LPFC_CQ_REPOST;
 
 		rc = 0;
 		list_for_each_entry(dmabuf, &cq->page_list, list) {
@@ -14640,6 +14735,7 @@ lpfc_mq_create(struct lpfc_hba *phba, struct lpfc_queue *mq,
 	mq->subtype = subtype;
 	mq->host_index = 0;
 	mq->hba_index = 0;
+	mq->entry_repost = LPFC_MQ_REPOST;
 
 	/* link the mq onto the parent cq child list */
 	list_add_tail(&mq->list, &cq->child_list);
@@ -14865,34 +14961,6 @@ out:
 }
 
 /**
- * lpfc_rq_adjust_repost - Adjust entry_repost for an RQ
- * @phba: HBA structure that indicates port to create a queue on.
- * @rq:   The queue structure to use for the receive queue.
- * @qno:  The associated HBQ number
- *
- *
- * For SLI4 we need to adjust the RQ repost value based on
- * the number of buffers that are initially posted to the RQ.
- */
-void
-lpfc_rq_adjust_repost(struct lpfc_hba *phba, struct lpfc_queue *rq, int qno)
-{
-	uint32_t cnt;
-
-	/* sanity check on queue memory */
-	if (!rq)
-		return;
-	cnt = lpfc_hbq_defs[qno]->entry_count;
-
-	/* Recalc repost for RQs based on buffers initially posted */
-	cnt = (cnt >> 3);
-	if (cnt < LPFC_QUEUE_MIN_REPOST)
-		cnt = LPFC_QUEUE_MIN_REPOST;
-
-	rq->entry_repost = cnt;
-}
-
-/**
  * lpfc_rq_create - Create a Receive Queue on the HBA
  * @phba: HBA structure that indicates port to create a queue on.
  * @hrq: The queue structure to use to create the header receive queue.
@@ -15077,6 +15145,7 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 	hrq->subtype = subtype;
 	hrq->host_index = 0;
 	hrq->hba_index = 0;
+	hrq->entry_repost = LPFC_RQ_REPOST;
 
 	/* now create the data queue */
 	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
@@ -15087,7 +15156,12 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 	if (phba->sli4_hba.pc_sli4_params.rqv == LPFC_Q_CREATE_VERSION_1) {
 		bf_set(lpfc_rq_context_rqe_count_1,
 		       &rq_create->u.request.context, hrq->entry_count);
-		rq_create->u.request.context.buffer_size = LPFC_DATA_BUF_SIZE;
+		if (subtype == LPFC_NVMET)
+			rq_create->u.request.context.buffer_size =
+				LPFC_NVMET_DATA_BUF_SIZE;
+		else
+			rq_create->u.request.context.buffer_size =
+				LPFC_DATA_BUF_SIZE;
 		bf_set(lpfc_rq_context_rqe_size, &rq_create->u.request.context,
 		       LPFC_RQE_SIZE_8);
 		bf_set(lpfc_rq_context_page_size, &rq_create->u.request.context,
@@ -15124,8 +15198,14 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 			       LPFC_RQ_RING_SIZE_4096);
 			break;
 		}
-		bf_set(lpfc_rq_context_buf_size, &rq_create->u.request.context,
-		       LPFC_DATA_BUF_SIZE);
+		if (subtype == LPFC_NVMET)
+			bf_set(lpfc_rq_context_buf_size,
+			       &rq_create->u.request.context,
+			       LPFC_NVMET_DATA_BUF_SIZE);
+		else
+			bf_set(lpfc_rq_context_buf_size,
+			       &rq_create->u.request.context,
+			       LPFC_DATA_BUF_SIZE);
 	}
 	bf_set(lpfc_rq_context_cq_id, &rq_create->u.request.context,
 	       cq->queue_id);
@@ -15158,6 +15238,7 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 	drq->subtype = subtype;
 	drq->host_index = 0;
 	drq->hba_index = 0;
+	drq->entry_repost = LPFC_RQ_REPOST;
 
 	/* link the header and data RQs onto the parent cq child list */
 	list_add_tail(&hrq->list, &cq->child_list);
@@ -15270,7 +15351,7 @@ lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp,
 			       cq->queue_id);
 			bf_set(lpfc_rq_context_data_size,
 			       &rq_create->u.request.context,
-			       LPFC_DATA_BUF_SIZE);
+			       LPFC_NVMET_DATA_BUF_SIZE);
 			bf_set(lpfc_rq_context_hdr_size,
 			       &rq_create->u.request.context,
 			       LPFC_HDR_BUF_SIZE);
@@ -15315,6 +15396,7 @@ lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp,
 		hrq->subtype = subtype;
 		hrq->host_index = 0;
 		hrq->hba_index = 0;
+		hrq->entry_repost = LPFC_RQ_REPOST;
 
 		drq->db_format = LPFC_DB_RING_FORMAT;
 		drq->db_regaddr = phba->sli4_hba.RQDBregaddr;
@@ -15323,6 +15405,7 @@ lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp,
 		drq->subtype = subtype;
 		drq->host_index = 0;
 		drq->hba_index = 0;
+		drq->entry_repost = LPFC_RQ_REPOST;
 
 		list_add_tail(&hrq->list, &cq->child_list);
 		list_add_tail(&drq->list, &cq->child_list);
@@ -16063,6 +16146,8 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr)
 	struct fc_vft_header *fc_vft_hdr;
 	uint32_t *header = (uint32_t *) fc_hdr;
 
+#define FC_RCTL_MDS_DIAGS	0xF4
+
 	switch (fc_hdr->fh_r_ctl) {
 	case FC_RCTL_DD_UNCAT:		/* uncategorized information */
 	case FC_RCTL_DD_SOL_DATA:	/* solicited data */
@@ -16090,6 +16175,7 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr)
 	case FC_RCTL_F_BSY:	/* fabric busy to data frame */
 	case FC_RCTL_F_BSYL:	/* fabric busy to link control frame */
 	case FC_RCTL_LCR:	/* link credit reset */
+	case FC_RCTL_MDS_DIAGS: /* MDS Diagnostics */
 	case FC_RCTL_END:	/* end */
 		break;
 	case FC_RCTL_VFTH:	/* Virtual Fabric tagging Header */
@@ -16099,12 +16185,16 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr)
 	default:
 		goto drop;
 	}
+
+#define FC_TYPE_VENDOR_UNIQUE	0xFF
+
 	switch (fc_hdr->fh_type) {
 	case FC_TYPE_BLS:
 	case FC_TYPE_ELS:
 	case FC_TYPE_FCP:
 	case FC_TYPE_CT:
 	case FC_TYPE_NVME:
+	case FC_TYPE_VENDOR_UNIQUE:
 		break;
 	case FC_TYPE_IP:
 	case FC_TYPE_ILS:
@@ -16115,12 +16205,14 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr)
 	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
 			"2538 Received frame rctl:%s (x%x), type:%s (x%x), "
 			"frame Data:%08x %08x %08x %08x %08x %08x %08x\n",
+			(fc_hdr->fh_r_ctl == FC_RCTL_MDS_DIAGS) ? "MDS Diags" :
 			lpfc_rctl_names[fc_hdr->fh_r_ctl], fc_hdr->fh_r_ctl,
-			lpfc_type_names[fc_hdr->fh_type], fc_hdr->fh_type,
-			be32_to_cpu(header[0]), be32_to_cpu(header[1]),
-			be32_to_cpu(header[2]), be32_to_cpu(header[3]),
-			be32_to_cpu(header[4]), be32_to_cpu(header[5]),
-			be32_to_cpu(header[6]));
+			(fc_hdr->fh_type == FC_TYPE_VENDOR_UNIQUE) ?
+			"Vendor Unique" : lpfc_type_names[fc_hdr->fh_type],
+			fc_hdr->fh_type, be32_to_cpu(header[0]),
+			be32_to_cpu(header[1]), be32_to_cpu(header[2]),
+			be32_to_cpu(header[3]), be32_to_cpu(header[4]),
+			be32_to_cpu(header[5]), be32_to_cpu(header[6]));
 	return 0;
 drop:
 	lpfc_printf_log(phba, KERN_WARNING, LOG_ELS,
@@ -16926,6 +17018,96 @@ lpfc_sli4_send_seq_to_ulp(struct lpfc_vport *vport,
 	lpfc_sli_release_iocbq(phba, iocbq);
 }
 
+static void
+lpfc_sli4_mds_loopback_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
+			    struct lpfc_iocbq *rspiocb)
+{
+	struct lpfc_dmabuf *pcmd = cmdiocb->context2;
+
+	if (pcmd && pcmd->virt)
+		pci_pool_free(phba->lpfc_drb_pool, pcmd->virt, pcmd->phys);
+	kfree(pcmd);
+	lpfc_sli_release_iocbq(phba, cmdiocb);
+}
+
+static void
+lpfc_sli4_handle_mds_loopback(struct lpfc_vport *vport,
+			      struct hbq_dmabuf *dmabuf)
+{
+	struct fc_frame_header *fc_hdr;
+	struct lpfc_hba *phba = vport->phba;
+	struct lpfc_iocbq *iocbq = NULL;
+	union  lpfc_wqe *wqe;
+	struct lpfc_dmabuf *pcmd = NULL;
+	uint32_t frame_len;
+	int rc;
+
+	fc_hdr = (struct fc_frame_header *)dmabuf->hbuf.virt;
+	frame_len = bf_get(lpfc_rcqe_length, &dmabuf->cq_event.cqe.rcqe_cmpl);
+
+	/* Send the received frame back */
+	iocbq = lpfc_sli_get_iocbq(phba);
+	if (!iocbq)
+		goto exit;
+
+	/* Allocate buffer for command payload */
+	pcmd = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
+	if (pcmd)
+		pcmd->virt = pci_pool_alloc(phba->lpfc_drb_pool, GFP_KERNEL,
+					    &pcmd->phys);
+	if (!pcmd || !pcmd->virt)
+		goto exit;
+
+	INIT_LIST_HEAD(&pcmd->list);
+
+	/* copyin the payload */
+	memcpy(pcmd->virt, dmabuf->dbuf.virt, frame_len);
+
+	/* fill in BDE's for command */
+	iocbq->iocb.un.xseq64.bdl.addrHigh = putPaddrHigh(pcmd->phys);
+	iocbq->iocb.un.xseq64.bdl.addrLow = putPaddrLow(pcmd->phys);
+	iocbq->iocb.un.xseq64.bdl.bdeFlags = BUFF_TYPE_BDE_64;
+	iocbq->iocb.un.xseq64.bdl.bdeSize = frame_len;
+
+	iocbq->context2 = pcmd;
+	iocbq->vport = vport;
+	iocbq->iocb_flag &= ~LPFC_FIP_ELS_ID_MASK;
+	iocbq->iocb_flag |= LPFC_USE_FCPWQIDX;
+
+	/*
+	 * Setup rest of the iocb as though it were a WQE
+	 * Build the SEND_FRAME WQE
+	 */
+	wqe = (union lpfc_wqe *)&iocbq->iocb;
+
+	wqe->send_frame.frame_len = frame_len;
+	wqe->send_frame.fc_hdr_wd0 = be32_to_cpu(*((uint32_t *)fc_hdr));
+	wqe->send_frame.fc_hdr_wd1 = be32_to_cpu(*((uint32_t *)fc_hdr + 1));
+	wqe->send_frame.fc_hdr_wd2 = be32_to_cpu(*((uint32_t *)fc_hdr + 2));
+	wqe->send_frame.fc_hdr_wd3 = be32_to_cpu(*((uint32_t *)fc_hdr + 3));
+	wqe->send_frame.fc_hdr_wd4 = be32_to_cpu(*((uint32_t *)fc_hdr + 4));
+	wqe->send_frame.fc_hdr_wd5 = be32_to_cpu(*((uint32_t *)fc_hdr + 5));
+
+	iocbq->iocb.ulpCommand = CMD_SEND_FRAME;
+	iocbq->iocb.ulpLe = 1;
+	iocbq->iocb_cmpl = lpfc_sli4_mds_loopback_cmpl;
+	rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, iocbq, 0);
+	if (rc == IOCB_ERROR)
+		goto exit;
+
+	lpfc_in_buf_free(phba, &dmabuf->dbuf);
+	return;
+
+exit:
+	lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
+			"2023 Unable to process MDS loopback frame\n");
+	if (pcmd && pcmd->virt)
+		pci_pool_free(phba->lpfc_drb_pool, pcmd->virt, pcmd->phys);
+	kfree(pcmd);
+	lpfc_sli_release_iocbq(phba, iocbq);
+	lpfc_in_buf_free(phba, &dmabuf->dbuf);
+}
+
 /**
  * lpfc_sli4_handle_received_buffer - Handle received buffers from firmware
  * @phba: Pointer to HBA context object.
@@ -16964,6 +17146,13 @@ lpfc_sli4_handle_received_buffer(struct lpfc_hba *phba,
 		fcfi = bf_get(lpfc_rcqe_fcf_id,
 			      &dmabuf->cq_event.cqe.rcqe_cmpl);
 
+	if (fc_hdr->fh_r_ctl == 0xF4 && fc_hdr->fh_type == 0xFF) {
+		vport = phba->pport;
+		/* Handle MDS Loopback frames */
+		lpfc_sli4_handle_mds_loopback(vport, dmabuf);
+		return;
+	}
+
 	/* d_id this frame is directed to */
 	did = sli4_did_from_fc_hdr(fc_hdr);
 
@@ -17137,6 +17326,14 @@ lpfc_sli4_post_rpi_hdr(struct lpfc_hba *phba, struct lpfc_rpi_hdr *rpi_page)
 				"status x%x add_status x%x, mbx status x%x\n",
 				shdr_status, shdr_add_status, rc);
 		rc = -ENXIO;
+	} else {
+		/*
+		 * The next_rpi stores the next logical module-64 rpi value used
+		 * to post physical rpis in subsequent rpi postings.
+		 */
+		spin_lock_irq(&phba->hbalock);
+		phba->sli4_hba.next_rpi = rpi_page->next_rpi;
+		spin_unlock_irq(&phba->hbalock);
 	}
 	return rc;
 }
@@ -18717,7 +18914,7 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, uint32_t ring_number,
 
 		spin_lock_irqsave(&pring->ring_lock, iflags);
 		ctxp = pwqe->context2;
-		sglq = ctxp->rqb_buffer->sglq;
+		sglq = ctxp->ctxbuf->sglq;
 		if (pwqe->sli4_xritag ==  NO_XRI) {
 			pwqe->sli4_lxritag = sglq->sli4_lxritag;
 			pwqe->sli4_xritag = sglq->sli4_xritag;
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index da46471337c8..cf863db27700 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -24,7 +24,6 @@
 #define LPFC_XRI_EXCH_BUSY_WAIT_TMO		10000
 #define LPFC_XRI_EXCH_BUSY_WAIT_T1   		10
 #define LPFC_XRI_EXCH_BUSY_WAIT_T2              30000
-#define LPFC_RELEASE_NOTIFICATION_INTERVAL	32
 #define LPFC_RPI_LOW_WATER_MARK			10
 
 #define LPFC_UNREG_FCF                          1
@@ -155,7 +154,11 @@ struct lpfc_queue {
 	uint32_t entry_count;	/* Number of entries to support on the queue */
 	uint32_t entry_size;	/* Size of each queue entry. */
 	uint32_t entry_repost;	/* Count of entries before doorbell is rung */
-#define LPFC_QUEUE_MIN_REPOST	8
+#define LPFC_EQ_REPOST		8
+#define LPFC_MQ_REPOST		8
+#define LPFC_CQ_REPOST		64
+#define LPFC_RQ_REPOST		64
+#define LPFC_RELEASE_NOTIFICATION_INTERVAL	32  /* For WQs */
 	uint32_t queue_id;	/* Queue ID assigned by the hardware */
 	uint32_t assoc_qid;     /* Queue ID associated with, for CQ/WQ/MQ */
 	uint32_t page_count;	/* Number of pages allocated for this queue */
@@ -195,7 +198,7 @@ struct lpfc_queue {
 /* defines for RQ stats */
 #define	RQ_no_posted_buf	q_cnt_1
 #define	RQ_no_buf_found		q_cnt_2
-#define	RQ_buf_trunc		q_cnt_3
+#define	RQ_buf_posted		q_cnt_3
 #define	RQ_rcv_buf		q_cnt_4
 
 	uint64_t isr_timestamp;
@@ -617,12 +620,17 @@ struct lpfc_sli4_hba {
 	uint16_t scsi_xri_start;
 	uint16_t els_xri_cnt;
 	uint16_t nvmet_xri_cnt;
+	uint16_t nvmet_ctx_cnt;
+	uint16_t nvmet_io_wait_cnt;
+	uint16_t nvmet_io_wait_total;
 	struct list_head lpfc_els_sgl_list;
 	struct list_head lpfc_abts_els_sgl_list;
 	struct list_head lpfc_nvmet_sgl_list;
 	struct list_head lpfc_abts_nvmet_ctx_list;
 	struct list_head lpfc_abts_scsi_buf_list;
 	struct list_head lpfc_abts_nvme_buf_list;
+	struct list_head lpfc_nvmet_ctx_list;
+	struct list_head lpfc_nvmet_io_wait_list;
 	struct lpfc_sglq **lpfc_sglq_active_list;
 	struct list_head lpfc_rpi_hdr_list;
 	unsigned long *rpi_bmask;
@@ -654,6 +662,7 @@ struct lpfc_sli4_hba {
 	spinlock_t abts_scsi_buf_list_lock; /* list of aborted SCSI IOs */
 	spinlock_t sgl_list_lock; /* list of aborted els IOs */
 	spinlock_t nvmet_io_lock;
+	spinlock_t nvmet_io_wait_lock; /* IOs waiting for ctx resources */
 	uint32_t physical_port;
 
 	/* CPU to vector mapping information */
@@ -661,8 +670,6 @@ struct lpfc_sli4_hba {
 	uint16_t num_online_cpu;
 	uint16_t num_present_cpu;
 	uint16_t curr_disp_cpu;
-
-	uint16_t nvmet_mrq_post_idx;
 };
 
 enum lpfc_sge_type {
@@ -698,6 +705,7 @@ struct lpfc_rpi_hdr {
 	struct lpfc_dmabuf *dmabuf;
 	uint32_t page_count;
 	uint32_t start_rpi;
+	uint16_t next_rpi;
 };
 
 struct lpfc_rsrc_blks {
@@ -762,7 +770,6 @@ int lpfc_rq_create(struct lpfc_hba *, struct lpfc_queue *,
 int lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp,
 			struct lpfc_queue **drqp, struct lpfc_queue **cqp,
 			uint32_t subtype);
-void lpfc_rq_adjust_repost(struct lpfc_hba *, struct lpfc_queue *, int);
 int lpfc_eq_destroy(struct lpfc_hba *, struct lpfc_queue *);
 int lpfc_cq_destroy(struct lpfc_hba *, struct lpfc_queue *);
 int lpfc_mq_destroy(struct lpfc_hba *, struct lpfc_queue *);
diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index 1c26dc67151b..c2653244221c 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h
@@ -20,7 +20,7 @@
  * included with this package.                                     *
  *******************************************************************/
 
-#define LPFC_DRIVER_VERSION "11.2.0.12"
+#define LPFC_DRIVER_VERSION "11.2.0.14"
 #define LPFC_DRIVER_NAME		"lpfc"
 
 /* Used for SLI 2/3 */
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index e31f1cc90b81..99e16ac479e3 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1851,7 +1851,7 @@ static int scsi_mq_prep_fn(struct request *req)
 
 	/* zero out the cmd, except for the embedded scsi_request */
 	memset((char *)cmd + sizeof(cmd->req), 0,
-		sizeof(*cmd) - sizeof(cmd->req));
+		sizeof(*cmd) - sizeof(cmd->req) + shost->hostt->cmd_size);
 
 	req->special = cmd;
 
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index f9d1432d7cc5..b6bb4e0ce0e3 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -827,21 +827,32 @@ static int sd_setup_write_zeroes_cmnd(struct scsi_cmnd *cmd)
 	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
 	u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9);
 	u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
+	int ret;
 
 	if (!(rq->cmd_flags & REQ_NOUNMAP)) {
 		switch (sdkp->zeroing_mode) {
 		case SD_ZERO_WS16_UNMAP:
-			return sd_setup_write_same16_cmnd(cmd, true);
+			ret = sd_setup_write_same16_cmnd(cmd, true);
+			goto out;
 		case SD_ZERO_WS10_UNMAP:
-			return sd_setup_write_same10_cmnd(cmd, true);
+			ret = sd_setup_write_same10_cmnd(cmd, true);
+			goto out;
 		}
 	}
 
 	if (sdp->no_write_same)
 		return BLKPREP_INVALID;
+
 	if (sdkp->ws16 || sector > 0xffffffff || nr_sectors > 0xffff)
-		return sd_setup_write_same16_cmnd(cmd, false);
-	return sd_setup_write_same10_cmnd(cmd, false);
+		ret = sd_setup_write_same16_cmnd(cmd, false);
+	else
+		ret = sd_setup_write_same10_cmnd(cmd, false);
+
+out:
+	if (sd_is_zoned(sdkp) && ret == BLKPREP_OK)
+		return sd_zbc_write_lock_zone(cmd);
+
+	return ret;
 }
 
 static void sd_config_write_same(struct scsi_disk *sdkp)
@@ -948,6 +959,10 @@ static int sd_setup_write_same_cmnd(struct scsi_cmnd *cmd)
 	rq->__data_len = sdp->sector_size;
 	ret = scsi_init_io(cmd);
 	rq->__data_len = nr_bytes;
+
+	if (sd_is_zoned(sdkp) && ret != BLKPREP_OK)
+		sd_zbc_write_unlock_zone(cmd);
+
 	return ret;
 }
 
@@ -1567,17 +1582,21 @@ out:
 	return retval;
 }
 
-static int sd_sync_cache(struct scsi_disk *sdkp)
+static int sd_sync_cache(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr)
 {
 	int retries, res;
 	struct scsi_device *sdp = sdkp->device;
 	const int timeout = sdp->request_queue->rq_timeout
 		* SD_FLUSH_TIMEOUT_MULTIPLIER;
-	struct scsi_sense_hdr sshdr;
+	struct scsi_sense_hdr my_sshdr;
 
 	if (!scsi_device_online(sdp))
 		return -ENODEV;
 
+	/* caller might not be interested in sense, but we need it */
+	if (!sshdr)
+		sshdr = &my_sshdr;
+
 	for (retries = 3; retries > 0; --retries) {
 		unsigned char cmd[10] = { 0 };
 
@@ -1586,7 +1605,7 @@ static int sd_sync_cache(struct scsi_disk *sdkp)
 		 * Leave the rest of the command zero to indicate
 		 * flush everything.
 		 */
-		res = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, &sshdr,
+		res = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, sshdr,
 				timeout, SD_MAX_RETRIES, 0, RQF_PM, NULL);
 		if (res == 0)
 			break;
@@ -1596,11 +1615,12 @@ static int sd_sync_cache(struct scsi_disk *sdkp)
 		sd_print_result(sdkp, "Synchronize Cache(10) failed", res);
 
 		if (driver_byte(res) & DRIVER_SENSE)
-			sd_print_sense_hdr(sdkp, &sshdr);
+			sd_print_sense_hdr(sdkp, sshdr);
+
 		/* we need to evaluate the error return  */
-		if (scsi_sense_valid(&sshdr) &&
-			(sshdr.asc == 0x3a ||	/* medium not present */
-			 sshdr.asc == 0x20))	/* invalid command */
+		if (scsi_sense_valid(sshdr) &&
+			(sshdr->asc == 0x3a ||	/* medium not present */
+			 sshdr->asc == 0x20))	/* invalid command */
 				/* this is no error here */
 				return 0;
 
@@ -3444,7 +3464,7 @@ static void sd_shutdown(struct device *dev)
 
 	if (sdkp->WCE && sdkp->media_present) {
 		sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
-		sd_sync_cache(sdkp);
+		sd_sync_cache(sdkp, NULL);
 	}
 
 	if (system_state != SYSTEM_RESTART && sdkp->device->manage_start_stop) {
@@ -3456,6 +3476,7 @@ static void sd_shutdown(struct device *dev)
 static int sd_suspend_common(struct device *dev, bool ignore_stop_errors)
 {
 	struct scsi_disk *sdkp = dev_get_drvdata(dev);
+	struct scsi_sense_hdr sshdr;
 	int ret = 0;
 
 	if (!sdkp)	/* E.g.: runtime suspend following sd_remove() */
@@ -3463,12 +3484,23 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors)
 
 	if (sdkp->WCE && sdkp->media_present) {
 		sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
-		ret = sd_sync_cache(sdkp);
+		ret = sd_sync_cache(sdkp, &sshdr);
+
 		if (ret) {
 			/* ignore OFFLINE device */
 			if (ret == -ENODEV)
-				ret = 0;
-			goto done;
+				return 0;
+
+			if (!scsi_sense_valid(&sshdr) ||
+			    sshdr.sense_key != ILLEGAL_REQUEST)
+				return ret;
+
+			/*
+			 * sshdr.sense_key == ILLEGAL_REQUEST means this drive
+			 * doesn't support sync. There's not much to do and
+			 * suspend shouldn't fail.
+			 */
+			 ret = 0;
 		}
 	}
 
@@ -3480,7 +3512,6 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors)
 			ret = 0;
 	}
 
-done:
 	return ret;
 }
 
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 0a38ba01b7b4..82c33a6edbea 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -2074,11 +2074,12 @@ sg_get_rq_mark(Sg_fd * sfp, int pack_id)
 		if ((1 == resp->done) && (!resp->sg_io_owned) &&
 		    ((-1 == pack_id) || (resp->header.pack_id == pack_id))) {
 			resp->done = 2;	/* guard against other readers */
-			break;
+			write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
+			return resp;
 		}
 	}
 	write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
-	return resp;
+	return NULL;
 }
 
 /* always adds to end of list */
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index abc7e87937cc..ffe8d8608818 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -7698,6 +7698,12 @@ static inline void ufshcd_add_sysfs_nodes(struct ufs_hba *hba)
 	ufshcd_add_spm_lvl_sysfs_nodes(hba);
 }
 
+static inline void ufshcd_remove_sysfs_nodes(struct ufs_hba *hba)
+{
+	device_remove_file(hba->dev, &hba->rpm_lvl_attr);
+	device_remove_file(hba->dev, &hba->spm_lvl_attr);
+}
+
 /**
  * ufshcd_shutdown - shutdown routine
  * @hba: per adapter instance
@@ -7735,6 +7741,7 @@ EXPORT_SYMBOL(ufshcd_shutdown);
  */
 void ufshcd_remove(struct ufs_hba *hba)
 {
+	ufshcd_remove_sysfs_nodes(hba);
 	scsi_remove_host(hba->host);
 	/* disable interrupts */
 	ufshcd_disable_intr(hba, hba->intr_mask);
diff --git a/drivers/thermal/broadcom/Kconfig b/drivers/thermal/broadcom/Kconfig
index ab08af4654ef..42c098e86f84 100644
--- a/drivers/thermal/broadcom/Kconfig
+++ b/drivers/thermal/broadcom/Kconfig
@@ -9,8 +9,9 @@ config BCM2835_THERMAL
 config BCM_NS_THERMAL
 	tristate "Northstar thermal driver"
 	depends on ARCH_BCM_IPROC || COMPILE_TEST
+	default y if ARCH_BCM_IPROC
 	help
-	  Northstar is a family of SoCs that includes e.g. BCM4708, BCM47081,
-	  BCM4709 and BCM47094. It contains DMU (Device Management Unit) block
-	  with a thermal sensor that allows checking CPU temperature. This
-	  driver provides support for it.
+	  Support for the Northstar and Northstar Plus family of SoCs (e.g.
+	  BCM4708, BCM4709, BCM5301x, BCM95852X, etc). It contains DMU (Device
+	  Management Unit) block with a thermal sensor that allows checking CPU
+	  temperature.
diff --git a/drivers/thermal/qoriq_thermal.c b/drivers/thermal/qoriq_thermal.c
index 644ba526d9ea..4362a69ac88d 100644
--- a/drivers/thermal/qoriq_thermal.c
+++ b/drivers/thermal/qoriq_thermal.c
@@ -195,7 +195,6 @@ static struct thermal_zone_of_device_ops tmu_tz_ops = {
 static int qoriq_tmu_probe(struct platform_device *pdev)
 {
 	int ret;
-	const struct thermal_trip *trip;
 	struct qoriq_tmu_data *data;
 	struct device_node *np = pdev->dev.of_node;
 	u32 site = 0;
@@ -243,8 +242,6 @@ static int qoriq_tmu_probe(struct platform_device *pdev)
 		goto err_tmu;
 	}
 
-	trip = of_thermal_get_trip_points(data->tz);
-
 	/* Enable monitoring */
 	site |= 0x1 << (15 - data->sensor_id);
 	tmu_write(data, site | TMR_ME | TMR_ALPF, &data->regs->tmr);
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index b21b9cc2c8d6..5a51c740e372 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -359,7 +359,7 @@ static DECLARE_DELAYED_WORK(thermal_emergency_poweroff_work,
  * This may be called from any critical situation to trigger a system shutdown
  * after a known period of time. By default this is not scheduled.
  */
-void thermal_emergency_poweroff(void)
+static void thermal_emergency_poweroff(void)
 {
 	int poweroff_delay_ms = CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS;
 	/*
diff --git a/drivers/thermal/ti-soc-thermal/ti-bandgap.c b/drivers/thermal/ti-soc-thermal/ti-bandgap.c
index ba9c302454fb..696ab3046b87 100644
--- a/drivers/thermal/ti-soc-thermal/ti-bandgap.c
+++ b/drivers/thermal/ti-soc-thermal/ti-bandgap.c
@@ -1010,7 +1010,7 @@ ti_bandgap_force_single_read(struct ti_bandgap *bgp, int id)
 }
 
 /**
- * ti_bandgap_set_continous_mode() - One time enabling of continuous mode
+ * ti_bandgap_set_continuous_mode() - One time enabling of continuous mode
  * @bgp: pointer to struct ti_bandgap
  *
  * Call this function only if HAS(MODE_CONFIG) is set. As this driver may
@@ -1214,22 +1214,18 @@ static struct ti_bandgap *ti_bandgap_build(struct platform_device *pdev)
 	}
 
 	bgp = devm_kzalloc(&pdev->dev, sizeof(*bgp), GFP_KERNEL);
-	if (!bgp) {
-		dev_err(&pdev->dev, "Unable to allocate mem for driver ref\n");
+	if (!bgp)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	of_id = of_match_device(of_ti_bandgap_match, &pdev->dev);
 	if (of_id)
 		bgp->conf = of_id->data;
 
 	/* register shadow for context save and restore */
-	bgp->regval = devm_kzalloc(&pdev->dev, sizeof(*bgp->regval) *
-				   bgp->conf->sensor_count, GFP_KERNEL);
-	if (!bgp->regval) {
-		dev_err(&pdev->dev, "Unable to allocate mem for driver ref\n");
+	bgp->regval = devm_kcalloc(&pdev->dev, bgp->conf->sensor_count,
+				   sizeof(*bgp->regval), GFP_KERNEL);
+	if (!bgp->regval)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	i = 0;
 	do {
diff --git a/drivers/tty/ehv_bytechan.c b/drivers/tty/ehv_bytechan.c
index 7ac9bcdf1e61..61fe8d6fd24e 100644
--- a/drivers/tty/ehv_bytechan.c
+++ b/drivers/tty/ehv_bytechan.c
@@ -764,7 +764,7 @@ static int __init ehv_bc_init(void)
 	ehv_bc_driver = alloc_tty_driver(count);
 	if (!ehv_bc_driver) {
 		ret = -ENOMEM;
-		goto error;
+		goto err_free_bcs;
 	}
 
 	ehv_bc_driver->driver_name = "ehv-bc";
@@ -778,24 +778,23 @@ static int __init ehv_bc_init(void)
 	ret = tty_register_driver(ehv_bc_driver);
 	if (ret) {
 		pr_err("ehv-bc: could not register tty driver (ret=%i)\n", ret);
-		goto error;
+		goto err_put_tty_driver;
 	}
 
 	ret = platform_driver_register(&ehv_bc_tty_driver);
 	if (ret) {
 		pr_err("ehv-bc: could not register platform driver (ret=%i)\n",
 		       ret);
-		goto error;
+		goto err_deregister_tty_driver;
 	}
 
 	return 0;
 
-error:
-	if (ehv_bc_driver) {
-		tty_unregister_driver(ehv_bc_driver);
-		put_tty_driver(ehv_bc_driver);
-	}
-
+err_deregister_tty_driver:
+	tty_unregister_driver(ehv_bc_driver);
+err_put_tty_driver:
+	put_tty_driver(ehv_bc_driver);
+err_free_bcs:
 	kfree(bcs);
 
 	return ret;
diff --git a/drivers/tty/serdev/core.c b/drivers/tty/serdev/core.c
index 433de5ea9b02..f71b47334149 100644
--- a/drivers/tty/serdev/core.c
+++ b/drivers/tty/serdev/core.c
@@ -122,6 +122,18 @@ void serdev_device_write_wakeup(struct serdev_device *serdev)
 }
 EXPORT_SYMBOL_GPL(serdev_device_write_wakeup);
 
+int serdev_device_write_buf(struct serdev_device *serdev,
+			    const unsigned char *buf, size_t count)
+{
+	struct serdev_controller *ctrl = serdev->ctrl;
+
+	if (!ctrl || !ctrl->ops->write_buf)
+		return -EINVAL;
+
+	return ctrl->ops->write_buf(ctrl, buf, count);
+}
+EXPORT_SYMBOL_GPL(serdev_device_write_buf);
+
 int serdev_device_write(struct serdev_device *serdev,
 			const unsigned char *buf, size_t count,
 			unsigned long timeout)
diff --git a/drivers/tty/serdev/serdev-ttyport.c b/drivers/tty/serdev/serdev-ttyport.c
index 487c88f6aa0e..d0a021c93986 100644
--- a/drivers/tty/serdev/serdev-ttyport.c
+++ b/drivers/tty/serdev/serdev-ttyport.c
@@ -102,9 +102,6 @@ static int ttyport_open(struct serdev_controller *ctrl)
 		return PTR_ERR(tty);
 	serport->tty = tty;
 
-	serport->port->client_ops = &client_ops;
-	serport->port->client_data = ctrl;
-
 	if (tty->ops->open)
 		tty->ops->open(serport->tty, NULL);
 	else
@@ -215,6 +212,7 @@ struct device *serdev_tty_port_register(struct tty_port *port,
 					struct device *parent,
 					struct tty_driver *drv, int idx)
 {
+	const struct tty_port_client_operations *old_ops;
 	struct serdev_controller *ctrl;
 	struct serport *serport;
 	int ret;
@@ -233,28 +231,37 @@ struct device *serdev_tty_port_register(struct tty_port *port,
 
 	ctrl->ops = &ctrl_ops;
 
+	old_ops = port->client_ops;
+	port->client_ops = &client_ops;
+	port->client_data = ctrl;
+
 	ret = serdev_controller_add(ctrl);
 	if (ret)
-		goto err_controller_put;
+		goto err_reset_data;
 
 	dev_info(&ctrl->dev, "tty port %s%d registered\n", drv->name, idx);
 	return &ctrl->dev;
 
-err_controller_put:
+err_reset_data:
+	port->client_data = NULL;
+	port->client_ops = old_ops;
 	serdev_controller_put(ctrl);
+
 	return ERR_PTR(ret);
 }
 
-void serdev_tty_port_unregister(struct tty_port *port)
+int serdev_tty_port_unregister(struct tty_port *port)
 {
 	struct serdev_controller *ctrl = port->client_data;
 	struct serport *serport = serdev_controller_get_drvdata(ctrl);
 
 	if (!serport)
-		return;
+		return -ENODEV;
 
 	serdev_controller_remove(ctrl);
 	port->client_ops = NULL;
 	port->client_data = NULL;
 	serdev_controller_put(ctrl);
+
+	return 0;
 }
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 09a65a3ec7f7..68fd045a7025 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -47,6 +47,7 @@
 /*
  * These are definitions for the Exar XR17V35X and XR17(C|D)15X
  */
+#define UART_EXAR_INT0		0x80
 #define UART_EXAR_SLEEP		0x8b	/* Sleep mode */
 #define UART_EXAR_DVID		0x8d	/* Device identification */
 
@@ -1337,7 +1338,7 @@ out_lock:
 	/*
 	 * Check if the device is a Fintek F81216A
 	 */
-	if (port->type == PORT_16550A)
+	if (port->type == PORT_16550A && port->iotype == UPIO_PORT)
 		fintek_8250_probe(up);
 
 	if (up->capabilities != old_capabilities) {
@@ -1869,17 +1870,13 @@ static int serial8250_default_handle_irq(struct uart_port *port)
 static int exar_handle_irq(struct uart_port *port)
 {
 	unsigned int iir = serial_port_in(port, UART_IIR);
-	int ret;
+	int ret = 0;
 
-	ret = serial8250_handle_irq(port, iir);
+	if (((port->type == PORT_XR17V35X) || (port->type == PORT_XR17D15X)) &&
+	    serial_port_in(port, UART_EXAR_INT0) != 0)
+		ret = 1;
 
-	if ((port->type == PORT_XR17V35X) ||
-	   (port->type == PORT_XR17D15X)) {
-		serial_port_in(port, 0x80);
-		serial_port_in(port, 0x81);
-		serial_port_in(port, 0x82);
-		serial_port_in(port, 0x83);
-	}
+	ret |= serial8250_handle_irq(port, iir);
 
 	return ret;
 }
@@ -2177,6 +2174,8 @@ int serial8250_do_startup(struct uart_port *port)
 	serial_port_in(port, UART_RX);
 	serial_port_in(port, UART_IIR);
 	serial_port_in(port, UART_MSR);
+	if ((port->type == PORT_XR17V35X) || (port->type == PORT_XR17D15X))
+		serial_port_in(port, UART_EXAR_INT0);
 
 	/*
 	 * At this point, there's no way the LSR could still be 0xff;
@@ -2335,6 +2334,8 @@ dont_test_tx_en:
 	serial_port_in(port, UART_RX);
 	serial_port_in(port, UART_IIR);
 	serial_port_in(port, UART_MSR);
+	if ((port->type == PORT_XR17V35X) || (port->type == PORT_XR17D15X))
+		serial_port_in(port, UART_EXAR_INT0);
 	up->lsr_saved_flags = 0;
 	up->msr_saved_flags = 0;
 
diff --git a/drivers/tty/serial/altera_jtaguart.c b/drivers/tty/serial/altera_jtaguart.c
index 18e3f8342b85..0475f5d261ce 100644
--- a/drivers/tty/serial/altera_jtaguart.c
+++ b/drivers/tty/serial/altera_jtaguart.c
@@ -478,6 +478,7 @@ static int altera_jtaguart_remove(struct platform_device *pdev)
 
 	port = &altera_jtaguart_ports[i].port;
 	uart_remove_one_port(&altera_jtaguart_driver, port);
+	iounmap(port->membase);
 
 	return 0;
 }
diff --git a/drivers/tty/serial/altera_uart.c b/drivers/tty/serial/altera_uart.c
index 46d3438a0d27..3e4b717670d7 100644
--- a/drivers/tty/serial/altera_uart.c
+++ b/drivers/tty/serial/altera_uart.c
@@ -615,6 +615,7 @@ static int altera_uart_remove(struct platform_device *pdev)
 	if (port) {
 		uart_remove_one_port(&altera_uart_driver, port);
 		port->mapbase = 0;
+		iounmap(port->membase);
 	}
 
 	return 0;
diff --git a/drivers/tty/serial/efm32-uart.c b/drivers/tty/serial/efm32-uart.c
index ebd8569f9ad5..9fff25be87f9 100644
--- a/drivers/tty/serial/efm32-uart.c
+++ b/drivers/tty/serial/efm32-uart.c
@@ -27,6 +27,7 @@
 #define UARTn_FRAME		0x04
 #define UARTn_FRAME_DATABITS__MASK	0x000f
 #define UARTn_FRAME_DATABITS(n)		((n) - 3)
+#define UARTn_FRAME_PARITY__MASK	0x0300
 #define UARTn_FRAME_PARITY_NONE		0x0000
 #define UARTn_FRAME_PARITY_EVEN		0x0200
 #define UARTn_FRAME_PARITY_ODD		0x0300
@@ -572,12 +573,16 @@ static void efm32_uart_console_get_options(struct efm32_uart_port *efm_port,
 			16 * (4 + (clkdiv >> 6)));
 
 	frame = efm32_uart_read32(efm_port, UARTn_FRAME);
-	if (frame & UARTn_FRAME_PARITY_ODD)
+	switch (frame & UARTn_FRAME_PARITY__MASK) {
+	case UARTn_FRAME_PARITY_ODD:
 		*parity = 'o';
-	else if (frame & UARTn_FRAME_PARITY_EVEN)
+		break;
+	case UARTn_FRAME_PARITY_EVEN:
 		*parity = 'e';
-	else
+		break;
+	default:
 		*parity = 'n';
+	}
 
 	*bits = (frame & UARTn_FRAME_DATABITS__MASK) -
 			UARTn_FRAME_DATABITS(4) + 4;
diff --git a/drivers/tty/serial/ifx6x60.c b/drivers/tty/serial/ifx6x60.c
index 157883653256..f190a84a0246 100644
--- a/drivers/tty/serial/ifx6x60.c
+++ b/drivers/tty/serial/ifx6x60.c
@@ -1382,9 +1382,9 @@ static struct spi_driver ifx_spi_driver = {
 static void __exit ifx_spi_exit(void)
 {
 	/* unregister */
+	spi_unregister_driver(&ifx_spi_driver);
 	tty_unregister_driver(tty_drv);
 	put_tty_driver(tty_drv);
-	spi_unregister_driver(&ifx_spi_driver);
 	unregister_reboot_notifier(&ifx_modem_reboot_notifier_block);
 }
 
diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
index 33509b4beaec..bbefddd92bfe 100644
--- a/drivers/tty/serial/imx.c
+++ b/drivers/tty/serial/imx.c
@@ -2184,7 +2184,9 @@ static int serial_imx_probe(struct platform_device *pdev)
 		 * and DCD (when they are outputs) or enables the respective
 		 * irqs. So set this bit early, i.e. before requesting irqs.
 		 */
-		writel(UFCR_DCEDTE, sport->port.membase + UFCR);
+		reg = readl(sport->port.membase + UFCR);
+		if (!(reg & UFCR_DCEDTE))
+			writel(reg | UFCR_DCEDTE, sport->port.membase + UFCR);
 
 		/*
 		 * Disable UCR3_RI and UCR3_DCD irqs. They are also not
@@ -2195,7 +2197,15 @@ static int serial_imx_probe(struct platform_device *pdev)
 		       sport->port.membase + UCR3);
 
 	} else {
-		writel(0, sport->port.membase + UFCR);
+		unsigned long ucr3 = UCR3_DSR;
+
+		reg = readl(sport->port.membase + UFCR);
+		if (reg & UFCR_DCEDTE)
+			writel(reg & ~UFCR_DCEDTE, sport->port.membase + UFCR);
+
+		if (!is_imx1_uart(sport))
+			ucr3 |= IMX21_UCR3_RXDMUXSEL | UCR3_ADNIMP;
+		writel(ucr3, sport->port.membase + UCR3);
 	}
 
 	clk_disable_unprepare(sport->clk_ipg);
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 0f45b7884a2c..13bfd5dcffce 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -2083,7 +2083,7 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *uport)
 	mutex_lock(&port->mutex);
 
 	tty_dev = device_find_child(uport->dev, &match, serial_match_port);
-	if (device_may_wakeup(tty_dev)) {
+	if (tty_dev && device_may_wakeup(tty_dev)) {
 		if (!enable_irq_wake(uport->irq))
 			uport->irq_wake = 1;
 		put_device(tty_dev);
@@ -2782,7 +2782,7 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *uport)
 	 * Register the port whether it's detected or not.  This allows
 	 * setserial to be used to alter this port's parameters.
 	 */
-	tty_dev = tty_port_register_device_attr(port, drv->tty_driver,
+	tty_dev = tty_port_register_device_attr_serdev(port, drv->tty_driver,
 			uport->line, uport->dev, port, uport->tty_groups);
 	if (likely(!IS_ERR(tty_dev))) {
 		device_set_wakeup_capable(tty_dev, 1);
@@ -2845,7 +2845,7 @@ int uart_remove_one_port(struct uart_driver *drv, struct uart_port *uport)
 	/*
 	 * Remove the devices from the tty layer
 	 */
-	tty_unregister_device(drv->tty_driver, uport->line);
+	tty_port_unregister_device(port, drv->tty_driver, uport->line);
 
 	tty = tty_port_tty_get(port);
 	if (tty) {
diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c
index 1d21a9c1d33e..4fb3165384c4 100644
--- a/drivers/tty/tty_port.c
+++ b/drivers/tty/tty_port.c
@@ -34,7 +34,9 @@ static int tty_port_default_receive_buf(struct tty_port *port,
 	if (!disc)
 		return 0;
 
+	mutex_lock(&tty->atomic_write_lock);
 	ret = tty_ldisc_receive_buf(disc, p, (char *)f, count);
+	mutex_unlock(&tty->atomic_write_lock);
 
 	tty_ldisc_deref(disc);
 
@@ -129,19 +131,85 @@ struct device *tty_port_register_device_attr(struct tty_port *port,
 		struct device *device, void *drvdata,
 		const struct attribute_group **attr_grp)
 {
+	tty_port_link_device(port, driver, index);
+	return tty_register_device_attr(driver, index, device, drvdata,
+			attr_grp);
+}
+EXPORT_SYMBOL_GPL(tty_port_register_device_attr);
+
+/**
+ * tty_port_register_device_attr_serdev - register tty or serdev device
+ * @port: tty_port of the device
+ * @driver: tty_driver for this device
+ * @index: index of the tty
+ * @device: parent if exists, otherwise NULL
+ * @drvdata: driver data for the device
+ * @attr_grp: attribute group for the device
+ *
+ * Register a serdev or tty device depending on if the parent device has any
+ * defined serdev clients or not.
+ */
+struct device *tty_port_register_device_attr_serdev(struct tty_port *port,
+		struct tty_driver *driver, unsigned index,
+		struct device *device, void *drvdata,
+		const struct attribute_group **attr_grp)
+{
 	struct device *dev;
 
 	tty_port_link_device(port, driver, index);
 
 	dev = serdev_tty_port_register(port, device, driver, index);
-	if (PTR_ERR(dev) != -ENODEV)
+	if (PTR_ERR(dev) != -ENODEV) {
 		/* Skip creating cdev if we registered a serdev device */
 		return dev;
+	}
 
 	return tty_register_device_attr(driver, index, device, drvdata,
 			attr_grp);
 }
-EXPORT_SYMBOL_GPL(tty_port_register_device_attr);
+EXPORT_SYMBOL_GPL(tty_port_register_device_attr_serdev);
+
+/**
+ * tty_port_register_device_serdev - register tty or serdev device
+ * @port: tty_port of the device
+ * @driver: tty_driver for this device
+ * @index: index of the tty
+ * @device: parent if exists, otherwise NULL
+ *
+ * Register a serdev or tty device depending on if the parent device has any
+ * defined serdev clients or not.
+ */
+struct device *tty_port_register_device_serdev(struct tty_port *port,
+		struct tty_driver *driver, unsigned index,
+		struct device *device)
+{
+	return tty_port_register_device_attr_serdev(port, driver, index,
+			device, NULL, NULL);
+}
+EXPORT_SYMBOL_GPL(tty_port_register_device_serdev);
+
+/**
+ * tty_port_unregister_device - deregister a tty or serdev device
+ * @port: tty_port of the device
+ * @driver: tty_driver for this device
+ * @index: index of the tty
+ *
+ * If a tty or serdev device is registered with a call to
+ * tty_port_register_device_serdev() then this function must be called when
+ * the device is gone.
+ */
+void tty_port_unregister_device(struct tty_port *port,
+		struct tty_driver *driver, unsigned index)
+{
+	int ret;
+
+	ret = serdev_tty_port_unregister(port);
+	if (ret == 0)
+		return;
+
+	tty_unregister_device(driver, index);
+}
+EXPORT_SYMBOL_GPL(tty_port_unregister_device);
 
 int tty_port_alloc_xmit_buf(struct tty_port *port)
 {
@@ -189,9 +257,6 @@ static void tty_port_destructor(struct kref *kref)
 	/* check if last port ref was dropped before tty release */
 	if (WARN_ON(port->itty))
 		return;
-
-	serdev_tty_port_unregister(port);
-
 	if (port->xmit_buf)
 		free_page((unsigned long)port->xmit_buf);
 	tty_port_destroy(port);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 3fdde0b283c9..29308a80d66f 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1671,8 +1671,12 @@ static long ceph_fallocate(struct file *file, int mode,
 	}
 
 	size = i_size_read(inode);
-	if (!(mode & FALLOC_FL_KEEP_SIZE))
+	if (!(mode & FALLOC_FL_KEEP_SIZE)) {
 		endoff = offset + length;
+		ret = inode_newsize_ok(inode, endoff);
+		if (ret)
+			goto unlock;
+	}
 
 	if (fi->fmode & CEPH_FILE_MODE_LAZY)
 		want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index f02eb7673392..a7048eafa8e6 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -1280,7 +1280,6 @@ xfs_bmap_read_extents(
 		xfs_bmbt_rec_t	*frp;
 		xfs_fsblock_t	nextbno;
 		xfs_extnum_t	num_recs;
-		xfs_extnum_t	start;
 
 		num_recs = xfs_btree_get_numrecs(block);
 		if (unlikely(i + num_recs > room)) {
@@ -1303,7 +1302,6 @@ xfs_bmap_read_extents(
 		 * Copy records into the extent records.
 		 */
 		frp = XFS_BMBT_REC_ADDR(mp, block, 1);
-		start = i;
 		for (j = 0; j < num_recs; j++, i++, frp++) {
 			xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i);
 			trp->l0 = be64_to_cpu(frp->l0);
@@ -2065,8 +2063,10 @@ xfs_bmap_add_extent_delay_real(
 		}
 		temp = xfs_bmap_worst_indlen(bma->ip, temp);
 		temp2 = xfs_bmap_worst_indlen(bma->ip, temp2);
-		diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) -
-			(bma->cur ? bma->cur->bc_private.b.allocated : 0));
+		diff = (int)(temp + temp2 -
+			     (startblockval(PREV.br_startblock) -
+			      (bma->cur ?
+			       bma->cur->bc_private.b.allocated : 0)));
 		if (diff > 0) {
 			error = xfs_mod_fdblocks(bma->ip->i_mount,
 						 -((int64_t)diff), false);
@@ -2123,7 +2123,6 @@ xfs_bmap_add_extent_delay_real(
 		temp = da_new;
 		if (bma->cur)
 			temp += bma->cur->bc_private.b.allocated;
-		ASSERT(temp <= da_old);
 		if (temp < da_old)
 			xfs_mod_fdblocks(bma->ip->i_mount,
 					(int64_t)(da_old - temp), false);
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 5392674bf893..3a673ba201aa 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -4395,7 +4395,7 @@ xfs_btree_visit_blocks(
 			xfs_btree_readahead_ptr(cur, ptr, 1);
 
 			/* save for the next iteration of the loop */
-			lptr = *ptr;
+			xfs_btree_copy_ptrs(cur, &lptr, ptr, 1);
 		}
 
 		/* for each buffer in the level */
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index b177ef33cd4c..82a38d86ebad 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1629,13 +1629,28 @@ xfs_refcount_recover_cow_leftovers(
 	if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START)
 		return -EOPNOTSUPP;
 
-	error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
+	INIT_LIST_HEAD(&debris);
+
+	/*
+	 * In this first part, we use an empty transaction to gather up
+	 * all the leftover CoW extents so that we can subsequently
+	 * delete them.  The empty transaction is used to avoid
+	 * a buffer lock deadlock if there happens to be a loop in the
+	 * refcountbt because we're allowed to re-grab a buffer that is
+	 * already attached to our transaction.  When we're done
+	 * recording the CoW debris we cancel the (empty) transaction
+	 * and everything goes away cleanly.
+	 */
+	error = xfs_trans_alloc_empty(mp, &tp);
 	if (error)
 		return error;
-	cur = xfs_refcountbt_init_cursor(mp, NULL, agbp, agno, NULL);
+
+	error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
+	if (error)
+		goto out_trans;
+	cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL);
 
 	/* Find all the leftover CoW staging extents. */
-	INIT_LIST_HEAD(&debris);
 	memset(&low, 0, sizeof(low));
 	memset(&high, 0, sizeof(high));
 	low.rc.rc_startblock = XFS_REFC_COW_START;
@@ -1645,10 +1660,11 @@ xfs_refcount_recover_cow_leftovers(
 	if (error)
 		goto out_cursor;
 	xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
-	xfs_buf_relse(agbp);
+	xfs_trans_brelse(tp, agbp);
+	xfs_trans_cancel(tp);
 
 	/* Now iterate the list to free the leftovers */
-	list_for_each_entry(rr, &debris, rr_list) {
+	list_for_each_entry_safe(rr, n, &debris, rr_list) {
 		/* Set up transaction. */
 		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp);
 		if (error)
@@ -1676,8 +1692,16 @@ xfs_refcount_recover_cow_leftovers(
 		error = xfs_trans_commit(tp);
 		if (error)
 			goto out_free;
+
+		list_del(&rr->rr_list);
+		kmem_free(rr);
 	}
 
+	return error;
+out_defer:
+	xfs_defer_cancel(&dfops);
+out_trans:
+	xfs_trans_cancel(tp);
 out_free:
 	/* Free the leftover list */
 	list_for_each_entry_safe(rr, n, &debris, rr_list) {
@@ -1688,11 +1712,6 @@ out_free:
 
 out_cursor:
 	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
-	xfs_buf_relse(agbp);
-	goto out_free;
-
-out_defer:
-	xfs_defer_cancel(&dfops);
-	xfs_trans_cancel(tp);
-	goto out_free;
+	xfs_trans_brelse(tp, agbp);
+	goto out_trans;
 }
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 2b954308a1d6..9e3cc2146d5b 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -582,9 +582,13 @@ xfs_getbmap(
 		}
 		break;
 	default:
+		/* Local format data forks report no extents. */
+		if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+			bmv->bmv_entries = 0;
+			return 0;
+		}
 		if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
-		    ip->i_d.di_format != XFS_DINODE_FMT_BTREE &&
-		    ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
+		    ip->i_d.di_format != XFS_DINODE_FMT_BTREE)
 			return -EINVAL;
 
 		if (xfs_get_extsz_hint(ip) ||
@@ -712,7 +716,7 @@ xfs_getbmap(
 			 * extents.
 			 */
 			if (map[i].br_startblock == DELAYSTARTBLOCK &&
-			    map[i].br_startoff <= XFS_B_TO_FSB(mp, XFS_ISIZE(ip)))
+			    map[i].br_startoff < XFS_B_TO_FSB(mp, XFS_ISIZE(ip)))
 				ASSERT((iflags & BMV_IF_DELALLOC) != 0);
 
                         if (map[i].br_startblock == HOLESTARTBLOCK &&
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 35703a801372..5fb5a0958a14 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1043,49 +1043,17 @@ xfs_find_get_desired_pgoff(
 
 	index = startoff >> PAGE_SHIFT;
 	endoff = XFS_FSB_TO_B(mp, map->br_startoff + map->br_blockcount);
-	end = endoff >> PAGE_SHIFT;
+	end = (endoff - 1) >> PAGE_SHIFT;
 	do {
 		int		want;
 		unsigned	nr_pages;
 		unsigned int	i;
 
-		want = min_t(pgoff_t, end - index, PAGEVEC_SIZE);
+		want = min_t(pgoff_t, end - index, PAGEVEC_SIZE - 1) + 1;
 		nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
 					  want);
-		/*
-		 * No page mapped into given range.  If we are searching holes
-		 * and if this is the first time we got into the loop, it means
-		 * that the given offset is landed in a hole, return it.
-		 *
-		 * If we have already stepped through some block buffers to find
-		 * holes but they all contains data.  In this case, the last
-		 * offset is already updated and pointed to the end of the last
-		 * mapped page, if it does not reach the endpoint to search,
-		 * that means there should be a hole between them.
-		 */
-		if (nr_pages == 0) {
-			/* Data search found nothing */
-			if (type == DATA_OFF)
-				break;
-
-			ASSERT(type == HOLE_OFF);
-			if (lastoff == startoff || lastoff < endoff) {
-				found = true;
-				*offset = lastoff;
-			}
-			break;
-		}
-
-		/*
-		 * At lease we found one page.  If this is the first time we
-		 * step into the loop, and if the first page index offset is
-		 * greater than the given search offset, a hole was found.
-		 */
-		if (type == HOLE_OFF && lastoff == startoff &&
-		    lastoff < page_offset(pvec.pages[0])) {
-			found = true;
+		if (nr_pages == 0)
 			break;
-		}
 
 		for (i = 0; i < nr_pages; i++) {
 			struct page	*page = pvec.pages[i];
@@ -1098,18 +1066,18 @@ xfs_find_get_desired_pgoff(
 			 * file mapping. However, page->index will not change
 			 * because we have a reference on the page.
 			 *
-			 * Searching done if the page index is out of range.
-			 * If the current offset is not reaches the end of
-			 * the specified search range, there should be a hole
-			 * between them.
+			 * If current page offset is beyond where we've ended,
+			 * we've found a hole.
 			 */
-			if (page->index > end) {
-				if (type == HOLE_OFF && lastoff < endoff) {
-					*offset = lastoff;
-					found = true;
-				}
+			if (type == HOLE_OFF && lastoff < endoff &&
+			    lastoff < page_offset(pvec.pages[i])) {
+				found = true;
+				*offset = lastoff;
 				goto out;
 			}
+			/* Searching done if the page index is out of range. */
+			if (page->index > end)
+				goto out;
 
 			lock_page(page);
 			/*
@@ -1151,21 +1119,20 @@ xfs_find_get_desired_pgoff(
 
 		/*
 		 * The number of returned pages less than our desired, search
-		 * done.  In this case, nothing was found for searching data,
-		 * but we found a hole behind the last offset.
+		 * done.
 		 */
-		if (nr_pages < want) {
-			if (type == HOLE_OFF) {
-				*offset = lastoff;
-				found = true;
-			}
+		if (nr_pages < want)
 			break;
-		}
 
 		index = pvec.pages[i - 1]->index + 1;
 		pagevec_release(&pvec);
 	} while (index <= end);
 
+	/* No page at lastoff and we are not done - we found a hole. */
+	if (type == HOLE_OFF && lastoff < endoff) {
+		*offset = lastoff;
+		found = true;
+	}
 out:
 	pagevec_release(&pvec);
 	return found;
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index 3683819887a5..814ed729881d 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -828,6 +828,7 @@ xfs_getfsmap(
 	struct xfs_fsmap		dkeys[2];	/* per-dev keys */
 	struct xfs_getfsmap_dev		handlers[XFS_GETFSMAP_DEVS];
 	struct xfs_getfsmap_info	info = { NULL };
+	bool				use_rmap;
 	int				i;
 	int				error = 0;
 
@@ -837,12 +838,14 @@ xfs_getfsmap(
 	    !xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[1]))
 		return -EINVAL;
 
+	use_rmap = capable(CAP_SYS_ADMIN) &&
+		   xfs_sb_version_hasrmapbt(&mp->m_sb);
 	head->fmh_entries = 0;
 
 	/* Set up our device handlers. */
 	memset(handlers, 0, sizeof(handlers));
 	handlers[0].dev = new_encode_dev(mp->m_ddev_targp->bt_dev);
-	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+	if (use_rmap)
 		handlers[0].fn = xfs_getfsmap_datadev_rmapbt;
 	else
 		handlers[0].fn = xfs_getfsmap_datadev_bnobt;
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index e1daa4f343cd..b9b5566acfe6 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -70,7 +70,6 @@
 #include <drm/drm_fourcc.h>
 #include <drm/drm_global.h>
 #include <drm/drm_hashtab.h>
-#include <drm/drm_mem_util.h>
 #include <drm/drm_mm.h>
 #include <drm/drm_os_linux.h>
 #include <drm/drm_sarea.h>
@@ -320,15 +319,6 @@ struct pci_controller;
 #define DRM_IF_VERSION(maj, min) (maj << 16 | min)
 
 
-/* Flags and return codes for get_vblank_timestamp() driver function. */
-#define DRM_CALLED_FROM_VBLIRQ 1
-#define DRM_VBLANKTIME_SCANOUTPOS_METHOD (1 << 0)
-
-/* get_scanout_position() return flags */
-#define DRM_SCANOUTPOS_VALID        (1 << 0)
-#define DRM_SCANOUTPOS_IN_VBLANK    (1 << 1)
-#define DRM_SCANOUTPOS_ACCURATE     (1 << 2)
-
 /**
  * DRM device structure. This structure represent a complete card that
  * may contain multiple heads.
diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h
index 788daf756f48..8645dcdef031 100644
--- a/include/drm/drm_atomic.h
+++ b/include/drm/drm_atomic.h
@@ -155,6 +155,53 @@ struct __drm_connnectors_state {
 };
 
 /**
+ * struct drm_private_state_funcs - atomic state functions for private objects
+ *
+ * These hooks are used by atomic helpers to create, swap and destroy states of
+ * private objects. The structure itself is used as a vtable to identify the
+ * associated private object type. Each private object type that needs to be
+ * added to the atomic states is expected to have an implementation of these
+ * hooks and pass a pointer to it's drm_private_state_funcs struct to
+ * drm_atomic_get_private_obj_state().
+ */
+struct drm_private_state_funcs {
+	/**
+	 * @duplicate_state:
+	 *
+	 * Duplicate the current state of the private object and return it. It
+	 * is an error to call this before obj->state has been initialized.
+	 *
+	 * RETURNS:
+	 *
+	 * Duplicated atomic state or NULL when obj->state is not
+	 * initialized or allocation failed.
+	 */
+	void *(*duplicate_state)(struct drm_atomic_state *state, void *obj);
+
+	/**
+	 * @swap_state:
+	 *
+	 * This function swaps the existing state of a private object @obj with
+	 * it's newly created state, the pointer to which is passed as
+	 * @obj_state_ptr.
+	 */
+	void (*swap_state)(void *obj, void **obj_state_ptr);
+
+	/**
+	 * @destroy_state:
+	 *
+	 * Frees the private object state created with @duplicate_state.
+	 */
+	void (*destroy_state)(void *obj_state);
+};
+
+struct __drm_private_objs_state {
+	void *obj;
+	void *obj_state;
+	const struct drm_private_state_funcs *funcs;
+};
+
+/**
  * struct drm_atomic_state - the global state object for atomic updates
  * @ref: count of all references to this state (will not be freed until zero)
  * @dev: parent DRM device
@@ -164,6 +211,8 @@ struct __drm_connnectors_state {
  * @crtcs: pointer to array of CRTC pointers
  * @num_connector: size of the @connectors and @connector_states arrays
  * @connectors: pointer to array of structures with per-connector data
+ * @num_private_objs: size of the @private_objs array
+ * @private_objs: pointer to array of private object pointers
  * @acquire_ctx: acquire context for this atomic modeset state update
  */
 struct drm_atomic_state {
@@ -176,6 +225,8 @@ struct drm_atomic_state {
 	struct __drm_crtcs_state *crtcs;
 	int num_connector;
 	struct __drm_connnectors_state *connectors;
+	int num_private_objs;
+	struct __drm_private_objs_state *private_objs;
 
 	struct drm_modeset_acquire_ctx *acquire_ctx;
 
@@ -268,6 +319,11 @@ int drm_atomic_connector_set_property(struct drm_connector *connector,
 		struct drm_connector_state *state, struct drm_property *property,
 		uint64_t val);
 
+void * __must_check
+drm_atomic_get_private_obj_state(struct drm_atomic_state *state,
+			      void *obj,
+			      const struct drm_private_state_funcs *funcs);
+
 /**
  * drm_atomic_get_existing_crtc_state - get crtc state, if it exists
  * @state: global atomic state object
@@ -753,6 +809,45 @@ void drm_state_dump(struct drm_device *dev, struct drm_printer *p);
 		for_each_if (plane)
 
 /**
+ * __for_each_private_obj - iterate over all private objects
+ * @__state: &struct drm_atomic_state pointer
+ * @obj: private object iteration cursor
+ * @obj_state: private object state iteration cursor
+ * @__i: int iteration cursor, for macro-internal use
+ * @__funcs: &struct drm_private_state_funcs iteration cursor
+ *
+ * This macro iterates over the array containing private object data in atomic
+ * state
+ */
+#define __for_each_private_obj(__state, obj, obj_state, __i, __funcs)	\
+	for ((__i) = 0;							\
+	     (__i) < (__state)->num_private_objs &&			\
+	     ((obj) = (__state)->private_objs[__i].obj,			\
+	      (__funcs) = (__state)->private_objs[__i].funcs,		\
+	      (obj_state) = (__state)->private_objs[__i].obj_state,	\
+	      1);							\
+	     (__i)++)							\
+
+/**
+ * for_each_private_obj - iterate over a specify type of private object
+ * @__state: &struct drm_atomic_state pointer
+ * @obj_funcs: &struct drm_private_state_funcs function table to filter
+ * 	private objects
+ * @obj: private object iteration cursor
+ * @obj_state: private object state iteration cursor
+ * @__i: int iteration cursor, for macro-internal use
+ * @__funcs: &struct drm_private_state_funcs iteration cursor
+ *
+ * This macro iterates over the private objects state array while filtering the
+ * objects based on the vfunc table that is passed as @obj_funcs. New macros
+ * can be created by passing in the vfunc table associated with a specific
+ * private object.
+ */
+#define for_each_private_obj(__state, obj_funcs, obj, obj_state, __i, __funcs)	\
+	__for_each_private_obj(__state, obj, obj_state, __i, __funcs)		\
+		for_each_if (__funcs == obj_funcs)
+
+/**
  * drm_atomic_crtc_needs_modeset - compute combined modeset need
  * @state: &drm_crtc_state for the CRTC
  *
diff --git a/include/drm/drm_blend.h b/include/drm/drm_blend.h
index 13221cf9b3eb..17606026590b 100644
--- a/include/drm/drm_blend.h
+++ b/include/drm/drm_blend.h
@@ -25,31 +25,15 @@
 
 #include <linux/list.h>
 #include <linux/ctype.h>
+#include <drm/drm_mode.h>
 
 struct drm_device;
 struct drm_atomic_state;
-
-/*
- * Rotation property bits. DRM_ROTATE_<degrees> rotates the image by the
- * specified amount in degrees in counter clockwise direction. DRM_REFLECT_X and
- * DRM_REFLECT_Y reflects the image along the specified axis prior to rotation
- *
- * WARNING: These defines are UABI since they're exposed in the rotation
- * property.
- */
-#define DRM_ROTATE_0	BIT(0)
-#define DRM_ROTATE_90	BIT(1)
-#define DRM_ROTATE_180	BIT(2)
-#define DRM_ROTATE_270	BIT(3)
-#define DRM_ROTATE_MASK (DRM_ROTATE_0   | DRM_ROTATE_90 | \
-			 DRM_ROTATE_180 | DRM_ROTATE_270)
-#define DRM_REFLECT_X	BIT(4)
-#define DRM_REFLECT_Y	BIT(5)
-#define DRM_REFLECT_MASK (DRM_REFLECT_X | DRM_REFLECT_Y)
+struct drm_plane;
 
 static inline bool drm_rotation_90_or_270(unsigned int rotation)
 {
-	return rotation & (DRM_ROTATE_90 | DRM_ROTATE_270);
+	return rotation & (DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270);
 }
 
 int drm_plane_create_rotation_property(struct drm_plane *plane,
diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h
index fdd82fcbf168..983054f2e86e 100644
--- a/include/drm/drm_bridge.h
+++ b/include/drm/drm_bridge.h
@@ -59,6 +59,40 @@ struct drm_bridge_funcs {
 	void (*detach)(struct drm_bridge *bridge);
 
 	/**
+	 * @mode_valid:
+	 *
+	 * This callback is used to check if a specific mode is valid in this
+	 * bridge. This should be implemented if the bridge has some sort of
+	 * restriction in the modes it can display. For example, a given bridge
+	 * may be responsible to set a clock value. If the clock can not
+	 * produce all the values for the available modes then this callback
+	 * can be used to restrict the number of modes to only the ones that
+	 * can be displayed.
+	 *
+	 * This hook is used by the probe helpers to filter the mode list in
+	 * drm_helper_probe_single_connector_modes(), and it is used by the
+	 * atomic helpers to validate modes supplied by userspace in
+	 * drm_atomic_helper_check_modeset().
+	 *
+	 * This function is optional.
+	 *
+	 * NOTE:
+	 *
+	 * Since this function is both called from the check phase of an atomic
+	 * commit, and the mode validation in the probe paths it is not allowed
+	 * to look at anything else but the passed-in mode, and validate it
+	 * against configuration-invariant hardward constraints. Any further
+	 * limits which depend upon the configuration can only be checked in
+	 * @mode_fixup.
+	 *
+	 * RETURNS:
+	 *
+	 * drm_mode_status Enum
+	 */
+	enum drm_mode_status (*mode_valid)(struct drm_bridge *crtc,
+					   const struct drm_display_mode *mode);
+
+	/**
 	 * @mode_fixup:
 	 *
 	 * This callback is used to validate and adjust a mode. The paramater
@@ -66,7 +100,7 @@ struct drm_bridge_funcs {
 	 * the display chain, either the final &drm_connector or the next
 	 * &drm_bridge. The parameter adjusted_mode is the input mode the bridge
 	 * requires. It can be modified by this callback and does not need to
-	 * match mode.
+	 * match mode. See also &drm_crtc_state.adjusted_mode for more details.
 	 *
 	 * This is the only hook that allows a bridge to reject a modeset. If
 	 * this function passes all other callbacks must succeed for this
@@ -82,6 +116,12 @@ struct drm_bridge_funcs {
 	 * NOT touch any persistent state (hardware or software) or data
 	 * structures except the passed in @state parameter.
 	 *
+	 * Also beware that userspace can request its own custom modes, neither
+	 * core nor helpers filter modes to the list of probe modes reported by
+	 * the GETCONNECTOR IOCTL and stored in &drm_connector.modes. To ensure
+	 * that modes are filtered consistently put any bridge constraints and
+	 * limits checks into @mode_valid.
+	 *
 	 * RETURNS:
 	 *
 	 * True if an acceptable configuration is possible, false if the modeset
diff --git a/include/drm/drm_color_mgmt.h b/include/drm/drm_color_mgmt.h
index bce4a532836d..03a59cbce621 100644
--- a/include/drm/drm_color_mgmt.h
+++ b/include/drm/drm_color_mgmt.h
@@ -25,6 +25,8 @@
 
 #include <linux/ctype.h>
 
+struct drm_crtc;
+
 uint32_t drm_color_lut_extract(uint32_t user_input, uint32_t bit_precision);
 
 void drm_crtc_enable_color_mgmt(struct drm_crtc *crtc,
diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h
index 4eeda120e46d..2fe09c1ddfb8 100644
--- a/include/drm/drm_connector.h
+++ b/include/drm/drm_connector.h
@@ -25,6 +25,7 @@
 
 #include <linux/list.h>
 #include <linux/ctype.h>
+#include <linux/hdmi.h>
 #include <drm/drm_mode_object.h>
 
 #include <uapi/drm/drm_mode.h>
@@ -326,6 +327,21 @@ struct drm_connector_state {
 	struct drm_atomic_state *state;
 
 	struct drm_tv_connector_state tv;
+
+	/**
+	 * @picture_aspect_ratio: Connector property to control the
+	 * HDMI infoframe aspect ratio setting.
+	 *
+	 * The %DRM_MODE_PICTURE_ASPECT_\* values much match the
+	 * values for &enum hdmi_picture_aspect
+	 */
+	enum hdmi_picture_aspect picture_aspect_ratio;
+
+	/**
+	 * @scaling_mode: Connector property to control the
+	 * upscaling, mostly used for built-in panels.
+	 */
+	unsigned int scaling_mode;
 };
 
 /**
@@ -675,6 +691,7 @@ struct drm_cmdline_mode {
  * @tile_v_loc: vertical location of this tile
  * @tile_h_size: horizontal size of this tile.
  * @tile_v_size: vertical size of this tile.
+ * @scaling_mode_property:  Optional atomic property to control the upscaling.
  *
  * Each connector may be connected to one or more CRTCs, or may be clonable by
  * another connector if they can share a CRTC.  Each connector also has a specific
@@ -754,6 +771,8 @@ struct drm_connector {
 	struct drm_property_blob *edid_blob_ptr;
 	struct drm_object_properties properties;
 
+	struct drm_property *scaling_mode_property;
+
 	/**
 	 * @path_blob_ptr:
 	 *
@@ -953,6 +972,8 @@ int drm_mode_create_tv_properties(struct drm_device *dev,
 				  unsigned int num_modes,
 				  const char * const modes[]);
 int drm_mode_create_scaling_mode_property(struct drm_device *dev);
+int drm_connector_attach_scaling_mode_property(struct drm_connector *connector,
+					       u32 scaling_mode_mask);
 int drm_mode_create_aspect_ratio_property(struct drm_device *dev);
 int drm_mode_create_suggested_offset_properties(struct drm_device *dev);
 
@@ -1031,7 +1052,7 @@ void drm_connector_list_iter_end(struct drm_connector_list_iter *iter);
  *
  * Note that @connector is only valid within the list body, if you want to use
  * @connector after calling drm_connector_list_iter_end() then you need to grab
- * your own reference first using drm_connector_begin().
+ * your own reference first using drm_connector_get().
  */
 #define drm_for_each_connector_iter(connector, iter) \
 	while ((connector = drm_connector_list_iter_next(iter)))
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index a8176a836e25..b6e3713bd7a9 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -90,14 +90,7 @@ struct drm_plane_helper_funcs;
  * @plane_mask: bitmask of (1 << drm_plane_index(plane)) of attached planes
  * @connector_mask: bitmask of (1 << drm_connector_index(connector)) of attached connectors
  * @encoder_mask: bitmask of (1 << drm_encoder_index(encoder)) of attached encoders
- * @adjusted_mode: for use by helpers and drivers to compute adjusted mode timings
- * @mode: current mode timings
  * @mode_blob: &drm_property_blob for @mode
- * @degamma_lut: Lookup table for converting framebuffer pixel data
- *	before apply the conversion matrix
- * @ctm: Transformation matrix
- * @gamma_lut: Lookup table for converting pixel data after the
- *	conversion matrix
  * @state: backpointer to global drm_atomic_state
  *
  * Note that the distinction between @enable and @active is rather subtile:
@@ -136,17 +129,62 @@ struct drm_crtc_state {
 	u32 connector_mask;
 	u32 encoder_mask;
 
-	/* adjusted_mode: for use by helpers and drivers */
+	/**
+	 * @adjusted_mode:
+	 *
+	 * Internal display timings which can be used by the driver to handle
+	 * differences between the mode requested by userspace in @mode and what
+	 * is actually programmed into the hardware. It is purely driver
+	 * implementation defined what exactly this adjusted mode means. Usually
+	 * it is used to store the hardware display timings used between the
+	 * CRTC and encoder blocks.
+	 */
 	struct drm_display_mode adjusted_mode;
 
+	/**
+	 * @mode:
+	 *
+	 * Display timings requested by userspace. The driver should try to
+	 * match the refresh rate as close as possible (but note that it's
+	 * undefined what exactly is close enough, e.g. some of the HDMI modes
+	 * only differ in less than 1% of the refresh rate). The active width
+	 * and height as observed by userspace for positioning planes must match
+	 * exactly.
+	 *
+	 * For external connectors where the sink isn't fixed (like with a
+	 * built-in panel), this mode here should match the physical mode on the
+	 * wire to the last details (i.e. including sync polarities and
+	 * everything).
+	 */
 	struct drm_display_mode mode;
 
 	/* blob property to expose current mode to atomic userspace */
 	struct drm_property_blob *mode_blob;
 
-	/* blob property to expose color management to userspace */
+	/**
+	 * @degamma_lut:
+	 *
+	 * Lookup table for converting framebuffer pixel data before apply the
+	 * color conversion matrix @ctm. See drm_crtc_enable_color_mgmt(). The
+	 * blob (if not NULL) is an array of &struct drm_color_lut.
+	 */
 	struct drm_property_blob *degamma_lut;
+
+	/**
+	 * @ctm:
+	 *
+	 * Color transformation matrix. See drm_crtc_enable_color_mgmt(). The
+	 * blob (if not NULL) is a &struct drm_color_ctm.
+	 */
 	struct drm_property_blob *ctm;
+
+	/**
+	 * @gamma_lut:
+	 *
+	 * Lookup table for converting pixel data after the color conversion
+	 * matrix @ctm.  See drm_crtc_enable_color_mgmt(). The blob (if not
+	 * NULL) is an array of &struct drm_color_lut.
+	 */
 	struct drm_property_blob *gamma_lut;
 
 	/**
@@ -313,6 +351,12 @@ struct drm_crtc_funcs {
 	 *
 	 * This callback is optional.
 	 *
+	 * Atomic drivers who want to support gamma tables should implement the
+	 * atomic color management support, enabled by calling
+	 * drm_crtc_enable_color_mgmt(), which then supports the legacy gamma
+	 * interface through the drm_atomic_helper_legacy_gamma_set()
+	 * compatibility implementation.
+	 *
 	 * NOTE:
 	 *
 	 * Drivers that support gamma tables and also fbdev emulation through
diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h
index c0bd0d7651a9..20eb5ca28594 100644
--- a/include/drm/drm_dp_helper.h
+++ b/include/drm/drm_dp_helper.h
@@ -179,6 +179,111 @@
 
 #define DP_GUID				    0x030   /* 1.2 */
 
+#define DP_DSC_SUPPORT                      0x060   /* DP 1.4 */
+# define DP_DSC_DECOMPRESSION_IS_SUPPORTED  (1 << 0)
+
+#define DP_DSC_REV                          0x061
+# define DP_DSC_MAJOR_MASK                  (0xf << 0)
+# define DP_DSC_MINOR_MASK                  (0xf << 4)
+# define DP_DSC_MAJOR_SHIFT                 0
+# define DP_DSC_MINOR_SHIFT                 4
+
+#define DP_DSC_RC_BUF_BLK_SIZE              0x062
+# define DP_DSC_RC_BUF_BLK_SIZE_1           0x0
+# define DP_DSC_RC_BUF_BLK_SIZE_4           0x1
+# define DP_DSC_RC_BUF_BLK_SIZE_16          0x2
+# define DP_DSC_RC_BUF_BLK_SIZE_64          0x3
+
+#define DP_DSC_RC_BUF_SIZE                  0x063
+
+#define DP_DSC_SLICE_CAP_1                  0x064
+# define DP_DSC_1_PER_DP_DSC_SINK           (1 << 0)
+# define DP_DSC_2_PER_DP_DSC_SINK           (1 << 1)
+# define DP_DSC_4_PER_DP_DSC_SINK           (1 << 3)
+# define DP_DSC_6_PER_DP_DSC_SINK           (1 << 4)
+# define DP_DSC_8_PER_DP_DSC_SINK           (1 << 5)
+# define DP_DSC_10_PER_DP_DSC_SINK          (1 << 6)
+# define DP_DSC_12_PER_DP_DSC_SINK          (1 << 7)
+
+#define DP_DSC_LINE_BUF_BIT_DEPTH           0x065
+# define DP_DSC_LINE_BUF_BIT_DEPTH_MASK     (0xf << 0)
+# define DP_DSC_LINE_BUF_BIT_DEPTH_9        0x0
+# define DP_DSC_LINE_BUF_BIT_DEPTH_10       0x1
+# define DP_DSC_LINE_BUF_BIT_DEPTH_11       0x2
+# define DP_DSC_LINE_BUF_BIT_DEPTH_12       0x3
+# define DP_DSC_LINE_BUF_BIT_DEPTH_13       0x4
+# define DP_DSC_LINE_BUF_BIT_DEPTH_14       0x5
+# define DP_DSC_LINE_BUF_BIT_DEPTH_15       0x6
+# define DP_DSC_LINE_BUF_BIT_DEPTH_16       0x7
+# define DP_DSC_LINE_BUF_BIT_DEPTH_8        0x8
+
+#define DP_DSC_BLK_PREDICTION_SUPPORT       0x066
+# define DP_DSC_BLK_PREDICTION_IS_SUPPORTED (1 << 0)
+
+#define DP_DSC_MAX_BITS_PER_PIXEL_LOW       0x067   /* eDP 1.4 */
+
+#define DP_DSC_MAX_BITS_PER_PIXEL_HI        0x068   /* eDP 1.4 */
+
+#define DP_DSC_DEC_COLOR_FORMAT_CAP         0x069
+# define DP_DSC_RGB                         (1 << 0)
+# define DP_DSC_YCbCr444                    (1 << 1)
+# define DP_DSC_YCbCr422_Simple             (1 << 2)
+# define DP_DSC_YCbCr422_Native             (1 << 3)
+# define DP_DSC_YCbCr420_Native             (1 << 4)
+
+#define DP_DSC_DEC_COLOR_DEPTH_CAP          0x06A
+# define DP_DSC_8_BPC                       (1 << 1)
+# define DP_DSC_10_BPC                      (1 << 2)
+# define DP_DSC_12_BPC                      (1 << 3)
+
+#define DP_DSC_PEAK_THROUGHPUT              0x06B
+# define DP_DSC_THROUGHPUT_MODE_0_MASK      (0xf << 0)
+# define DP_DSC_THROUGHPUT_MODE_0_SHIFT     0
+# define DP_DSC_THROUGHPUT_MODE_0_340       (1 << 0)
+# define DP_DSC_THROUGHPUT_MODE_0_400       (2 << 0)
+# define DP_DSC_THROUGHPUT_MODE_0_450       (3 << 0)
+# define DP_DSC_THROUGHPUT_MODE_0_500       (4 << 0)
+# define DP_DSC_THROUGHPUT_MODE_0_550       (5 << 0)
+# define DP_DSC_THROUGHPUT_MODE_0_600       (6 << 0)
+# define DP_DSC_THROUGHPUT_MODE_0_650       (7 << 0)
+# define DP_DSC_THROUGHPUT_MODE_0_700       (8 << 0)
+# define DP_DSC_THROUGHPUT_MODE_0_750       (9 << 0)
+# define DP_DSC_THROUGHPUT_MODE_0_800       (10 << 0)
+# define DP_DSC_THROUGHPUT_MODE_0_850       (11 << 0)
+# define DP_DSC_THROUGHPUT_MODE_0_900       (12 << 0)
+# define DP_DSC_THROUGHPUT_MODE_0_950       (13 << 0)
+# define DP_DSC_THROUGHPUT_MODE_0_1000      (14 << 0)
+# define DP_DSC_THROUGHPUT_MODE_1_MASK      (0xf << 4)
+# define DP_DSC_THROUGHPUT_MODE_1_SHIFT     4
+# define DP_DSC_THROUGHPUT_MODE_1_340       (1 << 4)
+# define DP_DSC_THROUGHPUT_MODE_1_400       (2 << 4)
+# define DP_DSC_THROUGHPUT_MODE_1_450       (3 << 4)
+# define DP_DSC_THROUGHPUT_MODE_1_500       (4 << 4)
+# define DP_DSC_THROUGHPUT_MODE_1_550       (5 << 4)
+# define DP_DSC_THROUGHPUT_MODE_1_600       (6 << 4)
+# define DP_DSC_THROUGHPUT_MODE_1_650       (7 << 4)
+# define DP_DSC_THROUGHPUT_MODE_1_700       (8 << 4)
+# define DP_DSC_THROUGHPUT_MODE_1_750       (9 << 4)
+# define DP_DSC_THROUGHPUT_MODE_1_800       (10 << 4)
+# define DP_DSC_THROUGHPUT_MODE_1_850       (11 << 4)
+# define DP_DSC_THROUGHPUT_MODE_1_900       (12 << 4)
+# define DP_DSC_THROUGHPUT_MODE_1_950       (13 << 4)
+# define DP_DSC_THROUGHPUT_MODE_1_1000      (14 << 4)
+
+#define DP_DSC_MAX_SLICE_WIDTH              0x06C
+
+#define DP_DSC_SLICE_CAP_2                  0x06D
+# define DP_DSC_16_PER_DP_DSC_SINK          (1 << 0)
+# define DP_DSC_20_PER_DP_DSC_SINK          (1 << 1)
+# define DP_DSC_24_PER_DP_DSC_SINK          (1 << 2)
+
+#define DP_DSC_BITS_PER_PIXEL_INC           0x06F
+# define DP_DSC_BITS_PER_PIXEL_1_16         0x0
+# define DP_DSC_BITS_PER_PIXEL_1_8          0x1
+# define DP_DSC_BITS_PER_PIXEL_1_4          0x2
+# define DP_DSC_BITS_PER_PIXEL_1_2          0x3
+# define DP_DSC_BITS_PER_PIXEL_1            0x4
+
 #define DP_PSR_SUPPORT                      0x070   /* XXX 1.2? */
 # define DP_PSR_IS_SUPPORTED                1
 # define DP_PSR2_IS_SUPPORTED		    2	    /* eDP 1.4 */
@@ -339,6 +444,8 @@
 #define DP_AUX_FRAME_SYNC_VALUE		    0x15c   /* eDP 1.4 */
 # define DP_AUX_FRAME_SYNC_VALID	    (1 << 0)
 
+#define DP_DSC_ENABLE                       0x160   /* DP 1.4 */
+
 #define DP_PSR_EN_CFG			    0x170   /* XXX 1.2? */
 # define DP_PSR_ENABLE			    (1 << 0)
 # define DP_PSR_MAIN_LINK_ACTIVE	    (1 << 1)
@@ -572,10 +679,12 @@
 #define DP_EDP_PWMGEN_BIT_COUNT             0x724
 #define DP_EDP_PWMGEN_BIT_COUNT_CAP_MIN     0x725
 #define DP_EDP_PWMGEN_BIT_COUNT_CAP_MAX     0x726
+# define DP_EDP_PWMGEN_BIT_COUNT_MASK       (0x1f << 0)
 
 #define DP_EDP_BACKLIGHT_CONTROL_STATUS     0x727
 
 #define DP_EDP_BACKLIGHT_FREQ_SET           0x728
+# define DP_EDP_BACKLIGHT_FREQ_BASE_KHZ     27000
 
 #define DP_EDP_BACKLIGHT_FREQ_CAP_MIN_MSB   0x72a
 #define DP_EDP_BACKLIGHT_FREQ_CAP_MIN_MID   0x72b
@@ -603,6 +712,9 @@
 #define DP_DEVICE_SERVICE_IRQ_VECTOR_ESI0   0x2003   /* 1.2 */
 
 #define DP_DEVICE_SERVICE_IRQ_VECTOR_ESI1   0x2004   /* 1.2 */
+# define DP_RX_GTC_MSTR_REQ_STATUS_CHANGE    (1 << 0)
+# define DP_LOCK_ACQUISITION_REQUEST         (1 << 1)
+# define DP_CEC_IRQ                          (1 << 2)
 
 #define DP_LINK_SERVICE_IRQ_VECTOR_ESI0     0x2005   /* 1.2 */
 
@@ -636,6 +748,62 @@
 # define DP_VSC_EXT_CEA_SDP_SUPPORTED			(1 << 6)  /* DP 1.4 */
 # define DP_VSC_EXT_CEA_SDP_CHAINING_SUPPORTED		(1 << 7)  /* DP 1.4 */
 
+/* HDMI CEC tunneling over AUX DP 1.3 section 5.3.3.3.1 DPCD 1.4+ */
+#define DP_CEC_TUNNELING_CAPABILITY            0x3000
+# define DP_CEC_TUNNELING_CAPABLE               (1 << 0)
+# define DP_CEC_SNOOPING_CAPABLE                (1 << 1)
+# define DP_CEC_MULTIPLE_LA_CAPABLE             (1 << 2)
+
+#define DP_CEC_TUNNELING_CONTROL               0x3001
+# define DP_CEC_TUNNELING_ENABLE                (1 << 0)
+# define DP_CEC_SNOOPING_ENABLE                 (1 << 1)
+
+#define DP_CEC_RX_MESSAGE_INFO                 0x3002
+# define DP_CEC_RX_MESSAGE_LEN_MASK             (0xf << 0)
+# define DP_CEC_RX_MESSAGE_LEN_SHIFT            0
+# define DP_CEC_RX_MESSAGE_HPD_STATE            (1 << 4)
+# define DP_CEC_RX_MESSAGE_HPD_LOST             (1 << 5)
+# define DP_CEC_RX_MESSAGE_ACKED                (1 << 6)
+# define DP_CEC_RX_MESSAGE_ENDED                (1 << 7)
+
+#define DP_CEC_TX_MESSAGE_INFO                 0x3003
+# define DP_CEC_TX_MESSAGE_LEN_MASK             (0xf << 0)
+# define DP_CEC_TX_MESSAGE_LEN_SHIFT            0
+# define DP_CEC_TX_RETRY_COUNT_MASK             (0x7 << 4)
+# define DP_CEC_TX_RETRY_COUNT_SHIFT            4
+# define DP_CEC_TX_MESSAGE_SEND                 (1 << 7)
+
+#define DP_CEC_TUNNELING_IRQ_FLAGS             0x3004
+# define DP_CEC_RX_MESSAGE_INFO_VALID           (1 << 0)
+# define DP_CEC_RX_MESSAGE_OVERFLOW             (1 << 1)
+# define DP_CEC_TX_MESSAGE_SENT                 (1 << 4)
+# define DP_CEC_TX_LINE_ERROR                   (1 << 5)
+# define DP_CEC_TX_ADDRESS_NACK_ERROR           (1 << 6)
+# define DP_CEC_TX_DATA_NACK_ERROR              (1 << 7)
+
+#define DP_CEC_LOGICAL_ADDRESS_MASK            0x300E /* 0x300F word */
+# define DP_CEC_LOGICAL_ADDRESS_0               (1 << 0)
+# define DP_CEC_LOGICAL_ADDRESS_1               (1 << 1)
+# define DP_CEC_LOGICAL_ADDRESS_2               (1 << 2)
+# define DP_CEC_LOGICAL_ADDRESS_3               (1 << 3)
+# define DP_CEC_LOGICAL_ADDRESS_4               (1 << 4)
+# define DP_CEC_LOGICAL_ADDRESS_5               (1 << 5)
+# define DP_CEC_LOGICAL_ADDRESS_6               (1 << 6)
+# define DP_CEC_LOGICAL_ADDRESS_7               (1 << 7)
+#define DP_CEC_LOGICAL_ADDRESS_MASK_2          0x300F /* 0x300E word */
+# define DP_CEC_LOGICAL_ADDRESS_8               (1 << 0)
+# define DP_CEC_LOGICAL_ADDRESS_9               (1 << 1)
+# define DP_CEC_LOGICAL_ADDRESS_10              (1 << 2)
+# define DP_CEC_LOGICAL_ADDRESS_11              (1 << 3)
+# define DP_CEC_LOGICAL_ADDRESS_12              (1 << 4)
+# define DP_CEC_LOGICAL_ADDRESS_13              (1 << 5)
+# define DP_CEC_LOGICAL_ADDRESS_14              (1 << 6)
+# define DP_CEC_LOGICAL_ADDRESS_15              (1 << 7)
+
+#define DP_CEC_RX_MESSAGE_BUFFER               0x3010
+#define DP_CEC_TX_MESSAGE_BUFFER               0x3020
+#define DP_CEC_MESSAGE_BUFFER_LENGTH             0x10
+
 /* DP 1.2 Sideband message defines */
 /* peer device type - DP 1.2a Table 2-92 */
 #define DP_PEER_DEVICE_NONE		0x0
diff --git a/include/drm/drm_dp_mst_helper.h b/include/drm/drm_dp_mst_helper.h
index 5b024764666c..177ab6f86855 100644
--- a/include/drm/drm_dp_mst_helper.h
+++ b/include/drm/drm_dp_mst_helper.h
@@ -24,6 +24,7 @@
 
 #include <linux/types.h>
 #include <drm/drm_dp_helper.h>
+#include <drm/drm_atomic.h>
 
 struct drm_dp_mst_branch;
 
@@ -403,6 +404,12 @@ struct drm_dp_payload {
 	int vcpi;
 };
 
+struct drm_dp_mst_topology_state {
+	int avail_slots;
+	struct drm_atomic_state *state;
+	struct drm_dp_mst_topology_mgr *mgr;
+};
+
 /**
  * struct drm_dp_mst_topology_mgr - DisplayPort MST manager
  *
@@ -481,6 +488,16 @@ struct drm_dp_mst_topology_mgr {
 	int pbn_div;
 
 	/**
+	 * @state: State information for topology manager
+	 */
+	struct drm_dp_mst_topology_state *state;
+
+	/**
+	 * @funcs: Atomic helper callbacks
+	 */
+	const struct drm_private_state_funcs *funcs;
+
+	/**
 	 * @qlock: protects @tx_msg_downq, the &drm_dp_mst_branch.txslost and
 	 * &drm_dp_sideband_msg_tx.state once they are queued
 	 */
@@ -596,4 +613,13 @@ void drm_dp_mst_dump_topology(struct seq_file *m,
 
 void drm_dp_mst_topology_mgr_suspend(struct drm_dp_mst_topology_mgr *mgr);
 int drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr);
+struct drm_dp_mst_topology_state *drm_atomic_get_mst_topology_state(struct drm_atomic_state *state,
+								    struct drm_dp_mst_topology_mgr *mgr);
+int drm_dp_atomic_find_vcpi_slots(struct drm_atomic_state *state,
+				  struct drm_dp_mst_topology_mgr *mgr,
+				  struct drm_dp_mst_port *port, int pbn);
+int drm_dp_atomic_release_vcpi_slots(struct drm_atomic_state *state,
+				     struct drm_dp_mst_topology_mgr *mgr,
+				     int slots);
+
 #endif
diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h
index 53b98321df9b..e64e33b9dd26 100644
--- a/include/drm/drm_drv.h
+++ b/include/drm/drm_drv.h
@@ -104,23 +104,6 @@ struct drm_driver {
 	int (*open) (struct drm_device *, struct drm_file *);
 
 	/**
-	 * @preclose:
-	 *
-	 * One of the driver callbacks when a new &struct drm_file is closed.
-	 * Useful for tearing down driver-private data structures allocated in
-	 * @open like buffer allocators, execution contexts or similar things.
-	 *
-	 * Since the display/modeset side of DRM can only be owned by exactly
-	 * one &struct drm_file (see &drm_file.is_master and &drm_device.master)
-	 * there should never be a need to tear down any modeset related
-	 * resources in this callback. Doing so would be a driver design bug.
-	 *
-	 * FIXME: It is not really clear why there's both @preclose and
-	 * @postclose. Without a really good reason, use @postclose only.
-	 */
-	void (*preclose) (struct drm_device *, struct drm_file *file_priv);
-
-	/**
 	 * @postclose:
 	 *
 	 * One of the driver callbacks when a new &struct drm_file is closed.
@@ -131,9 +114,6 @@ struct drm_driver {
 	 * one &struct drm_file (see &drm_file.is_master and &drm_device.master)
 	 * there should never be a need to tear down any modeset related
 	 * resources in this callback. Doing so would be a driver design bug.
-	 *
-	 * FIXME: It is not really clear why there's both @preclose and
-	 * @postclose. Without a really good reason, use @postclose only.
 	 */
 	void (*postclose) (struct drm_device *, struct drm_file *);
 
@@ -150,7 +130,7 @@ struct drm_driver {
 	 * state changes, e.g. in conjunction with the :ref:`vga_switcheroo`
 	 * infrastructure.
 	 *
-	 * This is called after @preclose and @postclose have been called.
+	 * This is called after @postclose hook has been called.
 	 *
 	 * NOTE:
 	 *
@@ -261,8 +241,10 @@ struct drm_driver {
 	 *     DRM device.
 	 * pipe:
 	 *     Id of the crtc to query.
-	 * flags:
-	 *     Flags from the caller (DRM_CALLED_FROM_VBLIRQ or 0).
+	 * in_vblank_irq:
+	 *     True when called from drm_crtc_handle_vblank().  Some drivers
+	 *     need to apply some workarounds for gpu-specific vblank irq quirks
+	 *     if flag is set.
 	 * vpos:
 	 *     Target location for current vertical scanout position.
 	 * hpos:
@@ -283,22 +265,19 @@ struct drm_driver {
 	 *
 	 * Returns:
 	 *
-	 * Flags, or'ed together as follows:
+	 * True on success, false if a reliable scanout position counter could
+	 * not be read out.
 	 *
-	 * DRM_SCANOUTPOS_VALID:
-	 *     Query successful.
-	 * DRM_SCANOUTPOS_INVBL:
-	 *     Inside vblank.
-	 * DRM_SCANOUTPOS_ACCURATE: Returned position is accurate. A lack of
-	 *     this flag means that returned position may be offset by a
-	 *     constant but unknown small number of scanlines wrt. real scanout
-	 *     position.
+	 * FIXME:
 	 *
+	 * Since this is a helper to implement @get_vblank_timestamp, we should
+	 * move it to &struct drm_crtc_helper_funcs, like all the other
+	 * helper-internal hooks.
 	 */
-	int (*get_scanout_position) (struct drm_device *dev, unsigned int pipe,
-				     unsigned int flags, int *vpos, int *hpos,
-				     ktime_t *stime, ktime_t *etime,
-				     const struct drm_display_mode *mode);
+	bool (*get_scanout_position) (struct drm_device *dev, unsigned int pipe,
+				      bool in_vblank_irq, int *vpos, int *hpos,
+				      ktime_t *stime, ktime_t *etime,
+				      const struct drm_display_mode *mode);
 
 	/**
 	 * @get_vblank_timestamp:
@@ -328,22 +307,25 @@ struct drm_driver {
 	 *     Returns true upper bound on error for timestamp.
 	 * vblank_time:
 	 *     Target location for returned vblank timestamp.
-	 * flags:
-	 *     0 = Defaults, no special treatment needed.
-	 *     DRM_CALLED_FROM_VBLIRQ = Function is called from vblank
-	 *     irq handler. Some drivers need to apply some workarounds
-	 *     for gpu-specific vblank irq quirks if flag is set.
+	 * in_vblank_irq:
+	 *     True when called from drm_crtc_handle_vblank().  Some drivers
+	 *     need to apply some workarounds for gpu-specific vblank irq quirks
+	 *     if flag is set.
 	 *
 	 * Returns:
 	 *
-	 * Zero if timestamping isn't supported in current display mode or a
-	 * negative number on failure. A positive status code on success,
-	 * which describes how the vblank_time timestamp was computed.
+	 * True on success, false on failure, which means the core should
+	 * fallback to a simple timestamp taken in drm_crtc_handle_vblank().
+	 *
+	 * FIXME:
+	 *
+	 * We should move this hook to &struct drm_crtc_funcs like all the other
+	 * vblank hooks.
 	 */
-	int (*get_vblank_timestamp) (struct drm_device *dev, unsigned int pipe,
+	bool (*get_vblank_timestamp) (struct drm_device *dev, unsigned int pipe,
 				     int *max_error,
 				     struct timeval *vblank_time,
-				     unsigned flags);
+				     bool in_vblank_irq);
 
 	/* these have to be filled in */
 
@@ -516,6 +498,7 @@ struct drm_driver {
 	/* List of devices hanging off this driver with stealth attach. */
 	struct list_head legacy_dev_list;
 	int (*firstopen) (struct drm_device *);
+	void (*preclose) (struct drm_device *, struct drm_file *file_priv);
 	int (*dma_ioctl) (struct drm_device *dev, void *data, struct drm_file *file_priv);
 	int (*dma_quiescent) (struct drm_device *);
 	int (*context_dtor) (struct drm_device *dev, int context);
diff --git a/include/drm/drm_fb_cma_helper.h b/include/drm/drm_fb_cma_helper.h
index a5ecc0a58260..199a63f48659 100644
--- a/include/drm/drm_fb_cma_helper.h
+++ b/include/drm/drm_fb_cma_helper.h
@@ -41,6 +41,10 @@ struct drm_framebuffer *drm_fb_cma_create(struct drm_device *dev,
 struct drm_gem_cma_object *drm_fb_cma_get_gem_obj(struct drm_framebuffer *fb,
 	unsigned int plane);
 
+dma_addr_t drm_fb_cma_get_gem_addr(struct drm_framebuffer *fb,
+				   struct drm_plane_state *state,
+				   unsigned int plane);
+
 int drm_fb_cma_prepare_fb(struct drm_plane *plane,
 			  struct drm_plane_state *state);
 
diff --git a/include/drm/drm_gem_cma_helper.h b/include/drm/drm_gem_cma_helper.h
index f962d33667cf..b42529e0fae0 100644
--- a/include/drm/drm_gem_cma_helper.h
+++ b/include/drm/drm_gem_cma_helper.h
@@ -26,6 +26,13 @@ to_drm_gem_cma_obj(struct drm_gem_object *gem_obj)
 	return container_of(gem_obj, struct drm_gem_cma_object, base);
 }
 
+#ifndef CONFIG_MMU
+#define DRM_GEM_CMA_UNMAPPED_AREA_FOPS \
+	.get_unmapped_area	= drm_gem_cma_get_unmapped_area,
+#else
+#define DRM_GEM_CMA_UNMAPPED_AREA_FOPS
+#endif
+
 /**
  * DEFINE_DRM_GEM_CMA_FOPS() - macro to generate file operations for CMA drivers
  * @name: name for the generated structure
@@ -50,6 +57,7 @@ to_drm_gem_cma_obj(struct drm_gem_object *gem_obj)
 		.read		= drm_read,\
 		.llseek		= noop_llseek,\
 		.mmap		= drm_gem_cma_mmap,\
+		DRM_GEM_CMA_UNMAPPED_AREA_FOPS \
 	}
 
 /* free GEM object */
@@ -85,15 +93,6 @@ unsigned long drm_gem_cma_get_unmapped_area(struct file *filp,
 					    unsigned long len,
 					    unsigned long pgoff,
 					    unsigned long flags);
-#else
-static inline unsigned long drm_gem_cma_get_unmapped_area(struct file *filp,
-							  unsigned long addr,
-							  unsigned long len,
-							  unsigned long pgoff,
-							  unsigned long flags)
-{
-	return -EINVAL;
-}
 #endif
 
 #ifdef CONFIG_DEBUG_FS
diff --git a/include/drm/drm_irq.h b/include/drm/drm_irq.h
index cf0be6594c8c..569ca86d4e1f 100644
--- a/include/drm/drm_irq.h
+++ b/include/drm/drm_irq.h
@@ -121,6 +121,18 @@ struct drm_vblank_crtc {
 	 * drm_calc_timestamping_constants().
 	 */
 	int linedur_ns;
+
+	/**
+	 * @hwmode:
+	 *
+	 * Cache of the current hardware display mode. Only valid when @enabled
+	 * is set. This is used by helpers like
+	 * drm_calc_vbltimestamp_from_scanoutpos(). We can't just access the
+	 * hardware mode by e.g. looking at &drm_crtc_state.adjusted_mode,
+	 * because that one is really hard to get from interrupt context.
+	 */
+	struct drm_display_mode hwmode;
+
 	/**
 	 * @enabled: Tracks the enabling state of the corresponding &drm_crtc to
 	 * avoid double-disabling and hence corrupting saved state. Needed by
@@ -153,11 +165,10 @@ void drm_crtc_vblank_on(struct drm_crtc *crtc);
 void drm_vblank_cleanup(struct drm_device *dev);
 u32 drm_accurate_vblank_count(struct drm_crtc *crtc);
 
-int drm_calc_vbltimestamp_from_scanoutpos(struct drm_device *dev,
-					  unsigned int pipe, int *max_error,
-					  struct timeval *vblank_time,
-					  unsigned flags,
-					  const struct drm_display_mode *mode);
+bool drm_calc_vbltimestamp_from_scanoutpos(struct drm_device *dev,
+					   unsigned int pipe, int *max_error,
+					   struct timeval *vblank_time,
+					   bool in_vblank_irq);
 void drm_calc_timestamping_constants(struct drm_crtc *crtc,
 				     const struct drm_display_mode *mode);
 
diff --git a/include/drm/drm_mem_util.h b/include/drm/drm_mem_util.h
deleted file mode 100644
index d0f6cf2e5324..000000000000
--- a/include/drm/drm_mem_util.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright © 2008 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *     Jesse Barnes <jbarnes@virtuousgeek.org>
- *
- */
-#ifndef _DRM_MEM_UTIL_H_
-#define _DRM_MEM_UTIL_H_
-
-#include <linux/vmalloc.h>
-
-static __inline__ void *drm_calloc_large(size_t nmemb, size_t size)
-{
-	if (size != 0 && nmemb > SIZE_MAX / size)
-		return NULL;
-
-	if (size * nmemb <= PAGE_SIZE)
-	    return kcalloc(nmemb, size, GFP_KERNEL);
-
-	return vzalloc(size * nmemb);
-}
-
-/* Modeled after cairo's malloc_ab, it's like calloc but without the zeroing. */
-static __inline__ void *drm_malloc_ab(size_t nmemb, size_t size)
-{
-	if (size != 0 && nmemb > SIZE_MAX / size)
-		return NULL;
-
-	if (size * nmemb <= PAGE_SIZE)
-	    return kmalloc(nmemb * size, GFP_KERNEL);
-
-	return vmalloc(size * nmemb);
-}
-
-static __inline__ void *drm_malloc_gfp(size_t nmemb, size_t size, gfp_t gfp)
-{
-	if (size != 0 && nmemb > SIZE_MAX / size)
-		return NULL;
-
-	if (size * nmemb <= PAGE_SIZE)
-		return kmalloc(nmemb * size, gfp);
-
-	if (gfp & __GFP_RECLAIMABLE) {
-		void *ptr = kmalloc(nmemb * size,
-				    gfp | __GFP_NOWARN | __GFP_NORETRY);
-		if (ptr)
-			return ptr;
-	}
-
-	return __vmalloc(size * nmemb, gfp, PAGE_KERNEL);
-}
-
-static __inline void drm_free_large(void *ptr)
-{
-	kvfree(ptr);
-}
-
-#endif
diff --git a/include/drm/drm_modeset_helper_vtables.h b/include/drm/drm_modeset_helper_vtables.h
index c01c328f6cc8..85984b208218 100644
--- a/include/drm/drm_modeset_helper_vtables.h
+++ b/include/drm/drm_modeset_helper_vtables.h
@@ -106,6 +106,40 @@ struct drm_crtc_helper_funcs {
 	void (*commit)(struct drm_crtc *crtc);
 
 	/**
+	 * @mode_valid:
+	 *
+	 * This callback is used to check if a specific mode is valid in this
+	 * crtc. This should be implemented if the crtc has some sort of
+	 * restriction in the modes it can display. For example, a given crtc
+	 * may be responsible to set a clock value. If the clock can not
+	 * produce all the values for the available modes then this callback
+	 * can be used to restrict the number of modes to only the ones that
+	 * can be displayed.
+	 *
+	 * This hook is used by the probe helpers to filter the mode list in
+	 * drm_helper_probe_single_connector_modes(), and it is used by the
+	 * atomic helpers to validate modes supplied by userspace in
+	 * drm_atomic_helper_check_modeset().
+	 *
+	 * This function is optional.
+	 *
+	 * NOTE:
+	 *
+	 * Since this function is both called from the check phase of an atomic
+	 * commit, and the mode validation in the probe paths it is not allowed
+	 * to look at anything else but the passed-in mode, and validate it
+	 * against configuration-invariant hardward constraints. Any further
+	 * limits which depend upon the configuration can only be checked in
+	 * @mode_fixup or @atomic_check.
+	 *
+	 * RETURNS:
+	 *
+	 * drm_mode_status Enum
+	 */
+	enum drm_mode_status (*mode_valid)(struct drm_crtc *crtc,
+					   const struct drm_display_mode *mode);
+
+	/**
 	 * @mode_fixup:
 	 *
 	 * This callback is used to validate a mode. The parameter mode is the
@@ -113,7 +147,8 @@ struct drm_crtc_helper_funcs {
 	 * encoders need to be fed with. Note that this is the inverse semantics
 	 * of the meaning for the &drm_encoder and &drm_bridge_funcs.mode_fixup
 	 * vfunc. If the CRTC cannot support the requested conversion from mode
-	 * to adjusted_mode it should reject the modeset.
+	 * to adjusted_mode it should reject the modeset. See also
+	 * &drm_crtc_state.adjusted_mode for more details.
 	 *
 	 * This function is used by both legacy CRTC helpers and atomic helpers.
 	 * With atomic helpers it is optional.
@@ -130,22 +165,17 @@ struct drm_crtc_helper_funcs {
 	 * allowed.
 	 *
 	 * Atomic drivers which need to inspect and adjust more state should
-	 * instead use the @atomic_check callback.
-	 *
-	 * Also beware that neither core nor helpers filter modes before
-	 * passing them to the driver: While the list of modes that is
-	 * advertised to userspace is filtered using the
-	 * &drm_connector.mode_valid callback, neither the core nor the helpers
-	 * do any filtering on modes passed in from userspace when setting a
-	 * mode. It is therefore possible for userspace to pass in a mode that
-	 * was previously filtered out using &drm_connector.mode_valid or add a
-	 * custom mode that wasn't probed from EDID or similar to begin with.
-	 * Even though this is an advanced feature and rarely used nowadays,
-	 * some users rely on being able to specify modes manually so drivers
-	 * must be prepared to deal with it. Specifically this means that all
-	 * drivers need not only validate modes in &drm_connector.mode_valid but
-	 * also in this or in the &drm_encoder_helper_funcs.mode_fixup callback
-	 * to make sure invalid modes passed in from userspace are rejected.
+	 * instead use the @atomic_check callback, but note that they're not
+	 * perfectly equivalent: @mode_valid is called from
+	 * drm_atomic_helper_check_modeset(), but @atomic_check is called from
+	 * drm_atomic_helper_check_planes(), because originally it was meant for
+	 * plane update checks only.
+	 *
+	 * Also beware that userspace can request its own custom modes, neither
+	 * core nor helpers filter modes to the list of probe modes reported by
+	 * the GETCONNECTOR IOCTL and stored in &drm_connector.modes. To ensure
+	 * that modes are filtered consistently put any CRTC constraints and
+	 * limits checks into @mode_valid.
 	 *
 	 * RETURNS:
 	 *
@@ -341,6 +371,12 @@ struct drm_crtc_helper_funcs {
 	 * state objects passed-in or assembled in the overall &drm_atomic_state
 	 * update tracking structure.
 	 *
+	 * Also beware that userspace can request its own custom modes, neither
+	 * core nor helpers filter modes to the list of probe modes reported by
+	 * the GETCONNECTOR IOCTL and stored in &drm_connector.modes. To ensure
+	 * that modes are filtered consistently put any CRTC constraints and
+	 * limits checks into @mode_valid.
+	 *
 	 * RETURNS:
 	 *
 	 * 0 on success, -EINVAL if the state or the transition can't be
@@ -457,13 +493,48 @@ struct drm_encoder_helper_funcs {
 	void (*dpms)(struct drm_encoder *encoder, int mode);
 
 	/**
+	 * @mode_valid:
+	 *
+	 * This callback is used to check if a specific mode is valid in this
+	 * encoder. This should be implemented if the encoder has some sort
+	 * of restriction in the modes it can display. For example, a given
+	 * encoder may be responsible to set a clock value. If the clock can
+	 * not produce all the values for the available modes then this callback
+	 * can be used to restrict the number of modes to only the ones that
+	 * can be displayed.
+	 *
+	 * This hook is used by the probe helpers to filter the mode list in
+	 * drm_helper_probe_single_connector_modes(), and it is used by the
+	 * atomic helpers to validate modes supplied by userspace in
+	 * drm_atomic_helper_check_modeset().
+	 *
+	 * This function is optional.
+	 *
+	 * NOTE:
+	 *
+	 * Since this function is both called from the check phase of an atomic
+	 * commit, and the mode validation in the probe paths it is not allowed
+	 * to look at anything else but the passed-in mode, and validate it
+	 * against configuration-invariant hardward constraints. Any further
+	 * limits which depend upon the configuration can only be checked in
+	 * @mode_fixup or @atomic_check.
+	 *
+	 * RETURNS:
+	 *
+	 * drm_mode_status Enum
+	 */
+	enum drm_mode_status (*mode_valid)(struct drm_encoder *crtc,
+					   const struct drm_display_mode *mode);
+
+	/**
 	 * @mode_fixup:
 	 *
 	 * This callback is used to validate and adjust a mode. The parameter
 	 * mode is the display mode that should be fed to the next element in
 	 * the display chain, either the final &drm_connector or a &drm_bridge.
 	 * The parameter adjusted_mode is the input mode the encoder requires. It
-	 * can be modified by this callback and does not need to match mode.
+	 * can be modified by this callback and does not need to match mode. See
+	 * also &drm_crtc_state.adjusted_mode for more details.
 	 *
 	 * This function is used by both legacy CRTC helpers and atomic helpers.
 	 * This hook is optional.
@@ -480,23 +551,15 @@ struct drm_encoder_helper_funcs {
 	 * allowed.
 	 *
 	 * Atomic drivers which need to inspect and adjust more state should
-	 * instead use the @atomic_check callback.
-	 *
-	 * Also beware that neither core nor helpers filter modes before
-	 * passing them to the driver: While the list of modes that is
-	 * advertised to userspace is filtered using the connector's
-	 * &drm_connector_helper_funcs.mode_valid callback, neither the core nor
-	 * the helpers do any filtering on modes passed in from userspace when
-	 * setting a mode. It is therefore possible for userspace to pass in a
-	 * mode that was previously filtered out using
-	 * &drm_connector_helper_funcs.mode_valid or add a custom mode that
-	 * wasn't probed from EDID or similar to begin with.  Even though this
-	 * is an advanced feature and rarely used nowadays, some users rely on
-	 * being able to specify modes manually so drivers must be prepared to
-	 * deal with it. Specifically this means that all drivers need not only
-	 * validate modes in &drm_connector.mode_valid but also in this or in
-	 * the &drm_crtc_helper_funcs.mode_fixup callback to make sure
-	 * invalid modes passed in from userspace are rejected.
+	 * instead use the @atomic_check callback. If @atomic_check is used,
+	 * this hook isn't called since @atomic_check allows a strict superset
+	 * of the functionality of @mode_fixup.
+	 *
+	 * Also beware that userspace can request its own custom modes, neither
+	 * core nor helpers filter modes to the list of probe modes reported by
+	 * the GETCONNECTOR IOCTL and stored in &drm_connector.modes. To ensure
+	 * that modes are filtered consistently put any encoder constraints and
+	 * limits checks into @mode_valid.
 	 *
 	 * RETURNS:
 	 *
@@ -677,6 +740,11 @@ struct drm_encoder_helper_funcs {
 	 * update the CRTC to match what the encoder needs for the requested
 	 * connector.
 	 *
+	 * Since this provides a strict superset of the functionality of
+	 * @mode_fixup (the requested and adjusted modes are both available
+	 * through the passed in &struct drm_crtc_state) @mode_fixup is not
+	 * called when @atomic_check is implemented.
+	 *
 	 * This function is used by the atomic helpers, but it is optional.
 	 *
 	 * NOTE:
@@ -686,6 +754,12 @@ struct drm_encoder_helper_funcs {
 	 * state objects passed-in or assembled in the overall &drm_atomic_state
 	 * update tracking structure.
 	 *
+	 * Also beware that userspace can request its own custom modes, neither
+	 * core nor helpers filter modes to the list of probe modes reported by
+	 * the GETCONNECTOR IOCTL and stored in &drm_connector.modes. To ensure
+	 * that modes are filtered consistently put any encoder constraints and
+	 * limits checks into @mode_valid.
+	 *
 	 * RETURNS:
 	 *
 	 * 0 on success, -EINVAL if the state or the transition can't be
@@ -795,13 +869,20 @@ struct drm_connector_helper_funcs {
 	 * (which is usually derived from the EDID data block from the sink).
 	 * See e.g. drm_helper_probe_single_connector_modes().
 	 *
+	 * This function is optional.
+	 *
 	 * NOTE:
 	 *
 	 * This only filters the mode list supplied to userspace in the
-	 * GETCONNECOTR IOCTL. Userspace is free to create modes of its own and
-	 * ask the kernel to use them. It this case the atomic helpers or legacy
-	 * CRTC helpers will not call this function. Drivers therefore must
-	 * still fully validate any mode passed in in a modeset request.
+	 * GETCONNECTOR IOCTL. Compared to &drm_encoder_helper_funcs.mode_valid,
+	 * &drm_crtc_helper_funcs.mode_valid and &drm_bridge_funcs.mode_valid,
+	 * which are also called by the atomic helpers from
+	 * drm_atomic_helper_check_modeset(). This allows userspace to force and
+	 * ignore sink constraint (like the pixel clock limits in the screen's
+	 * EDID), which is useful for e.g. testing, or working around a broken
+	 * EDID. Any source hardware constraint (which always need to be
+	 * enforced) therefore should be checked in one of the above callbacks,
+	 * and not this one here.
 	 *
 	 * To avoid races with concurrent connector state updates, the helper
 	 * libraries always call this with the &drm_mode_config.connection_mutex
diff --git a/include/drm/drm_panel.h b/include/drm/drm_panel.h
index 1b364b0100f4..14ac240a1f64 100644
--- a/include/drm/drm_panel.h
+++ b/include/drm/drm_panel.h
@@ -24,8 +24,10 @@
 #ifndef __DRM_PANEL_H__
 #define __DRM_PANEL_H__
 
+#include <linux/errno.h>
 #include <linux/list.h>
 
+struct device_node;
 struct drm_connector;
 struct drm_device;
 struct drm_panel;
diff --git a/include/drm/drm_prime.h b/include/drm/drm_prime.h
index 0b2a235c4be0..59ccab402e85 100644
--- a/include/drm/drm_prime.h
+++ b/include/drm/drm_prime.h
@@ -50,6 +50,8 @@ struct drm_prime_file_private {
 	struct rb_root handles;
 };
 
+struct device;
+
 struct dma_buf_export_info;
 struct dma_buf;
 
@@ -65,6 +67,11 @@ int drm_gem_prime_handle_to_fd(struct drm_device *dev,
 			       int *prime_fd);
 struct drm_gem_object *drm_gem_prime_import(struct drm_device *dev,
 					    struct dma_buf *dma_buf);
+
+struct drm_gem_object *drm_gem_prime_import_dev(struct drm_device *dev,
+						struct dma_buf *dma_buf,
+						struct device *attach_dev);
+
 int drm_gem_prime_fd_to_handle(struct drm_device *dev,
 			       struct drm_file *file_priv, int prime_fd, uint32_t *handle);
 struct dma_buf *drm_gem_dmabuf_export(struct drm_device *dev,
diff --git a/include/drm/intel_lpe_audio.h b/include/drm/intel_lpe_audio.h
index e9892b4c3af1..b6121c8fe539 100644
--- a/include/drm/intel_lpe_audio.h
+++ b/include/drm/intel_lpe_audio.h
@@ -31,20 +31,20 @@ struct platform_device;
 
 #define HDMI_MAX_ELD_BYTES	128
 
-struct intel_hdmi_lpe_audio_eld {
-	int port_id;
-	int pipe_id;
-	unsigned char eld_data[HDMI_MAX_ELD_BYTES];
+struct intel_hdmi_lpe_audio_port_pdata {
+	u8 eld[HDMI_MAX_ELD_BYTES];
+	int port;
+	int pipe;
+	int ls_clock;
+	bool dp_output;
 };
 
 struct intel_hdmi_lpe_audio_pdata {
-	bool notify_pending;
-	int tmds_clock_speed;
-	bool hdmi_connected;
-	bool dp_output;
-	int link_rate;
-	struct intel_hdmi_lpe_audio_eld eld;
-	void (*notify_audio_lpe)(struct platform_device *pdev);
+	struct intel_hdmi_lpe_audio_port_pdata port[3]; /* for ports B,C,D */
+	int num_ports;
+	int num_pipes;
+
+	void (*notify_audio_lpe)(struct platform_device *pdev, int port); /* port: 0==B,1==C,2==D */
 	spinlock_t lpe_audio_slock;
 };
 
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 6bbd34d25a8d..990d529f823c 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -30,10 +30,6 @@
 #ifndef _TTM_BO_DRIVER_H_
 #define _TTM_BO_DRIVER_H_
 
-#include <ttm/ttm_bo_api.h>
-#include <ttm/ttm_memory.h>
-#include <ttm/ttm_module.h>
-#include <ttm/ttm_placement.h>
 #include <drm/drm_mm.h>
 #include <drm/drm_global.h>
 #include <drm/drm_vma_manager.h>
@@ -42,6 +38,11 @@
 #include <linux/spinlock.h>
 #include <linux/reservation.h>
 
+#include "ttm_bo_api.h"
+#include "ttm_memory.h"
+#include "ttm_module.h"
+#include "ttm_placement.h"
+
 #define TTM_MAX_BO_PRIORITY	4U
 
 struct ttm_backend_func {
diff --git a/include/drm/ttm/ttm_execbuf_util.h b/include/drm/ttm/ttm_execbuf_util.h
index 47f35b8e6d09..b0fdd1980034 100644
--- a/include/drm/ttm/ttm_execbuf_util.h
+++ b/include/drm/ttm/ttm_execbuf_util.h
@@ -31,9 +31,10 @@
 #ifndef _TTM_EXECBUF_UTIL_H_
 #define _TTM_EXECBUF_UTIL_H_
 
-#include <ttm/ttm_bo_api.h>
 #include <linux/list.h>
 
+#include "ttm_bo_api.h"
+
 /**
  * struct ttm_validate_buffer
  *
diff --git a/include/drm/ttm/ttm_lock.h b/include/drm/ttm/ttm_lock.h
index 2902beb5f689..0c3af9836863 100644
--- a/include/drm/ttm/ttm_lock.h
+++ b/include/drm/ttm/ttm_lock.h
@@ -49,10 +49,11 @@
 #ifndef _TTM_LOCK_H_
 #define _TTM_LOCK_H_
 
-#include <ttm/ttm_object.h>
 #include <linux/wait.h>
 #include <linux/atomic.h>
 
+#include "ttm_object.h"
+
 /**
  * struct ttm_lock
  *
diff --git a/include/drm/ttm/ttm_object.h b/include/drm/ttm/ttm_object.h
index 1487011fe057..a98bfeb4239e 100644
--- a/include/drm/ttm/ttm_object.h
+++ b/include/drm/ttm/ttm_object.h
@@ -42,7 +42,8 @@
 #include <linux/kref.h>
 #include <linux/rcupdate.h>
 #include <linux/dma-buf.h>
-#include <ttm/ttm_memory.h>
+
+#include "ttm_memory.h"
 
 /**
  * enum ttm_ref_type
diff --git a/include/linux/amba/clcd-regs.h b/include/linux/amba/clcd-regs.h
new file mode 100644
index 000000000000..516a6fda83c5
--- /dev/null
+++ b/include/linux/amba/clcd-regs.h
@@ -0,0 +1,86 @@
+/*
+ * David A Rusling
+ *
+ * Copyright (C) 2001 ARM Limited
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ */
+
+#ifndef AMBA_CLCD_REGS_H
+#define AMBA_CLCD_REGS_H
+
+/*
+ * CLCD Controller Internal Register addresses
+ */
+#define CLCD_TIM0		0x00000000
+#define CLCD_TIM1 		0x00000004
+#define CLCD_TIM2 		0x00000008
+#define CLCD_TIM3 		0x0000000c
+#define CLCD_UBAS 		0x00000010
+#define CLCD_LBAS 		0x00000014
+
+#define CLCD_PL110_IENB		0x00000018
+#define CLCD_PL110_CNTL		0x0000001c
+#define CLCD_PL110_STAT		0x00000020
+#define CLCD_PL110_INTR 	0x00000024
+#define CLCD_PL110_UCUR		0x00000028
+#define CLCD_PL110_LCUR		0x0000002C
+
+#define CLCD_PL111_CNTL		0x00000018
+#define CLCD_PL111_IENB		0x0000001c
+#define CLCD_PL111_RIS		0x00000020
+#define CLCD_PL111_MIS		0x00000024
+#define CLCD_PL111_ICR		0x00000028
+#define CLCD_PL111_UCUR		0x0000002c
+#define CLCD_PL111_LCUR		0x00000030
+
+#define CLCD_PALL 		0x00000200
+#define CLCD_PALETTE		0x00000200
+
+#define TIM2_PCD_LO_MASK	GENMASK(4, 0)
+#define TIM2_PCD_LO_BITS	5
+#define TIM2_CLKSEL		(1 << 5)
+#define TIM2_IVS		(1 << 11)
+#define TIM2_IHS		(1 << 12)
+#define TIM2_IPC		(1 << 13)
+#define TIM2_IOE		(1 << 14)
+#define TIM2_BCD		(1 << 26)
+#define TIM2_PCD_HI_MASK	GENMASK(31, 27)
+#define TIM2_PCD_HI_BITS	5
+#define TIM2_PCD_HI_SHIFT	27
+
+#define CNTL_LCDEN		(1 << 0)
+#define CNTL_LCDBPP1		(0 << 1)
+#define CNTL_LCDBPP2		(1 << 1)
+#define CNTL_LCDBPP4		(2 << 1)
+#define CNTL_LCDBPP8		(3 << 1)
+#define CNTL_LCDBPP16		(4 << 1)
+#define CNTL_LCDBPP16_565	(6 << 1)
+#define CNTL_LCDBPP16_444	(7 << 1)
+#define CNTL_LCDBPP24		(5 << 1)
+#define CNTL_LCDBW		(1 << 4)
+#define CNTL_LCDTFT		(1 << 5)
+#define CNTL_LCDMONO8		(1 << 6)
+#define CNTL_LCDDUAL		(1 << 7)
+#define CNTL_BGR		(1 << 8)
+#define CNTL_BEBO		(1 << 9)
+#define CNTL_BEPO		(1 << 10)
+#define CNTL_LCDPWR		(1 << 11)
+#define CNTL_LCDVCOMP(x)	((x) << 12)
+#define CNTL_LDMAFIFOTIME	(1 << 15)
+#define CNTL_WATERMARK		(1 << 16)
+
+/* ST Microelectronics variant bits */
+#define CNTL_ST_1XBPP_444	0x0
+#define CNTL_ST_1XBPP_5551	(1 << 17)
+#define CNTL_ST_1XBPP_565	(1 << 18)
+#define CNTL_ST_CDWID_12	0x0
+#define CNTL_ST_CDWID_16	(1 << 19)
+#define CNTL_ST_CDWID_18	(1 << 20)
+#define CNTL_ST_CDWID_24	((1 << 19)|(1 << 20))
+#define CNTL_ST_CEAEN		(1 << 21)
+#define CNTL_ST_LCDBPP24_PACKED	(6 << 1)
+
+#endif /* AMBA_CLCD_REGS_H */
diff --git a/include/linux/amba/clcd.h b/include/linux/amba/clcd.h
index 1035879b322c..d0c3be77c18e 100644
--- a/include/linux/amba/clcd.h
+++ b/include/linux/amba/clcd.h
@@ -10,73 +10,7 @@
  * for more details.
  */
 #include <linux/fb.h>
-
-/*
- * CLCD Controller Internal Register addresses
- */
-#define CLCD_TIM0		0x00000000
-#define CLCD_TIM1 		0x00000004
-#define CLCD_TIM2 		0x00000008
-#define CLCD_TIM3 		0x0000000c
-#define CLCD_UBAS 		0x00000010
-#define CLCD_LBAS 		0x00000014
-
-#define CLCD_PL110_IENB		0x00000018
-#define CLCD_PL110_CNTL		0x0000001c
-#define CLCD_PL110_STAT		0x00000020
-#define CLCD_PL110_INTR 	0x00000024
-#define CLCD_PL110_UCUR		0x00000028
-#define CLCD_PL110_LCUR		0x0000002C
-
-#define CLCD_PL111_CNTL		0x00000018
-#define CLCD_PL111_IENB		0x0000001c
-#define CLCD_PL111_RIS		0x00000020
-#define CLCD_PL111_MIS		0x00000024
-#define CLCD_PL111_ICR		0x00000028
-#define CLCD_PL111_UCUR		0x0000002c
-#define CLCD_PL111_LCUR		0x00000030
-
-#define CLCD_PALL 		0x00000200
-#define CLCD_PALETTE		0x00000200
-
-#define TIM2_CLKSEL		(1 << 5)
-#define TIM2_IVS		(1 << 11)
-#define TIM2_IHS		(1 << 12)
-#define TIM2_IPC		(1 << 13)
-#define TIM2_IOE		(1 << 14)
-#define TIM2_BCD		(1 << 26)
-
-#define CNTL_LCDEN		(1 << 0)
-#define CNTL_LCDBPP1		(0 << 1)
-#define CNTL_LCDBPP2		(1 << 1)
-#define CNTL_LCDBPP4		(2 << 1)
-#define CNTL_LCDBPP8		(3 << 1)
-#define CNTL_LCDBPP16		(4 << 1)
-#define CNTL_LCDBPP16_565	(6 << 1)
-#define CNTL_LCDBPP16_444	(7 << 1)
-#define CNTL_LCDBPP24		(5 << 1)
-#define CNTL_LCDBW		(1 << 4)
-#define CNTL_LCDTFT		(1 << 5)
-#define CNTL_LCDMONO8		(1 << 6)
-#define CNTL_LCDDUAL		(1 << 7)
-#define CNTL_BGR		(1 << 8)
-#define CNTL_BEBO		(1 << 9)
-#define CNTL_BEPO		(1 << 10)
-#define CNTL_LCDPWR		(1 << 11)
-#define CNTL_LCDVCOMP(x)	((x) << 12)
-#define CNTL_LDMAFIFOTIME	(1 << 15)
-#define CNTL_WATERMARK		(1 << 16)
-
-/* ST Microelectronics variant bits */
-#define CNTL_ST_1XBPP_444	0x0
-#define CNTL_ST_1XBPP_5551	(1 << 17)
-#define CNTL_ST_1XBPP_565	(1 << 18)
-#define CNTL_ST_CDWID_12	0x0
-#define CNTL_ST_CDWID_16	(1 << 19)
-#define CNTL_ST_CDWID_18	(1 << 20)
-#define CNTL_ST_CDWID_24	((1 << 19)|(1 << 20))
-#define CNTL_ST_CEAEN		(1 << 21)
-#define CNTL_ST_LCDBPP24_PACKED	(6 << 1)
+#include <linux/amba/clcd-regs.h>
 
 enum {
 	/* individual formats */
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index c47aa248c640..fcd641032f8d 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -238,7 +238,6 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
 				bool kick_requeue_list);
 void blk_mq_kick_requeue_list(struct request_queue *q);
 void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
-void blk_mq_abort_requeue_list(struct request_queue *q);
 void blk_mq_complete_request(struct request *rq);
 
 bool blk_mq_queue_stopped(struct request_queue *q);
diff --git a/include/linux/ceph/ceph_debug.h b/include/linux/ceph/ceph_debug.h
index aa2e19182d99..51c5bd64bd00 100644
--- a/include/linux/ceph/ceph_debug.h
+++ b/include/linux/ceph/ceph_debug.h
@@ -3,6 +3,8 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/string.h>
+
 #ifdef CONFIG_CEPH_LIB_PRETTYDEBUG
 
 /*
@@ -12,12 +14,10 @@
  */
 
 # if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG)
-extern const char *ceph_file_part(const char *s, int len);
 #  define dout(fmt, ...)						\
 	pr_debug("%.*s %12.12s:%-4d : " fmt,				\
 		 8 - (int)sizeof(KBUILD_MODNAME), "    ",		\
-		 ceph_file_part(__FILE__, sizeof(__FILE__)),		\
-		 __LINE__, ##__VA_ARGS__)
+		 kbasename(__FILE__), __LINE__, ##__VA_ARGS__)
 # else
 /* faux printk call just to see any compiler warnings. */
 #  define dout(fmt, ...)	do {				\
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 56197f82af45..62d948f80730 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -272,6 +272,16 @@ struct bpf_prog_aux;
 		.off   = OFF,					\
 		.imm   = IMM })
 
+/* Unconditional jumps, goto pc + off16 */
+
+#define BPF_JMP_A(OFF)						\
+	((struct bpf_insn) {					\
+		.code  = BPF_JMP | BPF_JA,			\
+		.dst_reg = 0,					\
+		.src_reg = 0,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
 /* Function call */
 
 #define BPF_EMIT_CALL(FUNC)					\
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 8d5fcd6284ce..283dc2f5364d 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -614,14 +614,16 @@ static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb)
 static inline netdev_features_t vlan_features_check(const struct sk_buff *skb,
 						    netdev_features_t features)
 {
-	if (skb_vlan_tagged_multi(skb))
-		features = netdev_intersect_features(features,
-						     NETIF_F_SG |
-						     NETIF_F_HIGHDMA |
-						     NETIF_F_FRAGLIST |
-						     NETIF_F_HW_CSUM |
-						     NETIF_F_HW_VLAN_CTAG_TX |
-						     NETIF_F_HW_VLAN_STAG_TX);
+	if (skb_vlan_tagged_multi(skb)) {
+		/* In the case of multi-tagged packets, use a direct mask
+		 * instead of using netdev_interesect_features(), to make
+		 * sure that only devices supporting NETIF_F_HW_CSUM will
+		 * have checksum offloading support.
+		 */
+		features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_HW_CSUM |
+			    NETIF_F_FRAGLIST | NETIF_F_HW_VLAN_CTAG_TX |
+			    NETIF_F_HW_VLAN_STAG_TX;
+	}
 
 	return features;
 }
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index dd9a263ed368..a940ec6a046c 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -787,8 +787,14 @@ enum {
 };
 
 enum {
-	CQE_RSS_HTYPE_IP	= 0x3 << 6,
-	CQE_RSS_HTYPE_L4	= 0x3 << 2,
+	CQE_RSS_HTYPE_IP	= 0x3 << 2,
+	/* cqe->rss_hash_type[3:2] - IP destination selected for hash
+	 * (00 = none,  01 = IPv4, 10 = IPv6, 11 = Reserved)
+	 */
+	CQE_RSS_HTYPE_L4	= 0x3 << 6,
+	/* cqe->rss_hash_type[7:6] - L4 destination selected for hash
+	 * (00 = none, 01 = TCP. 10 = UDP, 11 = IPSEC.SPI
+	 */
 };
 
 enum {
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index bcdf739ee41a..93273d9ea4d1 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -787,7 +787,12 @@ enum {
 
 typedef void (*mlx5_cmd_cbk_t)(int status, void *context);
 
+enum {
+	MLX5_CMD_ENT_STATE_PENDING_COMP,
+};
+
 struct mlx5_cmd_work_ent {
+	unsigned long		state;
 	struct mlx5_cmd_msg    *in;
 	struct mlx5_cmd_msg    *out;
 	void		       *uout;
@@ -976,7 +981,7 @@ void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn);
 void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type);
 void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
 struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
-void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec);
+void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
 void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type);
 int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
 		       int nent, u64 mask, const char *name,
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index be378cf47fcc..b3044c2c62cb 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -294,7 +294,7 @@ int xt_match_to_user(const struct xt_entry_match *m,
 int xt_target_to_user(const struct xt_entry_target *t,
 		      struct xt_entry_target __user *u);
 int xt_data_to_user(void __user *dst, const void *src,
-		    int usersize, int size);
+		    int usersize, int size, int aligned_size);
 
 void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
 				 struct xt_counters_info *info, bool compat);
diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index a30efb437e6d..e0cbf17af780 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -125,4 +125,9 @@ extern unsigned int ebt_do_table(struct sk_buff *skb,
 /* True if the target is not a standard target */
 #define INVALID_TARGET (info->target < -NUM_STANDARD_TARGETS || info->target >= 0)
 
+static inline bool ebt_invalid_target(int target)
+{
+	return (target < -NUM_STANDARD_TARGETS || target >= 0);
+}
+
 #endif
diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h
index dc8224ae28d5..e0d1946270f3 100644
--- a/include/linux/of_platform.h
+++ b/include/linux/of_platform.h
@@ -64,6 +64,7 @@ extern struct platform_device *of_platform_device_create(struct device_node *np,
 						   const char *bus_id,
 						   struct device *parent);
 
+extern int of_platform_device_destroy(struct device *dev, void *data);
 extern int of_platform_bus_probe(struct device_node *root,
 				 const struct of_device_id *matches,
 				 struct device *parent);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 33c2b0b77429..8039f9f0ca05 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -183,6 +183,11 @@ enum pci_dev_flags {
 	PCI_DEV_FLAGS_BRIDGE_XLATE_ROOT = (__force pci_dev_flags_t) (1 << 9),
 	/* Do not use FLR even if device advertises PCI_AF_CAP */
 	PCI_DEV_FLAGS_NO_FLR_RESET = (__force pci_dev_flags_t) (1 << 10),
+	/*
+	 * Resume before calling the driver's system suspend hooks, disabling
+	 * the direct_complete optimization.
+	 */
+	PCI_DEV_FLAGS_NEEDS_RESUME = (__force pci_dev_flags_t) (1 << 11),
 };
 
 enum pci_irq_reroute_variant {
@@ -1342,9 +1347,9 @@ pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
 			       unsigned int max_vecs, unsigned int flags,
 			       const struct irq_affinity *aff_desc)
 {
-	if (min_vecs > 1)
-		return -EINVAL;
-	return 1;
+	if ((flags & PCI_IRQ_LEGACY) && min_vecs == 1 && dev->irq)
+		return 1;
+	return -ENOSPC;
 }
 
 static inline void pci_free_irq_vectors(struct pci_dev *dev)
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 422bc2e4cb6a..ef3eb8bbfee4 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -54,7 +54,8 @@ extern int ptrace_request(struct task_struct *child, long request,
 			  unsigned long addr, unsigned long data);
 extern void ptrace_notify(int exit_code);
 extern void __ptrace_link(struct task_struct *child,
-			  struct task_struct *new_parent);
+			  struct task_struct *new_parent,
+			  const struct cred *ptracer_cred);
 extern void __ptrace_unlink(struct task_struct *child);
 extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead);
 #define PTRACE_MODE_READ	0x01
@@ -206,7 +207,7 @@ static inline void ptrace_init_task(struct task_struct *child, bool ptrace)
 
 	if (unlikely(ptrace) && current->ptrace) {
 		child->ptrace = current->ptrace;
-		__ptrace_link(child, current->parent);
+		__ptrace_link(child, current->parent, current->ptracer_cred);
 
 		if (child->ptrace & PT_SEIZED)
 			task_set_jobctl_pending(child, JOBCTL_TRAP_STOP);
@@ -215,6 +216,8 @@ static inline void ptrace_init_task(struct task_struct *child, bool ptrace)
 
 		set_tsk_thread_flag(child, TIF_SIGPENDING);
 	}
+	else
+		child->ptracer_cred = NULL;
 }
 
 /**
diff --git a/include/linux/serdev.h b/include/linux/serdev.h
index cda76c6506ca..e69402d4a8ae 100644
--- a/include/linux/serdev.h
+++ b/include/linux/serdev.h
@@ -195,6 +195,7 @@ int serdev_device_open(struct serdev_device *);
 void serdev_device_close(struct serdev_device *);
 unsigned int serdev_device_set_baudrate(struct serdev_device *, unsigned int);
 void serdev_device_set_flow_control(struct serdev_device *, bool);
+int serdev_device_write_buf(struct serdev_device *, const unsigned char *, size_t);
 void serdev_device_wait_until_sent(struct serdev_device *, long);
 int serdev_device_get_tiocm(struct serdev_device *);
 int serdev_device_set_tiocm(struct serdev_device *, int, int);
@@ -236,6 +237,12 @@ static inline unsigned int serdev_device_set_baudrate(struct serdev_device *sdev
 	return 0;
 }
 static inline void serdev_device_set_flow_control(struct serdev_device *sdev, bool enable) {}
+static inline int serdev_device_write_buf(struct serdev_device *serdev,
+					  const unsigned char *buf,
+					  size_t count)
+{
+	return -ENODEV;
+}
 static inline void serdev_device_wait_until_sent(struct serdev_device *sdev, long timeout) {}
 static inline int serdev_device_get_tiocm(struct serdev_device *serdev)
 {
@@ -301,7 +308,7 @@ struct tty_driver;
 struct device *serdev_tty_port_register(struct tty_port *port,
 					struct device *parent,
 					struct tty_driver *drv, int idx);
-void serdev_tty_port_unregister(struct tty_port *port);
+int serdev_tty_port_unregister(struct tty_port *port);
 #else
 static inline struct device *serdev_tty_port_register(struct tty_port *port,
 					   struct device *parent,
@@ -309,14 +316,10 @@ static inline struct device *serdev_tty_port_register(struct tty_port *port,
 {
 	return ERR_PTR(-ENODEV);
 }
-static inline void serdev_tty_port_unregister(struct tty_port *port) {}
-#endif /* CONFIG_SERIAL_DEV_CTRL_TTYPORT */
-
-static inline int serdev_device_write_buf(struct serdev_device *serdev,
-					  const unsigned char *data,
-					  size_t count)
+static inline int serdev_tty_port_unregister(struct tty_port *port)
 {
-	return serdev_device_write(serdev, data, count, 0);
+	return -ENODEV;
 }
+#endif /* CONFIG_SERIAL_DEV_CTRL_TTYPORT */
 
 #endif /*_LINUX_SERDEV_H */
diff --git a/include/linux/sync_file.h b/include/linux/sync_file.h
index 3e3ab84fc4cd..5726107963b2 100644
--- a/include/linux/sync_file.h
+++ b/include/linux/sync_file.h
@@ -14,7 +14,6 @@
 #define _LINUX_SYNC_FILE_H
 
 #include <linux/types.h>
-#include <linux/kref.h>
 #include <linux/ktime.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
@@ -24,8 +23,6 @@
 /**
  * struct sync_file - sync file to export to the userspace
  * @file:		file representing this fence
- * @kref:		reference count on fence.
- * @name:		name of sync_file.  Useful for debugging
  * @sync_file_list:	membership in global file list
  * @wq:			wait queue for fence signaling
  * @fence:		fence with the fences in the sync_file
@@ -33,8 +30,14 @@
  */
 struct sync_file {
 	struct file		*file;
-	struct kref		kref;
-	char			name[32];
+	/**
+	 * @user_name:
+	 *
+	 * Name of the sync file provided by userspace, for merged fences.
+	 * Otherwise generated through driver callbacks (in which case the
+	 * entire array is 0).
+	 */
+	char			user_name[32];
 #ifdef CONFIG_DEBUG_FS
 	struct list_head	sync_file_list;
 #endif
@@ -49,5 +52,6 @@ struct sync_file {
 
 struct sync_file *sync_file_create(struct dma_fence *fence);
 struct dma_fence *sync_file_get_fence(int fd);
+char *sync_file_get_name(struct sync_file *sync_file, char *buf, int len);
 
 #endif /* _LINUX_SYNC_H */
diff --git a/include/linux/tty.h b/include/linux/tty.h
index d07cd2105a6c..eccb4ec30a8a 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -558,6 +558,15 @@ extern struct device *tty_port_register_device_attr(struct tty_port *port,
 		struct tty_driver *driver, unsigned index,
 		struct device *device, void *drvdata,
 		const struct attribute_group **attr_grp);
+extern struct device *tty_port_register_device_serdev(struct tty_port *port,
+		struct tty_driver *driver, unsigned index,
+		struct device *device);
+extern struct device *tty_port_register_device_attr_serdev(struct tty_port *port,
+		struct tty_driver *driver, unsigned index,
+		struct device *device, void *drvdata,
+		const struct attribute_group **attr_grp);
+extern void tty_port_unregister_device(struct tty_port *port,
+		struct tty_driver *driver, unsigned index);
 extern int tty_port_alloc_xmit_buf(struct tty_port *port);
 extern void tty_port_free_xmit_buf(struct tty_port *port);
 extern void tty_port_destroy(struct tty_port *port);
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 7dffa5624ea6..97116379db5f 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -206,6 +206,7 @@ struct cdc_state {
 };
 
 extern int usbnet_generic_cdc_bind(struct usbnet *, struct usb_interface *);
+extern int usbnet_ether_cdc_bind(struct usbnet *dev, struct usb_interface *intf);
 extern int usbnet_cdc_bind(struct usbnet *, struct usb_interface *);
 extern void usbnet_cdc_unbind(struct usbnet *, struct usb_interface *);
 extern void usbnet_cdc_status(struct usbnet *, struct urb *);
diff --git a/include/net/dst.h b/include/net/dst.h
index 049af33da3b6..cfc043784166 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -107,10 +107,16 @@ struct dst_entry {
 	};
 };
 
+struct dst_metrics {
+	u32		metrics[RTAX_MAX];
+	atomic_t	refcnt;
+};
+extern const struct dst_metrics dst_default_metrics;
+
 u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old);
-extern const u32 dst_default_metrics[];
 
 #define DST_METRICS_READ_ONLY		0x1UL
+#define DST_METRICS_REFCOUNTED		0x2UL
 #define DST_METRICS_FLAGS		0x3UL
 #define __DST_METRICS_PTR(Y)	\
 	((u32 *)((Y) & ~DST_METRICS_FLAGS))
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 6692c5758b33..f7f6aa789c61 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -114,11 +114,11 @@ struct fib_info {
 	__be32			fib_prefsrc;
 	u32			fib_tb_id;
 	u32			fib_priority;
-	u32			*fib_metrics;
-#define fib_mtu fib_metrics[RTAX_MTU-1]
-#define fib_window fib_metrics[RTAX_WINDOW-1]
-#define fib_rtt fib_metrics[RTAX_RTT-1]
-#define fib_advmss fib_metrics[RTAX_ADVMSS-1]
+	struct dst_metrics	*fib_metrics;
+#define fib_mtu fib_metrics->metrics[RTAX_MTU-1]
+#define fib_window fib_metrics->metrics[RTAX_WINDOW-1]
+#define fib_rtt fib_metrics->metrics[RTAX_RTT-1]
+#define fib_advmss fib_metrics->metrics[RTAX_ADVMSS-1]
 	int			fib_nhs;
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	int			fib_weight;
diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index e04fa7691e5d..c519bb5b5bb8 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -9,6 +9,7 @@
 
 #ifndef _NF_CONNTRACK_HELPER_H
 #define _NF_CONNTRACK_HELPER_H
+#include <linux/refcount.h>
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_extend.h>
 #include <net/netfilter/nf_conntrack_expect.h>
@@ -26,6 +27,7 @@ struct nf_conntrack_helper {
 	struct hlist_node hnode;	/* Internal use. */
 
 	char name[NF_CT_HELPER_NAME_LEN]; /* name of the module */
+	refcount_t refcnt;
 	struct module *me;		/* pointer to self */
 	const struct nf_conntrack_expect_policy *expect_policy;
 
@@ -79,6 +81,8 @@ struct nf_conntrack_helper *__nf_conntrack_helper_find(const char *name,
 struct nf_conntrack_helper *nf_conntrack_helper_try_module_get(const char *name,
 							       u16 l3num,
 							       u8 protonum);
+void nf_conntrack_helper_put(struct nf_conntrack_helper *helper);
+
 void nf_ct_helper_init(struct nf_conntrack_helper *helper,
 		       u16 l3num, u16 protonum, const char *name,
 		       u16 default_port, u16 spec_port, u32 id,
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 028faec8fc27..8a8bab8d7b15 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -176,7 +176,7 @@ struct nft_data_desc {
 int nft_data_init(const struct nft_ctx *ctx,
 		  struct nft_data *data, unsigned int size,
 		  struct nft_data_desc *desc, const struct nlattr *nla);
-void nft_data_uninit(const struct nft_data *data, enum nft_data_types type);
+void nft_data_release(const struct nft_data *data, enum nft_data_types type);
 int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data,
 		  enum nft_data_types type, unsigned int len);
 
diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h
index f31fb6331a53..3248beaf16b0 100644
--- a/include/net/tc_act/tc_csum.h
+++ b/include/net/tc_act/tc_csum.h
@@ -3,6 +3,7 @@
 
 #include <linux/types.h>
 #include <net/act_api.h>
+#include <linux/tc_act/tc_csum.h>
 
 struct tcf_csum {
 	struct tc_action common;
@@ -11,4 +12,18 @@ struct tcf_csum {
 };
 #define to_tcf_csum(a) ((struct tcf_csum *)a)
 
+static inline bool is_tcf_csum(const struct tc_action *a)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	if (a->ops && a->ops->type == TCA_ACT_CSUM)
+		return true;
+#endif
+	return false;
+}
+
+static inline u32 tcf_csum_update_flags(const struct tc_action *a)
+{
+	return to_tcf_csum(a)->update_flags;
+}
+
 #endif /* __NET_TC_CSUM_H */
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 6793a30c66b1..7e7e2b0d2915 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -979,10 +979,6 @@ struct xfrm_dst {
 	struct flow_cache_object flo;
 	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
 	int num_pols, num_xfrms;
-#ifdef CONFIG_XFRM_SUB_POLICY
-	struct flowi *origin;
-	struct xfrm_selector *partner;
-#endif
 	u32 xfrm_genid;
 	u32 policy_genid;
 	u32 route_mtu_cached;
@@ -998,12 +994,6 @@ static inline void xfrm_dst_destroy(struct xfrm_dst *xdst)
 	dst_release(xdst->route);
 	if (likely(xdst->u.dst.xfrm))
 		xfrm_state_put(xdst->u.dst.xfrm);
-#ifdef CONFIG_XFRM_SUB_POLICY
-	kfree(xdst->origin);
-	xdst->origin = NULL;
-	kfree(xdst->partner);
-	xdst->partner = NULL;
-#endif
 }
 #endif
 
diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 8c67fc03d53d..403339f98a92 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -75,7 +75,7 @@ extern "C" {
   * (define not exposed to user space).
   */
 #define DRM_MODE_FLAG_3D_MASK			(0x1f<<14)
-#define  DRM_MODE_FLAG_3D_NONE			(0<<14)
+#define  DRM_MODE_FLAG_3D_NONE		(0<<14)
 #define  DRM_MODE_FLAG_3D_FRAME_PACKING		(1<<14)
 #define  DRM_MODE_FLAG_3D_FIELD_ALTERNATIVE	(2<<14)
 #define  DRM_MODE_FLAG_3D_LINE_ALTERNATIVE	(3<<14)
@@ -127,6 +127,53 @@ extern "C" {
 #define DRM_MODE_LINK_STATUS_GOOD	0
 #define DRM_MODE_LINK_STATUS_BAD	1
 
+/*
+ * DRM_MODE_ROTATE_<degrees>
+ *
+ * Signals that a drm plane is been rotated <degrees> degrees in counter
+ * clockwise direction.
+ *
+ * This define is provided as a convenience, looking up the property id
+ * using the name->prop id lookup is the preferred method.
+ */
+#define DRM_MODE_ROTATE_0       (1<<0)
+#define DRM_MODE_ROTATE_90      (1<<1)
+#define DRM_MODE_ROTATE_180     (1<<2)
+#define DRM_MODE_ROTATE_270     (1<<3)
+
+/*
+ * DRM_MODE_ROTATE_MASK
+ *
+ * Bitmask used to look for drm plane rotations.
+ */
+#define DRM_MODE_ROTATE_MASK (\
+		DRM_MODE_ROTATE_0  | \
+		DRM_MODE_ROTATE_90  | \
+		DRM_MODE_ROTATE_180 | \
+		DRM_MODE_ROTATE_270)
+
+/*
+ * DRM_MODE_REFLECT_<axis>
+ *
+ * Signals that the contents of a drm plane is reflected in the <axis> axis,
+ * in the same way as mirroring.
+ *
+ * This define is provided as a convenience, looking up the property id
+ * using the name->prop id lookup is the preferred method.
+ */
+#define DRM_MODE_REFLECT_X      (1<<4)
+#define DRM_MODE_REFLECT_Y      (1<<5)
+
+/*
+ * DRM_MODE_REFLECT_MASK
+ *
+ * Bitmask used to look for drm plane reflections.
+ */
+#define DRM_MODE_REFLECT_MASK (\
+		DRM_MODE_REFLECT_X | \
+		DRM_MODE_REFLECT_Y)
+
+
 struct drm_mode_modeinfo {
 	__u32 clock;
 	__u16 hdisplay;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 3554495bef13..f24a80d2d42e 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -412,6 +412,12 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_HAS_EXEC_FENCE	 44
 
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to capture
+ * user specified bufffers for post-mortem debugging of GPU hangs. See
+ * EXEC_OBJECT_CAPTURE.
+ */
+#define I915_PARAM_HAS_EXEC_CAPTURE	 45
+
 typedef struct drm_i915_getparam {
 	__s32 param;
 	/*
@@ -666,6 +672,8 @@ struct drm_i915_gem_relocation_entry {
 #define I915_GEM_DOMAIN_VERTEX		0x00000020
 /** GTT domain - aperture and scanout */
 #define I915_GEM_DOMAIN_GTT		0x00000040
+/** WC domain - uncached access */
+#define I915_GEM_DOMAIN_WC		0x00000080
 /** @} */
 
 struct drm_i915_gem_exec_object {
@@ -773,8 +781,15 @@ struct drm_i915_gem_exec_object2 {
  * I915_PARAM_HAS_EXEC_FENCE to order execbufs and execute them asynchronously.
  */
 #define EXEC_OBJECT_ASYNC		(1<<6)
+/* Request that the contents of this execobject be copied into the error
+ * state upon a GPU hang involving this batch for post-mortem debugging.
+ * These buffers are recorded in no particular order as "user" in
+ * /sys/class/drm/cardN/error. Query I915_PARAM_HAS_EXEC_CAPTURE to see
+ * if the kernel supports this flag.
+ */
+#define EXEC_OBJECT_CAPTURE		(1<<7)
 /* All remaining bits are MBZ and RESERVED FOR FUTURE USE */
-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_ASYNC<<1)
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_CAPTURE<<1)
 	__u64 flags;
 
 	union {
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 5e00b2333c26..172dc8ee0e3b 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -86,6 +86,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 	array->map.key_size = attr->key_size;
 	array->map.value_size = attr->value_size;
 	array->map.max_entries = attr->max_entries;
+	array->map.map_flags = attr->map_flags;
 	array->elem_size = elem_size;
 
 	if (!percpu)
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 39cfafd895b8..b09185f0f17d 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -432,6 +432,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
 	trie->map.key_size = attr->key_size;
 	trie->map.value_size = attr->value_size;
 	trie->map.max_entries = attr->max_entries;
+	trie->map.map_flags = attr->map_flags;
 	trie->data_size = attr->key_size -
 			  offsetof(struct bpf_lpm_trie_key, data);
 	trie->max_prefixlen = trie->data_size * 8;
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 4dfd6f2ec2f9..31147d730abf 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -88,6 +88,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
 	smap->map.key_size = attr->key_size;
 	smap->map.value_size = value_size;
 	smap->map.max_entries = attr->max_entries;
+	smap->map.map_flags = attr->map_flags;
 	smap->n_buckets = n_buckets;
 	smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1eddb713b815..339c8a1371de 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -463,19 +463,22 @@ static const int caller_saved[CALLER_SAVED_REGS] = {
 	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
 };
 
+static void mark_reg_not_init(struct bpf_reg_state *regs, u32 regno)
+{
+	BUG_ON(regno >= MAX_BPF_REG);
+
+	memset(&regs[regno], 0, sizeof(regs[regno]));
+	regs[regno].type = NOT_INIT;
+	regs[regno].min_value = BPF_REGISTER_MIN_RANGE;
+	regs[regno].max_value = BPF_REGISTER_MAX_RANGE;
+}
+
 static void init_reg_state(struct bpf_reg_state *regs)
 {
 	int i;
 
-	for (i = 0; i < MAX_BPF_REG; i++) {
-		regs[i].type = NOT_INIT;
-		regs[i].imm = 0;
-		regs[i].min_value = BPF_REGISTER_MIN_RANGE;
-		regs[i].max_value = BPF_REGISTER_MAX_RANGE;
-		regs[i].min_align = 0;
-		regs[i].aux_off = 0;
-		regs[i].aux_off_align = 0;
-	}
+	for (i = 0; i < MAX_BPF_REG; i++)
+		mark_reg_not_init(regs, i);
 
 	/* frame pointer */
 	regs[BPF_REG_FP].type = FRAME_PTR;
@@ -808,11 +811,15 @@ static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
 		reg_off += reg->aux_off;
 	}
 
-	/* skb->data is NET_IP_ALIGN-ed, but for strict alignment checking
-	 * we force this to 2 which is universally what architectures use
-	 * when they don't set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS.
+	/* For platforms that do not have a Kconfig enabling
+	 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
+	 * NET_IP_ALIGN is universally set to '2'.  And on platforms
+	 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
+	 * to this code only in strict mode where we want to emulate
+	 * the NET_IP_ALIGN==2 checking.  Therefore use an
+	 * unconditional IP align value of '2'.
 	 */
-	ip_align = strict ? 2 : NET_IP_ALIGN;
+	ip_align = 2;
 	if ((ip_align + reg_off + off) % size != 0) {
 		verbose("misaligned packet access off %d+%d+%d size %d\n",
 			ip_align, reg_off, off, size);
@@ -839,9 +846,6 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
 {
 	bool strict = env->strict_alignment;
 
-	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
-		strict = true;
-
 	switch (reg->type) {
 	case PTR_TO_PACKET:
 		return check_pkt_ptr_alignment(reg, off, size, strict);
@@ -1345,7 +1349,6 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
 	struct bpf_verifier_state *state = &env->cur_state;
 	const struct bpf_func_proto *fn = NULL;
 	struct bpf_reg_state *regs = state->regs;
-	struct bpf_reg_state *reg;
 	struct bpf_call_arg_meta meta;
 	bool changes_data;
 	int i, err;
@@ -1412,11 +1415,8 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
 	}
 
 	/* reset caller saved regs */
-	for (i = 0; i < CALLER_SAVED_REGS; i++) {
-		reg = regs + caller_saved[i];
-		reg->type = NOT_INIT;
-		reg->imm = 0;
-	}
+	for (i = 0; i < CALLER_SAVED_REGS; i++)
+		mark_reg_not_init(regs, caller_saved[i]);
 
 	/* update return register */
 	if (fn->ret_type == RET_INTEGER) {
@@ -2444,7 +2444,6 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
 {
 	struct bpf_reg_state *regs = env->cur_state.regs;
 	u8 mode = BPF_MODE(insn->code);
-	struct bpf_reg_state *reg;
 	int i, err;
 
 	if (!may_access_skb(env->prog->type)) {
@@ -2477,11 +2476,8 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
 	}
 
 	/* reset caller saved regs to unreadable */
-	for (i = 0; i < CALLER_SAVED_REGS; i++) {
-		reg = regs + caller_saved[i];
-		reg->type = NOT_INIT;
-		reg->imm = 0;
-	}
+	for (i = 0; i < CALLER_SAVED_REGS; i++)
+		mark_reg_not_init(regs, caller_saved[i]);
 
 	/* mark destination R0 register as readable, since it contains
 	 * the value fetched from the packet
@@ -2692,7 +2688,8 @@ err_free:
 /* the following conditions reduce the number of explored insns
  * from ~140k to ~80k for ultra large programs that use a lot of ptr_to_packet
  */
-static bool compare_ptrs_to_packet(struct bpf_reg_state *old,
+static bool compare_ptrs_to_packet(struct bpf_verifier_env *env,
+				   struct bpf_reg_state *old,
 				   struct bpf_reg_state *cur)
 {
 	if (old->id != cur->id)
@@ -2735,7 +2732,7 @@ static bool compare_ptrs_to_packet(struct bpf_reg_state *old,
 	 * 'if (R4 > data_end)' and all further insn were already good with r=20,
 	 * so they will be good with r=30 and we can prune the search.
 	 */
-	if (old->off <= cur->off &&
+	if (!env->strict_alignment && old->off <= cur->off &&
 	    old->off >= old->range && cur->off >= cur->range)
 		return true;
 
@@ -2806,7 +2803,7 @@ static bool states_equal(struct bpf_verifier_env *env,
 			continue;
 
 		if (rold->type == PTR_TO_PACKET && rcur->type == PTR_TO_PACKET &&
-		    compare_ptrs_to_packet(rold, rcur))
+		    compare_ptrs_to_packet(env, rold, rcur))
 			continue;
 
 		return false;
@@ -3584,10 +3581,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
 	} else {
 		log_level = 0;
 	}
-	if (attr->prog_flags & BPF_F_STRICT_ALIGNMENT)
+
+	env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
+	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
 		env->strict_alignment = true;
-	else
-		env->strict_alignment = false;
 
 	ret = replace_map_fd_with_map_ptr(env);
 	if (ret < 0)
@@ -3693,7 +3690,10 @@ int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops,
 	mutex_lock(&bpf_verifier_lock);
 
 	log_level = 0;
+
 	env->strict_alignment = false;
+	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
+		env->strict_alignment = true;
 
 	env->explored_states = kcalloc(env->prog->len,
 				       sizeof(struct bpf_verifier_state_list *),
diff --git a/kernel/fork.c b/kernel/fork.c
index aa1076c5e4a9..e53770d2bf95 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1577,6 +1577,18 @@ static __latent_entropy struct task_struct *copy_process(
 	if (!p)
 		goto fork_out;
 
+	/*
+	 * This _must_ happen before we call free_task(), i.e. before we jump
+	 * to any of the bad_fork_* labels. This is to avoid freeing
+	 * p->set_child_tid which is (ab)used as a kthread's data pointer for
+	 * kernel threads (PF_KTHREAD).
+	 */
+	p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
+	/*
+	 * Clear TID on mm_release()?
+	 */
+	p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL;
+
 	ftrace_graph_init_task(p);
 
 	rt_mutex_init_task(p);
@@ -1743,11 +1755,6 @@ static __latent_entropy struct task_struct *copy_process(
 		}
 	}
 
-	p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
-	/*
-	 * Clear TID on mm_release()?
-	 */
-	p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL;
 #ifdef CONFIG_BLOCK
 	p->plug = NULL;
 #endif
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 2d2d3a568e4e..adfe3b4cfe05 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -122,7 +122,7 @@ static void *alloc_insn_page(void)
 	return module_alloc(PAGE_SIZE);
 }
 
-static void free_insn_page(void *page)
+void __weak free_insn_page(void *page)
 {
 	module_memfree(page);
 }
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index b95509416909..28cd09e635ed 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1785,12 +1785,14 @@ int rt_mutex_wait_proxy_lock(struct rt_mutex *lock,
 	int ret;
 
 	raw_spin_lock_irq(&lock->wait_lock);
-
-	set_current_state(TASK_INTERRUPTIBLE);
-
 	/* sleep on the mutex */
+	set_current_state(TASK_INTERRUPTIBLE);
 	ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
-
+	/*
+	 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
+	 * have to fix that up.
+	 */
+	fixup_rt_mutex_waiters(lock);
 	raw_spin_unlock_irq(&lock->wait_lock);
 
 	return ret;
@@ -1822,15 +1824,25 @@ bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock,
 
 	raw_spin_lock_irq(&lock->wait_lock);
 	/*
+	 * Do an unconditional try-lock, this deals with the lock stealing
+	 * state where __rt_mutex_futex_unlock() -> mark_wakeup_next_waiter()
+	 * sets a NULL owner.
+	 *
+	 * We're not interested in the return value, because the subsequent
+	 * test on rt_mutex_owner() will infer that. If the trylock succeeded,
+	 * we will own the lock and it will have removed the waiter. If we
+	 * failed the trylock, we're still not owner and we need to remove
+	 * ourselves.
+	 */
+	try_to_take_rt_mutex(lock, current, waiter);
+	/*
 	 * Unless we're the owner; we're still enqueued on the wait_list.
 	 * So check if we became owner, if not, take us off the wait_list.
 	 */
 	if (rt_mutex_owner(lock) != current) {
 		remove_waiter(lock, waiter);
-		fixup_rt_mutex_waiters(lock);
 		cleanup = true;
 	}
-
 	/*
 	 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
 	 * have to fix that up.
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 3b1e0f3ad07f..fa46606f3356 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1425,7 +1425,7 @@ static unsigned int nr_meta_pages;
  * Numbers of normal and highmem page frames allocated for hibernation image
  * before suspending devices.
  */
-unsigned int alloc_normal, alloc_highmem;
+static unsigned int alloc_normal, alloc_highmem;
 /*
  * Memory bitmap used for marking saveable pages (during hibernation) or
  * hibernation image pages (during restore)
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 266ddcc1d8bb..60f356d91060 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -60,19 +60,25 @@ int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,
 }
 
 
+void __ptrace_link(struct task_struct *child, struct task_struct *new_parent,
+		   const struct cred *ptracer_cred)
+{
+	BUG_ON(!list_empty(&child->ptrace_entry));
+	list_add(&child->ptrace_entry, &new_parent->ptraced);
+	child->parent = new_parent;
+	child->ptracer_cred = get_cred(ptracer_cred);
+}
+
 /*
  * ptrace a task: make the debugger its new parent and
  * move it to the ptrace list.
  *
  * Must be called with the tasklist lock write-held.
  */
-void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
+static void ptrace_link(struct task_struct *child, struct task_struct *new_parent)
 {
-	BUG_ON(!list_empty(&child->ptrace_entry));
-	list_add(&child->ptrace_entry, &new_parent->ptraced);
-	child->parent = new_parent;
 	rcu_read_lock();
-	child->ptracer_cred = get_cred(__task_cred(new_parent));
+	__ptrace_link(child, new_parent, __task_cred(new_parent));
 	rcu_read_unlock();
 }
 
@@ -386,7 +392,7 @@ static int ptrace_attach(struct task_struct *task, long request,
 		flags |= PT_SEIZED;
 	task->ptrace = flags;
 
-	__ptrace_link(task, current);
+	ptrace_link(task, current);
 
 	/* SEIZE doesn't trap tracee on attach */
 	if (!seize)
@@ -459,7 +465,7 @@ static int ptrace_traceme(void)
 		 */
 		if (!ret && !(current->real_parent->flags & PF_EXITING)) {
 			current->ptrace = PT_PTRACED;
-			__ptrace_link(current, current->real_parent);
+			ptrace_link(current, current->real_parent);
 		}
 	}
 	write_unlock_irq(&tasklist_lock);
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 76877a62b5fa..622eed1b7658 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -245,11 +245,10 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
 	sugov_update_commit(sg_policy, time, next_f);
 }
 
-static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu)
+static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
 {
 	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
 	struct cpufreq_policy *policy = sg_policy->policy;
-	u64 last_freq_update_time = sg_policy->last_freq_update_time;
 	unsigned long util = 0, max = 1;
 	unsigned int j;
 
@@ -265,7 +264,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu)
 		 * enough, don't take the CPU into account as it probably is
 		 * idle now (and clear iowait_boost for it).
 		 */
-		delta_ns = last_freq_update_time - j_sg_cpu->last_update;
+		delta_ns = time - j_sg_cpu->last_update;
 		if (delta_ns > TICK_NSEC) {
 			j_sg_cpu->iowait_boost = 0;
 			continue;
@@ -309,7 +308,7 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time,
 		if (flags & SCHED_CPUFREQ_RT_DL)
 			next_f = sg_policy->policy->cpuinfo.max_freq;
 		else
-			next_f = sugov_next_freq_shared(sg_cpu);
+			next_f = sugov_next_freq_shared(sg_cpu, time);
 
 		sugov_update_commit(sg_policy, time, next_f);
 	}
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 1370f067fb51..d2a1e6dd0291 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -825,8 +825,10 @@ static void check_thread_timers(struct task_struct *tsk,
 			 * At the hard limit, we just die.
 			 * No need to calculate anything else now.
 			 */
-			pr_info("CPU Watchdog Timeout (hard): %s[%d]\n",
-				tsk->comm, task_pid_nr(tsk));
+			if (print_fatal_signals) {
+				pr_info("CPU Watchdog Timeout (hard): %s[%d]\n",
+					tsk->comm, task_pid_nr(tsk));
+			}
 			__group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
 			return;
 		}
@@ -838,8 +840,10 @@ static void check_thread_timers(struct task_struct *tsk,
 				soft += USEC_PER_SEC;
 				sig->rlim[RLIMIT_RTTIME].rlim_cur = soft;
 			}
-			pr_info("RT Watchdog Timeout (soft): %s[%d]\n",
-				tsk->comm, task_pid_nr(tsk));
+			if (print_fatal_signals) {
+				pr_info("RT Watchdog Timeout (soft): %s[%d]\n",
+					tsk->comm, task_pid_nr(tsk));
+			}
 			__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
 		}
 	}
@@ -936,8 +940,10 @@ static void check_process_timers(struct task_struct *tsk,
 			 * At the hard limit, we just die.
 			 * No need to calculate anything else now.
 			 */
-			pr_info("RT Watchdog Timeout (hard): %s[%d]\n",
-				tsk->comm, task_pid_nr(tsk));
+			if (print_fatal_signals) {
+				pr_info("RT Watchdog Timeout (hard): %s[%d]\n",
+					tsk->comm, task_pid_nr(tsk));
+			}
 			__group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
 			return;
 		}
@@ -945,8 +951,10 @@ static void check_process_timers(struct task_struct *tsk,
 			/*
 			 * At the soft limit, send a SIGXCPU every second.
 			 */
-			pr_info("CPU Watchdog Timeout (soft): %s[%d]\n",
-				tsk->comm, task_pid_nr(tsk));
+			if (print_fatal_signals) {
+				pr_info("CPU Watchdog Timeout (soft): %s[%d]\n",
+					tsk->comm, task_pid_nr(tsk));
+			}
 			__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
 			if (soft < hard) {
 				soft++;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 74fdfe9ed3db..9e5841dc14b5 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5063,7 +5063,7 @@ ftrace_graph_release(struct inode *inode, struct file *file)
 	}
 
  out:
-	kfree(fgd->new_hash);
+	free_ftrace_hash(fgd->new_hash);
 	kfree(fgd);
 
 	return ret;
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 889bc31785be..be88cbaadde3 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -4504,6 +4504,44 @@ static struct bpf_test tests[] = {
 		{ },
 		{ { 0, 1 } },
 	},
+	{
+		"JMP_JSGE_K: Signed jump: value walk 1",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, 0),
+			BPF_LD_IMM64(R1, -3),
+			BPF_JMP_IMM(BPF_JSGE, R1, 0, 6),
+			BPF_ALU64_IMM(BPF_ADD, R1, 1),
+			BPF_JMP_IMM(BPF_JSGE, R1, 0, 4),
+			BPF_ALU64_IMM(BPF_ADD, R1, 1),
+			BPF_JMP_IMM(BPF_JSGE, R1, 0, 2),
+			BPF_ALU64_IMM(BPF_ADD, R1, 1),
+			BPF_JMP_IMM(BPF_JSGE, R1, 0, 1),
+			BPF_EXIT_INSN(),		/* bad exit */
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),	/* good exit */
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
+	{
+		"JMP_JSGE_K: Signed jump: value walk 2",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, 0),
+			BPF_LD_IMM64(R1, -3),
+			BPF_JMP_IMM(BPF_JSGE, R1, 0, 4),
+			BPF_ALU64_IMM(BPF_ADD, R1, 2),
+			BPF_JMP_IMM(BPF_JSGE, R1, 0, 2),
+			BPF_ALU64_IMM(BPF_ADD, R1, 2),
+			BPF_JMP_IMM(BPF_JSGE, R1, 0, 1),
+			BPF_EXIT_INSN(),		/* bad exit */
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),	/* good exit */
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
 	/* BPF_JMP | BPF_JGT | BPF_K */
 	{
 		"JMP_JGT_K: if (3 > 2) return 1",
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 08341d2aa9c9..0db8102995a5 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -179,6 +179,7 @@ static void br_stp_start(struct net_bridge *br)
 		br_debug(br, "using kernel STP\n");
 
 		/* To start timers on any ports left in blocking */
+		mod_timer(&br->hello_timer, jiffies + br->hello_time);
 		br_port_state_selection(br);
 	}
 
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index c98b3e5c140a..60b6fe277a8b 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -40,7 +40,7 @@ static void br_hello_timer_expired(unsigned long arg)
 	if (br->dev->flags & IFF_UP) {
 		br_config_bpdu_generation(br);
 
-		if (br->stp_enabled != BR_USER_STP)
+		if (br->stp_enabled == BR_KERNEL_STP)
 			mod_timer(&br->hello_timer,
 				  round_jiffies(jiffies + br->hello_time));
 	}
diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index 5929309beaa1..db85230e49c3 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -68,6 +68,9 @@ static int ebt_arpreply_tg_check(const struct xt_tgchk_param *par)
 	if (e->ethproto != htons(ETH_P_ARP) ||
 	    e->invflags & EBT_IPROTO)
 		return -EINVAL;
+	if (ebt_invalid_target(info->target))
+		return -EINVAL;
+
 	return 0;
 }
 
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 9ec0c9f908fa..9c6e619f452b 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1373,7 +1373,8 @@ static inline int ebt_obj_to_user(char __user *um, const char *_name,
 	strlcpy(name, _name, sizeof(name));
 	if (copy_to_user(um, name, EBT_FUNCTION_MAXNAMELEN) ||
 	    put_user(datasize, (int __user *)(um + EBT_FUNCTION_MAXNAMELEN)) ||
-	    xt_data_to_user(um + entrysize, data, usersize, datasize))
+	    xt_data_to_user(um + entrysize, data, usersize, datasize,
+			    XT_ALIGN(datasize)))
 		return -EFAULT;
 
 	return 0;
@@ -1658,7 +1659,8 @@ static int compat_match_to_user(struct ebt_entry_match *m, void __user **dstptr,
 		if (match->compat_to_user(cm->data, m->data))
 			return -EFAULT;
 	} else {
-		if (xt_data_to_user(cm->data, m->data, match->usersize, msize))
+		if (xt_data_to_user(cm->data, m->data, match->usersize, msize,
+				    COMPAT_XT_ALIGN(msize)))
 			return -EFAULT;
 	}
 
@@ -1687,7 +1689,8 @@ static int compat_target_to_user(struct ebt_entry_target *t,
 		if (target->compat_to_user(cm->data, t->data))
 			return -EFAULT;
 	} else {
-		if (xt_data_to_user(cm->data, t->data, target->usersize, tsize))
+		if (xt_data_to_user(cm->data, t->data, target->usersize, tsize,
+				    COMPAT_XT_ALIGN(tsize)))
 			return -EFAULT;
 	}
 
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 2034fb926670..8757fb87dab8 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -151,7 +151,7 @@ static int process_one_ticket(struct ceph_auth_client *ac,
 	struct timespec validity;
 	void *tp, *tpend;
 	void **ptp;
-	struct ceph_crypto_key new_session_key;
+	struct ceph_crypto_key new_session_key = { 0 };
 	struct ceph_buffer *new_ticket_blob;
 	unsigned long new_expires, new_renew_after;
 	u64 new_secret_id;
@@ -215,6 +215,9 @@ static int process_one_ticket(struct ceph_auth_client *ac,
 	dout(" ticket blob is %d bytes\n", dlen);
 	ceph_decode_need(ptp, tpend, 1 + sizeof(u64), bad);
 	blob_struct_v = ceph_decode_8(ptp);
+	if (blob_struct_v != 1)
+		goto bad;
+
 	new_secret_id = ceph_decode_64(ptp);
 	ret = ceph_decode_buffer(&new_ticket_blob, ptp, tpend);
 	if (ret)
@@ -234,13 +237,13 @@ static int process_one_ticket(struct ceph_auth_client *ac,
 	     type, ceph_entity_type_name(type), th->secret_id,
 	     (int)th->ticket_blob->vec.iov_len);
 	xi->have_keys |= th->service;
-
-out:
-	return ret;
+	return 0;
 
 bad:
 	ret = -EINVAL;
-	goto out;
+out:
+	ceph_crypto_key_destroy(&new_session_key);
+	return ret;
 }
 
 static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 4fd02831beed..47e94b560ba0 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -56,19 +56,6 @@ static const struct kernel_param_ops param_ops_supported_features = {
 module_param_cb(supported_features, &param_ops_supported_features, NULL,
 		S_IRUGO);
 
-/*
- * find filename portion of a path (/foo/bar/baz -> baz)
- */
-const char *ceph_file_part(const char *s, int len)
-{
-	const char *e = s + len;
-
-	while (e != s && *(e-1) != '/')
-		e--;
-	return e;
-}
-EXPORT_SYMBOL(ceph_file_part);
-
 const char *ceph_msg_type_name(int type)
 {
 	switch (type) {
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 5766a6c896c4..588a91930051 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1174,8 +1174,8 @@ static struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor,
  * Returns true if the result moves the cursor on to the next piece
  * of the data item.
  */
-static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
-				size_t bytes)
+static void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
+				  size_t bytes)
 {
 	bool new_piece;
 
@@ -1207,8 +1207,6 @@ static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
 		new_piece = true;
 	}
 	cursor->need_crc = new_piece;
-
-	return new_piece;
 }
 
 static size_t sizeof_footer(struct ceph_connection *con)
@@ -1577,7 +1575,6 @@ static int write_partial_message_data(struct ceph_connection *con)
 		size_t page_offset;
 		size_t length;
 		bool last_piece;
-		bool need_crc;
 		int ret;
 
 		page = ceph_msg_data_next(cursor, &page_offset, &length,
@@ -1592,7 +1589,7 @@ static int write_partial_message_data(struct ceph_connection *con)
 		}
 		if (do_datacrc && cursor->need_crc)
 			crc = ceph_crc32c_page(crc, page, page_offset, length);
-		need_crc = ceph_msg_data_advance(cursor, (size_t)ret);
+		ceph_msg_data_advance(cursor, (size_t)ret);
 	}
 
 	dout("%s %p msg %p done\n", __func__, con, msg);
@@ -2231,10 +2228,18 @@ static void process_ack(struct ceph_connection *con)
 	struct ceph_msg *m;
 	u64 ack = le64_to_cpu(con->in_temp_ack);
 	u64 seq;
+	bool reconnect = (con->in_tag == CEPH_MSGR_TAG_SEQ);
+	struct list_head *list = reconnect ? &con->out_queue : &con->out_sent;
 
-	while (!list_empty(&con->out_sent)) {
-		m = list_first_entry(&con->out_sent, struct ceph_msg,
-				     list_head);
+	/*
+	 * In the reconnect case, con_fault() has requeued messages
+	 * in out_sent. We should cleanup old messages according to
+	 * the reconnect seq.
+	 */
+	while (!list_empty(list)) {
+		m = list_first_entry(list, struct ceph_msg, list_head);
+		if (reconnect && m->needs_out_seq)
+			break;
 		seq = le64_to_cpu(m->hdr.seq);
 		if (seq > ack)
 			break;
@@ -2243,6 +2248,7 @@ static void process_ack(struct ceph_connection *con)
 		m->ack_stamp = jiffies;
 		ceph_msg_remove(m);
 	}
+
 	prepare_read_tag(con);
 }
 
@@ -2299,7 +2305,7 @@ static int read_partial_msg_data(struct ceph_connection *con)
 
 		if (do_datacrc)
 			crc = ceph_crc32c_page(crc, page, page_offset, ret);
-		(void) ceph_msg_data_advance(cursor, (size_t)ret);
+		ceph_msg_data_advance(cursor, (size_t)ret);
 	}
 	if (do_datacrc)
 		con->in_data_crc = crc;
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 29a0ef351c5e..250f11f78609 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -43,15 +43,13 @@ struct ceph_monmap *ceph_monmap_decode(void *p, void *end)
 	int i, err = -EINVAL;
 	struct ceph_fsid fsid;
 	u32 epoch, num_mon;
-	u16 version;
 	u32 len;
 
 	ceph_decode_32_safe(&p, end, len, bad);
 	ceph_decode_need(&p, end, len, bad);
 
 	dout("monmap_decode %p %p len %d\n", p, end, (int)(end-p));
-
-	ceph_decode_16_safe(&p, end, version, bad);
+	p += sizeof(u16);  /* skip version */
 
 	ceph_decode_need(&p, end, sizeof(fsid) + 2*sizeof(u32), bad);
 	ceph_decode_copy(&p, &fsid, sizeof(fsid));
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index ffe9e904d4d1..55e3a477f92d 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -317,6 +317,7 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
 		u32 yes;
 		struct crush_rule *r;
 
+		err = -EINVAL;
 		ceph_decode_32_safe(p, end, yes, bad);
 		if (!yes) {
 			dout("crush_decode NO rule %d off %x %p to %p\n",
diff --git a/net/core/dst.c b/net/core/dst.c
index 960e503b5a52..6192f11beec9 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -151,13 +151,13 @@ int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(dst_discard_out);
 
-const u32 dst_default_metrics[RTAX_MAX + 1] = {
+const struct dst_metrics dst_default_metrics = {
 	/* This initializer is needed to force linker to place this variable
 	 * into const section. Otherwise it might end into bss section.
 	 * We really want to avoid false sharing on this variable, and catch
 	 * any writes on it.
 	 */
-	[RTAX_MAX] = 0xdeadbeef,
+	.refcnt = ATOMIC_INIT(1),
 };
 
 void dst_init(struct dst_entry *dst, struct dst_ops *ops,
@@ -169,7 +169,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
 	if (dev)
 		dev_hold(dev);
 	dst->ops = ops;
-	dst_init_metrics(dst, dst_default_metrics, true);
+	dst_init_metrics(dst, dst_default_metrics.metrics, true);
 	dst->expires = 0UL;
 	dst->path = dst;
 	dst->from = NULL;
@@ -314,25 +314,30 @@ EXPORT_SYMBOL(dst_release);
 
 u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old)
 {
-	u32 *p = kmalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC);
+	struct dst_metrics *p = kmalloc(sizeof(*p), GFP_ATOMIC);
 
 	if (p) {
-		u32 *old_p = __DST_METRICS_PTR(old);
+		struct dst_metrics *old_p = (struct dst_metrics *)__DST_METRICS_PTR(old);
 		unsigned long prev, new;
 
-		memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
+		atomic_set(&p->refcnt, 1);
+		memcpy(p->metrics, old_p->metrics, sizeof(p->metrics));
 
 		new = (unsigned long) p;
 		prev = cmpxchg(&dst->_metrics, old, new);
 
 		if (prev != old) {
 			kfree(p);
-			p = __DST_METRICS_PTR(prev);
+			p = (struct dst_metrics *)__DST_METRICS_PTR(prev);
 			if (prev & DST_METRICS_READ_ONLY)
 				p = NULL;
+		} else if (prev & DST_METRICS_REFCOUNTED) {
+			if (atomic_dec_and_test(&old_p->refcnt))
+				kfree(old_p);
 		}
 	}
-	return p;
+	BUILD_BUG_ON(offsetof(struct dst_metrics, metrics) != 0);
+	return (u32 *)p;
 }
 EXPORT_SYMBOL(dst_cow_metrics_generic);
 
@@ -341,7 +346,7 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)
 {
 	unsigned long prev, new;
 
-	new = ((unsigned long) dst_default_metrics) | DST_METRICS_READ_ONLY;
+	new = ((unsigned long) &dst_default_metrics) | DST_METRICS_READ_ONLY;
 	prev = cmpxchg(&dst->_metrics, old, new);
 	if (prev == old)
 		kfree(__DST_METRICS_PTR(old));
diff --git a/net/core/filter.c b/net/core/filter.c
index a253a6197e6b..a6bb95fa87b2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2281,6 +2281,7 @@ bool bpf_helper_changes_pkt_data(void *func)
 	    func == bpf_skb_change_head ||
 	    func == bpf_skb_change_tail ||
 	    func == bpf_skb_pull_data ||
+	    func == bpf_clone_redirect ||
 	    func == bpf_l3_csum_replace ||
 	    func == bpf_l4_csum_replace ||
 	    func == bpf_xdp_adjust_head)
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 1934efd4a9d4..26bbfababff2 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -315,6 +315,25 @@ out_undo:
 	goto out;
 }
 
+static int __net_init net_defaults_init_net(struct net *net)
+{
+	net->core.sysctl_somaxconn = SOMAXCONN;
+	return 0;
+}
+
+static struct pernet_operations net_defaults_ops = {
+	.init = net_defaults_init_net,
+};
+
+static __init int net_defaults_init(void)
+{
+	if (register_pernet_subsys(&net_defaults_ops))
+		panic("Cannot initialize net default settings");
+
+	return 0;
+}
+
+core_initcall(net_defaults_init);
 
 #ifdef CONFIG_NET_NS
 static struct ucounts *inc_net_namespaces(struct user_namespace *ns)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 49a279a7cc15..9e2c0a7cb325 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3231,8 +3231,11 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	int err = 0;
 	int fidx = 0;
 
-	if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb,
-			IFLA_MAX, ifla_policy, NULL) == 0) {
+	err = nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb,
+			  IFLA_MAX, ifla_policy, NULL);
+	if (err < 0) {
+		return -EINVAL;
+	} else if (err == 0) {
 		if (tb[IFLA_MASTER])
 			br_idx = nla_get_u32(tb[IFLA_MASTER]);
 	}
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index ea23254b2457..b7cd9aafe99e 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -479,8 +479,6 @@ static __net_init int sysctl_core_net_init(struct net *net)
 {
 	struct ctl_table *tbl;
 
-	net->core.sysctl_somaxconn = SOMAXCONN;
-
 	tbl = netns_core_table;
 	if (!net_eq(net, &init_net)) {
 		tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index d54345a06f72..e9f3386a528b 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -641,6 +641,32 @@ void arp_xmit(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(arp_xmit);
 
+static bool arp_is_garp(struct net *net, struct net_device *dev,
+			int *addr_type, __be16 ar_op,
+			__be32 sip, __be32 tip,
+			unsigned char *sha, unsigned char *tha)
+{
+	bool is_garp = tip == sip;
+
+	/* Gratuitous ARP _replies_ also require target hwaddr to be
+	 * the same as source.
+	 */
+	if (is_garp && ar_op == htons(ARPOP_REPLY))
+		is_garp =
+			/* IPv4 over IEEE 1394 doesn't provide target
+			 * hardware address field in its ARP payload.
+			 */
+			tha &&
+			!memcmp(tha, sha, dev->addr_len);
+
+	if (is_garp) {
+		*addr_type = inet_addr_type_dev_table(net, dev, sip);
+		if (*addr_type != RTN_UNICAST)
+			is_garp = false;
+	}
+	return is_garp;
+}
+
 /*
  *	Process an arp request.
  */
@@ -837,29 +863,25 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
 
 	n = __neigh_lookup(&arp_tbl, &sip, dev, 0);
 
-	if (IN_DEV_ARP_ACCEPT(in_dev)) {
-		unsigned int addr_type = inet_addr_type_dev_table(net, dev, sip);
+	addr_type = -1;
+	if (n || IN_DEV_ARP_ACCEPT(in_dev)) {
+		is_garp = arp_is_garp(net, dev, &addr_type, arp->ar_op,
+				      sip, tip, sha, tha);
+	}
 
+	if (IN_DEV_ARP_ACCEPT(in_dev)) {
 		/* Unsolicited ARP is not accepted by default.
 		   It is possible, that this option should be enabled for some
 		   devices (strip is candidate)
 		 */
-		is_garp = tip == sip && addr_type == RTN_UNICAST;
-
-		/* Unsolicited ARP _replies_ also require target hwaddr to be
-		 * the same as source.
-		 */
-		if (is_garp && arp->ar_op == htons(ARPOP_REPLY))
-			is_garp =
-				/* IPv4 over IEEE 1394 doesn't provide target
-				 * hardware address field in its ARP payload.
-				 */
-				tha &&
-				!memcmp(tha, sha, dev->addr_len);
-
 		if (!n &&
-		    ((arp->ar_op == htons(ARPOP_REPLY)  &&
-				addr_type == RTN_UNICAST) || is_garp))
+		    (is_garp ||
+		     (arp->ar_op == htons(ARPOP_REPLY) &&
+		      (addr_type == RTN_UNICAST ||
+		       (addr_type < 0 &&
+			/* postpone calculation to as late as possible */
+			inet_addr_type_dev_table(net, dev, sip) ==
+				RTN_UNICAST)))))
 			n = __neigh_lookup(&arp_tbl, &sip, dev, 1);
 	}
 
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 65cc02bd82bc..93322f895eab 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -248,6 +248,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
 	u8 *tail;
 	u8 *vaddr;
 	int nfrags;
+	int esph_offset;
 	struct page *page;
 	struct sk_buff *trailer;
 	int tailen = esp->tailen;
@@ -313,11 +314,13 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
 	}
 
 cow:
+	esph_offset = (unsigned char *)esp->esph - skb_transport_header(skb);
+
 	nfrags = skb_cow_data(skb, tailen, &trailer);
 	if (nfrags < 0)
 		goto out;
 	tail = skb_tail_pointer(trailer);
-	esp->esph = ip_esp_hdr(skb);
+	esp->esph = (struct ip_esp_hdr *)(skb_transport_header(skb) + esph_offset);
 
 skip_cow:
 	esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index da449ddb8cc1..ad9ad4aab5da 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -203,6 +203,7 @@ static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp)
 static void free_fib_info_rcu(struct rcu_head *head)
 {
 	struct fib_info *fi = container_of(head, struct fib_info, rcu);
+	struct dst_metrics *m;
 
 	change_nexthops(fi) {
 		if (nexthop_nh->nh_dev)
@@ -213,8 +214,9 @@ static void free_fib_info_rcu(struct rcu_head *head)
 		rt_fibinfo_free(&nexthop_nh->nh_rth_input);
 	} endfor_nexthops(fi);
 
-	if (fi->fib_metrics != (u32 *) dst_default_metrics)
-		kfree(fi->fib_metrics);
+	m = fi->fib_metrics;
+	if (m != &dst_default_metrics && atomic_dec_and_test(&m->refcnt))
+		kfree(m);
 	kfree(fi);
 }
 
@@ -971,11 +973,11 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
 			val = 255;
 		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
 			return -EINVAL;
-		fi->fib_metrics[type - 1] = val;
+		fi->fib_metrics->metrics[type - 1] = val;
 	}
 
 	if (ecn_ca)
-		fi->fib_metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
+		fi->fib_metrics->metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
 
 	return 0;
 }
@@ -1033,11 +1035,12 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 		goto failure;
 	fib_info_cnt++;
 	if (cfg->fc_mx) {
-		fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
+		fi->fib_metrics = kzalloc(sizeof(*fi->fib_metrics), GFP_KERNEL);
 		if (!fi->fib_metrics)
 			goto failure;
+		atomic_set(&fi->fib_metrics->refcnt, 1);
 	} else
-		fi->fib_metrics = (u32 *) dst_default_metrics;
+		fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics;
 
 	fi->fib_net = net;
 	fi->fib_protocol = cfg->fc_protocol;
@@ -1238,7 +1241,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 	if (fi->fib_priority &&
 	    nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority))
 		goto nla_put_failure;
-	if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
+	if (rtnetlink_put_metrics(skb, fi->fib_metrics->metrics) < 0)
 		goto nla_put_failure;
 
 	if (fi->fib_prefsrc &&
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 655d9eebe43e..6883b3d4ba8f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1385,8 +1385,12 @@ static void rt_add_uncached_list(struct rtable *rt)
 
 static void ipv4_dst_destroy(struct dst_entry *dst)
 {
+	struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
 	struct rtable *rt = (struct rtable *) dst;
 
+	if (p != &dst_default_metrics && atomic_dec_and_test(&p->refcnt))
+		kfree(p);
+
 	if (!list_empty(&rt->rt_uncached)) {
 		struct uncached_list *ul = rt->rt_uncached_list;
 
@@ -1438,7 +1442,11 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
 			rt->rt_gateway = nh->nh_gw;
 			rt->rt_uses_gateway = 1;
 		}
-		dst_init_metrics(&rt->dst, fi->fib_metrics, true);
+		dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true);
+		if (fi->fib_metrics != &dst_default_metrics) {
+			rt->dst._metrics |= DST_METRICS_REFCOUNTED;
+			atomic_inc(&fi->fib_metrics->refcnt);
+		}
 #ifdef CONFIG_IP_ROUTE_CLASSID
 		rt->dst.tclassid = nh->nh_tclassid;
 #endif
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1e4c76d2b827..59792d283ff8 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1084,9 +1084,12 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_sock *inet = inet_sk(sk);
+	struct sockaddr *uaddr = msg->msg_name;
 	int err, flags;
 
-	if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE))
+	if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
+	    (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) &&
+	     uaddr->sa_family == AF_UNSPEC))
 		return -EOPNOTSUPP;
 	if (tp->fastopen_req)
 		return -EALREADY; /* Another Fast Open is in progress */
@@ -1108,7 +1111,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
 		}
 	}
 	flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
-	err = __inet_stream_connect(sk->sk_socket, msg->msg_name,
+	err = __inet_stream_connect(sk->sk_socket, uaddr,
 				    msg->msg_namelen, flags, 1);
 	/* fastopen_req could already be freed in __inet_stream_connect
 	 * if the connection times out or gets rst
@@ -2320,6 +2323,10 @@ int tcp_disconnect(struct sock *sk, int flags)
 	tcp_set_ca_state(sk, TCP_CA_Open);
 	tcp_clear_retrans(tp);
 	inet_csk_delack_init(sk);
+	/* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0
+	 * issue in __tcp_select_window()
+	 */
+	icsk->icsk_ack.rcv_mss = TCP_MIN_MSS;
 	tcp_init_send_head(sk);
 	memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
 	__sk_dst_reset(sk);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 8d128ba79b66..0c5b4caa1949 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -537,11 +537,10 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
 
 	memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
 
-	dsfield = ipv4_get_dsfield(iph);
-
 	if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
-		fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
-					  & IPV6_TCLASS_MASK;
+		dsfield = ipv4_get_dsfield(iph);
+	else
+		dsfield = ip6_tclass(t->parms.flowinfo);
 	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
 		fl6.flowi6_mark = skb->mark;
 	else
@@ -598,9 +597,11 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
 
 	memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
 
-	dsfield = ipv6_get_dsfield(ipv6h);
 	if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
-		fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
+		dsfield = ipv6_get_dsfield(ipv6h);
+	else
+		dsfield = ip6_tclass(t->parms.flowinfo);
+
 	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
 		fl6.flowlabel |= ip6_flowlabel(ipv6h);
 	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index d4a31becbd25..bf8a58a1c32d 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1466,6 +1466,11 @@ alloc_new_skb:
 			 */
 			alloclen += sizeof(struct frag_hdr);
 
+			copy = datalen - transhdrlen - fraggap;
+			if (copy < 0) {
+				err = -EINVAL;
+				goto error;
+			}
 			if (transhdrlen) {
 				skb = sock_alloc_send_skb(sk,
 						alloclen + hh_len,
@@ -1515,13 +1520,9 @@ alloc_new_skb:
 				data += fraggap;
 				pskb_trim_unique(skb_prev, maxfraglen);
 			}
-			copy = datalen - transhdrlen - fraggap;
-
-			if (copy < 0) {
-				err = -EINVAL;
-				kfree_skb(skb);
-				goto error;
-			} else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
+			if (copy > 0 &&
+			    getfrag(from, data + transhdrlen, offset,
+				    copy, fraggap, skb) < 0) {
 				err = -EFAULT;
 				kfree_skb(skb);
 				goto error;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 6eb2ae507500..7ae6c503f1ca 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1196,7 +1196,7 @@ route_lookup:
 	skb_push(skb, sizeof(struct ipv6hdr));
 	skb_reset_network_header(skb);
 	ipv6h = ipv6_hdr(skb);
-	ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield),
+	ip6_flow_hdr(ipv6h, dsfield,
 		     ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6));
 	ipv6h->hop_limit = hop_limit;
 	ipv6h->nexthdr = proto;
@@ -1231,8 +1231,6 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (tproto != IPPROTO_IPIP && tproto != 0)
 		return -1;
 
-	dsfield = ipv4_get_dsfield(iph);
-
 	if (t->parms.collect_md) {
 		struct ip_tunnel_info *tun_info;
 		const struct ip_tunnel_key *key;
@@ -1246,6 +1244,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 		fl6.flowi6_proto = IPPROTO_IPIP;
 		fl6.daddr = key->u.ipv6.dst;
 		fl6.flowlabel = key->label;
+		dsfield = ip6_tclass(key->label);
 	} else {
 		if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
 			encap_limit = t->parms.encap_limit;
@@ -1254,8 +1253,9 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 		fl6.flowi6_proto = IPPROTO_IPIP;
 
 		if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
-			fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
-					 & IPV6_TCLASS_MASK;
+			dsfield = ipv4_get_dsfield(iph);
+		else
+			dsfield = ip6_tclass(t->parms.flowinfo);
 		if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
 			fl6.flowi6_mark = skb->mark;
 		else
@@ -1267,6 +1267,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
 		return -1;
 
+	dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph));
+
 	skb_set_inner_ipproto(skb, IPPROTO_IPIP);
 
 	err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
@@ -1300,8 +1302,6 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	    ip6_tnl_addr_conflict(t, ipv6h))
 		return -1;
 
-	dsfield = ipv6_get_dsfield(ipv6h);
-
 	if (t->parms.collect_md) {
 		struct ip_tunnel_info *tun_info;
 		const struct ip_tunnel_key *key;
@@ -1315,6 +1315,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 		fl6.flowi6_proto = IPPROTO_IPV6;
 		fl6.daddr = key->u.ipv6.dst;
 		fl6.flowlabel = key->label;
+		dsfield = ip6_tclass(key->label);
 	} else {
 		offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
 		/* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
@@ -1337,7 +1338,9 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 		fl6.flowi6_proto = IPPROTO_IPV6;
 
 		if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
-			fl6.flowlabel |= (*(__be32 *)ipv6h & IPV6_TCLASS_MASK);
+			dsfield = ipv6_get_dsfield(ipv6h);
+		else
+			dsfield = ip6_tclass(t->parms.flowinfo);
 		if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
 			fl6.flowlabel |= ip6_flowlabel(ipv6h);
 		if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
@@ -1351,6 +1354,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
 		return -1;
 
+	dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h));
+
 	skb_set_inner_ipproto(skb, IPPROTO_IPV6);
 
 	err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
diff --git a/net/key/af_key.c b/net/key/af_key.c
index c1950bb14735..512dc43d0ce6 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3285,7 +3285,7 @@ static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt,
 		p += pol->sadb_x_policy_len*8;
 		sec_ctx = (struct sadb_x_sec_ctx *)p;
 		if (len < pol->sadb_x_policy_len*8 +
-		    sec_ctx->sadb_x_sec_len) {
+		    sec_ctx->sadb_x_sec_len*8) {
 			*dir = -EINVAL;
 			goto out;
 		}
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 8364fe5b59e4..c38d16f22d2a 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -311,6 +311,8 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
 	int rc = -EINVAL;
 
 	dprintk("%s: binding %02X\n", __func__, addr->sllc_sap);
+
+	lock_sock(sk);
 	if (unlikely(!sock_flag(sk, SOCK_ZAPPED) || addrlen != sizeof(*addr)))
 		goto out;
 	rc = -EAFNOSUPPORT;
@@ -382,6 +384,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
 out_put:
 	llc_sap_put(sap);
 out:
+	release_sock(sk);
 	return rc;
 }
 
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 35f4c7d7a500..1f75280ba26c 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2492,7 +2492,8 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
 		if (is_multicast_ether_addr(hdr->addr1)) {
 			mpp_addr = hdr->addr3;
 			proxied_addr = mesh_hdr->eaddr1;
-		} else if (mesh_hdr->flags & MESH_FLAGS_AE_A5_A6) {
+		} else if ((mesh_hdr->flags & MESH_FLAGS_AE) ==
+			    MESH_FLAGS_AE_A5_A6) {
 			/* has_a4 already checked in ieee80211_rx_mesh_check */
 			mpp_addr = hdr->addr4;
 			proxied_addr = mesh_hdr->eaddr2;
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index d2d7bdf1d510..ad99c1ceea6f 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -849,10 +849,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
 {
 	unsigned int verdict = NF_DROP;
 
-	if (IP_VS_FWD_METHOD(cp) != 0) {
-		pr_err("shouldn't reach here, because the box is on the "
-		       "half connection in the tun/dr module.\n");
-	}
+	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
+		goto ignore_cp;
 
 	/* Ensure the checksum is correct */
 	if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
@@ -886,6 +884,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
 		ip_vs_notrack(skb);
 	else
 		ip_vs_update_conntrack(skb, cp, 0);
+
+ignore_cp:
 	verdict = NF_ACCEPT;
 
 out:
@@ -1385,8 +1385,11 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
 	 */
 	cp = pp->conn_out_get(ipvs, af, skb, &iph);
 
-	if (likely(cp))
+	if (likely(cp)) {
+		if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
+			goto ignore_cp;
 		return handle_response(af, skb, pd, cp, &iph, hooknum);
+	}
 
 	/* Check for real-server-started requests */
 	if (atomic_read(&ipvs->conn_out_counter)) {
@@ -1444,9 +1447,15 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
 			}
 		}
 	}
+
+out:
 	IP_VS_DBG_PKT(12, af, pp, skb, iph.off,
 		      "ip_vs_out: packet continues traversal as normal");
 	return NF_ACCEPT;
+
+ignore_cp:
+	__ip_vs_conn_put(cp);
+	goto out;
 }
 
 /*
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 3a60efa7799b..7f6100ca63be 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -174,6 +174,10 @@ nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum)
 #endif
 	if (h != NULL && !try_module_get(h->me))
 		h = NULL;
+	if (h != NULL && !refcount_inc_not_zero(&h->refcnt)) {
+		module_put(h->me);
+		h = NULL;
+	}
 
 	rcu_read_unlock();
 
@@ -181,6 +185,13 @@ nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum)
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_helper_try_module_get);
 
+void nf_conntrack_helper_put(struct nf_conntrack_helper *helper)
+{
+	refcount_dec(&helper->refcnt);
+	module_put(helper->me);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_helper_put);
+
 struct nf_conn_help *
 nf_ct_helper_ext_add(struct nf_conn *ct,
 		     struct nf_conntrack_helper *helper, gfp_t gfp)
@@ -417,6 +428,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
 			}
 		}
 	}
+	refcount_set(&me->refcnt, 1);
 	hlist_add_head_rcu(&me->hnode, &nf_ct_helper_hash[h]);
 	nf_ct_helper_count++;
 out:
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index dcf561b5c97a..9799a50bc604 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -45,6 +45,8 @@
 #include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/nf_conntrack_timestamp.h>
 #include <net/netfilter/nf_conntrack_labels.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/nf_conntrack_synproxy.h>
 #ifdef CONFIG_NF_NAT_NEEDED
 #include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_nat_l4proto.h>
@@ -1007,9 +1009,8 @@ static const struct nla_policy tuple_nla_policy[CTA_TUPLE_MAX+1] = {
 
 static int
 ctnetlink_parse_tuple(const struct nlattr * const cda[],
-		      struct nf_conntrack_tuple *tuple,
-		      enum ctattr_type type, u_int8_t l3num,
-		      struct nf_conntrack_zone *zone)
+		      struct nf_conntrack_tuple *tuple, u32 type,
+		      u_int8_t l3num, struct nf_conntrack_zone *zone)
 {
 	struct nlattr *tb[CTA_TUPLE_MAX+1];
 	int err;
@@ -1828,6 +1829,8 @@ ctnetlink_create_conntrack(struct net *net,
 	nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
 	nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC);
 	nf_ct_labels_ext_add(ct);
+	nfct_seqadj_ext_add(ct);
+	nfct_synproxy_ext_add(ct);
 
 	/* we must add conntrack extensions before confirmation. */
 	ct->status |= IPS_CONFIRMED;
@@ -2447,7 +2450,7 @@ static struct nfnl_ct_hook ctnetlink_glue_hook = {
 
 static int ctnetlink_exp_dump_tuple(struct sk_buff *skb,
 				    const struct nf_conntrack_tuple *tuple,
-				    enum ctattr_expect type)
+				    u32 type)
 {
 	struct nlattr *nest_parms;
 
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index b48d6b5aae8a..ef0be325a0c6 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -409,6 +409,10 @@ nf_nat_setup_info(struct nf_conn *ct,
 {
 	struct nf_conntrack_tuple curr_tuple, new_tuple;
 
+	/* Can't setup nat info for confirmed ct. */
+	if (nf_ct_is_confirmed(ct))
+		return NF_ACCEPT;
+
 	NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC ||
 		     maniptype == NF_NAT_MANIP_DST);
 	BUG_ON(nf_nat_initialized(ct, maniptype));
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 559225029740..da314be0c048 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3367,35 +3367,50 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
 	return nf_tables_fill_setelem(args->skb, set, elem);
 }
 
+struct nft_set_dump_ctx {
+	const struct nft_set	*set;
+	struct nft_ctx		ctx;
+};
+
 static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct nft_set_dump_ctx *dump_ctx = cb->data;
 	struct net *net = sock_net(skb->sk);
-	u8 genmask = nft_genmask_cur(net);
+	struct nft_af_info *afi;
+	struct nft_table *table;
 	struct nft_set *set;
 	struct nft_set_dump_args args;
-	struct nft_ctx ctx;
-	struct nlattr *nla[NFTA_SET_ELEM_LIST_MAX + 1];
+	bool set_found = false;
 	struct nfgenmsg *nfmsg;
 	struct nlmsghdr *nlh;
 	struct nlattr *nest;
 	u32 portid, seq;
-	int event, err;
+	int event;
 
-	err = nlmsg_parse(cb->nlh, sizeof(struct nfgenmsg), nla,
-			  NFTA_SET_ELEM_LIST_MAX, nft_set_elem_list_policy,
-			  NULL);
-	if (err < 0)
-		return err;
+	rcu_read_lock();
+	list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
+		if (afi != dump_ctx->ctx.afi)
+			continue;
 
-	err = nft_ctx_init_from_elemattr(&ctx, net, cb->skb, cb->nlh,
-					 (void *)nla, genmask);
-	if (err < 0)
-		return err;
+		list_for_each_entry_rcu(table, &afi->tables, list) {
+			if (table != dump_ctx->ctx.table)
+				continue;
 
-	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
-				   genmask);
-	if (IS_ERR(set))
-		return PTR_ERR(set);
+			list_for_each_entry_rcu(set, &table->sets, list) {
+				if (set == dump_ctx->set) {
+					set_found = true;
+					break;
+				}
+			}
+			break;
+		}
+		break;
+	}
+
+	if (!set_found) {
+		rcu_read_unlock();
+		return -ENOENT;
+	}
 
 	event  = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWSETELEM);
 	portid = NETLINK_CB(cb->skb).portid;
@@ -3407,11 +3422,11 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 		goto nla_put_failure;
 
 	nfmsg = nlmsg_data(nlh);
-	nfmsg->nfgen_family = ctx.afi->family;
+	nfmsg->nfgen_family = afi->family;
 	nfmsg->version      = NFNETLINK_V0;
-	nfmsg->res_id	    = htons(ctx.net->nft.base_seq & 0xffff);
+	nfmsg->res_id	    = htons(net->nft.base_seq & 0xffff);
 
-	if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, ctx.table->name))
+	if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, table->name))
 		goto nla_put_failure;
 	if (nla_put_string(skb, NFTA_SET_ELEM_LIST_SET, set->name))
 		goto nla_put_failure;
@@ -3422,12 +3437,13 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 
 	args.cb			= cb;
 	args.skb		= skb;
-	args.iter.genmask	= nft_genmask_cur(ctx.net);
+	args.iter.genmask	= nft_genmask_cur(net);
 	args.iter.skip		= cb->args[0];
 	args.iter.count		= 0;
 	args.iter.err		= 0;
 	args.iter.fn		= nf_tables_dump_setelem;
-	set->ops->walk(&ctx, set, &args.iter);
+	set->ops->walk(&dump_ctx->ctx, set, &args.iter);
+	rcu_read_unlock();
 
 	nla_nest_end(skb, nest);
 	nlmsg_end(skb, nlh);
@@ -3441,9 +3457,16 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 
 nla_put_failure:
+	rcu_read_unlock();
 	return -ENOSPC;
 }
 
+static int nf_tables_dump_set_done(struct netlink_callback *cb)
+{
+	kfree(cb->data);
+	return 0;
+}
+
 static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
 				struct sk_buff *skb, const struct nlmsghdr *nlh,
 				const struct nlattr * const nla[])
@@ -3465,7 +3488,18 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
 			.dump = nf_tables_dump_set,
+			.done = nf_tables_dump_set_done,
 		};
+		struct nft_set_dump_ctx *dump_ctx;
+
+		dump_ctx = kmalloc(sizeof(*dump_ctx), GFP_KERNEL);
+		if (!dump_ctx)
+			return -ENOMEM;
+
+		dump_ctx->set = set;
+		dump_ctx->ctx = ctx;
+
+		c.data = dump_ctx;
 		return netlink_dump_start(nlsk, skb, nlh, &c);
 	}
 	return -EOPNOTSUPP;
@@ -3593,9 +3627,9 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
 {
 	struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
 
-	nft_data_uninit(nft_set_ext_key(ext), NFT_DATA_VALUE);
+	nft_data_release(nft_set_ext_key(ext), NFT_DATA_VALUE);
 	if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
-		nft_data_uninit(nft_set_ext_data(ext), set->dtype);
+		nft_data_release(nft_set_ext_data(ext), set->dtype);
 	if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
 		nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext));
 	if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
@@ -3604,6 +3638,18 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
 }
 EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
 
+/* Only called from commit path, nft_set_elem_deactivate() already deals with
+ * the refcounting from the preparation phase.
+ */
+static void nf_tables_set_elem_destroy(const struct nft_set *set, void *elem)
+{
+	struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
+		nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext));
+	kfree(elem);
+}
+
 static int nft_setelem_parse_flags(const struct nft_set *set,
 				   const struct nlattr *attr, u32 *flags)
 {
@@ -3815,9 +3861,9 @@ err4:
 	kfree(elem.priv);
 err3:
 	if (nla[NFTA_SET_ELEM_DATA] != NULL)
-		nft_data_uninit(&data, d2.type);
+		nft_data_release(&data, d2.type);
 err2:
-	nft_data_uninit(&elem.key.val, d1.type);
+	nft_data_release(&elem.key.val, d1.type);
 err1:
 	return err;
 }
@@ -3862,6 +3908,53 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
 	return err;
 }
 
+/**
+ *	nft_data_hold - hold a nft_data item
+ *
+ *	@data: struct nft_data to release
+ *	@type: type of data
+ *
+ *	Hold a nft_data item. NFT_DATA_VALUE types can be silently discarded,
+ *	NFT_DATA_VERDICT bumps the reference to chains in case of NFT_JUMP and
+ *	NFT_GOTO verdicts. This function must be called on active data objects
+ *	from the second phase of the commit protocol.
+ */
+static void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
+{
+	if (type == NFT_DATA_VERDICT) {
+		switch (data->verdict.code) {
+		case NFT_JUMP:
+		case NFT_GOTO:
+			data->verdict.chain->use++;
+			break;
+		}
+	}
+}
+
+static void nft_set_elem_activate(const struct net *net,
+				  const struct nft_set *set,
+				  struct nft_set_elem *elem)
+{
+	const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
+		nft_data_hold(nft_set_ext_data(ext), set->dtype);
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
+		(*nft_set_ext_obj(ext))->use++;
+}
+
+static void nft_set_elem_deactivate(const struct net *net,
+				    const struct nft_set *set,
+				    struct nft_set_elem *elem)
+{
+	const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
+		nft_data_release(nft_set_ext_data(ext), set->dtype);
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
+		(*nft_set_ext_obj(ext))->use--;
+}
+
 static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
 			   const struct nlattr *attr)
 {
@@ -3927,6 +4020,8 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
 	kfree(elem.priv);
 	elem.priv = priv;
 
+	nft_set_elem_deactivate(ctx->net, set, &elem);
+
 	nft_trans_elem(trans) = elem;
 	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
 	return 0;
@@ -3936,7 +4031,7 @@ err4:
 err3:
 	kfree(elem.priv);
 err2:
-	nft_data_uninit(&elem.key.val, desc.type);
+	nft_data_release(&elem.key.val, desc.type);
 err1:
 	return err;
 }
@@ -4743,8 +4838,8 @@ static void nf_tables_commit_release(struct nft_trans *trans)
 		nft_set_destroy(nft_trans_set(trans));
 		break;
 	case NFT_MSG_DELSETELEM:
-		nft_set_elem_destroy(nft_trans_elem_set(trans),
-				     nft_trans_elem(trans).priv, true);
+		nf_tables_set_elem_destroy(nft_trans_elem_set(trans),
+					   nft_trans_elem(trans).priv);
 		break;
 	case NFT_MSG_DELOBJ:
 		nft_obj_destroy(nft_trans_obj(trans));
@@ -4979,6 +5074,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
 		case NFT_MSG_DELSETELEM:
 			te = (struct nft_trans_elem *)trans->data;
 
+			nft_set_elem_activate(net, te->set, &te->elem);
 			te->set->ops->activate(net, te->set, &te->elem);
 			te->set->ndeact--;
 
@@ -5464,7 +5560,7 @@ int nft_data_init(const struct nft_ctx *ctx,
 EXPORT_SYMBOL_GPL(nft_data_init);
 
 /**
- *	nft_data_uninit - release a nft_data item
+ *	nft_data_release - release a nft_data item
  *
  *	@data: struct nft_data to release
  *	@type: type of data
@@ -5472,7 +5568,7 @@ EXPORT_SYMBOL_GPL(nft_data_init);
  *	Release a nft_data item. NFT_DATA_VALUE types can be silently discarded,
  *	all others need to be released by calling this function.
  */
-void nft_data_uninit(const struct nft_data *data, enum nft_data_types type)
+void nft_data_release(const struct nft_data *data, enum nft_data_types type)
 {
 	if (type < NFT_DATA_VERDICT)
 		return;
@@ -5483,7 +5579,7 @@ void nft_data_uninit(const struct nft_data *data, enum nft_data_types type)
 		WARN_ON(1);
 	}
 }
-EXPORT_SYMBOL_GPL(nft_data_uninit);
+EXPORT_SYMBOL_GPL(nft_data_release);
 
 int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data,
 		  enum nft_data_types type, unsigned int len)
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 950bf6eadc65..be678a323598 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -686,6 +686,7 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
 		tuple_set = true;
 	}
 
+	ret = -ENOENT;
 	list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) {
 		cur = &nlcth->helper;
 		j++;
@@ -699,16 +700,20 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
 		     tuple.dst.protonum != cur->tuple.dst.protonum))
 			continue;
 
-		found = true;
-		nf_conntrack_helper_unregister(cur);
-		kfree(cur->expect_policy);
+		if (refcount_dec_if_one(&cur->refcnt)) {
+			found = true;
+			nf_conntrack_helper_unregister(cur);
+			kfree(cur->expect_policy);
 
-		list_del(&nlcth->list);
-		kfree(nlcth);
+			list_del(&nlcth->list);
+			kfree(nlcth);
+		} else {
+			ret = -EBUSY;
+		}
 	}
 
 	/* Make sure we return success if we flush and there is no helpers */
-	return (found || j == 0) ? 0 : -ENOENT;
+	return (found || j == 0) ? 0 : ret;
 }
 
 static const struct nla_policy nfnl_cthelper_policy[NFCTH_MAX+1] = {
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index 877d9acd91ef..fff8073e2a56 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -83,17 +83,26 @@ static int nft_bitwise_init(const struct nft_ctx *ctx,
 			    tb[NFTA_BITWISE_MASK]);
 	if (err < 0)
 		return err;
-	if (d1.len != priv->len)
-		return -EINVAL;
+	if (d1.len != priv->len) {
+		err = -EINVAL;
+		goto err1;
+	}
 
 	err = nft_data_init(NULL, &priv->xor, sizeof(priv->xor), &d2,
 			    tb[NFTA_BITWISE_XOR]);
 	if (err < 0)
-		return err;
-	if (d2.len != priv->len)
-		return -EINVAL;
+		goto err1;
+	if (d2.len != priv->len) {
+		err = -EINVAL;
+		goto err2;
+	}
 
 	return 0;
+err2:
+	nft_data_release(&priv->xor, d2.type);
+err1:
+	nft_data_release(&priv->mask, d1.type);
+	return err;
 }
 
 static int nft_bitwise_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index 2b96effeadc1..c2945eb3397c 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -201,10 +201,18 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
 	if (err < 0)
 		return ERR_PTR(err);
 
+	if (desc.type != NFT_DATA_VALUE) {
+		err = -EINVAL;
+		goto err1;
+	}
+
 	if (desc.len <= sizeof(u32) && op == NFT_CMP_EQ)
 		return &nft_cmp_fast_ops;
-	else
-		return &nft_cmp_ops;
+
+	return &nft_cmp_ops;
+err1:
+	nft_data_release(&data, desc.type);
+	return ERR_PTR(-EINVAL);
 }
 
 struct nft_expr_type nft_cmp_type __read_mostly = {
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index a34ceb38fc55..1678e9e75e8e 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -826,9 +826,9 @@ static void nft_ct_helper_obj_destroy(struct nft_object *obj)
 	struct nft_ct_helper_obj *priv = nft_obj_data(obj);
 
 	if (priv->helper4)
-		module_put(priv->helper4->me);
+		nf_conntrack_helper_put(priv->helper4);
 	if (priv->helper6)
-		module_put(priv->helper6->me);
+		nf_conntrack_helper_put(priv->helper6);
 }
 
 static void nft_ct_helper_obj_eval(struct nft_object *obj,
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index 728baf88295a..4717d7796927 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -65,7 +65,7 @@ static int nft_immediate_init(const struct nft_ctx *ctx,
 	return 0;
 
 err1:
-	nft_data_uninit(&priv->data, desc.type);
+	nft_data_release(&priv->data, desc.type);
 	return err;
 }
 
@@ -73,7 +73,8 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx,
 				  const struct nft_expr *expr)
 {
 	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
-	return nft_data_uninit(&priv->data, nft_dreg_to_type(priv->dreg));
+
+	return nft_data_release(&priv->data, nft_dreg_to_type(priv->dreg));
 }
 
 static int nft_immediate_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c
index 9edc74eedc10..cedb96c3619f 100644
--- a/net/netfilter/nft_range.c
+++ b/net/netfilter/nft_range.c
@@ -102,9 +102,9 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr
 	priv->len = desc_from.len;
 	return 0;
 err2:
-	nft_data_uninit(&priv->data_to, desc_to.type);
+	nft_data_release(&priv->data_to, desc_to.type);
 err1:
-	nft_data_uninit(&priv->data_from, desc_from.type);
+	nft_data_release(&priv->data_from, desc_from.type);
 	return err;
 }
 
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 8ec086b6b56b..3d3a6df4ce70 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -222,7 +222,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set,
 	struct nft_set_elem elem;
 	int err;
 
-	err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
+	err = rhashtable_walk_init(&priv->ht, &hti, GFP_ATOMIC);
 	iter->err = err;
 	if (err)
 		return;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 8876b7da6884..1770c1d9b37f 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -283,28 +283,30 @@ static int xt_obj_to_user(u16 __user *psize, u16 size,
 		       &U->u.user.revision, K->u.kernel.TYPE->revision)
 
 int xt_data_to_user(void __user *dst, const void *src,
-		    int usersize, int size)
+		    int usersize, int size, int aligned_size)
 {
 	usersize = usersize ? : size;
 	if (copy_to_user(dst, src, usersize))
 		return -EFAULT;
-	if (usersize != size && clear_user(dst + usersize, size - usersize))
+	if (usersize != aligned_size &&
+	    clear_user(dst + usersize, aligned_size - usersize))
 		return -EFAULT;
 
 	return 0;
 }
 EXPORT_SYMBOL_GPL(xt_data_to_user);
 
-#define XT_DATA_TO_USER(U, K, TYPE, C_SIZE)				\
+#define XT_DATA_TO_USER(U, K, TYPE)					\
 	xt_data_to_user(U->data, K->data,				\
 			K->u.kernel.TYPE->usersize,			\
-			C_SIZE ? : K->u.kernel.TYPE->TYPE##size)
+			K->u.kernel.TYPE->TYPE##size,			\
+			XT_ALIGN(K->u.kernel.TYPE->TYPE##size))
 
 int xt_match_to_user(const struct xt_entry_match *m,
 		     struct xt_entry_match __user *u)
 {
 	return XT_OBJ_TO_USER(u, m, match, 0) ||
-	       XT_DATA_TO_USER(u, m, match, 0);
+	       XT_DATA_TO_USER(u, m, match);
 }
 EXPORT_SYMBOL_GPL(xt_match_to_user);
 
@@ -312,7 +314,7 @@ int xt_target_to_user(const struct xt_entry_target *t,
 		      struct xt_entry_target __user *u)
 {
 	return XT_OBJ_TO_USER(u, t, target, 0) ||
-	       XT_DATA_TO_USER(u, t, target, 0);
+	       XT_DATA_TO_USER(u, t, target);
 }
 EXPORT_SYMBOL_GPL(xt_target_to_user);
 
@@ -611,6 +613,12 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
 }
 EXPORT_SYMBOL_GPL(xt_compat_match_from_user);
 
+#define COMPAT_XT_DATA_TO_USER(U, K, TYPE, C_SIZE)			\
+	xt_data_to_user(U->data, K->data,				\
+			K->u.kernel.TYPE->usersize,			\
+			C_SIZE,						\
+			COMPAT_XT_ALIGN(C_SIZE))
+
 int xt_compat_match_to_user(const struct xt_entry_match *m,
 			    void __user **dstptr, unsigned int *size)
 {
@@ -626,7 +634,7 @@ int xt_compat_match_to_user(const struct xt_entry_match *m,
 		if (match->compat_to_user((void __user *)cm->data, m->data))
 			return -EFAULT;
 	} else {
-		if (XT_DATA_TO_USER(cm, m, match, msize - sizeof(*cm)))
+		if (COMPAT_XT_DATA_TO_USER(cm, m, match, msize - sizeof(*cm)))
 			return -EFAULT;
 	}
 
@@ -972,7 +980,7 @@ int xt_compat_target_to_user(const struct xt_entry_target *t,
 		if (target->compat_to_user((void __user *)ct->data, t->data))
 			return -EFAULT;
 	} else {
-		if (XT_DATA_TO_USER(ct, t, target, tsize - sizeof(*ct)))
+		if (COMPAT_XT_DATA_TO_USER(ct, t, target, tsize - sizeof(*ct)))
 			return -EFAULT;
 	}
 
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index bb7ad82dcd56..623ef37de886 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -96,7 +96,7 @@ xt_ct_set_helper(struct nf_conn *ct, const char *helper_name,
 
 	help = nf_ct_helper_ext_add(ct, helper, GFP_KERNEL);
 	if (help == NULL) {
-		module_put(helper->me);
+		nf_conntrack_helper_put(helper);
 		return -ENOMEM;
 	}
 
@@ -263,7 +263,7 @@ out:
 err4:
 	help = nfct_help(ct);
 	if (help)
-		module_put(help->helper->me);
+		nf_conntrack_helper_put(help->helper);
 err3:
 	nf_ct_tmpl_free(ct);
 err2:
@@ -346,7 +346,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par,
 	if (ct) {
 		help = nfct_help(ct);
 		if (help)
-			module_put(help->helper->me);
+			nf_conntrack_helper_put(help->helper);
 
 		nf_ct_netns_put(par->net, par->family);
 
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index bf602e33c40a..08679ebb3068 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1123,7 +1123,7 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name,
 
 	help = nf_ct_helper_ext_add(info->ct, helper, GFP_KERNEL);
 	if (!help) {
-		module_put(helper->me);
+		nf_conntrack_helper_put(helper);
 		return -ENOMEM;
 	}
 
@@ -1584,7 +1584,7 @@ void ovs_ct_free_action(const struct nlattr *a)
 static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
 {
 	if (ct_info->helper)
-		module_put(ct_info->helper->me);
+		nf_conntrack_helper_put(ct_info->helper);
 	if (ct_info->ct)
 		nf_ct_tmpl_free(ct_info->ct);
 }
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index dee469fed967..51859b8edd7e 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -203,7 +203,6 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
 
 	*arg = (unsigned long) head;
 	rcu_assign_pointer(tp->root, new);
-	call_rcu(&head->rcu, mall_destroy_rcu);
 	return 0;
 
 err_replace_hw_filter:
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index a9708da28eb5..95238284c422 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1176,7 +1176,9 @@ void sctp_assoc_update(struct sctp_association *asoc,
 
 		asoc->ctsn_ack_point = asoc->next_tsn - 1;
 		asoc->adv_peer_ack_point = asoc->ctsn_ack_point;
-		if (!asoc->stream) {
+
+		if (sctp_state(asoc, COOKIE_WAIT)) {
+			sctp_stream_free(asoc->stream);
 			asoc->stream = new->stream;
 			new->stream = NULL;
 		}
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 0e06a278d2a9..ba9ad32fc447 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -473,15 +473,14 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb,
 			     struct sctp_association **app,
 			     struct sctp_transport **tpp)
 {
+	struct sctp_init_chunk *chunkhdr, _chunkhdr;
 	union sctp_addr saddr;
 	union sctp_addr daddr;
 	struct sctp_af *af;
 	struct sock *sk = NULL;
 	struct sctp_association *asoc;
 	struct sctp_transport *transport = NULL;
-	struct sctp_init_chunk *chunkhdr;
 	__u32 vtag = ntohl(sctphdr->vtag);
-	int len = skb->len - ((void *)sctphdr - (void *)skb->data);
 
 	*app = NULL; *tpp = NULL;
 
@@ -516,13 +515,16 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb,
 	 * discard the packet.
 	 */
 	if (vtag == 0) {
-		chunkhdr = (void *)sctphdr + sizeof(struct sctphdr);
-		if (len < sizeof(struct sctphdr) + sizeof(sctp_chunkhdr_t)
-			  + sizeof(__be32) ||
+		/* chunk header + first 4 octects of init header */
+		chunkhdr = skb_header_pointer(skb, skb_transport_offset(skb) +
+					      sizeof(struct sctphdr),
+					      sizeof(struct sctp_chunkhdr) +
+					      sizeof(__be32), &_chunkhdr);
+		if (!chunkhdr ||
 		    chunkhdr->chunk_hdr.type != SCTP_CID_INIT ||
-		    ntohl(chunkhdr->init_hdr.init_tag) != asoc->c.my_vtag) {
+		    ntohl(chunkhdr->init_hdr.init_tag) != asoc->c.my_vtag)
 			goto out;
-		}
+
 	} else if (vtag != asoc->c.peer_vtag) {
 		goto out;
 	}
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 8a08f13469c4..92e332e17391 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2454,16 +2454,11 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
 	 * stream sequence number shall be set to 0.
 	 */
 
-	/* Allocate storage for the negotiated streams if it is not a temporary
-	 * association.
-	 */
-	if (!asoc->temp) {
-		if (sctp_stream_init(asoc, gfp))
-			goto clean_up;
+	if (sctp_stream_init(asoc, gfp))
+		goto clean_up;
 
-		if (sctp_assoc_set_id(asoc, gfp))
-			goto clean_up;
-	}
+	if (!asoc->temp && sctp_assoc_set_id(asoc, gfp))
+		goto clean_up;
 
 	/* ADDIP Section 4.1 ASCONF Chunk Procedures
 	 *
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 4f5e6cfc7f60..f863b5573e42 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -2088,6 +2088,9 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net,
 		}
 	}
 
+	/* Set temp so that it won't be added into hashtable */
+	new_asoc->temp = 1;
+
 	/* Compare the tie_tag in cookie with the verification tag of
 	 * current association.
 	 */
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 6f7f6757ceef..dfc8c51e4d74 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1540,8 +1540,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 	long timeout;
 	int err;
 	struct vsock_transport_send_notify_data send_data;
-
-	DEFINE_WAIT(wait);
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
 
 	sk = sock->sk;
 	vsk = vsock_sk(sk);
@@ -1584,11 +1583,10 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 	if (err < 0)
 		goto out;
 
-
 	while (total_written < len) {
 		ssize_t written;
 
-		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+		add_wait_queue(sk_sleep(sk), &wait);
 		while (vsock_stream_has_space(vsk) == 0 &&
 		       sk->sk_err == 0 &&
 		       !(sk->sk_shutdown & SEND_SHUTDOWN) &&
@@ -1597,33 +1595,30 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 			/* Don't wait for non-blocking sockets. */
 			if (timeout == 0) {
 				err = -EAGAIN;
-				finish_wait(sk_sleep(sk), &wait);
+				remove_wait_queue(sk_sleep(sk), &wait);
 				goto out_err;
 			}
 
 			err = transport->notify_send_pre_block(vsk, &send_data);
 			if (err < 0) {
-				finish_wait(sk_sleep(sk), &wait);
+				remove_wait_queue(sk_sleep(sk), &wait);
 				goto out_err;
 			}
 
 			release_sock(sk);
-			timeout = schedule_timeout(timeout);
+			timeout = wait_woken(&wait, TASK_INTERRUPTIBLE, timeout);
 			lock_sock(sk);
 			if (signal_pending(current)) {
 				err = sock_intr_errno(timeout);
-				finish_wait(sk_sleep(sk), &wait);
+				remove_wait_queue(sk_sleep(sk), &wait);
 				goto out_err;
 			} else if (timeout == 0) {
 				err = -EAGAIN;
-				finish_wait(sk_sleep(sk), &wait);
+				remove_wait_queue(sk_sleep(sk), &wait);
 				goto out_err;
 			}
-
-			prepare_to_wait(sk_sleep(sk), &wait,
-					TASK_INTERRUPTIBLE);
 		}
-		finish_wait(sk_sleep(sk), &wait);
+		remove_wait_queue(sk_sleep(sk), &wait);
 
 		/* These checks occur both as part of and after the loop
 		 * conditional since we need to check before and after
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 14d5f0c8c45f..9f0901f3e42b 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -322,9 +322,9 @@ cfg80211_find_sched_scan_req(struct cfg80211_registered_device *rdev, u64 reqid)
 {
 	struct cfg80211_sched_scan_request *pos;
 
-	ASSERT_RTNL();
+	WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());
 
-	list_for_each_entry(pos, &rdev->sched_scan_req_list, list) {
+	list_for_each_entry_rcu(pos, &rdev->sched_scan_req_list, list) {
 		if (pos->reqid == reqid)
 			return pos;
 	}
@@ -398,13 +398,13 @@ void cfg80211_sched_scan_results(struct wiphy *wiphy, u64 reqid)
 	trace_cfg80211_sched_scan_results(wiphy, reqid);
 	/* ignore if we're not scanning */
 
-	rtnl_lock();
+	rcu_read_lock();
 	request = cfg80211_find_sched_scan_req(rdev, reqid);
 	if (request) {
 		request->report_results = true;
 		queue_work(cfg80211_wq, &rdev->sched_scan_res_wk);
 	}
-	rtnl_unlock();
+	rcu_read_unlock();
 }
 EXPORT_SYMBOL(cfg80211_sched_scan_results);
 
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 7198373e2920..4992f1025c9d 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -454,6 +454,8 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
 	if (iftype == NL80211_IFTYPE_MESH_POINT)
 		skb_copy_bits(skb, hdrlen, &mesh_flags, 1);
 
+	mesh_flags &= MESH_FLAGS_AE;
+
 	switch (hdr->frame_control &
 		cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) {
 	case cpu_to_le16(IEEE80211_FCTL_TODS):
@@ -469,9 +471,9 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
 			     iftype != NL80211_IFTYPE_STATION))
 			return -1;
 		if (iftype == NL80211_IFTYPE_MESH_POINT) {
-			if (mesh_flags & MESH_FLAGS_AE_A4)
+			if (mesh_flags == MESH_FLAGS_AE_A4)
 				return -1;
-			if (mesh_flags & MESH_FLAGS_AE_A5_A6) {
+			if (mesh_flags == MESH_FLAGS_AE_A5_A6) {
 				skb_copy_bits(skb, hdrlen +
 					offsetof(struct ieee80211s_hdr, eaddr1),
 					tmp.h_dest, 2 * ETH_ALEN);
@@ -487,9 +489,9 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
 		     ether_addr_equal(tmp.h_source, addr)))
 			return -1;
 		if (iftype == NL80211_IFTYPE_MESH_POINT) {
-			if (mesh_flags & MESH_FLAGS_AE_A5_A6)
+			if (mesh_flags == MESH_FLAGS_AE_A5_A6)
 				return -1;
-			if (mesh_flags & MESH_FLAGS_AE_A4)
+			if (mesh_flags == MESH_FLAGS_AE_A4)
 				skb_copy_bits(skb, hdrlen +
 					offsetof(struct ieee80211s_hdr, eaddr1),
 					tmp.h_source, ETH_ALEN);
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 8ec8a3fcf8d4..574e6f32f94f 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -170,7 +170,7 @@ static int xfrm_dev_feat_change(struct net_device *dev)
 
 static int xfrm_dev_down(struct net_device *dev)
 {
-	if (dev->hw_features & NETIF_F_HW_ESP)
+	if (dev->features & NETIF_F_HW_ESP)
 		xfrm_dev_state_flush(dev_net(dev), dev, true);
 
 	xfrm_garbage_collect(dev_net(dev));
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index b00a1d5a7f52..ed4e52d95172 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1797,43 +1797,6 @@ free_dst:
 	goto out;
 }
 
-#ifdef CONFIG_XFRM_SUB_POLICY
-static int xfrm_dst_alloc_copy(void **target, const void *src, int size)
-{
-	if (!*target) {
-		*target = kmalloc(size, GFP_ATOMIC);
-		if (!*target)
-			return -ENOMEM;
-	}
-
-	memcpy(*target, src, size);
-	return 0;
-}
-#endif
-
-static int xfrm_dst_update_parent(struct dst_entry *dst,
-				  const struct xfrm_selector *sel)
-{
-#ifdef CONFIG_XFRM_SUB_POLICY
-	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
-	return xfrm_dst_alloc_copy((void **)&(xdst->partner),
-				   sel, sizeof(*sel));
-#else
-	return 0;
-#endif
-}
-
-static int xfrm_dst_update_origin(struct dst_entry *dst,
-				  const struct flowi *fl)
-{
-#ifdef CONFIG_XFRM_SUB_POLICY
-	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
-	return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl));
-#else
-	return 0;
-#endif
-}
-
 static int xfrm_expand_policies(const struct flowi *fl, u16 family,
 				struct xfrm_policy **pols,
 				int *num_pols, int *num_xfrms)
@@ -1905,16 +1868,6 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
 
 	xdst = (struct xfrm_dst *)dst;
 	xdst->num_xfrms = err;
-	if (num_pols > 1)
-		err = xfrm_dst_update_parent(dst, &pols[1]->selector);
-	else
-		err = xfrm_dst_update_origin(dst, fl);
-	if (unlikely(err)) {
-		dst_free(dst);
-		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
-		return ERR_PTR(err);
-	}
-
 	xdst->num_pols = num_pols;
 	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
 	xdst->policy_genid = atomic_read(&pols[0]->genid);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index fc3c5aa38754..2e291bc5f1fc 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1383,6 +1383,8 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig)
 	x->curlft.add_time = orig->curlft.add_time;
 	x->km.state = orig->km.state;
 	x->km.seq = orig->km.seq;
+	x->replay = orig->replay;
+	x->preplay = orig->preplay;
 
 	return x;
 
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 58df440013c5..918e45268915 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -2328,6 +2328,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
 	SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3),
 	SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte EP45-DS3/Z87X-UD3H", ALC889_FIXUP_FRONT_HP_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1458, 0xa0b8, "Gigabyte AZ370-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS),
+	SND_PCI_QUIRK(0x1462, 0xda57, "MSI Z270-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS),
 	SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX),
 	SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD),
 	SND_PCI_QUIRK(0x161f, 0x2054, "Medion laptop", ALC883_FIXUP_EAPD),
@@ -2342,6 +2343,7 @@ static const struct hda_model_fixup alc882_fixup_models[] = {
 	{.id = ALC883_FIXUP_ACER_EAPD, .name = "acer-aspire"},
 	{.id = ALC882_FIXUP_INV_DMIC, .name = "inv-dmic"},
 	{.id = ALC882_FIXUP_NO_PRIMARY_HP, .name = "no-primary-hp"},
+	{.id = ALC1220_FIXUP_GB_DUAL_CODECS, .name = "dual-codecs"},
 	{}
 };
 
@@ -6014,6 +6016,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
 	{.id = ALC292_FIXUP_TPT440_DOCK, .name = "tpt440-dock"},
 	{.id = ALC292_FIXUP_TPT440, .name = "tpt440"},
 	{.id = ALC292_FIXUP_TPT460, .name = "tpt460"},
+	{.id = ALC233_FIXUP_LENOVO_MULTI_CODECS, .name = "dual-codecs"},
 	{}
 };
 #define ALC225_STANDARD_PINS \
@@ -6465,8 +6468,11 @@ static int patch_alc269(struct hda_codec *codec)
 		break;
 	case 0x10ec0225:
 	case 0x10ec0295:
+		spec->codec_variant = ALC269_TYPE_ALC225;
+		break;
 	case 0x10ec0299:
 		spec->codec_variant = ALC269_TYPE_ALC225;
+		spec->gen.mixer_nid = 0; /* no loopback on ALC299 */
 		break;
 	case 0x10ec0234:
 	case 0x10ec0274:
@@ -7338,6 +7344,7 @@ static const struct hda_model_fixup alc662_fixup_models[] = {
 	{.id = ALC662_FIXUP_ASUS_MODE8, .name = "asus-mode8"},
 	{.id = ALC662_FIXUP_INV_DMIC, .name = "inv-dmic"},
 	{.id = ALC668_FIXUP_DELL_MIC_NO_PRESENCE, .name = "dell-headset-multi"},
+	{.id = ALC662_FIXUP_LENOVO_MULTI_CODECS, .name = "dual-codecs"},
 	{}
 };
 
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index faa3d38bac0b..6cefdf6c0b75 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -1559,6 +1559,8 @@ static const struct snd_pci_quirk stac9200_fixup_tbl[] = {
 		      "Dell Inspiron 1501", STAC_9200_DELL_M26),
 	SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x01f6,
 		      "unknown Dell", STAC_9200_DELL_M26),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x0201,
+		      "Dell Latitude D430", STAC_9200_DELL_M22),
 	/* Panasonic */
 	SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-74", STAC_9200_PANASONIC),
 	/* Gateway machines needs EAPD to be set on resume */
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 01eff6ce6401..d7b0b0a3a2db 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1364,7 +1364,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
 	/* Amanero Combo384 USB interface with native DSD support */
 	case USB_ID(0x16d0, 0x071a):
 		if (fp->altsetting == 2) {
-			switch (chip->dev->descriptor.bcdDevice) {
+			switch (le16_to_cpu(chip->dev->descriptor.bcdDevice)) {
 			case 0x199:
 				return SNDRV_PCM_FMTBIT_DSD_U32_LE;
 			case 0x19b:
diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c
index b11d3920b9a5..c19efc9708d7 100644
--- a/sound/x86/intel_hdmi_audio.c
+++ b/sound/x86/intel_hdmi_audio.c
@@ -42,6 +42,11 @@
 #include <drm/intel_lpe_audio.h>
 #include "intel_hdmi_audio.h"
 
+#define for_each_pipe(card_ctx, pipe) \
+	for ((pipe) = 0; (pipe) < (card_ctx)->num_pipes; (pipe)++)
+#define for_each_port(card_ctx, port) \
+	for ((port) = 0; (port) < (card_ctx)->num_ports; (port)++)
+
 /*standard module options for ALSA. This module supports only one card*/
 static int hdmi_card_index = SNDRV_DEFAULT_IDX1;
 static char *hdmi_card_id = SNDRV_DEFAULT_STR1;
@@ -189,15 +194,30 @@ static void had_substream_put(struct snd_intelhad *intelhaddata)
 	spin_unlock_irqrestore(&intelhaddata->had_spinlock, flags);
 }
 
+static u32 had_config_offset(int pipe)
+{
+	switch (pipe) {
+	default:
+	case 0:
+		return AUDIO_HDMI_CONFIG_A;
+	case 1:
+		return AUDIO_HDMI_CONFIG_B;
+	case 2:
+		return AUDIO_HDMI_CONFIG_C;
+	}
+}
+
 /* Register access functions */
-static u32 had_read_register_raw(struct snd_intelhad *ctx, u32 reg)
+static u32 had_read_register_raw(struct snd_intelhad_card *card_ctx,
+				 int pipe, u32 reg)
 {
-	return ioread32(ctx->mmio_start + ctx->had_config_offset + reg);
+	return ioread32(card_ctx->mmio_start + had_config_offset(pipe) + reg);
 }
 
-static void had_write_register_raw(struct snd_intelhad *ctx, u32 reg, u32 val)
+static void had_write_register_raw(struct snd_intelhad_card *card_ctx,
+				   int pipe, u32 reg, u32 val)
 {
-	iowrite32(val, ctx->mmio_start + ctx->had_config_offset + reg);
+	iowrite32(val, card_ctx->mmio_start + had_config_offset(pipe) + reg);
 }
 
 static void had_read_register(struct snd_intelhad *ctx, u32 reg, u32 *val)
@@ -205,13 +225,13 @@ static void had_read_register(struct snd_intelhad *ctx, u32 reg, u32 *val)
 	if (!ctx->connected)
 		*val = 0;
 	else
-		*val = had_read_register_raw(ctx, reg);
+		*val = had_read_register_raw(ctx->card_ctx, ctx->pipe, reg);
 }
 
 static void had_write_register(struct snd_intelhad *ctx, u32 reg, u32 val)
 {
 	if (ctx->connected)
-		had_write_register_raw(ctx, reg, val);
+		had_write_register_raw(ctx->card_ctx, ctx->pipe, reg, val);
 }
 
 /*
@@ -1358,6 +1378,9 @@ static void had_process_hot_plug(struct snd_intelhad *intelhaddata)
 		return;
 	}
 
+	/* Disable Audio */
+	had_enable_audio(intelhaddata, false);
+
 	intelhaddata->connected = true;
 	dev_dbg(intelhaddata->dev,
 		"%s @ %d:DEBUG PLUG/UNPLUG : HAD_DRV_CONNECTED\n",
@@ -1519,22 +1542,32 @@ static const struct snd_kcontrol_new had_controls[] = {
  */
 static irqreturn_t display_pipe_interrupt_handler(int irq, void *dev_id)
 {
-	struct snd_intelhad *ctx = dev_id;
-	u32 audio_stat;
+	struct snd_intelhad_card *card_ctx = dev_id;
+	u32 audio_stat[3] = {};
+	int pipe, port;
+
+	for_each_pipe(card_ctx, pipe) {
+		/* use raw register access to ack IRQs even while disconnected */
+		audio_stat[pipe] = had_read_register_raw(card_ctx, pipe,
+							 AUD_HDMI_STATUS) &
+			(HDMI_AUDIO_UNDERRUN | HDMI_AUDIO_BUFFER_DONE);
+
+		if (audio_stat[pipe])
+			had_write_register_raw(card_ctx, pipe,
+					       AUD_HDMI_STATUS, audio_stat[pipe]);
+	}
 
-	/* use raw register access to ack IRQs even while disconnected */
-	audio_stat = had_read_register_raw(ctx, AUD_HDMI_STATUS);
+	for_each_port(card_ctx, port) {
+		struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port];
+		int pipe = ctx->pipe;
 
-	if (audio_stat & HDMI_AUDIO_UNDERRUN) {
-		had_write_register_raw(ctx, AUD_HDMI_STATUS,
-				       HDMI_AUDIO_UNDERRUN);
-		had_process_buffer_underrun(ctx);
-	}
+		if (pipe < 0)
+			continue;
 
-	if (audio_stat & HDMI_AUDIO_BUFFER_DONE) {
-		had_write_register_raw(ctx, AUD_HDMI_STATUS,
-				       HDMI_AUDIO_BUFFER_DONE);
-		had_process_buffer_done(ctx);
+		if (audio_stat[pipe] & HDMI_AUDIO_BUFFER_DONE)
+			had_process_buffer_done(ctx);
+		if (audio_stat[pipe] & HDMI_AUDIO_UNDERRUN)
+			had_process_buffer_underrun(ctx);
 	}
 
 	return IRQ_HANDLED;
@@ -1543,9 +1576,10 @@ static irqreturn_t display_pipe_interrupt_handler(int irq, void *dev_id)
 /*
  * monitor plug/unplug notification from i915; just kick off the work
  */
-static void notify_audio_lpe(struct platform_device *pdev)
+static void notify_audio_lpe(struct platform_device *pdev, int port)
 {
-	struct snd_intelhad *ctx = platform_get_drvdata(pdev);
+	struct snd_intelhad_card *card_ctx = platform_get_drvdata(pdev);
+	struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port];
 
 	schedule_work(&ctx->hdmi_audio_wq);
 }
@@ -1556,47 +1590,51 @@ static void had_audio_wq(struct work_struct *work)
 	struct snd_intelhad *ctx =
 		container_of(work, struct snd_intelhad, hdmi_audio_wq);
 	struct intel_hdmi_lpe_audio_pdata *pdata = ctx->dev->platform_data;
+	struct intel_hdmi_lpe_audio_port_pdata *ppdata = &pdata->port[ctx->port];
 
 	pm_runtime_get_sync(ctx->dev);
 	mutex_lock(&ctx->mutex);
-	if (!pdata->hdmi_connected) {
-		dev_dbg(ctx->dev, "%s: Event: HAD_NOTIFY_HOT_UNPLUG\n",
-			__func__);
+	if (ppdata->pipe < 0) {
+		dev_dbg(ctx->dev, "%s: Event: HAD_NOTIFY_HOT_UNPLUG : port = %d\n",
+			__func__, ctx->port);
+
 		memset(ctx->eld, 0, sizeof(ctx->eld)); /* clear the old ELD */
+
+		ctx->dp_output = false;
+		ctx->tmds_clock_speed = 0;
+		ctx->link_rate = 0;
+
+		/* Shut down the stream */
 		had_process_hot_unplug(ctx);
-	} else {
-		struct intel_hdmi_lpe_audio_eld *eld = &pdata->eld;
 
+		ctx->pipe = -1;
+	} else {
 		dev_dbg(ctx->dev, "%s: HAD_NOTIFY_ELD : port = %d, tmds = %d\n",
-			__func__, eld->port_id,	pdata->tmds_clock_speed);
-
-		switch (eld->pipe_id) {
-		case 0:
-			ctx->had_config_offset = AUDIO_HDMI_CONFIG_A;
-			break;
-		case 1:
-			ctx->had_config_offset = AUDIO_HDMI_CONFIG_B;
-			break;
-		case 2:
-			ctx->had_config_offset = AUDIO_HDMI_CONFIG_C;
-			break;
-		default:
-			dev_dbg(ctx->dev, "Invalid pipe %d\n",
-				eld->pipe_id);
-			break;
-		}
+			__func__, ctx->port, ppdata->ls_clock);
 
-		memcpy(ctx->eld, eld->eld_data, sizeof(ctx->eld));
+		memcpy(ctx->eld, ppdata->eld, sizeof(ctx->eld));
 
-		ctx->dp_output = pdata->dp_output;
-		ctx->tmds_clock_speed = pdata->tmds_clock_speed;
-		ctx->link_rate = pdata->link_rate;
+		ctx->dp_output = ppdata->dp_output;
+		if (ctx->dp_output) {
+			ctx->tmds_clock_speed = 0;
+			ctx->link_rate = ppdata->ls_clock;
+		} else {
+			ctx->tmds_clock_speed = ppdata->ls_clock;
+			ctx->link_rate = 0;
+		}
 
+		/*
+		 * Shut down the stream before we change
+		 * the pipe assignment for this pcm device
+		 */
 		had_process_hot_plug(ctx);
 
-		/* Process mode change if stream is active */
+		ctx->pipe = ppdata->pipe;
+
+		/* Restart the stream if necessary */
 		had_process_mode_change(ctx);
 	}
+
 	mutex_unlock(&ctx->mutex);
 	pm_runtime_mark_last_busy(ctx->dev);
 	pm_runtime_put_autosuspend(ctx->dev);
@@ -1605,11 +1643,17 @@ static void had_audio_wq(struct work_struct *work)
 /*
  * Jack interface
  */
-static int had_create_jack(struct snd_intelhad *ctx)
+static int had_create_jack(struct snd_intelhad *ctx,
+			   struct snd_pcm *pcm)
 {
+	char hdmi_str[32];
 	int err;
 
-	err = snd_jack_new(ctx->card, "HDMI/DP", SND_JACK_AVOUT, &ctx->jack,
+	snprintf(hdmi_str, sizeof(hdmi_str),
+		 "HDMI/DP,pcm=%d", pcm->device);
+
+	err = snd_jack_new(ctx->card_ctx->card, hdmi_str,
+			   SND_JACK_AVOUT, &ctx->jack,
 			   true, false);
 	if (err < 0)
 		return err;
@@ -1623,13 +1667,18 @@ static int had_create_jack(struct snd_intelhad *ctx)
 
 static int hdmi_lpe_audio_runtime_suspend(struct device *dev)
 {
-	struct snd_intelhad *ctx = dev_get_drvdata(dev);
-	struct snd_pcm_substream *substream;
+	struct snd_intelhad_card *card_ctx = dev_get_drvdata(dev);
+	int port;
 
-	substream = had_substream_get(ctx);
-	if (substream) {
-		snd_pcm_suspend(substream);
-		had_substream_put(ctx);
+	for_each_port(card_ctx, port) {
+		struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port];
+		struct snd_pcm_substream *substream;
+
+		substream = had_substream_get(ctx);
+		if (substream) {
+			snd_pcm_suspend(substream);
+			had_substream_put(ctx);
+		}
 	}
 
 	return 0;
@@ -1637,12 +1686,12 @@ static int hdmi_lpe_audio_runtime_suspend(struct device *dev)
 
 static int __maybe_unused hdmi_lpe_audio_suspend(struct device *dev)
 {
-	struct snd_intelhad *ctx = dev_get_drvdata(dev);
+	struct snd_intelhad_card *card_ctx = dev_get_drvdata(dev);
 	int err;
 
 	err = hdmi_lpe_audio_runtime_suspend(dev);
 	if (!err)
-		snd_power_change_state(ctx->card, SNDRV_CTL_POWER_D3hot);
+		snd_power_change_state(card_ctx->card, SNDRV_CTL_POWER_D3hot);
 	return err;
 }
 
@@ -1654,24 +1703,34 @@ static int hdmi_lpe_audio_runtime_resume(struct device *dev)
 
 static int __maybe_unused hdmi_lpe_audio_resume(struct device *dev)
 {
-	struct snd_intelhad *ctx = dev_get_drvdata(dev);
+	struct snd_intelhad_card *card_ctx = dev_get_drvdata(dev);
 
 	hdmi_lpe_audio_runtime_resume(dev);
-	snd_power_change_state(ctx->card, SNDRV_CTL_POWER_D0);
+	snd_power_change_state(card_ctx->card, SNDRV_CTL_POWER_D0);
 	return 0;
 }
 
 /* release resources */
 static void hdmi_lpe_audio_free(struct snd_card *card)
 {
-	struct snd_intelhad *ctx = card->private_data;
+	struct snd_intelhad_card *card_ctx = card->private_data;
+	struct intel_hdmi_lpe_audio_pdata *pdata = card_ctx->dev->platform_data;
+	int port;
+
+	spin_lock_irq(&pdata->lpe_audio_slock);
+	pdata->notify_audio_lpe = NULL;
+	spin_unlock_irq(&pdata->lpe_audio_slock);
 
-	cancel_work_sync(&ctx->hdmi_audio_wq);
+	for_each_port(card_ctx, port) {
+		struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port];
 
-	if (ctx->mmio_start)
-		iounmap(ctx->mmio_start);
-	if (ctx->irq >= 0)
-		free_irq(ctx->irq, ctx);
+		cancel_work_sync(&ctx->hdmi_audio_wq);
+	}
+
+	if (card_ctx->mmio_start)
+		iounmap(card_ctx->mmio_start);
+	if (card_ctx->irq >= 0)
+		free_irq(card_ctx->irq, card_ctx);
 }
 
 /*
@@ -1683,12 +1742,12 @@ static void hdmi_lpe_audio_free(struct snd_card *card)
 static int hdmi_lpe_audio_probe(struct platform_device *pdev)
 {
 	struct snd_card *card;
-	struct snd_intelhad *ctx;
+	struct snd_intelhad_card *card_ctx;
 	struct snd_pcm *pcm;
 	struct intel_hdmi_lpe_audio_pdata *pdata;
 	int irq;
 	struct resource *res_mmio;
-	int i, ret;
+	int port, ret;
 
 	pdata = pdev->dev.platform_data;
 	if (!pdata) {
@@ -1711,39 +1770,30 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev)
 
 	/* create a card instance with ALSA framework */
 	ret = snd_card_new(&pdev->dev, hdmi_card_index, hdmi_card_id,
-			   THIS_MODULE, sizeof(*ctx), &card);
+			   THIS_MODULE, sizeof(*card_ctx), &card);
 	if (ret)
 		return ret;
 
-	ctx = card->private_data;
-	spin_lock_init(&ctx->had_spinlock);
-	mutex_init(&ctx->mutex);
-	ctx->connected = false;
-	ctx->dev = &pdev->dev;
-	ctx->card = card;
-	ctx->aes_bits = SNDRV_PCM_DEFAULT_CON_SPDIF;
+	card_ctx = card->private_data;
+	card_ctx->dev = &pdev->dev;
+	card_ctx->card = card;
 	strcpy(card->driver, INTEL_HAD);
 	strcpy(card->shortname, "Intel HDMI/DP LPE Audio");
 	strcpy(card->longname, "Intel HDMI/DP LPE Audio");
 
-	ctx->irq = -1;
-	ctx->tmds_clock_speed = DIS_SAMPLE_RATE_148_5;
-	INIT_WORK(&ctx->hdmi_audio_wq, had_audio_wq);
+	card_ctx->irq = -1;
 
 	card->private_free = hdmi_lpe_audio_free;
 
-	/* assume pipe A as default */
-	ctx->had_config_offset = AUDIO_HDMI_CONFIG_A;
-
-	platform_set_drvdata(pdev, ctx);
+	platform_set_drvdata(pdev, card_ctx);
 
 	dev_dbg(&pdev->dev, "%s: mmio_start = 0x%x, mmio_end = 0x%x\n",
 		__func__, (unsigned int)res_mmio->start,
 		(unsigned int)res_mmio->end);
 
-	ctx->mmio_start = ioremap_nocache(res_mmio->start,
-					  (size_t)(resource_size(res_mmio)));
-	if (!ctx->mmio_start) {
+	card_ctx->mmio_start = ioremap_nocache(res_mmio->start,
+					       (size_t)(resource_size(res_mmio)));
+	if (!card_ctx->mmio_start) {
 		dev_err(&pdev->dev, "Could not get ioremap\n");
 		ret = -EACCES;
 		goto err;
@@ -1751,54 +1801,79 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev)
 
 	/* setup interrupt handler */
 	ret = request_irq(irq, display_pipe_interrupt_handler, 0,
-			  pdev->name, ctx);
+			  pdev->name, card_ctx);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "request_irq failed\n");
 		goto err;
 	}
 
-	ctx->irq = irq;
-
-	ret = snd_pcm_new(card, INTEL_HAD, PCM_INDEX, MAX_PB_STREAMS,
-			  MAX_CAP_STREAMS, &pcm);
-	if (ret)
-		goto err;
-
-	/* setup private data which can be retrieved when required */
-	pcm->private_data = ctx;
-	pcm->info_flags = 0;
-	strncpy(pcm->name, card->shortname, strlen(card->shortname));
-	/* setup the ops for playabck */
-	snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &had_pcm_ops);
+	card_ctx->irq = irq;
 
 	/* only 32bit addressable */
 	dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
 	dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
 
-	/* allocate dma pages;
-	 * try to allocate 600k buffer as default which is large enough
-	 */
-	snd_pcm_lib_preallocate_pages_for_all(pcm,
-			SNDRV_DMA_TYPE_DEV, NULL,
-			HAD_DEFAULT_BUFFER, HAD_MAX_BUFFER);
+	init_channel_allocations();
 
-	/* create controls */
-	for (i = 0; i < ARRAY_SIZE(had_controls); i++) {
-		ret = snd_ctl_add(card, snd_ctl_new1(&had_controls[i], ctx));
-		if (ret < 0)
+	card_ctx->num_pipes = pdata->num_pipes;
+	card_ctx->num_ports = pdata->num_ports;
+
+	for_each_port(card_ctx, port) {
+		struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port];
+		int i;
+
+		ctx->card_ctx = card_ctx;
+		ctx->dev = card_ctx->dev;
+		ctx->port = port;
+		ctx->pipe = -1;
+
+		INIT_WORK(&ctx->hdmi_audio_wq, had_audio_wq);
+
+		ret = snd_pcm_new(card, INTEL_HAD, port, MAX_PB_STREAMS,
+				  MAX_CAP_STREAMS, &pcm);
+		if (ret)
 			goto err;
-	}
 
-	init_channel_allocations();
+		/* setup private data which can be retrieved when required */
+		pcm->private_data = ctx;
+		pcm->info_flags = 0;
+		strncpy(pcm->name, card->shortname, strlen(card->shortname));
+		/* setup the ops for playabck */
+		snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &had_pcm_ops);
 
-	/* Register channel map controls */
-	ret = had_register_chmap_ctls(ctx, pcm);
-	if (ret < 0)
-		goto err;
+		/* allocate dma pages;
+		 * try to allocate 600k buffer as default which is large enough
+		 */
+		snd_pcm_lib_preallocate_pages_for_all(pcm,
+						      SNDRV_DMA_TYPE_DEV, NULL,
+						      HAD_DEFAULT_BUFFER, HAD_MAX_BUFFER);
+
+		/* create controls */
+		for (i = 0; i < ARRAY_SIZE(had_controls); i++) {
+			struct snd_kcontrol *kctl;
+
+			kctl = snd_ctl_new1(&had_controls[i], ctx);
+			if (!kctl) {
+				ret = -ENOMEM;
+				goto err;
+			}
 
-	ret = had_create_jack(ctx);
-	if (ret < 0)
-		goto err;
+			kctl->id.device = pcm->device;
+
+			ret = snd_ctl_add(card, kctl);
+			if (ret < 0)
+				goto err;
+		}
+
+		/* Register channel map controls */
+		ret = had_register_chmap_ctls(ctx, pcm);
+		if (ret < 0)
+			goto err;
+
+		ret = had_create_jack(ctx, pcm);
+		if (ret < 0)
+			goto err;
+	}
 
 	ret = snd_card_register(card);
 	if (ret)
@@ -1806,7 +1881,6 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev)
 
 	spin_lock_irq(&pdata->lpe_audio_slock);
 	pdata->notify_audio_lpe = notify_audio_lpe;
-	pdata->notify_pending = false;
 	spin_unlock_irq(&pdata->lpe_audio_slock);
 
 	pm_runtime_use_autosuspend(&pdev->dev);
@@ -1814,7 +1888,11 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev)
 	pm_runtime_set_active(&pdev->dev);
 
 	dev_dbg(&pdev->dev, "%s: handle pending notification\n", __func__);
-	schedule_work(&ctx->hdmi_audio_wq);
+	for_each_port(card_ctx, port) {
+		struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port];
+
+		schedule_work(&ctx->hdmi_audio_wq);
+	}
 
 	return 0;
 
@@ -1830,9 +1908,9 @@ err:
  */
 static int hdmi_lpe_audio_remove(struct platform_device *pdev)
 {
-	struct snd_intelhad *ctx = platform_get_drvdata(pdev);
+	struct snd_intelhad_card *card_ctx = platform_get_drvdata(pdev);
 
-	snd_card_free(ctx->card);
+	snd_card_free(card_ctx->card);
 	return 0;
 }
 
diff --git a/sound/x86/intel_hdmi_audio.h b/sound/x86/intel_hdmi_audio.h
index 2d3e389f76b3..0d91bb5dbab7 100644
--- a/sound/x86/intel_hdmi_audio.h
+++ b/sound/x86/intel_hdmi_audio.h
@@ -32,7 +32,6 @@
 
 #include "intel_hdmi_lpe_audio.h"
 
-#define PCM_INDEX		0
 #define MAX_PB_STREAMS		1
 #define MAX_CAP_STREAMS		0
 #define BYTES_PER_WORD		0x4
@@ -101,7 +100,7 @@ struct pcm_stream_info {
  * @chmap: holds channel map info
  */
 struct snd_intelhad {
-	struct snd_card	*card;
+	struct snd_intelhad_card *card_ctx;
 	bool		connected;
 	struct		pcm_stream_info stream_info;
 	unsigned char	eld[HDMI_MAX_ELD_BYTES];
@@ -112,6 +111,8 @@ struct snd_intelhad {
 	struct snd_pcm_chmap *chmap;
 	int tmds_clock_speed;
 	int link_rate;
+	int port; /* fixed */
+	int pipe; /* can change dynamically */
 
 	/* ring buffer (BD) position index */
 	unsigned int bd_head;
@@ -123,9 +124,6 @@ struct snd_intelhad {
 	unsigned int period_bytes;	/* PCM period size in bytes */
 
 	/* internal stuff */
-	int irq;
-	void __iomem *mmio_start;
-	unsigned int had_config_offset;
 	union aud_cfg aud_config;	/* AUD_CONFIG reg value cache */
 	struct work_struct hdmi_audio_wq;
 	struct mutex mutex; /* for protecting chmap and eld */
@@ -133,4 +131,16 @@ struct snd_intelhad {
 	struct snd_jack *jack;
 };
 
+struct snd_intelhad_card {
+	struct snd_card	*card;
+	struct device *dev;
+
+	/* internal stuff */
+	int irq;
+	void __iomem *mmio_start;
+	int num_pipes;
+	int num_ports;
+	struct snd_intelhad pcm_ctx[3]; /* one for each port */
+};
+
 #endif /* _INTEL_HDMI_AUDIO_ */
diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h
index 6ebd3e6a1fd1..5e3c673fa3f4 100644
--- a/tools/arch/arm/include/uapi/asm/kvm.h
+++ b/tools/arch/arm/include/uapi/asm/kvm.h
@@ -27,6 +27,8 @@
 #define __KVM_HAVE_IRQ_LINE
 #define __KVM_HAVE_READONLY_MEM
 
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+
 #define KVM_REG_SIZE(id)						\
 	(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
 
@@ -114,6 +116,8 @@ struct kvm_debug_exit_arch {
 };
 
 struct kvm_sync_regs {
+	/* Used with KVM_CAP_ARM_USER_IRQ */
+	__u64 device_irq_level;
 };
 
 struct kvm_arch_memory_slot {
@@ -192,13 +196,17 @@ struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
 #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO  7
+#define KVM_DEV_ARM_VGIC_GRP_ITS_REGS	8
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT	10
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
 			(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
 #define VGIC_LEVEL_INFO_LINE_LEVEL	0
 
-#define   KVM_DEV_ARM_VGIC_CTRL_INIT    0
+#define   KVM_DEV_ARM_VGIC_CTRL_INIT		0
+#define   KVM_DEV_ARM_ITS_SAVE_TABLES		1
+#define   KVM_DEV_ARM_ITS_RESTORE_TABLES	2
+#define   KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES	3
 
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT		24
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index c2860358ae3e..70eea2ecc663 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -39,6 +39,8 @@
 #define __KVM_HAVE_IRQ_LINE
 #define __KVM_HAVE_READONLY_MEM
 
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+
 #define KVM_REG_SIZE(id)						\
 	(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
 
@@ -143,6 +145,8 @@ struct kvm_debug_exit_arch {
 #define KVM_GUESTDBG_USE_HW		(1 << 17)
 
 struct kvm_sync_regs {
+	/* Used with KVM_CAP_ARM_USER_IRQ */
+	__u64 device_irq_level;
 };
 
 struct kvm_arch_memory_slot {
@@ -212,13 +216,17 @@ struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
 #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO  7
+#define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT	10
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
 			(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK	0x3ff
 #define VGIC_LEVEL_INFO_LINE_LEVEL	0
 
-#define   KVM_DEV_ARM_VGIC_CTRL_INIT	0
+#define   KVM_DEV_ARM_VGIC_CTRL_INIT		0
+#define   KVM_DEV_ARM_ITS_SAVE_TABLES           1
+#define   KVM_DEV_ARM_ITS_RESTORE_TABLES        2
+#define   KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES	3
 
 /* Device Control API on vcpu fd */
 #define KVM_ARM_VCPU_PMU_V3_CTRL	0
diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h
index 4edbe4bb0e8b..07fbeb927834 100644
--- a/tools/arch/powerpc/include/uapi/asm/kvm.h
+++ b/tools/arch/powerpc/include/uapi/asm/kvm.h
@@ -29,6 +29,9 @@
 #define __KVM_HAVE_IRQ_LINE
 #define __KVM_HAVE_GUEST_DEBUG
 
+/* Not always available, but if it is, this is the correct offset.  */
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+
 struct kvm_regs {
 	__u64 pc;
 	__u64 cr;
diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h
index 7f4fd65e9208..3dd2a1d308dd 100644
--- a/tools/arch/s390/include/uapi/asm/kvm.h
+++ b/tools/arch/s390/include/uapi/asm/kvm.h
@@ -26,6 +26,8 @@
 #define KVM_DEV_FLIC_ADAPTER_REGISTER	6
 #define KVM_DEV_FLIC_ADAPTER_MODIFY	7
 #define KVM_DEV_FLIC_CLEAR_IO_IRQ	8
+#define KVM_DEV_FLIC_AISM		9
+#define KVM_DEV_FLIC_AIRQ_INJECT	10
 /*
  * We can have up to 4*64k pending subchannels + 8 adapter interrupts,
  * as well as up  to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts.
@@ -41,7 +43,14 @@ struct kvm_s390_io_adapter {
 	__u8 isc;
 	__u8 maskable;
 	__u8 swap;
-	__u8 pad;
+	__u8 flags;
+};
+
+#define KVM_S390_ADAPTER_SUPPRESSIBLE 0x01
+
+struct kvm_s390_ais_req {
+	__u8 isc;
+	__u16 mode;
 };
 
 #define KVM_S390_IO_ADAPTER_MASK 1
@@ -110,6 +119,7 @@ struct kvm_s390_vm_cpu_machine {
 #define KVM_S390_VM_CPU_FEAT_CMMA	10
 #define KVM_S390_VM_CPU_FEAT_PFMFI	11
 #define KVM_S390_VM_CPU_FEAT_SIGPIF	12
+#define KVM_S390_VM_CPU_FEAT_KSS	13
 struct kvm_s390_vm_cpu_feat {
 	__u64 feat[16];
 };
@@ -198,6 +208,10 @@ struct kvm_guest_debug_arch {
 #define KVM_SYNC_VRS    (1UL << 6)
 #define KVM_SYNC_RICCB  (1UL << 7)
 #define KVM_SYNC_FPRS   (1UL << 8)
+#define KVM_SYNC_GSCB   (1UL << 9)
+/* length and alignment of the sdnx as a power of two */
+#define SDNXC 8
+#define SDNXL (1UL << SDNXC)
 /* definition of registers in kvm_run */
 struct kvm_sync_regs {
 	__u64 prefix;	/* prefix register */
@@ -218,8 +232,16 @@ struct kvm_sync_regs {
 	};
 	__u8  reserved[512];	/* for future vector expansion */
 	__u32 fpc;		/* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */
-	__u8 padding[52];	/* riccb needs to be 64byte aligned */
+	__u8 padding1[52];	/* riccb needs to be 64byte aligned */
 	__u8 riccb[64];		/* runtime instrumentation controls block */
+	__u8 padding2[192];	/* sdnx needs to be 256byte aligned */
+	union {
+		__u8 sdnx[SDNXL];  /* state description annex */
+		struct {
+			__u64 reserved1[2];
+			__u64 gscb[4];
+		};
+	};
 };
 
 #define KVM_REG_S390_TODPR	(KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1)
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 0fe00446f9ca..2701e5f8145b 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -202,6 +202,8 @@
 #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
 #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
 
+#define X86_FEATURE_MBA         ( 7*32+18) /* Memory Bandwidth Allocation */
+
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
 #define X86_FEATURE_VNMI        ( 8*32+ 1) /* Intel Virtual NMI */
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index 85599ad4d024..5dff775af7cd 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -36,6 +36,12 @@
 # define DISABLE_OSPKE		(1<<(X86_FEATURE_OSPKE & 31))
 #endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */
 
+#ifdef CONFIG_X86_5LEVEL
+# define DISABLE_LA57	0
+#else
+# define DISABLE_LA57	(1<<(X86_FEATURE_LA57 & 31))
+#endif
+
 /*
  * Make sure to add features to the correct mask
  */
@@ -55,7 +61,7 @@
 #define DISABLED_MASK13	0
 #define DISABLED_MASK14	0
 #define DISABLED_MASK15	0
-#define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE)
+#define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57)
 #define DISABLED_MASK17	0
 #define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
 
diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h
index fac9a5c0abe9..d91ba04dd007 100644
--- a/tools/arch/x86/include/asm/required-features.h
+++ b/tools/arch/x86/include/asm/required-features.h
@@ -53,6 +53,12 @@
 # define NEED_MOVBE	0
 #endif
 
+#ifdef CONFIG_X86_5LEVEL
+# define NEED_LA57	(1<<(X86_FEATURE_LA57 & 31))
+#else
+# define NEED_LA57	0
+#endif
+
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_PARAVIRT
 /* Paravirtualized systems may not have PSE or PGE available */
@@ -98,7 +104,7 @@
 #define REQUIRED_MASK13	0
 #define REQUIRED_MASK14	0
 #define REQUIRED_MASK15	0
-#define REQUIRED_MASK16	0
+#define REQUIRED_MASK16	(NEED_LA57)
 #define REQUIRED_MASK17	0
 #define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
 
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 739c0c594022..c2824d02ba37 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -9,6 +9,9 @@
 #include <linux/types.h>
 #include <linux/ioctl.h>
 
+#define KVM_PIO_PAGE_OFFSET 1
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 2
+
 #define DE_VECTOR 0
 #define DB_VECTOR 1
 #define BP_VECTOR 3
diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h
index 14458658e988..690a2dcf4078 100644
--- a/tools/arch/x86/include/uapi/asm/vmx.h
+++ b/tools/arch/x86/include/uapi/asm/vmx.h
@@ -76,7 +76,11 @@
 #define EXIT_REASON_WBINVD              54
 #define EXIT_REASON_XSETBV              55
 #define EXIT_REASON_APIC_WRITE          56
+#define EXIT_REASON_RDRAND              57
 #define EXIT_REASON_INVPCID             58
+#define EXIT_REASON_VMFUNC              59
+#define EXIT_REASON_ENCLS               60
+#define EXIT_REASON_RDSEED              61
 #define EXIT_REASON_PML_FULL            62
 #define EXIT_REASON_XSAVES              63
 #define EXIT_REASON_XRSTORS             64
@@ -90,6 +94,7 @@
 	{ EXIT_REASON_TASK_SWITCH,           "TASK_SWITCH" }, \
 	{ EXIT_REASON_CPUID,                 "CPUID" }, \
 	{ EXIT_REASON_HLT,                   "HLT" }, \
+	{ EXIT_REASON_INVD,                  "INVD" }, \
 	{ EXIT_REASON_INVLPG,                "INVLPG" }, \
 	{ EXIT_REASON_RDPMC,                 "RDPMC" }, \
 	{ EXIT_REASON_RDTSC,                 "RDTSC" }, \
@@ -108,6 +113,8 @@
 	{ EXIT_REASON_IO_INSTRUCTION,        "IO_INSTRUCTION" }, \
 	{ EXIT_REASON_MSR_READ,              "MSR_READ" }, \
 	{ EXIT_REASON_MSR_WRITE,             "MSR_WRITE" }, \
+	{ EXIT_REASON_INVALID_STATE,         "INVALID_STATE" }, \
+	{ EXIT_REASON_MSR_LOAD_FAIL,         "MSR_LOAD_FAIL" }, \
 	{ EXIT_REASON_MWAIT_INSTRUCTION,     "MWAIT_INSTRUCTION" }, \
 	{ EXIT_REASON_MONITOR_TRAP_FLAG,     "MONITOR_TRAP_FLAG" }, \
 	{ EXIT_REASON_MONITOR_INSTRUCTION,   "MONITOR_INSTRUCTION" }, \
@@ -115,20 +122,24 @@
 	{ EXIT_REASON_MCE_DURING_VMENTRY,    "MCE_DURING_VMENTRY" }, \
 	{ EXIT_REASON_TPR_BELOW_THRESHOLD,   "TPR_BELOW_THRESHOLD" }, \
 	{ EXIT_REASON_APIC_ACCESS,           "APIC_ACCESS" }, \
-	{ EXIT_REASON_GDTR_IDTR,	     "GDTR_IDTR" }, \
-	{ EXIT_REASON_LDTR_TR,		     "LDTR_TR" }, \
+	{ EXIT_REASON_EOI_INDUCED,           "EOI_INDUCED" }, \
+	{ EXIT_REASON_GDTR_IDTR,             "GDTR_IDTR" }, \
+	{ EXIT_REASON_LDTR_TR,               "LDTR_TR" }, \
 	{ EXIT_REASON_EPT_VIOLATION,         "EPT_VIOLATION" }, \
 	{ EXIT_REASON_EPT_MISCONFIG,         "EPT_MISCONFIG" }, \
 	{ EXIT_REASON_INVEPT,                "INVEPT" }, \
+	{ EXIT_REASON_RDTSCP,                "RDTSCP" }, \
 	{ EXIT_REASON_PREEMPTION_TIMER,      "PREEMPTION_TIMER" }, \
+	{ EXIT_REASON_INVVPID,               "INVVPID" }, \
 	{ EXIT_REASON_WBINVD,                "WBINVD" }, \
+	{ EXIT_REASON_XSETBV,                "XSETBV" }, \
 	{ EXIT_REASON_APIC_WRITE,            "APIC_WRITE" }, \
-	{ EXIT_REASON_EOI_INDUCED,           "EOI_INDUCED" }, \
-	{ EXIT_REASON_INVALID_STATE,         "INVALID_STATE" }, \
-	{ EXIT_REASON_MSR_LOAD_FAIL,         "MSR_LOAD_FAIL" }, \
-	{ EXIT_REASON_INVD,                  "INVD" }, \
-	{ EXIT_REASON_INVVPID,               "INVVPID" }, \
+	{ EXIT_REASON_RDRAND,                "RDRAND" }, \
 	{ EXIT_REASON_INVPCID,               "INVPCID" }, \
+	{ EXIT_REASON_VMFUNC,                "VMFUNC" }, \
+	{ EXIT_REASON_ENCLS,                 "ENCLS" }, \
+	{ EXIT_REASON_RDSEED,                "RDSEED" }, \
+	{ EXIT_REASON_PML_FULL,              "PML_FULL" }, \
 	{ EXIT_REASON_XSAVES,                "XSAVES" }, \
 	{ EXIT_REASON_XRSTORS,               "XRSTORS" }
 
diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h
index 390d7c9685fd..4ce25d43e8e3 100644
--- a/tools/include/linux/filter.h
+++ b/tools/include/linux/filter.h
@@ -208,6 +208,16 @@
 		.off   = OFF,					\
 		.imm   = IMM })
 
+/* Unconditional jumps, goto pc + off16 */
+
+#define BPF_JMP_A(OFF)						\
+	((struct bpf_insn) {					\
+		.code  = BPF_JMP | BPF_JA,			\
+		.dst_reg = 0,					\
+		.src_reg = 0,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
 /* Function call */
 
 #define BPF_EMIT_CALL(FUNC)					\
diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h
index d538897b8e08..17b10304c393 100644
--- a/tools/include/uapi/linux/stat.h
+++ b/tools/include/uapi/linux/stat.h
@@ -48,17 +48,13 @@
  * tv_sec holds the number of seconds before (negative) or after (positive)
  * 00:00:00 1st January 1970 UTC.
  *
- * tv_nsec holds a number of nanoseconds before (0..-999,999,999 if tv_sec is
- * negative) or after (0..999,999,999 if tv_sec is positive) the tv_sec time.
- *
- * Note that if both tv_sec and tv_nsec are non-zero, then the two values must
- * either be both positive or both negative.
+ * tv_nsec holds a number of nanoseconds (0..999,999,999) after the tv_sec time.
  *
  * __reserved is held in case we need a yet finer resolution.
  */
 struct statx_timestamp {
 	__s64	tv_sec;
-	__s32	tv_nsec;
+	__u32	tv_nsec;
 	__s32	__reserved;
 };
 
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index cb0eda3925e6..3517e204a2b3 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -311,6 +311,10 @@ include::itrace.txt[]
 	Set the maximum number of program blocks to print with brstackasm for
 	each sample.
 
+--inline::
+	If a callgraph address belongs to an inlined function, the inline stack
+	will be printed. Each entry has function name and file/line.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-script-perl[1],
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index d05aec491cff..4761b0d7fcb5 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -2494,6 +2494,8 @@ int cmd_script(int argc, const char **argv)
 			"Enable kernel symbol demangling"),
 	OPT_STRING(0, "time", &script.time_str, "str",
 		   "Time span of interest (start,stop)"),
+	OPT_BOOLEAN(0, "inline", &symbol_conf.inline_name,
+		    "Show inline function"),
 	OPT_END()
 	};
 	const char * const script_subcommands[] = { "record", "report", NULL };
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 59addd52d9cd..ddb2c6fbdf91 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -210,6 +210,8 @@ static int __hpp__sort_acc(struct hist_entry *a, struct hist_entry *b,
 			return 0;
 
 		ret = b->callchain->max_depth - a->callchain->max_depth;
+		if (callchain_param.order == ORDER_CALLER)
+			ret = -ret;
 	}
 	return ret;
 }
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 81fc29ac798f..b4204b43ed58 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -621,14 +621,19 @@ enum match_result {
 static enum match_result match_chain_srcline(struct callchain_cursor_node *node,
 					     struct callchain_list *cnode)
 {
-	char *left = get_srcline(cnode->ms.map->dso,
+	char *left = NULL;
+	char *right = NULL;
+	enum match_result ret = MATCH_EQ;
+	int cmp;
+
+	if (cnode->ms.map)
+		left = get_srcline(cnode->ms.map->dso,
 				 map__rip_2objdump(cnode->ms.map, cnode->ip),
 				 cnode->ms.sym, true, false);
-	char *right = get_srcline(node->map->dso,
+	if (node->map)
+		right = get_srcline(node->map->dso,
 				  map__rip_2objdump(node->map, node->ip),
 				  node->sym, true, false);
-	enum match_result ret = MATCH_EQ;
-	int cmp;
 
 	if (left && right)
 		cmp = strcmp(left, right);
diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
index e415aee6a245..583f3a602506 100644
--- a/tools/perf/util/evsel_fprintf.c
+++ b/tools/perf/util/evsel_fprintf.c
@@ -7,6 +7,7 @@
 #include "map.h"
 #include "strlist.h"
 #include "symbol.h"
+#include "srcline.h"
 
 static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...)
 {
@@ -168,6 +169,38 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
 			if (!print_oneline)
 				printed += fprintf(fp, "\n");
 
+			if (symbol_conf.inline_name && node->map) {
+				struct inline_node *inode;
+
+				addr = map__rip_2objdump(node->map, node->ip),
+				inode = dso__parse_addr_inlines(node->map->dso, addr);
+
+				if (inode) {
+					struct inline_list *ilist;
+
+					list_for_each_entry(ilist, &inode->val, list) {
+						if (print_arrow)
+							printed += fprintf(fp, " <-");
+
+						/* IP is same, just skip it */
+						if (print_ip)
+							printed += fprintf(fp, "%c%16s",
+									   s, "");
+						if (print_sym)
+							printed += fprintf(fp, " %s",
+									   ilist->funcname);
+						if (print_srcline)
+							printed += fprintf(fp, "\n  %s:%d",
+									   ilist->filename,
+									   ilist->line_nr);
+						if (!print_oneline)
+							printed += fprintf(fp, "\n");
+					}
+
+					inline_node__delete(inode);
+				}
+			}
+
 			if (symbol_conf.bt_stop_list &&
 			    node->sym &&
 			    strlist__has_entry(symbol_conf.bt_stop_list,
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index df051a52393c..ebc88a74e67b 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c
@@ -56,7 +56,10 @@ static int inline_list__append(char *filename, char *funcname, int line_nr,
 		}
 	}
 
-	list_add_tail(&ilist->list, &node->val);
+	if (callchain_param.order == ORDER_CALLEE)
+		list_add_tail(&ilist->list, &node->val);
+	else
+		list_add(&ilist->list, &node->val);
 
 	return 0;
 }
@@ -200,12 +203,14 @@ static void addr2line_cleanup(struct a2l_data *a2l)
 
 #define MAX_INLINE_NEST 1024
 
-static void inline_list__reverse(struct inline_node *node)
+static int inline_list__append_dso_a2l(struct dso *dso,
+				       struct inline_node *node)
 {
-	struct inline_list *ilist, *n;
+	struct a2l_data *a2l = dso->a2l;
+	char *funcname = a2l->funcname ? strdup(a2l->funcname) : NULL;
+	char *filename = a2l->filename ? strdup(a2l->filename) : NULL;
 
-	list_for_each_entry_safe_reverse(ilist, n, &node->val, list)
-		list_move_tail(&ilist->list, &node->val);
+	return inline_list__append(filename, funcname, a2l->line, node, dso);
 }
 
 static int addr2line(const char *dso_name, u64 addr,
@@ -230,36 +235,36 @@ static int addr2line(const char *dso_name, u64 addr,
 
 	bfd_map_over_sections(a2l->abfd, find_address_in_section, a2l);
 
-	if (a2l->found && unwind_inlines) {
+	if (!a2l->found)
+		return 0;
+
+	if (unwind_inlines) {
 		int cnt = 0;
 
+		if (node && inline_list__append_dso_a2l(dso, node))
+			return 0;
+
 		while (bfd_find_inliner_info(a2l->abfd, &a2l->filename,
 					     &a2l->funcname, &a2l->line) &&
 		       cnt++ < MAX_INLINE_NEST) {
 
 			if (node != NULL) {
-				if (inline_list__append(strdup(a2l->filename),
-							strdup(a2l->funcname),
-							a2l->line, node,
-							dso) != 0)
+				if (inline_list__append_dso_a2l(dso, node))
 					return 0;
+				// found at least one inline frame
+				ret = 1;
 			}
 		}
+	}
 
-		if ((node != NULL) &&
-		    (callchain_param.order != ORDER_CALLEE)) {
-			inline_list__reverse(node);
-		}
+	if (file) {
+		*file = a2l->filename ? strdup(a2l->filename) : NULL;
+		ret = *file ? 1 : 0;
 	}
 
-	if (a2l->found && a2l->filename) {
-		*file = strdup(a2l->filename);
+	if (line)
 		*line = a2l->line;
 
-		if (*file)
-			ret = 1;
-	}
-
 	return ret;
 }
 
@@ -278,8 +283,6 @@ void dso__free_a2l(struct dso *dso)
 static struct inline_node *addr2inlines(const char *dso_name, u64 addr,
 	struct dso *dso)
 {
-	char *file = NULL;
-	unsigned int line = 0;
 	struct inline_node *node;
 
 	node = zalloc(sizeof(*node));
@@ -291,7 +294,7 @@ static struct inline_node *addr2inlines(const char *dso_name, u64 addr,
 	INIT_LIST_HEAD(&node->val);
 	node->addr = addr;
 
-	if (!addr2line(dso_name, addr, &file, &line, dso, TRUE, node))
+	if (!addr2line(dso_name, addr, NULL, NULL, dso, TRUE, node))
 		goto out_free_inline_node;
 
 	if (list_empty(&node->val))
diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c
index f90e11a555b2..943a06291587 100644
--- a/tools/perf/util/unwind-libdw.c
+++ b/tools/perf/util/unwind-libdw.c
@@ -168,12 +168,16 @@ frame_callback(Dwfl_Frame *state, void *arg)
 {
 	struct unwind_info *ui = arg;
 	Dwarf_Addr pc;
+	bool isactivation;
 
-	if (!dwfl_frame_pc(state, &pc, NULL)) {
+	if (!dwfl_frame_pc(state, &pc, &isactivation)) {
 		pr_err("%s", dwfl_errmsg(-1));
 		return DWARF_CB_ABORT;
 	}
 
+	if (!isactivation)
+		--pc;
+
 	return entry(pc, ui) || !(--ui->max_stack) ?
 	       DWARF_CB_ABORT : DWARF_CB_OK;
 }
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
index f8455bed6e65..672c2ada9357 100644
--- a/tools/perf/util/unwind-libunwind-local.c
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -692,6 +692,17 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb,
 
 		while (!ret && (unw_step(&c) > 0) && i < max_stack) {
 			unw_get_reg(&c, UNW_REG_IP, &ips[i]);
+
+			/*
+			 * Decrement the IP for any non-activation frames.
+			 * this is required to properly find the srcline
+			 * for caller frames.
+			 * See also the documentation for dwfl_frame_pc(),
+			 * which this code tries to replicate.
+			 */
+			if (unw_is_signal_frame(&c) <= 0)
+				--ips[i];
+
 			++i;
 		}
 
diff --git a/tools/power/acpi/.gitignore b/tools/power/acpi/.gitignore
new file mode 100644
index 000000000000..cba3d994995c
--- /dev/null
+++ b/tools/power/acpi/.gitignore
@@ -0,0 +1,4 @@
+acpidbg
+acpidump
+ec
+include
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 3773562056da..cabb19b1e371 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -49,6 +49,7 @@
 #define MAX_NR_MAPS	4
 
 #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS	(1 << 0)
+#define F_LOAD_WITH_STRICT_ALIGNMENT		(1 << 1)
 
 struct bpf_test {
 	const char *descr;
@@ -2615,6 +2616,30 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
+		"direct packet access: test17 (pruning, alignment)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 14),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_7, 1, 4),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, -4),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+			BPF_JMP_A(-6),
+		},
+		.errstr = "misaligned packet access off 2+15+-4 size 4",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+	},
+	{
 		"helper access to packet: test1, valid packet_ptr range",
 		.insns = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
@@ -3341,6 +3366,70 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS
 	},
 	{
+		"alu ops on ptr_to_map_value_or_null, 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 10),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 2),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 4 },
+		.errstr = "R4 invalid mem access",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS
+	},
+	{
+		"alu ops on ptr_to_map_value_or_null, 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 10),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_4, -1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 4 },
+		.errstr = "R4 invalid mem access",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS
+	},
+	{
+		"alu ops on ptr_to_map_value_or_null, 3",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 10),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_4, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 4 },
+		.errstr = "R4 invalid mem access",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS
+	},
+	{
 		"invalid memory access with multiple map_lookup_elem calls",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_1, 10),
@@ -4937,7 +5026,149 @@ static struct bpf_test tests[] = {
 		.fixup_map_in_map = { 3 },
 		.errstr = "R1 type=map_value_or_null expected=map_ptr",
 		.result = REJECT,
-	}
+	},
+	{
+		"ld_abs: check calling conv, r1",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_1, 0),
+			BPF_LD_ABS(BPF_W, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_abs: check calling conv, r2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_LD_ABS(BPF_W, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R2 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_abs: check calling conv, r3",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_LD_ABS(BPF_W, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R3 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_abs: check calling conv, r4",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_LD_ABS(BPF_W, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R4 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_abs: check calling conv, r5",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_LD_ABS(BPF_W, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R5 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_abs: check calling conv, r7",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_7, 0),
+			BPF_LD_ABS(BPF_W, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"ld_ind: check calling conv, r1",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_1, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_1, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_ind: check calling conv, r2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_2, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_2, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R2 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_ind: check calling conv, r3",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_3, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_3, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R3 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_ind: check calling conv, r4",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_4, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_4, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R4 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_ind: check calling conv, r5",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_5, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_5, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R5 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_ind: check calling conv, r7",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_7, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)
@@ -5059,9 +5290,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 
 	do_test_fixup(test, prog, map_fds);
 
-	fd_prog = bpf_load_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
-				   prog, prog_len, "GPL", 0, bpf_vlog,
-				   sizeof(bpf_vlog));
+	fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
+				     prog, prog_len, test->flags & F_LOAD_WITH_STRICT_ALIGNMENT,
+				     "GPL", 0, bpf_vlog, sizeof(bpf_vlog));
 
 	expected_ret = unpriv && test->result_unpriv != UNDEF ?
 		       test->result_unpriv : test->result;
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
new file mode 100644
index 000000000000..f4d1ff785d67
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
@@ -0,0 +1,21 @@
+#!/bin/sh
+# description: Register/unregister many kprobe events
+
+# ftrace fentry skip size depends on the machine architecture.
+# Currently HAVE_KPROBES_ON_FTRACE defined on x86 and powerpc
+case `uname -m` in
+  x86_64|i[3456]86) OFFS=5;;
+  ppc*) OFFS=4;;
+  *) OFFS=0;;
+esac
+
+echo "Setup up to 256 kprobes"
+grep t /proc/kallsyms | cut -f3 -d" " | grep -v .*\\..* | \
+head -n 256 | while read i; do echo p ${i}+${OFFS} ; done > kprobe_events ||:
+
+echo 1 > events/kprobes/enable
+echo 0 > events/kprobes/enable
+echo > kprobe_events
+echo "Waiting for unoptimizing & freeing"
+sleep 5
+echo "Done"
diff --git a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c
index d9c49f41515e..e79ccd6aada1 100644
--- a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c
+++ b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c
@@ -42,12 +42,12 @@ int test_body(void)
 	printf("Check DSCR TM context switch: ");
 	fflush(stdout);
 	for (;;) {
-		rv = 1;
 		asm __volatile__ (
 			/* set a known value into the DSCR */
 			"ld      3, %[dscr1];"
 			"mtspr   %[sprn_dscr], 3;"
 
+			"li      %[rv], 1;"
 			/* start and suspend a transaction */
 			"tbegin.;"
 			"beq     1f;"