From eda4a7bf5d75b8b579c54622f2795696a02883b9 Mon Sep 17 00:00:00 2001
From: Peilin Ye <yepeilin.cs@gmail.com>
Date: Sun, 18 Oct 2020 16:54:01 -0400
Subject: docs: fb: Add font_6x8 to available built-in fonts

Recently we added a new 6x8 font in commit e2028c8e6bf9 ("lib/fonts: add
font 6x8 for OLED display"). Add its name to the "compiled-in fonts"
list.

Signed-off-by: Peilin Ye <yepeilin.cs@gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20201018205401.698242-1-yepeilin.cs@gmail.com
---
 Documentation/fb/fbcon.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Documentation/fb/fbcon.rst b/Documentation/fb/fbcon.rst
index a7b487cba307..71cade19d92c 100644
--- a/Documentation/fb/fbcon.rst
+++ b/Documentation/fb/fbcon.rst
@@ -81,7 +81,7 @@ C. Boot options
 1. fbcon=font:<name>
 
 	Select the initial font to use. The value 'name' can be any of the
-	compiled-in fonts: 10x18, 6x10, 7x14, Acorn8x8, MINI4x6,
+	compiled-in fonts: 10x18, 6x10, 6x8, 7x14, Acorn8x8, MINI4x6,
 	PEARL8x8, ProFont6x11, SUN12x22, SUN8x16, TER16x32, VGA8x16, VGA8x8.
 
 	Note, not all drivers can handle font with widths not divisible by 8,
-- 
cgit v1.2.3-59-g8ed1b


From 272d70895113ef00c03ab325787d159ee51718c8 Mon Sep 17 00:00:00 2001
From: Peilin Ye <yepeilin.cs@gmail.com>
Date: Sun, 18 Oct 2020 14:12:04 -0400
Subject: Fonts: Support FONT_EXTRA_WORDS macros for font_6x8

Recently, in commit 6735b4632def ("Fonts: Support FONT_EXTRA_WORDS macros
for built-in fonts"), we wrapped each of our built-in data buffers in a
`font_data` structure, in order to use the following macros on them, see
include/linux/font.h:

	#define REFCOUNT(fd)	(((int *)(fd))[-1])
	#define FNTSIZE(fd)	(((int *)(fd))[-2])
	#define FNTCHARCNT(fd)	(((int *)(fd))[-3])
	#define FNTSUM(fd)	(((int *)(fd))[-4])

	#define FONT_EXTRA_WORDS 4

Do the same thing to our new 6x8 font. For built-in fonts, currently we
only use FNTSIZE(). Since this is only a temporary solution for an
out-of-bounds issue in the framebuffer layer (see commit 5af08640795b
("fbcon: Fix global-out-of-bounds read in fbcon_get_font()")), all the
three other fields are intentionally set to zero in order to discourage
using these negative-indexing macros.

Signed-off-by: Peilin Ye <yepeilin.cs@gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/926453876c92caac34cba8545716a491754d04d5.1603037079.git.yepeilin.cs@gmail.com
---
 lib/fonts/font_6x8.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/fonts/font_6x8.c b/lib/fonts/font_6x8.c
index e06447788418..700039a9ceae 100644
--- a/lib/fonts/font_6x8.c
+++ b/lib/fonts/font_6x8.c
@@ -3,8 +3,8 @@
 
 #define FONTDATAMAX 2048
 
-static const unsigned char fontdata_6x8[FONTDATAMAX] = {
-
+static struct font_data fontdata_6x8 = {
+	{ 0, 0, FONTDATAMAX, 0 }, {
 	/* 0 0x00 '^@' */
 	0x00, /* 000000 */
 	0x00, /* 000000 */
@@ -2564,13 +2564,13 @@ static const unsigned char fontdata_6x8[FONTDATAMAX] = {
 	0x00, /* 000000 */
 	0x00, /* 000000 */
 	0x00, /* 000000 */
-};
+} };
 
 const struct font_desc font_6x8 = {
 	.idx	= FONT6x8_IDX,
 	.name	= "6x8",
 	.width	= 6,
 	.height	= 8,
-	.data	= fontdata_6x8,
+	.data	= fontdata_6x8.data,
 	.pref	= 0,
 };
-- 
cgit v1.2.3-59-g8ed1b


From 354842df3888d63dd0371358189cafde267b4a72 Mon Sep 17 00:00:00 2001
From: Sean Paul <seanpaul@chromium.org>
Date: Thu, 17 Sep 2020 20:28:42 -0400
Subject: drm/i915/dp: Tweak initial dpcd backlight.enabled value
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In commit 79946723092b ("drm/i915: Assume 100% brightness when not in
DPCD control mode"), we fixed the brightness level when DPCD control was
not active to max brightness. This is as good as we can guess since most
backlights go on full when uncontrolled.

However in doing so we changed the semantics of the initial
'backlight.enabled' value. At least on Pixelbooks, they  were relying
on the brightness level in DP_EDP_BACKLIGHT_BRIGHTNESS_MSB to be 0 on
boot such that enabled would be false. This causes the device to be
enabled when the brightness is set. Without this, brightness control
doesn't work. So by changing brightness to max, we also flipped enabled
to be true on boot.

To fix this, make enabled a function of brightness and backlight control
mechanism.

Fixes: 79946723092b ("drm/i915: Assume 100% brightness when not in DPCD control mode")
Cc: Lyude Paul <lyude@redhat.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
Cc: "Ville Syrjälä" <ville.syrjala@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Kevin Chowski <chowski@chromium.org>>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
Reviewed-by: Lyude Paul <lyude@redhat.com>
Signed-off-by: Lyude Paul <lyude@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200918002845.32766-1-sean@poorly.run
(cherry picked from commit 4ade8f31c25bef7ce7ed4d7cbac17df7c4bad850)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 .../gpu/drm/i915/display/intel_dp_aux_backlight.c  | 31 ++++++++++++++--------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
index acbd7eb66cbe..036f504ac7db 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
@@ -52,17 +52,11 @@ static void set_aux_backlight_enable(struct intel_dp *intel_dp, bool enable)
 	}
 }
 
-/*
- * Read the current backlight value from DPCD register(s) based
- * on if 8-bit(MSB) or 16-bit(MSB and LSB) values are supported
- */
-static u32 intel_dp_aux_get_backlight(struct intel_connector *connector)
+static bool intel_dp_aux_backlight_dpcd_mode(struct intel_connector *connector)
 {
 	struct intel_dp *intel_dp = intel_attached_dp(connector);
 	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
-	u8 read_val[2] = { 0x0 };
 	u8 mode_reg;
-	u16 level = 0;
 
 	if (drm_dp_dpcd_readb(&intel_dp->aux,
 			      DP_EDP_BACKLIGHT_MODE_SET_REGISTER,
@@ -70,15 +64,29 @@ static u32 intel_dp_aux_get_backlight(struct intel_connector *connector)
 		drm_dbg_kms(&i915->drm,
 			    "Failed to read the DPCD register 0x%x\n",
 			    DP_EDP_BACKLIGHT_MODE_SET_REGISTER);
-		return 0;
+		return false;
 	}
 
+	return (mode_reg & DP_EDP_BACKLIGHT_CONTROL_MODE_MASK) ==
+	       DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD;
+}
+
+/*
+ * Read the current backlight value from DPCD register(s) based
+ * on if 8-bit(MSB) or 16-bit(MSB and LSB) values are supported
+ */
+static u32 intel_dp_aux_get_backlight(struct intel_connector *connector)
+{
+	struct intel_dp *intel_dp = intel_attached_dp(connector);
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
+	u8 read_val[2] = { 0x0 };
+	u16 level = 0;
+
 	/*
 	 * If we're not in DPCD control mode yet, the programmed brightness
 	 * value is meaningless and we should assume max brightness
 	 */
-	if ((mode_reg & DP_EDP_BACKLIGHT_CONTROL_MODE_MASK) !=
-	    DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD)
+	if (!intel_dp_aux_backlight_dpcd_mode(connector))
 		return connector->panel.backlight.max;
 
 	if (drm_dp_dpcd_read(&intel_dp->aux, DP_EDP_BACKLIGHT_BRIGHTNESS_MSB,
@@ -319,7 +327,8 @@ static int intel_dp_aux_setup_backlight(struct intel_connector *connector,
 
 	panel->backlight.min = 0;
 	panel->backlight.level = intel_dp_aux_get_backlight(connector);
-	panel->backlight.enabled = panel->backlight.level != 0;
+	panel->backlight.enabled = intel_dp_aux_backlight_dpcd_mode(connector) &&
+				   panel->backlight.level != 0;
 
 	return 0;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 849c0fe9e831dcebea1b46e2237e13f274a8756a Mon Sep 17 00:00:00 2001
From: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
Date: Wed, 29 Jul 2020 15:55:39 +0530
Subject: drm/i915/gt: Initialize reserved and unspecified MOCS indices

In order to avoid functional breakage of mis-programmed applications that
have grown to depend on unused MOCS entries, we are programming
those entries to be equal to fully cached ("L3 + LLC") entry.

These reserved and unspecified entries should not be used as they may be
changed to less performant variants with better coherency in the future
if more entries are needed.

v2: As suggested by Lucas De Marchi to utilise __init_mocs_table for
programming default value, setting I915_MOCS_PTE index of tgl_mocs_table
with desired value.

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Tomasz Lis <tomasz.lis@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Francisco Jerez <currojerez@riseup.net>
Cc: Mathew Alwin <alwin.mathew@intel.com>
Cc: Mcguire Russell W <russell.w.mcguire@intel.com>
Cc: Spruit Neil R <neil.r.spruit@intel.com>
Cc: Zhou Cheng <cheng.zhou@intel.com>
Cc: Benemelis Mike G <mike.g.benemelis@intel.com>

Signed-off-by: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Acked-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20200729102539.134731-2-ayaz.siddiqui@intel.com
Cc: stable@vger.kernel.org
(cherry picked from commit 4d8a5cfe3b131f60903949f998c5961cc922e0b0)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_mocs.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 632e08a4592b..b8f56e62158e 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -234,11 +234,17 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
 		   L3_1_UC)
 
 static const struct drm_i915_mocs_entry tgl_mocs_table[] = {
-	/* Base - Error (Reserved for Non-Use) */
-	MOCS_ENTRY(0, 0x0, 0x0),
-	/* Base - Reserved */
-	MOCS_ENTRY(1, 0x0, 0x0),
-
+	/*
+	 * NOTE:
+	 * Reserved and unspecified MOCS indices have been set to (L3 + LCC).
+	 * These reserved entries should never be used, they may be changed
+	 * to low performant variants with better coherency in the future if
+	 * more entries are needed. We are programming index I915_MOCS_PTE(1)
+	 * only, __init_mocs_table() take care to program unused index with
+	 * this entry.
+	 */
+	MOCS_ENTRY(1, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+		   L3_3_WB),
 	GEN11_MOCS_ENTRIES,
 
 	/* Implicitly enable L1 - HDC:L1 + L3 + LLC */
-- 
cgit v1.2.3-59-g8ed1b


From 1664ffee760a5d98952318fdd9b198fae396d660 Mon Sep 17 00:00:00 2001
From: Ville Syrjälä <ville.syrjala@linux.intel.com>
Date: Thu, 15 Oct 2020 13:21:35 +0100
Subject: drm/i915: Mark ininitial fb obj as WT on eLLC machines to avoid rcu
 lockup during fbdev init
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently we leave the cache_level of the initial fb obj
set to NONE. This means on eLLC machines the first pin_to_display()
will try to switch it to WT which requires a vma unbind+bind.
If that happens during the fbdev initialization rcu does not
seem operational which causes the unbind to get stuck. To
most appearances this looks like a dead machine on boot.

Avoid the unbind by already marking the object cache_level
as WT when creating it. We still do an excplicit ggtt pin
which will rewrite the PTEs anyway, so they will match whatever
cache level we set.

Cc: <stable@vger.kernel.org> # v5.7+
Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2381
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20201007120329.17076-1-ville.syrjala@linux.intel.com
Link: https://patchwork.freedesktop.org/patch/msgid/20201015122138.30161-1-chris@chris-wilson.co.uk
(cherry picked from commit d46b60a2e8d246f1f0faa38e52f4f5a73858c338)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/display/intel_display.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index fac4657a286c..d9494fd7c305 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -3434,6 +3434,14 @@ initial_plane_vma(struct drm_i915_private *i915,
 	if (IS_ERR(obj))
 		return NULL;
 
+	/*
+	 * Mark it WT ahead of time to avoid changing the
+	 * cache_level during fbdev initialization. The
+	 * unbind there would get stuck waiting for rcu.
+	 */
+	i915_gem_object_set_cache_coherency(obj, HAS_WT(i915) ?
+					    I915_CACHE_WT : I915_CACHE_NONE);
+
 	switch (plane_config->tiling) {
 	case I915_TILING_NONE:
 		break;
-- 
cgit v1.2.3-59-g8ed1b


From d5e8782129c22036425f29f9b6a254895482d7bd Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 15 Oct 2020 12:59:54 +0100
Subject: drm/i915/gem: Support parsing of oversize batches

Matthew Auld noted that on more recent systems (such as the parser for
gen9) we may have objects that are larger than expected by the GEM uAPI
(i.e. greater than u32). These objects would have incorrect implicit
batch lengths, causing the parser to reject them for being incomplete,
or worse.

Based on a patch by Matthew Auld.

Reported-by: Matthew Auld <matthew.auld@intel.com>
Fixes: 435e8fc059db ("drm/i915: Allow parsing of unsized batches")
Testcase: igt/gem_exec_params/larger-than-life-batch
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Jon Bloomfield <jon.bloomfield@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Cc: stable@vger.kernel.org
Link: https://patchwork.freedesktop.org/patch/msgid/20201015115954.871-1-chris@chris-wilson.co.uk
(cherry picked from commit 57b2d834bf235daab388c3ba12d035c820ae09c6)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 4b09bcd70cf4..1904e6e5ea64 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -287,8 +287,8 @@ struct i915_execbuffer {
 	u64 invalid_flags; /** Set of execobj.flags that are invalid */
 	u32 context_flags; /** Set of execobj.flags to insert from the ctx */
 
+	u64 batch_len; /** Length of batch within object */
 	u32 batch_start_offset; /** Location within object of batch */
-	u32 batch_len; /** Length of batch within object */
 	u32 batch_flags; /** Flags composed for emit_bb_start() */
 	struct intel_gt_buffer_pool_node *batch_pool; /** pool node for batch buffer */
 
@@ -871,6 +871,10 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 
 	if (eb->batch_len == 0)
 		eb->batch_len = eb->batch->vma->size - eb->batch_start_offset;
+	if (unlikely(eb->batch_len == 0)) { /* impossible! */
+		drm_dbg(&i915->drm, "Invalid batch length\n");
+		return -EINVAL;
+	}
 
 	return 0;
 
@@ -2424,7 +2428,7 @@ static int eb_parse(struct i915_execbuffer *eb)
 	struct drm_i915_private *i915 = eb->i915;
 	struct intel_gt_buffer_pool_node *pool = eb->batch_pool;
 	struct i915_vma *shadow, *trampoline, *batch;
-	unsigned int len;
+	unsigned long len;
 	int err;
 
 	if (!eb_use_cmdparser(eb)) {
@@ -2449,6 +2453,8 @@ static int eb_parse(struct i915_execbuffer *eb)
 	} else {
 		len += I915_CMD_PARSER_TRAMPOLINE_SIZE;
 	}
+	if (unlikely(len < eb->batch_len)) /* last paranoid check of overflow */
+		return -EINVAL;
 
 	if (!pool) {
 		pool = intel_gt_get_buffer_pool(eb->engine->gt, len);
-- 
cgit v1.2.3-59-g8ed1b


From 9b99e5ba3e5d68039bd6b657e4bbe520a3521f4c Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 15 Oct 2020 20:50:23 +0100
Subject: drm/i915/gt: Delay execlist processing for tgl

When running gem_exec_nop, it floods the system with many requests (with
the goal of userspace submitting faster than the HW can process a single
empty batch). This causes the driver to continually resubmit new
requests onto the end of an active context, a flood of lite-restore
preemptions. If we time this just right, Tigerlake hangs.

Inserting a small delay between the processing of CS events and
submitting the next context, prevents the hang. Naturally it does not
occur with debugging enabled. The suspicion then is that this is related
to the issues with the CS event buffer, and inserting an mmio read of
the CS pointer status appears to be very successful in preventing the
hang. Other registers, or uncached reads, or plain mb, do not prevent
the hang, suggesting that register is key -- but that the hang can be
prevented by a simple udelay, suggests it is just a timing issue like
that encountered by commit 233c1ae3c83f ("drm/i915/gt: Wait for CSB
entries on Tigerlake"). Also note that the hang is not prevented by
applying CTX_DESC_FORCE_RESTORE, or by inserting a delay on the GPU
between requests.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Bruce Chang <yu.bruce.chang@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: stable@vger.kernel.org
Acked-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20201015195023.32346-1-chris@chris-wilson.co.uk
(cherry picked from commit 6ca7217dffaf1abba91558e67a2efb655ac91405)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 0412a44f25f2..bff237a8843a 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2649,6 +2649,9 @@ static void process_csb(struct intel_engine_cs *engine)
 			smp_wmb(); /* complete the seqlock */
 			WRITE_ONCE(execlists->active, execlists->inflight);
 
+			/* XXX Magic delay for tgl */
+			ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
+
 			WRITE_ONCE(execlists->pending[0], NULL);
 		} else {
 			if (GEM_WARN_ON(!*execlists->active)) {
-- 
cgit v1.2.3-59-g8ed1b


From 64402570e12f7b63ab33fc4640d3709c9ce2b380 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 2 Oct 2020 09:34:25 +0100
Subject: drm/i915/gt: Undo forced context restores after trivial preemptions

We may try to preempt the currently executing request, only to find that
after unravelling all the dependencies that the original executing
context is still the earliest in the topological sort and re-submitted
back to HW (if we do detect some change in the ELSP that requires
re-submission). However, due to the way we check for wrap-around during
the unravelling, we mark any context that has been submitted just once
(i.e. with the rq->wa_tail set, but the ring->tail earlier) as
potentially wrapping and requiring a forced restore on resubmission.
This was expected to be not a problem, as it was anticipated that most
unwinding for preemption would result in a context switch and the few
that did not would be lost in the noise. It did not take long for
someone to find one particular workload where the cost of those extra
context restores was measurable.

However, since we know the wa_tail is of fixed size, and we know that a
request must be larger than the wa_tail itself, we can safely maintain
the check for request wrapping and check against a slightly future point
in the ring that includes an expected wa_tail. (That is if the
ring->tail is already set to rq->wa_tail, including another 8 bytes in
the check does not invalidate the incremental wrap detection.)

Fixes: 8ab3a3812aa9 ("drm/i915/gt: Incrementally check for rewinding")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Bruce Chang <yu.bruce.chang@intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: <stable@vger.kernel.org> # v5.4+
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20201002083425.4605-1-chris@chris-wilson.co.uk
(cherry picked from commit bb65548e3c6e299175a9e8c3e24b2b9577656a5d)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index bff237a8843a..341ff8e3af4c 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1140,9 +1140,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 
 			/* Check in case we rollback so far we wrap [size/2] */
 			if (intel_ring_direction(rq->ring,
-						 intel_ring_wrap(rq->ring,
-								 rq->tail),
-						 rq->ring->tail) > 0)
+						 rq->tail,
+						 rq->ring->tail + 8) > 0)
 				rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE;
 
 			active = rq;
-- 
cgit v1.2.3-59-g8ed1b


From db9bc2d35f49fed248296d3216597b078c0bab37 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 16 Oct 2020 10:25:27 +0100
Subject: drm/i915: Use the active reference on the vma while capturing

During error capture, we need to take a reference to the vma from before
the reset in order to catpure the contents of the vma later. Currently
we are using both an active reference and a kref, but due to nature of
the i915_vma reference handling, that kref is on the vma->obj and not
the vma itself. This means the vma may be destroyed as soon as it is
idle, that is in between the i915_active_release(&vma->active) and the
i915_vma_put(vma):

<3> [197.866181] BUG: KASAN: use-after-free in intel_engine_coredump_add_vma+0x36c/0x4a0 [i915]
<3> [197.866339] Read of size 8 at addr ffff8881258cb800 by task gem_exec_captur/1041
<3> [197.866467]
<4> [197.866512] CPU: 2 PID: 1041 Comm: gem_exec_captur Not tainted 5.9.0-g5e4234f97efba-kasan_200+ #1
<4> [197.866521] Hardware name: Intel Corp. Broxton P/Apollolake RVP1A, BIOS APLKRVPA.X64.0150.B11.1608081044 08/08/2016
<4> [197.866530] Call Trace:
<4> [197.866549]  dump_stack+0x99/0xd0
<4> [197.866760]  ? intel_engine_coredump_add_vma+0x36c/0x4a0 [i915]
<4> [197.866783]  print_address_description.constprop.8+0x3e/0x60
<4> [197.866797]  ? kmsg_dump_rewind_nolock+0xd4/0xd4
<4> [197.866819]  ? lockdep_hardirqs_off+0xd4/0x120
<4> [197.867037]  ? intel_engine_coredump_add_vma+0x36c/0x4a0 [i915]
<4> [197.867249]  ? intel_engine_coredump_add_vma+0x36c/0x4a0 [i915]
<4> [197.867270]  kasan_report.cold.10+0x1f/0x37
<4> [197.867492]  ? intel_engine_coredump_add_vma+0x36c/0x4a0 [i915]
<4> [197.867710]  intel_engine_coredump_add_vma+0x36c/0x4a0 [i915]
<4> [197.867949]  i915_gpu_coredump.part.29+0x150/0x7b0 [i915]
<4> [197.868186]  i915_capture_error_state+0x5e/0xc0 [i915]
<4> [197.868396]  intel_gt_handle_error+0x6eb/0xa20 [i915]
<4> [197.868624]  ? intel_gt_reset_global+0x370/0x370 [i915]
<4> [197.868644]  ? check_flags+0x50/0x50
<4> [197.868662]  ? __lock_acquire+0xd59/0x6b00
<4> [197.868678]  ? register_lock_class+0x1ad0/0x1ad0
<4> [197.868944]  i915_wedged_set+0xcf/0x1b0 [i915]
<4> [197.869147]  ? i915_wedged_get+0x90/0x90 [i915]
<4> [197.869371]  ? i915_wedged_get+0x90/0x90 [i915]
<4> [197.869398]  simple_attr_write+0x153/0x1c0
<4> [197.869428]  full_proxy_write+0xee/0x180
<4> [197.869442]  ? __sb_start_write+0x1f3/0x310
<4> [197.869465]  vfs_write+0x1a3/0x640
<4> [197.869492]  ksys_write+0xec/0x1c0
<4> [197.869507]  ? __ia32_sys_read+0xa0/0xa0
<4> [197.869525]  ? lockdep_hardirqs_on_prepare+0x32b/0x4e0
<4> [197.869541]  ? syscall_enter_from_user_mode+0x1c/0x50
<4> [197.869566]  do_syscall_64+0x33/0x80
<4> [197.869579]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
<4> [197.869590] RIP: 0033:0x7fd8b7aee281
<4> [197.869604] Code: c3 0f 1f 84 00 00 00 00 00 48 8b 05 59 8d 20 00 c3 0f 1f 84 00 00 00 00 00 8b 05 8a d1 20 00 85 c0 75 16 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 57 f3 c3 0f 1f 44 00 00 41 54 55 49 89 d4 53
<4> [197.869613] RSP: 002b:00007ffea3b72008 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
<4> [197.869625] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fd8b7aee281
<4> [197.869633] RDX: 0000000000000002 RSI: 00007fd8b81a82e7 RDI: 000000000000000d
<4> [197.869641] RBP: 0000000000000002 R08: 0000000000000000 R09: 0000000000000034
<4> [197.869650] R10: 0000000000000000 R11: 0000000000000246 R12: 00007fd8b81a82e7
<4> [197.869658] R13: 000000000000000d R14: 0000000000000000 R15: 0000000000000000
<3> [197.869707]
<3> [197.869757] Allocated by task 1041:
<4> [197.869833]  kasan_save_stack+0x19/0x40
<4> [197.869843]  __kasan_kmalloc.constprop.5+0xc1/0xd0
<4> [197.869853]  kmem_cache_alloc+0x106/0x8e0
<4> [197.870059]  i915_vma_instance+0x212/0x1930 [i915]
<4> [197.870270]  eb_lookup_vmas+0xe06/0x1d10 [i915]
<4> [197.870475]  i915_gem_do_execbuffer+0x131d/0x4080 [i915]
<4> [197.870682]  i915_gem_execbuffer2_ioctl+0x103/0x5d0 [i915]
<4> [197.870701]  drm_ioctl_kernel+0x1d2/0x270
<4> [197.870710]  drm_ioctl+0x40d/0x85c
<4> [197.870721]  __x64_sys_ioctl+0x10d/0x170
<4> [197.870731]  do_syscall_64+0x33/0x80
<4> [197.870740]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
<3> [197.870748]
<3> [197.870798] Freed by task 22:
<4> [197.870865]  kasan_save_stack+0x19/0x40
<4> [197.870875]  kasan_set_track+0x1c/0x30
<4> [197.870884]  kasan_set_free_info+0x1b/0x30
<4> [197.870894]  __kasan_slab_free+0x111/0x160
<4> [197.870903]  kmem_cache_free+0xcd/0x710
<4> [197.871109]  i915_vma_parked+0x618/0x800 [i915]
<4> [197.871307]  __gt_park+0xdb/0x1e0 [i915]
<4> [197.871501]  ____intel_wakeref_put_last+0xb1/0x190 [i915]
<4> [197.871516]  process_one_work+0x8dc/0x15d0
<4> [197.871525]  worker_thread+0x82/0xb30
<4> [197.871535]  kthread+0x36d/0x440
<4> [197.871545]  ret_from_fork+0x22/0x30
<3> [197.871553]
<3> [197.871602] The buggy address belongs to the object at ffff8881258cb740
 which belongs to the cache i915_vma of size 968

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2553
Fixes: 2850748ef876 ("drm/i915: Pull i915_vma_pin under the vm->mutex")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: <stable@vger.kernel.org> # v5.5+
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20201016092527.29039-1-chris@chris-wilson.co.uk
(cherry picked from commit 178536b8292ecd118f59d2fac4509c7e70b99854)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index a635ec8d0b94..cf6e47adfde6 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1312,7 +1312,7 @@ capture_vma(struct intel_engine_capture_vma *next,
 	}
 
 	strcpy(c->name, name);
-	c->vma = i915_vma_get(vma);
+	c->vma = vma; /* reference held while active */
 
 	c->next = next;
 	return c;
@@ -1402,7 +1402,6 @@ intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
 						 compress));
 
 		i915_active_release(&vma->active);
-		i915_vma_put(vma);
 
 		capture = this->next;
 		kfree(this);
-- 
cgit v1.2.3-59-g8ed1b


From ca05277e40216979d9976613322e64db23a850e0 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 15 Sep 2020 14:49:20 +0100
Subject: drm/i915/gt: Widen CSB pointer to u64 for the parsers

A CSB entry is 64b, and it is simpler for us to treat it as an array of
64b entries than as an array of pairs of 32b entries.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200915134923.30088-1-chris@chris-wilson.co.uk
(cherry picked from commit f24a44e52fbc9881fc5f3bcef536831a15a439f3)
(cherry picked from commit 3d4dbe0e0f0d04ebcea917b7279586817da8cf46)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_types.h |  2 +-
 drivers/gpu/drm/i915/gt/intel_lrc.c          | 33 ++++++++++++++--------------
 2 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index c400aaa2287b..ee6312601c56 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -278,7 +278,7 @@ struct intel_engine_execlists {
 	 *
 	 * Note these register may be either mmio or HWSP shadow.
 	 */
-	u32 *csb_status;
+	u64 *csb_status;
 
 	/**
 	 * @csb_size: context status buffer FIFO size
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 341ff8e3af4c..1a7bbbb16356 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2463,7 +2463,7 @@ cancel_port_requests(struct intel_engine_execlists * const execlists)
 }
 
 static inline void
-invalidate_csb_entries(const u32 *first, const u32 *last)
+invalidate_csb_entries(const u64 *first, const u64 *last)
 {
 	clflush((void *)first);
 	clflush((void *)last);
@@ -2495,14 +2495,12 @@ invalidate_csb_entries(const u32 *first, const u32 *last)
  *     bits 47-57: sw context id of the lrc the GT switched away from
  *     bits 58-63: sw counter of the lrc the GT switched away from
  */
-static inline bool
-gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
+static inline bool gen12_csb_parse(const u64 *csb)
 {
-	u32 lower_dw = csb[0];
-	u32 upper_dw = csb[1];
-	bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw);
-	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw);
-	bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
+	u64 entry = READ_ONCE(*csb);
+	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
+	bool new_queue =
+		lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
 
 	/*
 	 * The context switch detail is not guaranteed to be 5 when a preemption
@@ -2512,7 +2510,7 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
 	 * would require some extra handling, but we don't support that.
 	 */
 	if (!ctx_away_valid || new_queue) {
-		GEM_BUG_ON(!ctx_to_valid);
+		GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(entry)));
 		return true;
 	}
 
@@ -2521,12 +2519,11 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
 	 * context switch on an unsuccessful wait instruction since we always
 	 * use polling mode.
 	 */
-	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
+	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(entry)));
 	return false;
 }
 
-static inline bool
-gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
+static inline bool gen8_csb_parse(const u64 *csb)
 {
 	return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
 }
@@ -2534,7 +2531,7 @@ gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
 static void process_csb(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	const u32 * const buf = execlists->csb_status;
+	const u64 * const buf = execlists->csb_status;
 	const u8 num_entries = execlists->csb_size;
 	u8 head, tail;
 
@@ -2615,12 +2612,14 @@ static void process_csb(struct intel_engine_cs *engine)
 		 */
 
 		ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
-			     head, buf[2 * head + 0], buf[2 * head + 1]);
+			     head,
+			     upper_32_bits(buf[head]),
+			     lower_32_bits(buf[head]));
 
 		if (INTEL_GEN(engine->i915) >= 12)
-			promote = gen12_csb_parse(execlists, buf + 2 * head);
+			promote = gen12_csb_parse(buf + head);
 		else
-			promote = gen8_csb_parse(execlists, buf + 2 * head);
+			promote = gen8_csb_parse(buf + head);
 		if (promote) {
 			struct i915_request * const *old = execlists->active;
 
@@ -5159,7 +5158,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
 	}
 
 	execlists->csb_status =
-		&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
+		(u64 *)&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
 
 	execlists->csb_write =
 		&engine->status_page.addr[intel_hws_csb_write_index(i915)];
-- 
cgit v1.2.3-59-g8ed1b


From 4a9bb58aba6db4eba2a8b3aa1edc415c94a669a8 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 15 Sep 2020 14:49:21 +0100
Subject: drm/i915/gt: Wait for CSB entries on Tigerlake

On Tigerlake, we are seeing a repeat of commit d8f505311717 ("drm/i915/icl:
Forcibly evict stale csb entries") where, presumably, due to a missing
Global Observation Point synchronisation, the write pointer of the CSB
ringbuffer is updated _prior_ to the contents of the ringbuffer. That is
we see the GPU report more context-switch entries for us to parse, but
those entries have not been written, leading us to process stale events,
and eventually report a hung GPU.

However, this effect appears to be much more severe than we previously
saw on Icelake (though it might be best if we try the same approach
there as well and measure), and Bruce suggested the good idea of resetting
the CSB entry after use so that we can detect when it has been updated by
the GPU. By instrumenting how long that may be, we can set a reliable
upper bound for how long we should wait for:

    513 late, avg of 61 retries (590 ns), max of 1061 retries (10099 ns)

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2045
References: d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries")
References: HSDES#22011327657, HSDES#1508287568
Suggested-by: Bruce Chang <yu.bruce.chang@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Bruce Chang <yu.bruce.chang@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: stable@vger.kernel.org # v5.4
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200915134923.30088-2-chris@chris-wilson.co.uk
(cherry picked from commit 233c1ae3c83f21046c6c4083da904163ece8f110)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 1a7bbbb16356..a32aabce7901 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2497,9 +2497,22 @@ invalidate_csb_entries(const u64 *first, const u64 *last)
  */
 static inline bool gen12_csb_parse(const u64 *csb)
 {
-	u64 entry = READ_ONCE(*csb);
-	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
-	bool new_queue =
+	bool ctx_away_valid;
+	bool new_queue;
+	u64 entry;
+
+	/* HSD#22011248461 */
+	entry = READ_ONCE(*csb);
+	if (unlikely(entry == -1)) {
+		preempt_disable();
+		if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
+			GEM_WARN_ON("50us CSB timeout");
+		preempt_enable();
+	}
+	WRITE_ONCE(*(u64 *)csb, -1);
+
+	ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
+	new_queue =
 		lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
 
 	/*
@@ -4006,6 +4019,8 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
 	WRITE_ONCE(*execlists->csb_write, reset_value);
 	wmb(); /* Make sure this is visible to HW (paranoia?) */
 
+	/* Check that the GPU does indeed update the CSB entries! */
+	memset(execlists->csb_status, -1, (reset_value + 1) * sizeof(u64));
 	invalidate_csb_entries(&execlists->csb_status[0],
 			       &execlists->csb_status[reset_value]);
 
-- 
cgit v1.2.3-59-g8ed1b


From fea456d82c19d201c21313864105876deabe148b Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Tue, 20 Oct 2020 08:22:53 +1000
Subject: drm/ttm: fix eviction valuable range check.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This was adding size to start, but pfn and start are in pages,
so it should be using num_pages.

Not sure this fixes anything in the real world, just noticed it
during refactoring.

Signed-off-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Cc: stable@vger.kernel.org
Link: https://patchwork.freedesktop.org/patch/msgid/20201019222257.1684769-2-airlied@gmail.com
---
 drivers/gpu/drm/ttm/ttm_bo.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 70b3bee27850..eb4b7df02ca0 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -647,7 +647,7 @@ bool ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
 	/* Don't evict this BO if it's outside of the
 	 * requested placement range
 	 */
-	if (place->fpfn >= (bo->mem.start + bo->mem.size) ||
+	if (place->fpfn >= (bo->mem.start + bo->mem.num_pages) ||
 	    (place->lpfn && place->lpfn <= bo->mem.start))
 		return false;
 
-- 
cgit v1.2.3-59-g8ed1b


From b8cff311a42df4f15d6432583573d828b5c7b12a Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 19 Oct 2020 09:34:44 +0100
Subject: drm/i915/gt: Onion unwind for scratch page allocation failure

In switching to using objects for our ppGTT scratch pages, care was not
taken to avoid trying to unref NULL objects on failure. And for gen6
ppGTT, it appears we forgot entirely to unwind after a partial allocation
failure.

Fixes: 89351925a477 ("drm/i915/gt: Switch to object allocations for page directories")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20201019083444.1286-1-chris@chris-wilson.co.uk
(cherry picked from commit fa812ce96a46efc27cae4dcad866aaee9cb25d28)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 18 ++++++++++++------
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c |  3 ++-
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
index fd0d24d28763..c30adc05fa98 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -239,18 +239,24 @@ static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
 			       I915_CACHE_NONE, PTE_READ_ONLY);
 
 	vm->scratch[1] = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
-	if (IS_ERR(vm->scratch[1]))
-		return PTR_ERR(vm->scratch[1]);
+	if (IS_ERR(vm->scratch[1])) {
+		ret = PTR_ERR(vm->scratch[1]);
+		goto err_scratch0;
+	}
 
 	ret = pin_pt_dma(vm, vm->scratch[1]);
-	if (ret) {
-		i915_gem_object_put(vm->scratch[1]);
-		return ret;
-	}
+	if (ret)
+		goto err_scratch1;
 
 	fill32_px(vm->scratch[1], vm->scratch[0]->encode);
 
 	return 0;
+
+err_scratch1:
+	i915_gem_object_put(vm->scratch[1]);
+err_scratch0:
+	i915_gem_object_put(vm->scratch[0]);
+	return ret;
 }
 
 static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index eb64f474a78c..38c7069b7749 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -604,7 +604,8 @@ static int gen8_init_scratch(struct i915_address_space *vm)
 	return 0;
 
 free_scratch:
-	free_scratch(vm);
+	while (i--)
+		i915_gem_object_put(vm->scratch[i]);
 	return -ENOMEM;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 3da3c5c1c9825c24168f27b021339e90af37e969 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 19 Oct 2020 17:50:05 +0100
Subject: drm/i915: Exclude low pages (128KiB) of stolen from use

The GPU is trashing the low pages of its reserved memory upon reset. If
we are using this memory for ringbuffers, then we will dutiful resubmit
the trashed rings after the reset causing further resets, and worse. We
must exclude this range from our own use. The value of 128KiB was found
by empirical measurement (and verified now with a selftest) on gen9.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: stable@vger.kernel.org
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20201019165005.18128-2-chris@chris-wilson.co.uk
(cherry picked from commit d3606757e611fbd48bb239e8c2fe9779b3f50035)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/Kconfig.debug         |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c |   6 +-
 drivers/gpu/drm/i915/gem/i915_gem_stolen.h |   2 +
 drivers/gpu/drm/i915/gt/selftest_reset.c   | 196 +++++++++++++++++++++++++++++
 4 files changed, 203 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index 1cb28c20807c..25cd9788a4d5 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -153,6 +153,7 @@ config DRM_I915_SELFTEST
 	select DRM_EXPORT_FOR_TESTS if m
 	select FAULT_INJECTION
 	select PRIME_NUMBERS
+	select CRC32
 	help
 	  Choose this option to allow the driver to perform selftests upon
 	  loading; also requires the i915.selftest=1 module parameter. To
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index 0be5e8683337..84b2707d8b17 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -53,8 +53,10 @@ int i915_gem_stolen_insert_node(struct drm_i915_private *i915,
 				struct drm_mm_node *node, u64 size,
 				unsigned alignment)
 {
-	return i915_gem_stolen_insert_node_in_range(i915, node, size,
-						    alignment, 0, U64_MAX);
+	return i915_gem_stolen_insert_node_in_range(i915, node,
+						    size, alignment,
+						    I915_GEM_STOLEN_BIAS,
+						    U64_MAX);
 }
 
 void i915_gem_stolen_remove_node(struct drm_i915_private *i915,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h
index e15c0adad8af..61e028063f9f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h
@@ -30,4 +30,6 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
 					       resource_size_t stolen_offset,
 					       resource_size_t size);
 
+#define I915_GEM_STOLEN_BIAS SZ_128K
+
 #endif /* __I915_GEM_STOLEN_H__ */
diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c
index 35406ecdf0b2..ef5aeebbeeb0 100644
--- a/drivers/gpu/drm/i915/gt/selftest_reset.c
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@@ -3,9 +3,203 @@
  * Copyright © 2018 Intel Corporation
  */
 
+#include <linux/crc32.h>
+
+#include "gem/i915_gem_stolen.h"
+
+#include "i915_memcpy.h"
 #include "i915_selftest.h"
 #include "selftests/igt_reset.h"
 #include "selftests/igt_atomic.h"
+#include "selftests/igt_spinner.h"
+
+static int
+__igt_reset_stolen(struct intel_gt *gt,
+		   intel_engine_mask_t mask,
+		   const char *msg)
+{
+	struct i915_ggtt *ggtt = &gt->i915->ggtt;
+	const struct resource *dsm = &gt->i915->dsm;
+	resource_size_t num_pages, page;
+	struct intel_engine_cs *engine;
+	intel_wakeref_t wakeref;
+	enum intel_engine_id id;
+	struct igt_spinner spin;
+	long max, count;
+	void *tmp;
+	u32 *crc;
+	int err;
+
+	if (!drm_mm_node_allocated(&ggtt->error_capture))
+		return 0;
+
+	num_pages = resource_size(dsm) >> PAGE_SHIFT;
+	if (!num_pages)
+		return 0;
+
+	crc = kmalloc_array(num_pages, sizeof(u32), GFP_KERNEL);
+	if (!crc)
+		return -ENOMEM;
+
+	tmp = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!tmp) {
+		err = -ENOMEM;
+		goto err_crc;
+	}
+
+	igt_global_reset_lock(gt);
+	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
+
+	err = igt_spinner_init(&spin, gt);
+	if (err)
+		goto err_lock;
+
+	for_each_engine(engine, gt, id) {
+		struct intel_context *ce;
+		struct i915_request *rq;
+
+		if (!(mask & engine->mask))
+			continue;
+
+		if (!intel_engine_can_store_dword(engine))
+			continue;
+
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce)) {
+			err = PTR_ERR(ce);
+			goto err_spin;
+		}
+		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
+		intel_context_put(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto err_spin;
+		}
+		i915_request_add(rq);
+	}
+
+	for (page = 0; page < num_pages; page++) {
+		dma_addr_t dma = (dma_addr_t)dsm->start + (page << PAGE_SHIFT);
+		void __iomem *s;
+		void *in;
+
+		ggtt->vm.insert_page(&ggtt->vm, dma,
+				     ggtt->error_capture.start,
+				     I915_CACHE_NONE, 0);
+		mb();
+
+		s = io_mapping_map_wc(&ggtt->iomap,
+				      ggtt->error_capture.start,
+				      PAGE_SIZE);
+
+		if (!__drm_mm_interval_first(&gt->i915->mm.stolen,
+					     page << PAGE_SHIFT,
+					     ((page + 1) << PAGE_SHIFT) - 1))
+			memset32(s, STACK_MAGIC, PAGE_SIZE / sizeof(u32));
+
+		in = s;
+		if (i915_memcpy_from_wc(tmp, s, PAGE_SIZE))
+			in = tmp;
+		crc[page] = crc32_le(0, in, PAGE_SIZE);
+
+		io_mapping_unmap(s);
+	}
+	mb();
+	ggtt->vm.clear_range(&ggtt->vm, ggtt->error_capture.start, PAGE_SIZE);
+
+	if (mask == ALL_ENGINES) {
+		intel_gt_reset(gt, mask, NULL);
+	} else {
+		for_each_engine(engine, gt, id) {
+			if (mask & engine->mask)
+				intel_engine_reset(engine, NULL);
+		}
+	}
+
+	max = -1;
+	count = 0;
+	for (page = 0; page < num_pages; page++) {
+		dma_addr_t dma = (dma_addr_t)dsm->start + (page << PAGE_SHIFT);
+		void __iomem *s;
+		void *in;
+		u32 x;
+
+		ggtt->vm.insert_page(&ggtt->vm, dma,
+				     ggtt->error_capture.start,
+				     I915_CACHE_NONE, 0);
+		mb();
+
+		s = io_mapping_map_wc(&ggtt->iomap,
+				      ggtt->error_capture.start,
+				      PAGE_SIZE);
+
+		in = s;
+		if (i915_memcpy_from_wc(tmp, s, PAGE_SIZE))
+			in = tmp;
+		x = crc32_le(0, in, PAGE_SIZE);
+
+		if (x != crc[page] &&
+		    !__drm_mm_interval_first(&gt->i915->mm.stolen,
+					     page << PAGE_SHIFT,
+					     ((page + 1) << PAGE_SHIFT) - 1)) {
+			pr_debug("unused stolen page %pa modified by GPU reset\n",
+				 &page);
+			if (count++ == 0)
+				igt_hexdump(in, PAGE_SIZE);
+			max = page;
+		}
+
+		io_mapping_unmap(s);
+	}
+	mb();
+	ggtt->vm.clear_range(&ggtt->vm, ggtt->error_capture.start, PAGE_SIZE);
+
+	if (count > 0) {
+		pr_info("%s reset clobbered %ld pages of stolen, last clobber at page %ld\n",
+			msg, count, max);
+	}
+	if (max >= I915_GEM_STOLEN_BIAS >> PAGE_SHIFT) {
+		pr_err("%s reset clobbered unreserved area [above %x] of stolen; may cause severe faults\n",
+		       msg, I915_GEM_STOLEN_BIAS);
+		err = -EINVAL;
+	}
+
+err_spin:
+	igt_spinner_fini(&spin);
+
+err_lock:
+	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+	igt_global_reset_unlock(gt);
+
+	kfree(tmp);
+err_crc:
+	kfree(crc);
+	return err;
+}
+
+static int igt_reset_device_stolen(void *arg)
+{
+	return __igt_reset_stolen(arg, ALL_ENGINES, "device");
+}
+
+static int igt_reset_engines_stolen(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err;
+
+	if (!intel_has_reset_engine(gt))
+		return 0;
+
+	for_each_engine(engine, gt, id) {
+		err = __igt_reset_stolen(gt, engine->mask, engine->name);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
 
 static int igt_global_reset(void *arg)
 {
@@ -164,6 +358,8 @@ int intel_reset_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_global_reset), /* attempt to recover GPU first */
+		SUBTEST(igt_reset_device_stolen),
+		SUBTEST(igt_reset_engines_stolen),
 		SUBTEST(igt_wedged_reset),
 		SUBTEST(igt_atomic_reset),
 		SUBTEST(igt_atomic_engine_reset),
-- 
cgit v1.2.3-59-g8ed1b


From 8195400f7ea95399f721ad21f4d663a62c65036f Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 19 Oct 2020 11:15:23 +0100
Subject: drm/i915: Force VT'd workarounds when running as a guest OS

If i915.ko is being used as a passthrough device, it does not know if
the host is using intel_iommu. Mixing the iommu and gfx causes a few
issues (such as scanout overfetch) which we need to workaround inside
the driver, so if we detect we are running under a hypervisor, also
assume the device access is being virtualised.

Reported-by: Stefan Fritsch <sf@sfritsch.de>
Suggested-by: Stefan Fritsch <sf@sfritsch.de>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Stefan Fritsch <sf@sfritsch.de>
Cc: stable@vger.kernel.org
Tested-by: Stefan Fritsch <sf@sfritsch.de>
Reviewed-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20201019101523.4145-1-chris@chris-wilson.co.uk
(cherry picked from commit f566fdcd6cc49a9d5b5d782f56e3e7cb243f01b8)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index eef9a821c49c..8426d5974669 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -33,6 +33,8 @@
 #include <uapi/drm/i915_drm.h>
 #include <uapi/drm/drm_fourcc.h>
 
+#include <asm/hypervisor.h>
+
 #include <linux/io-mapping.h>
 #include <linux/i2c.h>
 #include <linux/i2c-algo-bit.h>
@@ -1760,7 +1762,9 @@ static inline bool intel_vtd_active(void)
 	if (intel_iommu_gfx_mapped)
 		return true;
 #endif
-	return false;
+
+	/* Running as a guest, we assume the host is enforcing VT'd */
+	return !hypervisor_is_type(X86_HYPER_NATIVE);
 }
 
 static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv)
-- 
cgit v1.2.3-59-g8ed1b


From 5c6c13cd1102caf92d006a3cf4591c0229019daf Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 11 Aug 2020 10:25:32 +0100
Subject: drm/i915: Drop runtime-pm assert from vgpu io accessors

The "mmio" writes into vgpu registers are simple memory traps from the
guest into the host. We do not need to assert in the guest that the
device is awake for the io as we do not write to the device itself.

However, over time we have refactored all the mmio accessors with the
result that the vgpu reuses the gen2 accessors and so inherits the
assert for runtime-pm of the native device. The assert though has
actually been there since commit 3be0bf5acca6 ("drm/i915: Create vGPU
specific MMIO operations to reduce traps").

References: 3be0bf5acca6 ("drm/i915: Create vGPU specific MMIO operations to reduce traps")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Yan Zhao <yan.y.zhao@intel.com>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Reviewed-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: stable@vger.kernel.org
Link: https://patchwork.freedesktop.org/patch/msgid/20200811092532.13753-1-chris@chris-wilson.co.uk
(cherry picked from commit 0e65ce24a33c1d37da4bf43c34e080334ec6cb60)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/intel_uncore.c | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 263ffcb832b7..97ded2a59cf4 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -1209,6 +1209,18 @@ unclaimed_reg_debug(struct intel_uncore *uncore,
 		spin_unlock(&uncore->debug->lock);
 }
 
+#define __vgpu_read(x) \
+static u##x \
+vgpu_read##x(struct intel_uncore *uncore, i915_reg_t reg, bool trace) { \
+	u##x val = __raw_uncore_read##x(uncore, reg); \
+	trace_i915_reg_rw(false, reg, val, sizeof(val), trace); \
+	return val; \
+}
+__vgpu_read(8)
+__vgpu_read(16)
+__vgpu_read(32)
+__vgpu_read(64)
+
 #define GEN2_READ_HEADER(x) \
 	u##x val = 0; \
 	assert_rpm_wakelock_held(uncore->rpm);
@@ -1414,6 +1426,16 @@ __gen_reg_write_funcs(gen8);
 #undef GEN6_WRITE_FOOTER
 #undef GEN6_WRITE_HEADER
 
+#define __vgpu_write(x) \
+static void \
+vgpu_write##x(struct intel_uncore *uncore, i915_reg_t reg, u##x val, bool trace) { \
+	trace_i915_reg_rw(true, reg, val, sizeof(val), trace); \
+	__raw_uncore_write##x(uncore, reg, val); \
+}
+__vgpu_write(8)
+__vgpu_write(16)
+__vgpu_write(32)
+
 #define ASSIGN_RAW_WRITE_MMIO_VFUNCS(uncore, x) \
 do { \
 	(uncore)->funcs.mmio_writeb = x##_write8; \
@@ -1735,7 +1757,10 @@ static void uncore_raw_init(struct intel_uncore *uncore)
 {
 	GEM_BUG_ON(intel_uncore_has_forcewake(uncore));
 
-	if (IS_GEN(uncore->i915, 5)) {
+	if (intel_vgpu_active(uncore->i915)) {
+		ASSIGN_RAW_WRITE_MMIO_VFUNCS(uncore, vgpu);
+		ASSIGN_RAW_READ_MMIO_VFUNCS(uncore, vgpu);
+	} else if (IS_GEN(uncore->i915, 5)) {
 		ASSIGN_RAW_WRITE_MMIO_VFUNCS(uncore, gen5);
 		ASSIGN_RAW_READ_MMIO_VFUNCS(uncore, gen5);
 	} else {
-- 
cgit v1.2.3-59-g8ed1b


From 274c240c760ed4647ddae1f1b994e0dd3f71cbb1 Mon Sep 17 00:00:00 2001
From: Likun Gao <Likun.Gao@amd.com>
Date: Wed, 14 Oct 2020 14:05:18 +0800
Subject: drm/amdgpu: add function to program pbb mode for sienna cichlid

Add function for sienna_cichlid to force PBB workload mode to zero by
checking whether there have SE been harvested.

Signed-off-by: Likun Gao <Likun.Gao@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org # 5.9.x
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 62 ++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 9792ec737029..f95a173c52f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -112,6 +112,22 @@
 #define mmCP_HYP_ME_UCODE_DATA			0x5817
 #define mmCP_HYP_ME_UCODE_DATA_BASE_IDX		1
 
+//CC_GC_SA_UNIT_DISABLE
+#define mmCC_GC_SA_UNIT_DISABLE                 0x0fe9
+#define mmCC_GC_SA_UNIT_DISABLE_BASE_IDX        0
+#define CC_GC_SA_UNIT_DISABLE__SA_DISABLE__SHIFT	0x8
+#define CC_GC_SA_UNIT_DISABLE__SA_DISABLE_MASK		0x0000FF00L
+//GC_USER_SA_UNIT_DISABLE
+#define mmGC_USER_SA_UNIT_DISABLE               0x0fea
+#define mmGC_USER_SA_UNIT_DISABLE_BASE_IDX      0
+#define GC_USER_SA_UNIT_DISABLE__SA_DISABLE__SHIFT	0x8
+#define GC_USER_SA_UNIT_DISABLE__SA_DISABLE_MASK	0x0000FF00L
+//PA_SC_ENHANCE_3
+#define mmPA_SC_ENHANCE_3                       0x1085
+#define mmPA_SC_ENHANCE_3_BASE_IDX              0
+#define PA_SC_ENHANCE_3__FORCE_PBB_WORKLOAD_MODE_TO_ZERO__SHIFT 0x3
+#define PA_SC_ENHANCE_3__FORCE_PBB_WORKLOAD_MODE_TO_ZERO_MASK   0x00000008L
+
 MODULE_FIRMWARE("amdgpu/navi10_ce.bin");
 MODULE_FIRMWARE("amdgpu/navi10_pfp.bin");
 MODULE_FIRMWARE("amdgpu/navi10_me.bin");
@@ -3188,6 +3204,8 @@ static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
 static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume);
 static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
 static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
+static u32 gfx_v10_3_get_disabled_sa(struct amdgpu_device *adev);
+static void gfx_v10_3_program_pbb_mode(struct amdgpu_device *adev);
 
 static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
 {
@@ -6950,6 +6968,9 @@ static int gfx_v10_0_hw_init(void *handle)
 	if (r)
 		return r;
 
+	if (adev->asic_type == CHIP_SIENNA_CICHLID)
+		gfx_v10_3_program_pbb_mode(adev);
+
 	return r;
 }
 
@@ -8797,6 +8818,47 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev,
 	return 0;
 }
 
+static u32 gfx_v10_3_get_disabled_sa(struct amdgpu_device *adev)
+{
+	uint32_t efuse_setting, vbios_setting, disabled_sa, max_sa_mask;
+
+	efuse_setting = RREG32_SOC15(GC, 0, mmCC_GC_SA_UNIT_DISABLE);
+	efuse_setting &= CC_GC_SA_UNIT_DISABLE__SA_DISABLE_MASK;
+	efuse_setting >>= CC_GC_SA_UNIT_DISABLE__SA_DISABLE__SHIFT;
+
+	vbios_setting = RREG32_SOC15(GC, 0, mmGC_USER_SA_UNIT_DISABLE);
+	vbios_setting &= GC_USER_SA_UNIT_DISABLE__SA_DISABLE_MASK;
+	vbios_setting >>= GC_USER_SA_UNIT_DISABLE__SA_DISABLE__SHIFT;
+
+	max_sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se *
+						adev->gfx.config.max_shader_engines);
+	disabled_sa = efuse_setting | vbios_setting;
+	disabled_sa &= max_sa_mask;
+
+	return disabled_sa;
+}
+
+static void gfx_v10_3_program_pbb_mode(struct amdgpu_device *adev)
+{
+	uint32_t max_sa_per_se, max_sa_per_se_mask, max_shader_engines;
+	uint32_t disabled_sa_mask, se_index, disabled_sa_per_se;
+
+	disabled_sa_mask = gfx_v10_3_get_disabled_sa(adev);
+
+	max_sa_per_se = adev->gfx.config.max_sh_per_se;
+	max_sa_per_se_mask = (1 << max_sa_per_se) - 1;
+	max_shader_engines = adev->gfx.config.max_shader_engines;
+
+	for (se_index = 0; max_shader_engines > se_index; se_index++) {
+		disabled_sa_per_se = disabled_sa_mask >> (se_index * max_sa_per_se);
+		disabled_sa_per_se &= max_sa_per_se_mask;
+		if (disabled_sa_per_se == max_sa_per_se_mask) {
+			WREG32_FIELD15(GC, 0, PA_SC_ENHANCE_3, FORCE_PBB_WORKLOAD_MODE_TO_ZERO, 1);
+			break;
+		}
+	}
+}
+
 const struct amdgpu_ip_block_version gfx_v10_0_ip_block =
 {
 	.type = AMD_IP_BLOCK_TYPE_GFX,
-- 
cgit v1.2.3-59-g8ed1b


From 843c7eb2f7571aa092a8ea010c80e8d94c197f67 Mon Sep 17 00:00:00 2001
From: Likun Gao <Likun.Gao@amd.com>
Date: Wed, 30 Sep 2020 14:34:08 +0800
Subject: drm/amdgpu: add rlc iram and dram firmware support

Support to load RLC iram and dram ucode when RLC firmware struct use v2.2

Signed-off-by: Likun Gao <Likun.Gao@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c   |  6 +++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h   |  4 ++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 10 ++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h | 11 +++++++++
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c    | 39 ++++++++++++++++++++++++++-----
 drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h   |  4 ++--
 6 files changed, 66 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 18be544d8c1e..a9cae6d943c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -1750,6 +1750,12 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,
 	case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM:
 		*type = GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM;
 		break;
+	case AMDGPU_UCODE_ID_RLC_IRAM:
+		*type = GFX_FW_TYPE_RLC_IRAM;
+		break;
+	case AMDGPU_UCODE_ID_RLC_DRAM:
+		*type = GFX_FW_TYPE_RLC_DRAM_BOOT;
+		break;
 	case AMDGPU_UCODE_ID_SMC:
 		*type = GFX_FW_TYPE_SMU;
 		break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
index 60bb3e8b3118..aeaaae713c59 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
@@ -168,12 +168,16 @@ struct amdgpu_rlc {
 	u32 save_restore_list_cntl_size_bytes;
 	u32 save_restore_list_gpm_size_bytes;
 	u32 save_restore_list_srm_size_bytes;
+	u32 rlc_iram_ucode_size_bytes;
+	u32 rlc_dram_ucode_size_bytes;
 
 	u32 *register_list_format;
 	u32 *register_restore;
 	u8 *save_restore_list_cntl;
 	u8 *save_restore_list_gpm;
 	u8 *save_restore_list_srm;
+	u8 *rlc_iram_ucode;
+	u8 *rlc_dram_ucode;
 
 	bool is_rlc_v2_1;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 55fe19a2f332..b313ce4c3e97 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -500,6 +500,8 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
 	     ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL &&
 	     ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM &&
 	     ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM &&
+	     ucode->ucode_id != AMDGPU_UCODE_ID_RLC_IRAM &&
+	     ucode->ucode_id != AMDGPU_UCODE_ID_RLC_DRAM &&
 		 ucode->ucode_id != AMDGPU_UCODE_ID_DMCU_ERAM &&
 		 ucode->ucode_id != AMDGPU_UCODE_ID_DMCU_INTV &&
 		 ucode->ucode_id != AMDGPU_UCODE_ID_DMCUB)) {
@@ -556,6 +558,14 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
 		ucode->ucode_size = adev->gfx.rlc.save_restore_list_srm_size_bytes;
 		memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_srm,
 		       ucode->ucode_size);
+	} else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_IRAM) {
+		ucode->ucode_size = adev->gfx.rlc.rlc_iram_ucode_size_bytes;
+		memcpy(ucode->kaddr, adev->gfx.rlc.rlc_iram_ucode,
+		       ucode->ucode_size);
+	} else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_DRAM) {
+		ucode->ucode_size = adev->gfx.rlc.rlc_dram_ucode_size_bytes;
+		memcpy(ucode->kaddr, adev->gfx.rlc.rlc_dram_ucode,
+		       ucode->ucode_size);
 	} else if (ucode->ucode_id == AMDGPU_UCODE_ID_CP_MES) {
 		ucode->ucode_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
 		memcpy(ucode->kaddr, (void *)((uint8_t *)adev->mes.fw->data +
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index 3c23c6293ff9..0e43b46d3ab5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -222,6 +222,15 @@ struct rlc_firmware_header_v2_1 {
 	uint32_t save_restore_list_srm_offset_bytes;
 };
 
+/* version_major=2, version_minor=1 */
+struct rlc_firmware_header_v2_2 {
+	struct rlc_firmware_header_v2_1 v2_1;
+	uint32_t rlc_iram_ucode_size_bytes;
+	uint32_t rlc_iram_ucode_offset_bytes;
+	uint32_t rlc_dram_ucode_size_bytes;
+	uint32_t rlc_dram_ucode_offset_bytes;
+};
+
 /* version_major=1, version_minor=0 */
 struct sdma_firmware_header_v1_0 {
 	struct common_firmware_header header;
@@ -339,6 +348,8 @@ enum AMDGPU_UCODE_ID {
 	AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL,
 	AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM,
 	AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM,
+	AMDGPU_UCODE_ID_RLC_IRAM,
+	AMDGPU_UCODE_ID_RLC_DRAM,
 	AMDGPU_UCODE_ID_RLC_G,
 	AMDGPU_UCODE_ID_STORAGE,
 	AMDGPU_UCODE_ID_SMC,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index f95a173c52f1..aff14e257ef5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -3604,6 +3604,17 @@ static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
 			le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
 }
 
+static void gfx_v10_0_init_rlc_iram_dram_microcode(struct amdgpu_device *adev)
+{
+	const struct rlc_firmware_header_v2_2 *rlc_hdr;
+
+	rlc_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
+	adev->gfx.rlc.rlc_iram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_iram_ucode_size_bytes);
+	adev->gfx.rlc.rlc_iram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_iram_ucode_offset_bytes);
+	adev->gfx.rlc.rlc_dram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_dram_ucode_size_bytes);
+	adev->gfx.rlc.rlc_dram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_dram_ucode_offset_bytes);
+}
+
 static bool gfx_v10_0_navi10_gfxoff_should_enable(struct amdgpu_device *adev)
 {
 	bool ret = false;
@@ -3719,8 +3730,6 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
 		rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
 		version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
 		version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
-		if (version_major == 2 && version_minor == 1)
-			adev->gfx.rlc.is_rlc_v2_1 = true;
 
 		adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
 		adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
@@ -3762,8 +3771,12 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
 		for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
 			adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
 
-		if (adev->gfx.rlc.is_rlc_v2_1)
-			gfx_v10_0_init_rlc_ext_microcode(adev);
+		if (version_major == 2) {
+			if (version_minor >= 1)
+				gfx_v10_0_init_rlc_ext_microcode(adev);
+			if (version_minor == 2)
+				gfx_v10_0_init_rlc_iram_dram_microcode(adev);
+		}
 	}
 
 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec%s.bin", chip_name, wks);
@@ -3824,8 +3837,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
 			adev->firmware.fw_size +=
 				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 		}
-		if (adev->gfx.rlc.is_rlc_v2_1 &&
-		    adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
+		if (adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
 		    adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
 		    adev->gfx.rlc.save_restore_list_srm_size_bytes) {
 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
@@ -3845,6 +3857,21 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
 			info->fw = adev->gfx.rlc_fw;
 			adev->firmware.fw_size +=
 				ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
+
+			if (adev->gfx.rlc.rlc_iram_ucode_size_bytes &&
+			    adev->gfx.rlc.rlc_dram_ucode_size_bytes) {
+				info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_IRAM];
+				info->ucode_id = AMDGPU_UCODE_ID_RLC_IRAM;
+				info->fw = adev->gfx.rlc_fw;
+				adev->firmware.fw_size +=
+					ALIGN(adev->gfx.rlc.rlc_iram_ucode_size_bytes, PAGE_SIZE);
+
+				info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_DRAM];
+				info->ucode_id = AMDGPU_UCODE_ID_RLC_DRAM;
+				info->fw = adev->gfx.rlc_fw;
+				adev->firmware.fw_size +=
+					ALIGN(adev->gfx.rlc.rlc_dram_ucode_size_bytes, PAGE_SIZE);
+			}
 		}
 
 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
index 1ef2f5b1d828..4137dc710aaf 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
@@ -201,7 +201,7 @@ enum psp_gfx_fw_type {
 	GFX_FW_TYPE_UVD1        = 23,   /* UVD1                     VG-20   */
 	GFX_FW_TYPE_TOC         = 24,   /* TOC                      NV-10   */
 	GFX_FW_TYPE_RLC_P                           = 25,   /* RLC P                    NV      */
-	GFX_FW_TYPE_RLX6                            = 26,   /* RLX6                     NV      */
+	GFX_FW_TYPE_RLC_IRAM                        = 26,   /* RLC_IRAM                 NV      */
 	GFX_FW_TYPE_GLOBAL_TAP_DELAYS               = 27,   /* GLOBAL TAP DELAYS        NV      */
 	GFX_FW_TYPE_SE0_TAP_DELAYS                  = 28,   /* SE0 TAP DELAYS           NV      */
 	GFX_FW_TYPE_SE1_TAP_DELAYS                  = 29,   /* SE1 TAP DELAYS           NV      */
@@ -223,7 +223,7 @@ enum psp_gfx_fw_type {
 	GFX_FW_TYPE_ACCUM_CTRL_RAM                  = 45,   /* ACCUM CTRL RAM           NV      */
 	GFX_FW_TYPE_RLCP_CAM                        = 46,   /* RLCP CAM                 NV      */
 	GFX_FW_TYPE_RLC_SPP_CAM_EXT                 = 47,   /* RLC SPP CAM EXT          NV      */
-	GFX_FW_TYPE_RLX6_DRAM_BOOT                  = 48,   /* RLX6 DRAM BOOT           NV      */
+	GFX_FW_TYPE_RLC_DRAM_BOOT                   = 48,   /* RLC DRAM BOOT            NV      */
 	GFX_FW_TYPE_VCN0_RAM                        = 49,   /* VCN_RAM                  NV + RN */
 	GFX_FW_TYPE_VCN1_RAM                        = 50,   /* VCN_RAM                  NV + RN */
 	GFX_FW_TYPE_DMUB                            = 51,   /* DMUB                          RN */
-- 
cgit v1.2.3-59-g8ed1b


From 207ac684792560acdb9e06f9d707ebf63c84b0e0 Mon Sep 17 00:00:00 2001
From: Evan Quan <evan.quan@amd.com>
Date: Thu, 15 Oct 2020 14:57:46 +0800
Subject: drm/amdgpu: correct the gpu reset handling for job != NULL case

Current code wrongly treat all cases as job == NULL.

Signed-off-by: Evan Quan <evan.quan@amd.com>
Reviewed-and-tested-by: Jane Jian <Jane.Jian@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e8b41756c9f9..37da3537ba2e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4625,7 +4625,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
 		r = amdgpu_device_pre_asic_reset(tmp_adev,
-						 NULL,
+						 (tmp_adev == adev) ? job : NULL,
 						 &need_full_reset);
 		/*TODO Should we stop ?*/
 		if (r) {
-- 
cgit v1.2.3-59-g8ed1b


From d48d7484d8dca1d4577fc53f1f826e68420d00eb Mon Sep 17 00:00:00 2001
From: Kevin Wang <kevin1.wang@amd.com>
Date: Fri, 16 Oct 2020 11:07:47 +0800
Subject: drm/amd/swsmu: add missing feature map for sienna_cichlid

it will cause smu sysfs node of "pp_features" show error.

Signed-off-by: Kevin Wang <kevin1.wang@amd.com>
Reviewed-by: Likun Gao <Likun.Gao@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org # 5.9.x
---
 drivers/gpu/drm/amd/pm/inc/smu_types.h                  | 1 +
 drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/inc/smu_types.h b/drivers/gpu/drm/amd/pm/inc/smu_types.h
index 35fc46d3c9c0..cbf4a58b77d9 100644
--- a/drivers/gpu/drm/amd/pm/inc/smu_types.h
+++ b/drivers/gpu/drm/amd/pm/inc/smu_types.h
@@ -220,6 +220,7 @@ enum smu_clk_type {
        __SMU_DUMMY_MAP(DPM_MP0CLK),                    	\
        __SMU_DUMMY_MAP(DPM_LINK),                      	\
        __SMU_DUMMY_MAP(DPM_DCEFCLK),                   	\
+       __SMU_DUMMY_MAP(DPM_XGMI),			\
        __SMU_DUMMY_MAP(DS_GFXCLK),                     	\
        __SMU_DUMMY_MAP(DS_SOCCLK),                     	\
        __SMU_DUMMY_MAP(DS_LCLK),                       	\
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index c27806fd07e0..ca2abb2e5340 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -151,14 +151,17 @@ static struct cmn2asic_mapping sienna_cichlid_feature_mask_map[SMU_FEATURE_COUNT
 	FEA_MAP(DPM_GFXCLK),
 	FEA_MAP(DPM_GFX_GPO),
 	FEA_MAP(DPM_UCLK),
+	FEA_MAP(DPM_FCLK),
 	FEA_MAP(DPM_SOCCLK),
 	FEA_MAP(DPM_MP0CLK),
 	FEA_MAP(DPM_LINK),
 	FEA_MAP(DPM_DCEFCLK),
+	FEA_MAP(DPM_XGMI),
 	FEA_MAP(MEM_VDDCI_SCALING),
 	FEA_MAP(MEM_MVDD_SCALING),
 	FEA_MAP(DS_GFXCLK),
 	FEA_MAP(DS_SOCCLK),
+	FEA_MAP(DS_FCLK),
 	FEA_MAP(DS_LCLK),
 	FEA_MAP(DS_DCEFCLK),
 	FEA_MAP(DS_UCLK),
-- 
cgit v1.2.3-59-g8ed1b


From 0d142232d9436acab3578ee995472f58adcbf201 Mon Sep 17 00:00:00 2001
From: Likun Gao <Likun.Gao@amd.com>
Date: Thu, 15 Oct 2020 10:48:15 +0800
Subject: drm/amdgpu: update golden setting for sienna_cichlid

Update golden setting for sienna_cichlid.

Signed-off-by: Likun Gao <Likun.Gao@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org # 5.9.x
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index aff14e257ef5..1c98b248a7fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -3107,6 +3107,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3[] =
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x10f80988),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG,  0x00000020, 0x00000020),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf17fffff, 0x01200007),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000820),
-- 
cgit v1.2.3-59-g8ed1b


From 0d427f6c290c69827b2ca33c5f1386816992e4d8 Mon Sep 17 00:00:00 2001
From: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Date: Wed, 14 Oct 2020 13:10:06 -0400
Subject: drm/amd/display: Revert "drm/amd/display: Fix a list corruption"

This fixes regression on device unplug and/or driver unload.

[   65.681501 <    0.000004>] BUG: kernel NULL pointer dereference, address: 0000000000000008
[   65.681504 <    0.000003>] #PF: supervisor write access in kernel mode
[   65.681506 <    0.000002>] #PF: error_code(0x0002) - not-present page
[   65.681507 <    0.000001>] PGD 7c9437067 P4D 7c9437067 PUD 7c9db7067 PMD 0
[   65.681511 <    0.000004>] Oops: 0002 [#1] SMP NOPTI
[   65.681512 <    0.000001>] CPU: 8 PID: 127 Comm: kworker/8:1 Tainted: G        W  O      5.9.0-rc2-dev+ #59
[   65.681514 <    0.000002>] Hardware name: System manufacturer System Product Name/PRIME X470-PRO, BIOS 4406 02/28/2019
[   65.681525 <    0.000011>] Workqueue: events drm_connector_free_work_fn [drm]
[   65.681535 <    0.000010>] RIP: 0010:drm_atomic_private_obj_fini+0x11/0x60 [drm]
[   65.681537 <    0.000002>] Code: de 4c 89 e7 e8 70 f2 ba f8 48 8d 65 d8 5b 41 5c 41 5d 41 5e 41 5f 5d c3 90 0f 1f 44 00 00 48 8b 47 08 48 8b 17 55 48 89 e5 53 <48> 89 42 08 48 89 10 48 b8 00 01 00 00 00 00 ad de 48 89 fb 48 89
[   65.681541 <    0.000004>] RSP: 0018:ffffa5fa805efdd8 EFLAGS: 00010246
[   65.681542 <    0.000001>] RAX: 0000000000000000 RBX: ffff9a4b094654d8 RCX: 0000000000000000
[   65.681544 <    0.000002>] RDX: 0000000000000000 RSI: ffffffffba197bc2 RDI: ffff9a4b094654d8
[   65.681545 <    0.000001>] RBP: ffffa5fa805efde0 R08: ffffffffba197b82 R09: 0000000000000040
[   65.681547 <    0.000002>] R10: ffffa5fa805efdc8 R11: 000000000000007f R12: ffff9a4b09465888
[   65.681549 <    0.000002>] R13: ffff9a4b36f20010 R14: ffff9a4b36f20290 R15: ffff9a4b3a692840
[   65.681551 <    0.000002>] FS:  0000000000000000(0000) GS:ffff9a4b3ea00000(0000) knlGS:0000000000000000
[   65.681553 <    0.000002>] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   65.681554 <    0.000001>] CR2: 0000000000000008 CR3: 00000007c9c82000 CR4: 00000000003506e0
[   65.681556 <    0.000002>] Call Trace:
[   65.681561 <    0.000005>]  drm_dp_mst_topology_mgr_destroy+0xc4/0xe0 [drm_kms_helper]
[   65.681612 <    0.000051>]  amdgpu_dm_connector_destroy+0x3d/0x110 [amdgpu]
[   65.681622 <    0.000010>]  drm_connector_free_work_fn+0x78/0x90 [drm]
[   65.681624 <    0.000002>]  process_one_work+0x164/0x410
[   65.681626 <    0.000002>]  worker_thread+0x4d/0x450
[   65.681628 <    0.000002>]  ? rescuer_thread+0x390/0x390
[   65.681630 <    0.000002>]  kthread+0x10a/0x140
[   65.681632 <    0.000002>]  ? kthread_unpark+0x70/0x70
[   65.681634 <    0.000002>]  ret_from_fork+0x22/0x30

This reverts commit 1545fbf97eafc1dbdc2923e58b4186b16a834784.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index bb1bc7f5d149..51223450918a 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -5063,7 +5063,6 @@ static void amdgpu_dm_connector_destroy(struct drm_connector *connector)
 	struct amdgpu_device *adev = drm_to_adev(connector->dev);
 	struct amdgpu_display_manager *dm = &adev->dm;
 
-	drm_atomic_private_obj_fini(&aconnector->mst_mgr.base);
 #if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) ||\
 	defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
 
-- 
cgit v1.2.3-59-g8ed1b


From 5dff80bdce9e385af5716ed083f9e33e814484ab Mon Sep 17 00:00:00 2001
From: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Date: Wed, 14 Oct 2020 13:12:30 -0400
Subject: drm/amd/display: Avoid MST manager resource leak.

On connector destruction call drm_dp_mst_topology_mgr_destroy
to release resources allocated in drm_dp_mst_topology_mgr_init.
Do it only if MST manager was initilized before otherwsie a crash
is seen on driver unload/device unplug.

Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 51223450918a..e2b23486ba4c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -5063,6 +5063,13 @@ static void amdgpu_dm_connector_destroy(struct drm_connector *connector)
 	struct amdgpu_device *adev = drm_to_adev(connector->dev);
 	struct amdgpu_display_manager *dm = &adev->dm;
 
+	/*
+	 * Call only if mst_mgr was iniitalized before since it's not done
+	 * for all connector types.
+	 */
+	if (aconnector->mst_mgr.dev)
+		drm_dp_mst_topology_mgr_destroy(&aconnector->mst_mgr);
+
 #if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) ||\
 	defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
 
-- 
cgit v1.2.3-59-g8ed1b


From f1bcddffe46b349a82445a8d9efd5f5fcb72557f Mon Sep 17 00:00:00 2001
From: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Date: Fri, 16 Oct 2020 10:50:44 -0400
Subject: drm/amd/psp: Fix sysfs: cannot create duplicate filename

psp sysfs not cleaned up on driver unload for sienna_cichlid

Fixes: ce87c98db428e7 ("drm/amdgpu: Include sienna_cichlid in USBC PD FW support.")
Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org # 5.9.x
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index a9cae6d943c4..96a9699f87ba 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -208,7 +208,8 @@ static int psp_sw_fini(void *handle)
 		adev->psp.ta_fw = NULL;
 	}
 
-	if (adev->asic_type == CHIP_NAVI10)
+	if (adev->asic_type == CHIP_NAVI10 ||
+	    adev->asic_type == CHIP_SIENNA_CICHLID)
 		psp_sysfs_fini(adev);
 
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From a6c42e8431657487b48fe5f57378517e16eef404 Mon Sep 17 00:00:00 2001
From: Kevin Wang <kevin1.wang@amd.com>
Date: Fri, 16 Oct 2020 16:59:25 +0800
Subject: drm/amd/swsmu: correct wrong feature bit mapping

1. when smc feature bit isn't mapped,
the feature state isn't showed on sysfs node of pp_features.
2. add pp_features table title

Signed-off-by: Kevin Wang <kevin1.wang@amd.com>
Reviewed-by: Kenneth Feng <kenneth.feng@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
index c30d3338825f..92b2ea4c197b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
@@ -431,10 +431,9 @@ size_t smu_cmn_get_pp_feature_mask(struct smu_context *smu,
 				   char *buf)
 {
 	uint32_t feature_mask[2] = { 0 };
-	int32_t feature_index = 0;
+	int feature_index = 0;
 	uint32_t count = 0;
-	uint32_t sort_feature[SMU_FEATURE_COUNT];
-	uint64_t hw_feature_count = 0;
+	int8_t sort_feature[SMU_FEATURE_COUNT];
 	size_t size = 0;
 	int ret = 0, i;
 
@@ -447,23 +446,31 @@ size_t smu_cmn_get_pp_feature_mask(struct smu_context *smu,
 	size =  sprintf(buf + size, "features high: 0x%08x low: 0x%08x\n",
 			feature_mask[1], feature_mask[0]);
 
+	memset(sort_feature, -1, sizeof(sort_feature));
+
 	for (i = 0; i < SMU_FEATURE_COUNT; i++) {
 		feature_index = smu_cmn_to_asic_specific_index(smu,
 							       CMN2ASIC_MAPPING_FEATURE,
 							       i);
 		if (feature_index < 0)
 			continue;
+
 		sort_feature[feature_index] = i;
-		hw_feature_count++;
 	}
 
-	for (i = 0; i < hw_feature_count; i++) {
+	size += sprintf(buf + size, "%-2s. %-20s  %-3s : %-s\n",
+			"No", "Feature", "Bit", "State");
+
+	for (i = 0; i < SMU_FEATURE_COUNT; i++) {
+		if (sort_feature[i] < 0)
+			continue;
+
 		size += sprintf(buf + size, "%02d. %-20s (%2d) : %s\n",
-			       count++,
-			       smu_get_feature_name(smu, sort_feature[i]),
-			       i,
-			       !!smu_cmn_feature_is_enabled(smu, sort_feature[i]) ?
-			       "enabled" : "disabled");
+				count++,
+				smu_get_feature_name(smu, sort_feature[i]),
+				i,
+				!!smu_cmn_feature_is_enabled(smu, sort_feature[i]) ?
+				"enabled" : "disabled");
 	}
 
 	return size;
-- 
cgit v1.2.3-59-g8ed1b


From d56b1980d7efe9ef08469e856fc0703d0cef65e4 Mon Sep 17 00:00:00 2001
From: Jay Cornwall <jay.cornwall@amd.com>
Date: Sat, 17 Oct 2020 08:38:43 -0500
Subject: drm/amdkfd: Use same SQ prefetch setting as amdgpu

0 causes instruction fetch stall at cache line boundary under some
conditions on Navi10. A non-zero prefetch is the preferred default
in any case.

Fixes soft hang in Luxmark.

Signed-off-by: Jay Cornwall <jay.cornwall@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
index 72e4d61ac752..ad0593342333 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
@@ -58,8 +58,9 @@ static int update_qpd_v10(struct device_queue_manager *dqm,
 	/* check if sh_mem_config register already configured */
 	if (qpd->sh_mem_config == 0) {
 		qpd->sh_mem_config =
-				SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
-					SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
+			(SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+				SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
+			(3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
 #if 0
 		/* TODO:
 		 *    This shouldn't be an issue with Navi10.  Verify.
-- 
cgit v1.2.3-59-g8ed1b


From 9a2f408f5406df567a3515f4cb5c2ce1bde64501 Mon Sep 17 00:00:00 2001
From: Likun Gao <Likun.Gao@amd.com>
Date: Tue, 20 Oct 2020 16:29:30 +0800
Subject: drm/amd/pm: fix pcie information for sienna cichlid

Fix the function used for sienna cichlid to get correct PCIE information
by pp_dpm_pcie.

Signed-off-by: Likun Gao <Likun.Gao@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Kenneth Feng <kenneth.feng@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org # 5.9.x
---
 drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index ca2abb2e5340..d708b383f83b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -962,8 +962,8 @@ static int sienna_cichlid_print_clk_levels(struct smu_context *smu,
 		}
 		break;
 	case SMU_PCIE:
-		gen_speed = smu_v11_0_get_current_pcie_link_speed(smu);
-		lane_width = smu_v11_0_get_current_pcie_link_width(smu);
+		gen_speed = smu_v11_0_get_current_pcie_link_speed_level(smu);
+		lane_width = smu_v11_0_get_current_pcie_link_width_level(smu);
 		for (i = 0; i < NUM_LINK_LEVELS; i++)
 			size += sprintf(buf + size, "%d: %s %s %dMhz %s\n", i,
 					(dpm_context->dpm_tables.pcie_table.pcie_gen[i] == 0) ? "2.5GT/s," :
-- 
cgit v1.2.3-59-g8ed1b


From e4eeceb73cb06b8fa379b94cbba77e6a0a032e43 Mon Sep 17 00:00:00 2001
From: John Clements <john.clements@amd.com>
Date: Wed, 21 Oct 2020 16:20:42 +0800
Subject: Revert drm/amdgpu: disable sienna chichlid UMC RAS

This reverts commit 265c280a4807419249644156654e5c40a235ea84.

Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: John Clements <john.clements@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 8bf6a7c056bc..4e36551ab50b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1986,7 +1986,8 @@ static int amdgpu_ras_check_asic_type(struct amdgpu_device *adev)
 {
 	if (adev->asic_type != CHIP_VEGA10 &&
 		adev->asic_type != CHIP_VEGA20 &&
-		adev->asic_type != CHIP_ARCTURUS)
+		adev->asic_type != CHIP_ARCTURUS &&
+		adev->asic_type != CHIP_SIENNA_CICHLID)
 		return 1;
 	else
 		return 0;
@@ -2030,7 +2031,6 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev,
 
 	*supported = amdgpu_ras_enable == 0 ?
 			0 : *hw_supported & amdgpu_ras_mask;
-
 	adev->ras_features = *supported;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 392d256fa26d943fb0a019fea4be80382780d3b1 Mon Sep 17 00:00:00 2001
From: Kenneth Feng <kenneth.feng@amd.com>
Date: Wed, 21 Oct 2020 16:15:47 +0800
Subject: drm/amd/pm: fix pp_dpm_fclk

fclk value is missing in pp_dpm_fclk. add this to correctly show the current value.

Signed-off-by: Kenneth Feng <kenneth.feng@amd.com>
Reviewed-by: Likun Gao <Likun.Gao@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org # 5.9.x
---
 drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index d708b383f83b..9ca3d93b1c95 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -455,6 +455,9 @@ static int sienna_cichlid_get_smu_metrics_data(struct smu_context *smu,
 	case METRICS_CURR_DCEFCLK:
 		*value = metrics->CurrClock[PPCLK_DCEFCLK];
 		break;
+	case METRICS_CURR_FCLK:
+		*value = metrics->CurrClock[PPCLK_FCLK];
+		break;
 	case METRICS_AVERAGE_GFXCLK:
 		if (metrics->AverageGfxActivity <= SMU_11_0_7_GFX_BUSY_THRESHOLD)
 			*value = metrics->AverageGfxclkFrequencyPostDs;
-- 
cgit v1.2.3-59-g8ed1b


From 0435d77cd9f4613e7c95ca208d252acf6d745c3f Mon Sep 17 00:00:00 2001
From: Kenneth Feng <kenneth.feng@amd.com>
Date: Wed, 21 Oct 2020 17:30:02 +0800
Subject: drm/amd/pm: remove the average clock value in sysfs

if it's fine-grained clock dpm, remove the average clock value and
reflects the real clock.

Signed-off-by: Kenneth Feng <kenneth.feng@amd.com>
Reviewed-by: Likun Gao <Likun.Gao@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index 9ca3d93b1c95..685a8a3b25d4 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -954,12 +954,16 @@ static int sienna_cichlid_print_clk_levels(struct smu_context *smu,
 			freq_values[1] = cur_value;
 			mark_index = cur_value == freq_values[0] ? 0 :
 				     cur_value == freq_values[2] ? 2 : 1;
-			if (mark_index != 1)
-				freq_values[1] = (freq_values[0] + freq_values[2]) / 2;
 
-			for (i = 0; i < 3; i++) {
+			count = 3;
+			if (mark_index != 1) {
+				count = 2;
+				freq_values[1] = freq_values[2];
+			}
+
+			for (i = 0; i < count; i++) {
 				size += sprintf(buf + size, "%d: %uMhz %s\n", i, freq_values[i],
-						i == mark_index ? "*" : "");
+						cur_value  == freq_values[i] ? "*" : "");
 			}
 
 		}
-- 
cgit v1.2.3-59-g8ed1b


From 687e79c0feb4243b141b1e9a20adba3c0ec66f7f Mon Sep 17 00:00:00 2001
From: Likun Gao <Likun.Gao@amd.com>
Date: Thu, 22 Oct 2020 00:50:07 +0800
Subject: drm/amdgpu: correct the cu and rb info for sienna cichlid

Skip disabled sa to correct the cu_info and active_rbs for sienna cichlid.

Signed-off-by: Likun Gao <Likun.Gao@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org # 5.9.x
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 1c98b248a7fb..56fdbe626d30 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -4582,12 +4582,17 @@ static void gfx_v10_0_setup_rb(struct amdgpu_device *adev)
 	int i, j;
 	u32 data;
 	u32 active_rbs = 0;
+	u32 bitmap;
 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
 					adev->gfx.config.max_sh_per_se;
 
 	mutex_lock(&adev->grbm_idx_mutex);
 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+			bitmap = i * adev->gfx.config.max_sh_per_se + j;
+			if ((adev->asic_type == CHIP_SIENNA_CICHLID) &&
+			    ((gfx_v10_3_get_disabled_sa(adev) >> bitmap) & 1))
+				continue;
 			gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
 			data = gfx_v10_0_get_rb_active_bitmap(adev);
 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
@@ -8812,6 +8817,10 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev,
 	mutex_lock(&adev->grbm_idx_mutex);
 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+			bitmap = i * adev->gfx.config.max_sh_per_se + j;
+			if ((adev->asic_type == CHIP_SIENNA_CICHLID) &&
+			    ((gfx_v10_3_get_disabled_sa(adev) >> bitmap) & 1))
+				continue;
 			mask = 1;
 			ao_bitmap = 0;
 			counter = 0;
-- 
cgit v1.2.3-59-g8ed1b