From d630213f2a47933e1037909273a20023ba3e598d Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Mon, 10 Jul 2017 14:41:25 +0200
Subject: drm/bridge: tc358767: fix probe without attached output node

The output node of the TC358767 is only used if another bridge is chained
behind it. Panels attached to the TC358767 can be detected using the usual
DP AUX probing. This restores the old behavior of ignoring the output if
no endpoint is found.

Fixes: ebc944613567 (drm: convert drivers to use drm_of_find_panel_or_bridge)
CC: stable@vger.kernel.org
Acked-by: Andrey Gusakov <andrey.gusakov@cogentembedded.com>
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Signed-off-by: Archit Taneja <architt@codeaurora.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20170710124125.9019-1-l.stach@pengutronix.de
---
 drivers/gpu/drm/bridge/tc358767.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c
index 5c26488e7a2d..0529e500c534 100644
--- a/drivers/gpu/drm/bridge/tc358767.c
+++ b/drivers/gpu/drm/bridge/tc358767.c
@@ -1255,7 +1255,7 @@ static int tc_probe(struct i2c_client *client, const struct i2c_device_id *id)
 
 	/* port@2 is the output port */
 	ret = drm_of_find_panel_or_bridge(dev->of_node, 2, 0, &tc->panel, NULL);
-	if (ret)
+	if (ret && ret != -ENODEV)
 		return ret;
 
 	/* Shut down GPIO is optional */
-- 
cgit v1.2.3-59-g8ed1b


From 99f828436788f0155798145853607ca8f0e6de93 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 28 Jul 2017 22:29:51 +0100
Subject: dma-buf/sync_file: Allow multiple sync_files to wrap a single
 dma-fence

Up until recently sync_file were create to export a single dma-fence to
userspace, and so we could canabalise a bit insie dma-fence to mark
whether or not we had enable polling for the sync_file itself. However,
with the advent of syncobj, we do allow userspace to create multiple
sync_files for a single dma-fence. (Similarly, that the sw-sync
validation framework also started returning multiple sync-files wrapping
a single dma-fence for a syncpt also triggering the problem.)

This patch reverts my suggestion in commit e24165537312
("dma-buf/sync_file: only enable fence signalling on poll()") to use a
single bit in the shared dma-fence and restores the sync_file->flags for
tracking the bits individually.

Reported-by: Gustavo Padovan <gustavo.padovan@collabora.com>
Fixes: f1e8c67123cf ("dma-buf/sw-sync: Use an rbtree to sort fences in the timeline")
Fixes: e9083420bbac ("drm: introduce sync objects (v4)")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Sean Paul <seanpaul@chromium.org>
Cc: Gustavo Padovan <gustavo@padovan.org>
Cc: dri-devel@lists.freedesktop.org
Cc: <drm-intel-fixes@lists.freedesktop.org> # v4.13-rc1+
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170728212951.7818-1-chris@chris-wilson.co.uk
(cherry picked from commit db1fc97ca0c0d3fdeeadf314d99a26188438940a)
---
 drivers/dma-buf/sync_file.c | 5 +++--
 include/linux/sync_file.h   | 3 ++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index d7e219d2669d..66fb40d0ebdb 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -304,7 +304,7 @@ static int sync_file_release(struct inode *inode, struct file *file)
 {
 	struct sync_file *sync_file = file->private_data;
 
-	if (test_bit(POLL_ENABLED, &sync_file->fence->flags))
+	if (test_bit(POLL_ENABLED, &sync_file->flags))
 		dma_fence_remove_callback(sync_file->fence, &sync_file->cb);
 	dma_fence_put(sync_file->fence);
 	kfree(sync_file);
@@ -318,7 +318,8 @@ static unsigned int sync_file_poll(struct file *file, poll_table *wait)
 
 	poll_wait(file, &sync_file->wq, wait);
 
-	if (!test_and_set_bit(POLL_ENABLED, &sync_file->fence->flags)) {
+	if (list_empty(&sync_file->cb.node) &&
+	    !test_and_set_bit(POLL_ENABLED, &sync_file->flags)) {
 		if (dma_fence_add_callback(sync_file->fence, &sync_file->cb,
 					   fence_check_cb_func) < 0)
 			wake_up_all(&sync_file->wq);
diff --git a/include/linux/sync_file.h b/include/linux/sync_file.h
index 5726107963b2..0ad87c434ae6 100644
--- a/include/linux/sync_file.h
+++ b/include/linux/sync_file.h
@@ -43,12 +43,13 @@ struct sync_file {
 #endif
 
 	wait_queue_head_t	wq;
+	unsigned long		flags;
 
 	struct dma_fence	*fence;
 	struct dma_fence_cb cb;
 };
 
-#define POLL_ENABLED DMA_FENCE_FLAG_USER_BITS
+#define POLL_ENABLED 0
 
 struct sync_file *sync_file_create(struct dma_fence *fence);
 struct dma_fence *sync_file_get_fence(int fd);
-- 
cgit v1.2.3-59-g8ed1b


From d490c9cd2f67399e1dbc951f190d03724b81d0c8 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 29 Jun 2017 14:49:59 +0530
Subject: drm/msm/mdp5: Fix compilation warnings

Following compilation warnings were observed for these files:

  CC [M]  drivers/gpu/drm/msm/mdp/mdp5/mdp5_mdss.o
drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c: In function 'blend_setup':
drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c:223:7: warning: missing braces around initializer [-Wmissing-braces]
  enum mdp5_pipe stage[STAGE_MAX + 1][MAX_PIPE_STAGE] = { SSPP_NONE };
       ^
drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c:223:7: warning: (near initialization for 'stage[0]') [-Wmissing-braces]
drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c:224:7: warning: missing braces around initializer [-Wmissing-braces]
  enum mdp5_pipe r_stage[STAGE_MAX + 1][MAX_PIPE_STAGE] = { SSPP_NONE };
       ^
drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c:224:7: warning: (near initialization for 'r_stage[0]') [-Wmissing-braces]

drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c: In function 'mdp5_plane_mode_set':
drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c:892:9: warning: missing braces around initializer [-Wmissing-braces]
  struct phase_step step = { 0 };
         ^
drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c:892:9: warning: (near initialization for 'step.x') [-Wmissing-braces]
drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c:893:9: warning: missing braces around initializer [-Wmissing-braces]
  struct pixel_ext pe = { 0 };
         ^
drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c:893:9: warning: (near initialization for 'pe.left') [-Wmissing-braces]

This happens because in the first case we were initializing a two
dimensional array with {0} and in the second case we were initializing a
struct containing two arrays with {0}.

Fix them by adding another pair of {}.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c  | 4 ++--
 drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
index cb5415d6c04b..62a0fb377e7a 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
@@ -221,8 +221,8 @@ static void blend_setup(struct drm_crtc *crtc)
 	struct mdp5_ctl *ctl = mdp5_cstate->ctl;
 	uint32_t blend_op, fg_alpha, bg_alpha, ctl_blend_flags = 0;
 	unsigned long flags;
-	enum mdp5_pipe stage[STAGE_MAX + 1][MAX_PIPE_STAGE] = { SSPP_NONE };
-	enum mdp5_pipe r_stage[STAGE_MAX + 1][MAX_PIPE_STAGE] = { SSPP_NONE };
+	enum mdp5_pipe stage[STAGE_MAX + 1][MAX_PIPE_STAGE] = { { SSPP_NONE } };
+	enum mdp5_pipe r_stage[STAGE_MAX + 1][MAX_PIPE_STAGE] = { { SSPP_NONE } };
 	int i, plane_cnt = 0;
 	bool bg_alpha_enabled = false;
 	u32 mixer_op_mode = 0;
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
index fe3a4de1a433..61f39c86dd09 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
@@ -890,8 +890,8 @@ static int mdp5_plane_mode_set(struct drm_plane *plane,
 	struct mdp5_hw_pipe *right_hwpipe;
 	const struct mdp_format *format;
 	uint32_t nplanes, config = 0;
-	struct phase_step step = { 0 };
-	struct pixel_ext pe = { 0 };
+	struct phase_step step = { { 0 } };
+	struct pixel_ext pe = { { 0 } };
 	uint32_t hdecm = 0, vdecm = 0;
 	uint32_t pix_format;
 	unsigned int rotation;
-- 
cgit v1.2.3-59-g8ed1b


From 65e93108891e571f177c202add9288eda9ac4100 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 30 Jun 2017 10:59:15 +0300
Subject: drm/msm: fix an integer overflow test

We recently added an integer overflow check but it needs an additional
tweak to work properly on 32 bit systems.

The problem is that we're doing the right hand side of the assignment as
type unsigned long so the max it will have an integer overflow instead
of being larger than SIZE_MAX.  That means the "sz > SIZE_MAX" condition
is never true even on 32 bit systems.  We need to first cast it to u64
and then do the math.

Fixes: 4a630fadbb29 ("drm/msm: Fix potential buffer overflow issue")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/msm_gem_submit.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 6bfca7470141..8095658e8cb4 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -34,8 +34,8 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev,
 		struct msm_gpu *gpu, uint32_t nr_bos, uint32_t nr_cmds)
 {
 	struct msm_gem_submit *submit;
-	uint64_t sz = sizeof(*submit) + (nr_bos * sizeof(submit->bos[0])) +
-		(nr_cmds * sizeof(submit->cmd[0]));
+	uint64_t sz = sizeof(*submit) + ((u64)nr_bos * sizeof(submit->bos[0])) +
+		((u64)nr_cmds * sizeof(submit->cmd[0]));
 
 	if (sz > SIZE_MAX)
 		return NULL;
-- 
cgit v1.2.3-59-g8ed1b


From 71e3dfa167b105d3c06e76fee1d0e2fd1e502cf6 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 10 Jul 2017 10:20:42 +0300
Subject: drm/msm: unlock on error in msm_gem_get_iova()

We recently added locking to this function but there was a direct return
that was overlooked where we need to unlock.

Fixes: 0e08270a1f01 ("drm/msm: Separate locking of buffer resources from struct_mutex")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/msm_gem.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 65f35544c1ec..065d933df2c3 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -383,8 +383,10 @@ int msm_gem_get_iova(struct drm_gem_object *obj,
 		struct page **pages;
 
 		vma = add_vma(obj, aspace);
-		if (IS_ERR(vma))
-			return PTR_ERR(vma);
+		if (IS_ERR(vma)) {
+			ret = PTR_ERR(vma);
+			goto unlock;
+		}
 
 		pages = get_pages(obj);
 		if (IS_ERR(pages)) {
@@ -405,7 +407,7 @@ int msm_gem_get_iova(struct drm_gem_object *obj,
 
 fail:
 	del_vma(vma);
-
+unlock:
 	mutex_unlock(&msm_obj->lock);
 	return ret;
 }
-- 
cgit v1.2.3-59-g8ed1b


From af1f5f12c21bd9dc08f578d86adc192eec4eb28a Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@gmail.com>
Date: Mon, 3 Jul 2017 13:15:57 -0400
Subject: drm/msm/mdp5: fix unclocked register access in _cursor_set()

Fixes an insta-reboot when screen-blanking kicks in, due to cursor
updates without clocks enabled.

Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
index 62a0fb377e7a..735a87a699fa 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
@@ -753,6 +753,7 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc,
 	if (!handle) {
 		DBG("Cursor off");
 		cursor_enable = false;
+		mdp5_enable(mdp5_kms);
 		goto set_cursor;
 	}
 
@@ -776,6 +777,8 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc,
 
 	get_roi(crtc, &roi_w, &roi_h);
 
+	mdp5_enable(mdp5_kms);
+
 	mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_STRIDE(lm), stride);
 	mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_FORMAT(lm),
 			MDP5_LM_CURSOR_FORMAT_FORMAT(CURSOR_FMT_ARGB8888));
@@ -804,6 +807,7 @@ set_cursor:
 	crtc_flush(crtc, flush_mask);
 
 end:
+	mdp5_disable(mdp5_kms);
 	if (old_bo) {
 		drm_flip_work_queue(&mdp5_crtc->unref_cursor_work, old_bo);
 		/* enable vblank to complete cursor work: */
@@ -836,6 +840,8 @@ static int mdp5_crtc_cursor_move(struct drm_crtc *crtc, int x, int y)
 
 	get_roi(crtc, &roi_w, &roi_h);
 
+	mdp5_enable(mdp5_kms);
+
 	spin_lock_irqsave(&mdp5_crtc->cursor.lock, flags);
 	mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_SIZE(lm),
 			MDP5_LM_CURSOR_SIZE_ROI_H(roi_h) |
@@ -847,6 +853,8 @@ static int mdp5_crtc_cursor_move(struct drm_crtc *crtc, int x, int y)
 
 	crtc_flush(crtc, flush_mask);
 
+	mdp5_disable(mdp5_kms);
+
 	return 0;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From d4cea38ebb4de90913085391ce4febde1a4ba9aa Mon Sep 17 00:00:00 2001
From: Archit Taneja <architt@codeaurora.org>
Date: Wed, 12 Jul 2017 15:09:55 +0530
Subject: drm/msm/dsi: Calculate link clock rates with updated dsi->lanes

After the commit mentioned below, we start computing the byte and pixel
clocks (dsi_calc_clk_rate) in the DSI bridge's mode_set() op. The
calculation involves the number of DSI lanes being used by the
downstream bridge/panel.

If the downstream bridge/panel tries to change the number of DSI lanes
(as done in the ADV7533 driver) in its mode_set() op, then our DSI
host driver will not have the correct number of lanes when computing
byte/pixel clocks.

Fix this by delaying the clock rate calculation in the DSI bridge
enable path. In particular, compute the clock rates in
msm_dsi_host_get_phy_clk_req().

This fixes the DSI host error interrupts seen when we try to switch
between modes that require different number of lanes (4 to 3 lanes, or
vice versa) on db410c. The error interrupts occur since the byte/pixel
clock rates aren't according to what the DSI video mode timing engine
expects.

Fixes: b62aa70a98c5 ("drm/msm/dsi: Move PHY operations out of host")
Signed-off-by: Archit Taneja <architt@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/dsi/dsi_host.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c
index 9e9c5696bc03..c7b612c3d771 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_host.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_host.c
@@ -2137,6 +2137,13 @@ void msm_dsi_host_get_phy_clk_req(struct mipi_dsi_host *host,
 	struct msm_dsi_phy_clk_request *clk_req)
 {
 	struct msm_dsi_host *msm_host = to_msm_dsi_host(host);
+	int ret;
+
+	ret = dsi_calc_clk_rate(msm_host);
+	if (ret) {
+		pr_err("%s: unable to calc clk rate, %d\n", __func__, ret);
+		return;
+	}
 
 	clk_req->bitclk_rate = msm_host->byte_clk_rate * 8;
 	clk_req->escclk_rate = msm_host->esc_clk_rate;
@@ -2280,7 +2287,6 @@ int msm_dsi_host_set_display_mode(struct mipi_dsi_host *host,
 					struct drm_display_mode *mode)
 {
 	struct msm_dsi_host *msm_host = to_msm_dsi_host(host);
-	int ret;
 
 	if (msm_host->mode) {
 		drm_mode_destroy(msm_host->dev, msm_host->mode);
@@ -2293,12 +2299,6 @@ int msm_dsi_host_set_display_mode(struct mipi_dsi_host *host,
 		return -ENOMEM;
 	}
 
-	ret = dsi_calc_clk_rate(msm_host);
-	if (ret) {
-		pr_err("%s: unable to calc clk rate, %d\n", __func__, ret);
-		return ret;
-	}
-
 	return 0;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From b3949a9a3e09f87e0371e80a2bef6ec0d48b575d Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Sun, 30 Jul 2017 14:42:36 +0200
Subject: drm/msm: fix WARN_ON in add_vma() with no iommu

While I was testing the upcoming adv7533 CEC support with my Dragonboard c410
I encountered this warning several times during boot:

[    4.408309] WARNING: CPU: 3 PID: 1347 at drivers/gpu/drm/msm/msm_gem.c:312 add_vma+0x78/0x88 [msm]
[    4.412951] Modules linked in: snd_soc_hdmi_codec adv7511 cec qcom_wcnss_pil msm mdt_loader drm_kms_helper msm_rng rng_core drm
[    4.421728] CPU: 3 PID: 1347 Comm: kworker/3:3 Not tainted 4.13.0-rc1-dragonboard #111
[    4.433090] Hardware name: Qualcomm Technologies, Inc. APQ 8016 SBC (DT)
[    4.441081] Workqueue: events deferred_probe_work_func
[    4.447929] task: ffff800031243600 task.stack: ffff800003394000
[    4.453023] PC is at add_vma+0x78/0x88 [msm]
[    4.458823] LR is at _msm_gem_new+0xd4/0x188 [msm]
[    4.463207] pc : [<ffff000000ac01f8>] lr : [<ffff000000ac06b4>] pstate: 40000145
[    4.467811] sp : ffff8000033978a0
[    4.475357] x29: ffff8000033978a0 x28: ffff8000031dea18
[    4.478572] x27: ffff800003933a00 x26: ffff800003b39800
[    4.483953] x25: ffff8000338ff800 x24: 0000000000000001
[    4.489249] x23: 0000000000000000 x22: ffff800003b39800
[    4.494544] x21: ffff8000338ff800 x20: 0000000000000000
[    4.499839] x19: ffff800003932600 x18: 0000000000000001
[    4.505135] x17: 0000ffff8969e9e0 x16: ffff7e00000ce7a0
[    4.510429] x15: ffffffffffffffff x14: ffff8000833977ef
[    4.515724] x13: ffff8000033977f3 x12: 0000000000000038
[    4.521020] x11: 0101010101010101 x10: ffffff7f7fff7f7f
[    4.526315] x9 : 0000000000000000 x8 : ffff800003932800
[    4.531633] x7 : 0000000000000000 x6 : 000000000000003f
[    4.531644] x5 : 0000000000000040 x4 : 0000000000000000
[    4.531650] x3 : ffff800031243600 x2 : 0000000000000000
[    4.531655] x1 : 0000000000000000 x0 : 0000000000000000
[    4.531670] Call trace:
[    4.531676] Exception stack(0xffff8000033976c0 to 0xffff8000033977f0)
[    4.531683] 76c0: ffff800003932600 0001000000000000 ffff8000033978a0 ffff000000ac01f8
[    4.531688] 76e0: 0000000000000140 0000000000000000 ffff800003932550 ffff800003397780
[    4.531694] 7700: ffff800003397730 ffff000008261ce8 0000000000000000 ffff8000031d2f80
[    4.531699] 7720: ffff800003397800 ffff0000081d671c 0000000000000140 0000000000000000
[    4.531705] 7740: ffff000000ac04c0 0000000000004003 ffff800003397908 00000000014080c0
[    4.531710] 7760: 0000000000000000 ffff800003b39800 0000000000000000 0000000000000000
[    4.531716] 7780: 0000000000000000 ffff800031243600 0000000000000000 0000000000000040
[    4.531721] 77a0: 000000000000003f 0000000000000000 ffff800003932800 0000000000000000
[    4.531726] 77c0: ffffff7f7fff7f7f 0101010101010101 0000000000000038 ffff8000033977f3
[    4.531730] 77e0: ffff8000833977ef ffffffffffffffff
[    4.531881] [<ffff000000ac01f8>] add_vma+0x78/0x88 [msm]
[    4.532011] [<ffff000000ac06b4>] _msm_gem_new+0xd4/0x188 [msm]
[    4.532134] [<ffff000000ac1900>] msm_gem_new+0x10/0x18 [msm]
[    4.532260] [<ffff000000acb274>] msm_dsi_host_modeset_init+0x17c/0x268 [msm]
[    4.532384] [<ffff000000ac9024>] msm_dsi_modeset_init+0x34/0x1b8 [msm]
[    4.532504] [<ffff000000ab6168>] modeset_init+0x408/0x488 [msm]
[    4.532623] [<ffff000000ab6c4c>] mdp5_kms_init+0x2b4/0x338 [msm]
[    4.532745] [<ffff000000abeff8>] msm_drm_bind+0x218/0x4e8 [msm]
[    4.532755] [<ffff00000855d744>] try_to_bring_up_master+0x1f4/0x318
[    4.532762] [<ffff00000855d900>] component_add+0x98/0x180
[    4.532887] [<ffff000000ac8da0>] dsi_dev_probe+0x18/0x28 [msm]
[    4.532895] [<ffff000008565fe8>] platform_drv_probe+0x58/0xc0
[    4.532901] [<ffff00000856410c>] driver_probe_device+0x324/0x458
[    4.532907] [<ffff00000856440c>] __device_attach_driver+0xac/0x170
[    4.532913] [<ffff000008561ef4>] bus_for_each_drv+0x4c/0x98
[    4.532918] [<ffff000008563c38>] __device_attach+0xc0/0x160
[    4.532924] [<ffff000008564530>] device_initial_probe+0x10/0x18
[    4.532929] [<ffff000008562f84>] bus_probe_device+0x94/0xa0
[    4.532934] [<ffff0000085635d4>] deferred_probe_work_func+0x8c/0xe8
[    4.532941] [<ffff0000080d79bc>] process_one_work+0x1d4/0x330
[    4.532946] [<ffff0000080d7b60>] worker_thread+0x48/0x468
[    4.532952] [<ffff0000080ddae4>] kthread+0x12c/0x130
[    4.532958] [<ffff000008082f10>] ret_from_fork+0x10/0x40
[    4.532962] ---[ end trace b1ac6888ec40b0bb ]---

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/msm_gem.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 065d933df2c3..a0c60e738db8 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -930,8 +930,12 @@ static struct drm_gem_object *_msm_gem_new(struct drm_device *dev,
 	if (use_vram) {
 		struct msm_gem_vma *vma;
 		struct page **pages;
+		struct msm_gem_object *msm_obj = to_msm_bo(obj);
+
+		mutex_lock(&msm_obj->lock);
 
 		vma = add_vma(obj, NULL);
+		mutex_unlock(&msm_obj->lock);
 		if (IS_ERR(vma)) {
 			ret = PTR_ERR(vma);
 			goto fail;
-- 
cgit v1.2.3-59-g8ed1b


From 79687057c2880d871451f107548187e4853c38e6 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Sun, 30 Jul 2017 14:46:56 +0200
Subject: drm/msm: NULL pointer dereference in
 drivers/gpu/drm/msm/msm_gem_vma.c

While I was testing the upcoming adv7533 CEC support with my Dragonboard c410
I encountered this NULL pointer dereference:

[   17.912822] Unable to handle kernel NULL pointer dereference at virtual address 000000e8
[   17.917191] user pgtable: 4k pages, 48-bit VAs, pgd = ffff800030e9f000
[   17.925249] [00000000000000e8] *pgd=00000000b0daf003, *pud=0000000000000000
[   17.931650] Internal error: Oops: 96000005 [#1] PREEMPT SMP
[   17.938395] Modules linked in: btqcomsmd btqca arc4 wcn36xx mac80211 bluetooth cfg80211 ecdh_generic r8152 snd_soc_hdmi_codec adv7511 cec
qcom_wcnss_pil msm mdt_loader drm_kms_helper msm_rng rng_core drm
[   17.943967] CPU: 0 PID: 1684 Comm: Xorg Tainted: G        W       4.13.0-rc1-dragonboard #111
[   17.962005] Hardware name: Qualcomm Technologies, Inc. APQ 8016 SBC (DT)
[   17.970685] task: ffff800031236c00 task.stack: ffff800033fbc000
[   17.977582] PC is at msm_gem_unmap_vma+0x20/0x80 [msm]
[   17.983213] LR is at put_iova+0x60/0xb8 [msm]
[   17.988303] pc : [<ffff000000ac2d58>] lr : [<ffff000000ac07c8>] pstate: 20000145
[   17.992733] sp : ffff800033fbfb30
[   18.000193] x29: ffff800033fbfb30 x28: ffff800030b5f000
[   18.003407] x27: 00000000000000b4 x26: ffff0000009f8cd8
[   18.008789] x25: 0000000000000004 x24: dead000000000100
[   18.014085] x23: dead000000000200 x22: ffff800030b5fd40
[   18.019379] x21: ffff800030b5fc00 x20: 0000000000000000
[   18.024675] x19: ffff80003082bf00 x18: 0000000000000000
[   18.029970] x17: 0000ffffb3347e70 x16: ffff000008207638
[   18.035265] x15: 0000000000000053 x14: 0000000000000000
[   18.040560] x13: 0000000000000038 x12: 0101010101010101
[   18.045855] x11: 7f7f7f7f7f7f7f7f x10: 0000000000000040
[   18.051150] x9 : ffff800030b5f038 x8 : ffff800031657b50
[   18.056446] x7 : ffff800031657b78 x6 : 0000000000000000
[   18.061740] x5 : 0000000000000000 x4 : 00000000b5c01000
[   18.067036] x3 : 0000000000000000 x2 : ffff8000337bf300
[   18.072330] x1 : ffff80003082bf00 x0 : 0000000000000000
[   18.077629] Process Xorg (pid: 1684, stack limit = 0xffff800033fbc000)
[   18.082925] Stack: (0xffff800033fbfb30 to 0xffff800033fc0000)
[   18.089262] fb20:                                   ffff800033fbfb60 ffff000000ac07c8
[   18.095081] fb40: ffff80003082bf00 ffff800030b5fc90 ffff800030b5fc00 ffff000000abf4a0
[   18.102893] fb60: ffff800033fbfba0 ffff000000ac16b0 ffff800030b5fc00 ffff8000338ff870
[   18.110706] fb80: ffff8000338ff800 ffff800030b5fc00 ffff800030b5fda8 ffff800033fbfd80
[   18.118518] fba0: ffff800033fbfbe0 ffff0000009d4244 ffff800030b5fc00 ffff800030b5f038
[   18.126332] fbc0: ffff800033fbfbd0 ffff800030b5fc00 ffff800030b5f038 ffff0000009d4840
[   18.134144] fbe0: ffff800033fbfbf0 ffff0000009d4858 ffff800033fbfc10 ffff0000009d48e4
[   18.141955] fc00: ffff800030b5fc00 ffff8000338ffd98 ffff800033fbfc30 ffff0000009d49a4
[   18.149768] fc20: ffff800030b5fc00 ffff800030b5f000 ffff800033fbfc60 ffff0000009d4a4c
[   18.157581] fc40: ffff800030b5f050 ffff800030b5f000 0000000000000001 ffff800030b5fc00
[   18.165394] fc60: ffff800033fbfca0 ffff0000009d4ab0 0000000000000018 ffff800030b5f000
[   18.173206] fc80: ffff0000009efd28 ffff800033fbfd80 ffff8000338ff800 ffff0000009d56a8
[   18.181019] fca0: ffff800033fbfcb0 ffff0000009efd54 ffff800033fbfcc0 ffff0000009d56c8
[   18.188831] fcc0: ffff800033fbfd00 ffff0000009d58e0 ffff0000009fa6e0 00000000c00464b4
[   18.196643] fce0: 0000000000000004 ffff80003082b400 0000ffffea1f0e00 0000000000000000
[   18.204456] fd00: ffff800033fbfe00 ffff000008206f0c ffff80000335caf8 ffff80003082b400
[   18.212269] fd20: 0000ffffea1f0e00 ffff80003082b400 00000000c00464b4 0000ffffea1f0e00
[   18.220081] fd40: 0000000000000124 000000000000001d ffff0000089d2000 ffff800031236c00
[   18.227894] fd60: ffff800033fbfd80 0000000000000004 ffff0000009efd28 ffff800033fbfd80
[   18.235706] fd80: 0000000100000001 0000008000000001 0000001800000020 0000000000000001
[   18.243518] fda0: 0000000100000000 0000000100000001 0000ffff00000000 0000ffff00000000
[   18.251331] fdc0: 0000000000000124 0000000000000038 ffff0000089d2000 ffff800031236c00
[   18.259144] fde0: ffff800033fbfe40 ffff000008214124 ffff800033fbfe30 ffff000008203290
[   18.266956] fe00: ffff800033fbfe80 ffff0000082076b4 0000000000000000 ffff800030d8a000
[   18.274768] fe20: ffff80003082b400 0000000000000016 ffff800033fbfe50 ffff0000081f0488
[   18.282581] fe40: ffff800033fbfe80 ffff000008207678 0000000000000000 ffff80003082b400
[   18.290393] fe60: ffff800033fbfe70 ffff0000082138b0 ffff800033fbfe80 ffff000008207658
[   18.298207] fe80: 0000000000000000 ffff000008082f84 0000000000000000 0000800034a16000
[   18.306017] fea0: ffffffffffffffff 0000ffffb3347e7c 0000000000000000 0000000000000015
[   18.313832] fec0: 0000000000000016 00000000c00464b4 0000ffffea1f0e00 0000000000000001
[   18.321643] fee0: 0000000000000020 0000000000000080 0000000000000001 0000000000000000
[   18.329456] ff00: 000000000000001d 000000012692c5b0 0101010101010101 7f7f7f7f7f7f7f7f
[   18.337269] ff20: 0101010101010101 0000000000000038 0000000000000000 0000000000000053
[   18.345082] ff40: 0000ffffb368b2b8 0000ffffb3347e70 0000000000000000 0000ffffb3847000
[   18.352894] ff60: 0000ffffea1f0e00 00000000c00464b4 0000000000000016 0000ffffea1f0edc
[   18.360705] ff80: 000000012692ad20 0000000000000003 00000001214282e4 0000000121428388
[   18.368518] ffa0: 0000000000000000 0000ffffea1f0da0 0000ffffb367185c 0000ffffea1f0da0
[   18.376332] ffc0: 0000ffffb3347e7c 0000000000000000 0000000000000016 000000000000001d
[   18.384142] ffe0: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
[   18.391953] Call trace:
[   18.399760] Exception stack(0xffff800033fbf950 to 0xffff800033fbfa80)
[   18.402023] f940:                                   ffff80003082bf00 0001000000000000
[   18.408622] f960: ffff800033fbfb30 ffff000000ac2d58 0000000020000145 ffff8000338ffa78
[   18.416435] f980: 0000000000000000 0000000000000000 ffff800033fbf9e0 ffff0000089afcf0
[   18.424248] f9a0: ffff80000348f230 ffff8000338ffa78 0000000000000000 0000000000000000
[   18.432060] f9c0: ffff8000338ffaa8 0000000000000001 ffff800033fbfb80 ffff0000009e8f38
[   18.439872] f9e0: ffff800033fbfa10 ffff0000089a9ff8 0000000000000027 ffff80003082b918
[   18.447684] fa00: 0000000000000000 ffff80003082bf00 ffff8000337bf300 0000000000000000
[   18.455497] fa20: 00000000b5c01000 0000000000000000 0000000000000000 ffff800031657b78
[   18.463310] fa40: ffff800031657b50 ffff800030b5f038 0000000000000040 7f7f7f7f7f7f7f7f
[   18.471122] fa60: 0101010101010101 0000000000000038 0000000000000000 0000000000000053
[   18.479062] [<ffff000000ac2d58>] msm_gem_unmap_vma+0x20/0x80 [msm]
[   18.486862] [<ffff000000ac07c8>] put_iova+0x60/0xb8 [msm]
[   18.492938] [<ffff000000ac16b0>] msm_gem_free_object+0x60/0x198 [msm]
[   18.498432] [<ffff0000009d4244>] drm_gem_object_free+0x1c/0x58 [drm]
[   18.504854] [<ffff0000009d4858>] drm_gem_object_put_unlocked+0x90/0xa0 [drm]
[   18.511273] [<ffff0000009d48e4>] drm_gem_object_handle_put_unlocked+0x64/0xd0 [drm]
[   18.518300] [<ffff0000009d49a4>] drm_gem_object_release_handle+0x54/0x98 [drm]
[   18.525679] [<ffff0000009d4a4c>] drm_gem_handle_delete+0x64/0xb8 [drm]
[   18.532968] [<ffff0000009d4ab0>] drm_gem_dumb_destroy+0x10/0x18 [drm]
[   18.539479] [<ffff0000009efd54>] drm_mode_destroy_dumb_ioctl+0x2c/0x40 [drm]
[   18.545992] [<ffff0000009d56c8>] drm_ioctl_kernel+0x68/0xe0 [drm]
[   18.553105] [<ffff0000009d58e0>] drm_ioctl+0x178/0x3b0 [drm]
[   18.558970] [<ffff000008206f0c>] do_vfs_ioctl+0xa4/0x7d0
[   18.564694] [<ffff0000082076b4>] SyS_ioctl+0x7c/0x98
[   18.569992] [<ffff000008082f84>] el0_svc_naked+0x38/0x3c
[   18.574941] Code: a90153f3 aa0003f4 f90013f5 aa0103f3 (f9407400)
[   18.580502] ---[ end trace b1ac6888ec40b0be ]---

It turns out that the aspace argument in msm_gem_unmap_vma() is NULL.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
[Note: this case gets hit with !IOMMU config]
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/msm_gem_vma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c
index c36321bc8714..d34e331554f3 100644
--- a/drivers/gpu/drm/msm/msm_gem_vma.c
+++ b/drivers/gpu/drm/msm/msm_gem_vma.c
@@ -42,7 +42,7 @@ void
 msm_gem_unmap_vma(struct msm_gem_address_space *aspace,
 		struct msm_gem_vma *vma, struct sg_table *sgt)
 {
-	if (!vma->iova)
+	if (!aspace || !vma->iova)
 		return;
 
 	if (aspace->mmu) {
-- 
cgit v1.2.3-59-g8ed1b


From b0e77fd87cf302351e537881c8daf8d1c69cf541 Mon Sep 17 00:00:00 2001
From: Archit Taneja <architt@codeaurora.org>
Date: Fri, 28 Jul 2017 16:16:59 +0530
Subject: drm/msm/mdp5: Fix typo in encoder_enable path

The mdp5_cmd_encoder_disable is accidentally called in the encoder enable
path. We've not seen any problems since we haven't tested with command
mode panels in a while. Fix the copy-paste error.

Signed-off-by: Archit Taneja <architt@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c
index 97f3294fbfc6..70bef51245af 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c
@@ -299,7 +299,7 @@ static void mdp5_encoder_enable(struct drm_encoder *encoder)
 	struct mdp5_interface *intf = mdp5_encoder->intf;
 
 	if (intf->mode == MDP5_INTF_DSI_MODE_COMMAND)
-		mdp5_cmd_encoder_disable(encoder);
+		mdp5_cmd_encoder_enable(encoder);
 	else
 		mdp5_vid_encoder_enable(encoder);
 }
-- 
cgit v1.2.3-59-g8ed1b


From d0538f5048fafe5633c58f25c3332f67739cdeb4 Mon Sep 17 00:00:00 2001
From: Archit Taneja <architt@codeaurora.org>
Date: Fri, 28 Jul 2017 16:17:00 +0530
Subject: drm/msm/mdp5: Drop clock names with "_clk" suffix

We have upstream bindings (msm8916) that have the "_clk" suffix in the
clock names. The downstream bindings also require it.

We want to drop the "_clk" suffix and at the same time support existing
bindings. Update the MDP5 code with the the msm_clk_get() helper to
support both old and new clock names.

Signed-off-by: Archit Taneja <architt@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
index 5d13fa5381ee..1c603aef3c59 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
@@ -502,7 +502,7 @@ static int get_clk(struct platform_device *pdev, struct clk **clkp,
 		const char *name, bool mandatory)
 {
 	struct device *dev = &pdev->dev;
-	struct clk *clk = devm_clk_get(dev, name);
+	struct clk *clk = msm_clk_get(pdev, name);
 	if (IS_ERR(clk) && mandatory) {
 		dev_err(dev, "failed to get %s (%ld)\n", name, PTR_ERR(clk));
 		return PTR_ERR(clk);
@@ -887,21 +887,21 @@ static int mdp5_init(struct platform_device *pdev, struct drm_device *dev)
 	}
 
 	/* mandatory clocks: */
-	ret = get_clk(pdev, &mdp5_kms->axi_clk, "bus_clk", true);
+	ret = get_clk(pdev, &mdp5_kms->axi_clk, "bus", true);
 	if (ret)
 		goto fail;
-	ret = get_clk(pdev, &mdp5_kms->ahb_clk, "iface_clk", true);
+	ret = get_clk(pdev, &mdp5_kms->ahb_clk, "iface", true);
 	if (ret)
 		goto fail;
-	ret = get_clk(pdev, &mdp5_kms->core_clk, "core_clk", true);
+	ret = get_clk(pdev, &mdp5_kms->core_clk, "core", true);
 	if (ret)
 		goto fail;
-	ret = get_clk(pdev, &mdp5_kms->vsync_clk, "vsync_clk", true);
+	ret = get_clk(pdev, &mdp5_kms->vsync_clk, "vsync", true);
 	if (ret)
 		goto fail;
 
 	/* optional clocks: */
-	get_clk(pdev, &mdp5_kms->lut_clk, "lut_clk", false);
+	get_clk(pdev, &mdp5_kms->lut_clk, "lut", false);
 
 	/* we need to set a default rate before enabling.  Set a safe
 	 * rate first, then figure out hw revision, and then set a
-- 
cgit v1.2.3-59-g8ed1b


From 3394f5618dfe8e686a2429aad75edf7ff6540911 Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Thu, 27 Jul 2017 10:42:30 -0600
Subject: drm/msm: Remove some potentially blocked register ranges

The 0xf400 and 0xf800 ranges are in the RBBM_SECVID block which may
be protected from CPU access. Skip dumping them since they are minimally
useful for debugging and they aren't worth a system hang.

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 49 +++++++++++++++++------------------
 1 file changed, 24 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index b4b54f1c24bc..1f60a9a885b4 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -920,31 +920,30 @@ static const u32 a5xx_registers[] = {
 	0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
 	0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
 	0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
-	0x04E0, 0x0533, 0x0540, 0x0555, 0xF400, 0xF400, 0xF800, 0xF807,
-	0x0800, 0x081A, 0x081F, 0x0841, 0x0860, 0x0860, 0x0880, 0x08A0,
-	0x0B00, 0x0B12, 0x0B15, 0x0B28, 0x0B78, 0x0B7F, 0x0BB0, 0x0BBD,
-	0x0BC0, 0x0BC6, 0x0BD0, 0x0C53, 0x0C60, 0x0C61, 0x0C80, 0x0C82,
-	0x0C84, 0x0C85, 0x0C90, 0x0C98, 0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2,
-	0x2180, 0x2185, 0x2580, 0x2585, 0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7,
-	0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8, 0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8,
-	0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E, 0x2100, 0x211E, 0x2140, 0x2145,
-	0x2500, 0x251E, 0x2540, 0x2545, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
-	0x0D30, 0x0D30, 0x20C0, 0x20C0, 0x24C0, 0x24C0, 0x0E40, 0x0E43,
-	0x0E4A, 0x0E4A, 0x0E50, 0x0E57, 0x0E60, 0x0E7C, 0x0E80, 0x0E8E,
-	0x0E90, 0x0E96, 0x0EA0, 0x0EA8, 0x0EB0, 0x0EB2, 0xE140, 0xE147,
-	0xE150, 0xE187, 0xE1A0, 0xE1A9, 0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7,
-	0xE1D0, 0xE1D1, 0xE200, 0xE201, 0xE210, 0xE21C, 0xE240, 0xE268,
-	0xE000, 0xE006, 0xE010, 0xE09A, 0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB,
-	0xE100, 0xE105, 0xE380, 0xE38F, 0xE3B0, 0xE3B0, 0xE400, 0xE405,
-	0xE408, 0xE4E9, 0xE4F0, 0xE4F0, 0xE280, 0xE280, 0xE282, 0xE2A3,
-	0xE2A5, 0xE2C2, 0xE940, 0xE947, 0xE950, 0xE987, 0xE9A0, 0xE9A9,
-	0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7, 0xE9D0, 0xE9D1, 0xEA00, 0xEA01,
-	0xEA10, 0xEA1C, 0xEA40, 0xEA68, 0xE800, 0xE806, 0xE810, 0xE89A,
-	0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB, 0xE900, 0xE905, 0xEB80, 0xEB8F,
-	0xEBB0, 0xEBB0, 0xEC00, 0xEC05, 0xEC08, 0xECE9, 0xECF0, 0xECF0,
-	0xEA80, 0xEA80, 0xEA82, 0xEAA3, 0xEAA5, 0xEAC2, 0xA800, 0xA8FF,
-	0xAC60, 0xAC60, 0xB000, 0xB97F, 0xB9A0, 0xB9BF,
-	~0
+	0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
+	0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
+	0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
+	0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
+	0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
+	0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
+	0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
+	0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
+	0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
+	0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
+	0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
+	0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
+	0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
+	0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
+	0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
+	0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
+	0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
+	0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
+	0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
+	0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
+	0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
+	0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
+	0xEAA5, 0xEAC2, 0xA800, 0xA8FF, 0xAC60, 0xAC60, 0xB000, 0xB97F,
+	0xB9A0, 0xB9BF, ~0
 };
 
 static void a5xx_dump(struct msm_gpu *gpu)
-- 
cgit v1.2.3-59-g8ed1b


From 6e749e5971fc7c7a33d7a673fbe4944604b397cf Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Thu, 27 Jul 2017 10:42:31 -0600
Subject: drm/msm: Allow hardware clock gating to be toggled

There are some use cases wherein we need to turn off hardware clock
gating before reading certain registers. Modify the A5XX HWCG function
to allow user to enable or disable clock gating at will.

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 42 ++++++++---------------------------
 drivers/gpu/drm/msm/adreno/a5xx_gpu.h |  1 +
 2 files changed, 10 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index 1f60a9a885b4..c1f8c20414f1 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -117,12 +117,10 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 	gpu->funcs->flush(gpu);
 }
 
-struct a5xx_hwcg {
+static const struct {
 	u32 offset;
 	u32 value;
-};
-
-static const struct a5xx_hwcg a530_hwcg[] = {
+} a5xx_hwcg[] = {
 	{REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
 	{REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
 	{REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
@@ -217,38 +215,16 @@ static const struct a5xx_hwcg a530_hwcg[] = {
 	{REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
 };
 
-static const struct {
-	int (*test)(struct adreno_gpu *gpu);
-	const struct a5xx_hwcg *regs;
-	unsigned int count;
-} a5xx_hwcg_regs[] = {
-	{ adreno_is_a530, a530_hwcg, ARRAY_SIZE(a530_hwcg), },
-};
-
-static void _a5xx_enable_hwcg(struct msm_gpu *gpu,
-		const struct a5xx_hwcg *regs, unsigned int count)
+void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
 {
 	unsigned int i;
 
-	for (i = 0; i < count; i++)
-		gpu_write(gpu, regs[i].offset, regs[i].value);
-
-	gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xAAA8AA00);
-	gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, 0x182);
-}
+	for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
+		gpu_write(gpu, a5xx_hwcg[i].offset,
+			state ? a5xx_hwcg[i].value : 0);
 
-static void a5xx_enable_hwcg(struct msm_gpu *gpu)
-{
-	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
-	unsigned int i;
-
-	for (i = 0; i < ARRAY_SIZE(a5xx_hwcg_regs); i++) {
-		if (a5xx_hwcg_regs[i].test(adreno_gpu)) {
-			_a5xx_enable_hwcg(gpu, a5xx_hwcg_regs[i].regs,
-				a5xx_hwcg_regs[i].count);
-			return;
-		}
-	}
+	gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
+	gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
 }
 
 static int a5xx_me_init(struct msm_gpu *gpu)
@@ -545,7 +521,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
 
 	/* Enable HWCG */
-	a5xx_enable_hwcg(gpu);
+	a5xx_set_hwcg(gpu, true);
 
 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
 
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
index 6638bc85645d..d24796f3a706 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
@@ -59,5 +59,6 @@ static inline int spin_usecs(struct msm_gpu *gpu, uint32_t usecs,
 }
 
 bool a5xx_idle(struct msm_gpu *gpu);
+void a5xx_set_hwcg(struct msm_gpu *gpu, bool state);
 
 #endif /* __A5XX_GPU_H__ */
-- 
cgit v1.2.3-59-g8ed1b


From a23cb3b52fec73b22671bac65356d8c55bf37706 Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Thu, 27 Jul 2017 10:42:32 -0600
Subject: drm/msm: Turn off hardware clock gating before reading A5XX registers

On A5XX GPU hardware clock gating needs to be turned off before
reading certain GPU registers via AHB. Turn off HWCG before calling
adreno_show() to safely dump all the registers without a system hang.

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index c1f8c20414f1..33763b005b7b 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -995,7 +995,14 @@ static void a5xx_show(struct msm_gpu *gpu, struct seq_file *m)
 {
 	seq_printf(m, "status:   %08x\n",
 			gpu_read(gpu, REG_A5XX_RBBM_STATUS));
+
+	/*
+	 * Temporarily disable hardware clock gating before going into
+	 * adreno_show to avoid issues while reading the registers
+	 */
+	a5xx_set_hwcg(gpu, false);
 	adreno_show(gpu, m);
+	a5xx_set_hwcg(gpu, true);
 }
 #endif
 
-- 
cgit v1.2.3-59-g8ed1b


From b0135ab91af16be57e444e74023e48b1f379ab36 Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Thu, 27 Jul 2017 10:42:34 -0600
Subject: drm/msm: args->fence should be args->flags

Fix a typo in msm_ioctl_gem_submit - check args->flags for the
MSM_SUBMIT_NO_IMPLICIT flag instead of args->fence.

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/msm_gem_submit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 8095658e8cb4..8a75c0bd8a78 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -451,7 +451,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 	if (ret)
 		goto out;
 
-	if (!(args->fence & MSM_SUBMIT_NO_IMPLICIT)) {
+	if (!(args->flags & MSM_SUBMIT_NO_IMPLICIT)) {
 		ret = submit_fence_sync(submit);
 		if (ret)
 			goto out;
-- 
cgit v1.2.3-59-g8ed1b


From cdbc78ba702650b02b9e3e957dbaa725423bdf1e Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Thu, 27 Jul 2017 10:42:35 -0600
Subject: drm/msm: Remove __user from __u64 data types

__user should be used to identify user pointers and not __u64
variables containing pointers.

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 include/uapi/drm/msm_drm.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index 26c54f6d595d..ad4eb2863e70 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -171,7 +171,7 @@ struct drm_msm_gem_submit_cmd {
 	__u32 size;           /* in, cmdstream size */
 	__u32 pad;
 	__u32 nr_relocs;      /* in, number of submit_reloc's */
-	__u64 __user relocs;  /* in, ptr to array of submit_reloc's */
+	__u64 relocs;         /* in, ptr to array of submit_reloc's */
 };
 
 /* Each buffer referenced elsewhere in the cmdstream submit (ie. the
@@ -215,8 +215,8 @@ struct drm_msm_gem_submit {
 	__u32 fence;          /* out */
 	__u32 nr_bos;         /* in, number of submit_bo's */
 	__u32 nr_cmds;        /* in, number of submit_cmd's */
-	__u64 __user bos;     /* in, ptr to array of submit_bo's */
-	__u64 __user cmds;    /* in, ptr to array of submit_cmd's */
+	__u64 bos;            /* in, ptr to array of submit_bo's */
+	__u64 cmds;           /* in, ptr to array of submit_cmd's */
 	__s32 fence_fd;       /* in/out fence fd (see MSM_SUBMIT_FENCE_FD_IN/OUT) */
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From 541de4c9c953438b677c31072e7762115ac11299 Mon Sep 17 00:00:00 2001
From: Archit Taneja <architt@codeaurora.org>
Date: Fri, 28 Jul 2017 16:17:08 +0530
Subject: drm/msm/adreno: Prevent unclocked access when retrieving timestamps

msm_gpu's get_timestamp() op (called by the MSM_GET_PARAM ioctl) can
result in register accesses. We need our power domain and clocks to
be active for that. Make sure they are enabled here.

Signed-off-by: Archit Taneja <architt@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/adreno/adreno_gpu.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index f1ab2703674a..7414c6bbd582 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -48,8 +48,15 @@ int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value)
 		*value = adreno_gpu->base.fast_rate;
 		return 0;
 	case MSM_PARAM_TIMESTAMP:
-		if (adreno_gpu->funcs->get_timestamp)
-			return adreno_gpu->funcs->get_timestamp(gpu, value);
+		if (adreno_gpu->funcs->get_timestamp) {
+			int ret;
+
+			pm_runtime_get_sync(&gpu->pdev->dev);
+			ret = adreno_gpu->funcs->get_timestamp(gpu, value);
+			pm_runtime_put_autosuspend(&gpu->pdev->dev);
+
+			return ret;
+		}
 		return -EINVAL;
 	default:
 		DBG("%s: invalid param: %u", gpu->name, param);
-- 
cgit v1.2.3-59-g8ed1b


From bdab8e8b2bc89dce73b6bcabcd295806ce2e5ef9 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 26 Jul 2017 17:52:44 +0200
Subject: drm/msm: gpu: call qcom_mdt interfaces only for ARCH_QCOM

When compile-testing for something other than ARCH_QCOM,
we run into a link error:

drivers/gpu/drm/msm/adreno/a5xx_gpu.o: In function `a5xx_hw_init':
a5xx_gpu.c:(.text.a5xx_hw_init+0x600): undefined reference to `qcom_mdt_get_size'
a5xx_gpu.c:(.text.a5xx_hw_init+0x93c): undefined reference to `qcom_mdt_load'

There is already an #ifdef that tries to check for CONFIG_QCOM_MDT_LOADER,
but that symbol is only meaningful when building for ARCH_QCOM.

This adds a compile-time check for ARCH_QCOM, and clarifies the
Kconfig select statement so we don't even try it for other targets.

The check for CONFIG_QCOM_MDT_LOADER can then go away, which also
improves compile-time coverage and makes the code a little nicer
to read.

Fixes: 7c65817e6d38 ("drm/msm: gpu: Enable zap shader for A5XX")
Acked-by: Jordan Crouse <jcrouse@codeaurora.org>
Acked-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/Kconfig           |  2 +-
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 11 +++--------
 2 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig
index b638d192ce5e..99d39b2aefa6 100644
--- a/drivers/gpu/drm/msm/Kconfig
+++ b/drivers/gpu/drm/msm/Kconfig
@@ -5,7 +5,7 @@ config DRM_MSM
 	depends on ARCH_QCOM || (ARM && COMPILE_TEST)
 	depends on OF && COMMON_CLK
 	depends on MMU
-	select QCOM_MDT_LOADER
+	select QCOM_MDT_LOADER if ARCH_QCOM
 	select REGULATOR
 	select DRM_KMS_HELPER
 	select DRM_PANEL
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index 33763b005b7b..a3393e1dc497 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -26,8 +26,6 @@ static void a5xx_dump(struct msm_gpu *gpu);
 
 #define GPU_PAS_ID 13
 
-#if IS_ENABLED(CONFIG_QCOM_MDT_LOADER)
-
 static int zap_shader_load_mdt(struct device *dev, const char *fwname)
 {
 	const struct firmware *fw;
@@ -36,6 +34,9 @@ static int zap_shader_load_mdt(struct device *dev, const char *fwname)
 	void *mem_region = NULL;
 	int ret;
 
+	if (!IS_ENABLED(CONFIG_ARCH_QCOM))
+		return -EINVAL;
+
 	/* Request the MDT file for the firmware */
 	ret = request_firmware(&fw, fwname, dev);
 	if (ret) {
@@ -73,12 +74,6 @@ out:
 
 	return ret;
 }
-#else
-static int zap_shader_load_mdt(struct device *dev, const char *fwname)
-{
-	return -ENODEV;
-}
-#endif
 
 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 	struct msm_file_private *ctx)
-- 
cgit v1.2.3-59-g8ed1b


From 8f93e043d048b671c32c6f0a5102fefa800c4618 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 26 Jul 2017 21:59:21 +0200
Subject: drm/msm: gpu: don't abuse dma_alloc for non-DMA allocations

In zap_shader_load_mdt(), we pass a pointer to a phys_addr_t
into dmam_alloc_coherent, which the compiler warns about:

drivers/gpu/drm/msm/adreno/a5xx_gpu.c: In function 'zap_shader_load_mdt':
drivers/gpu/drm/msm/adreno/a5xx_gpu.c:54:50: error: passing argument 3 of 'dmam_alloc_coherent' from incompatible pointer type [-Werror=incompatible-pointer-types]

The returned DMA address is later passed on to a function that
takes a phys_addr_t, so it's clearly wrong to use the DMA
mapping interface here: the memory may be uncached, or the
address may be completely wrong if there is an IOMMU connected
to the device. What the code actually wants to do is to get
the physical address from the reserved-mem node. It goes through
the dma-mapping interfaces for obscure reasons, and this
apparently only works by chance, relying on specific bugs
in the error handling of the arm64 dma-mapping implementation.

The same problem existed in the "venus" media driver, which was
now fixed by Stanimir Varbanov after long discussions.

In order to make some progress here, I have now ported his
approach over to the adreno driver. The patch is currently
untested, and should get a good review, but it is now much
simpler than the original, and it should be obvious what
goes wrong if I made a mistake in the port.

See also: a6e2d36bf6b7 ("media: venus: don't abuse dma_alloc for non-DMA allocations")
Cc: Stanimir Varbanov <stanimir.varbanov@linaro.org>
Fixes: 7c65817e6d38 ("drm/msm: gpu: Enable zap shader for A5XX")
Acked-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Acked-and-Tested-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 72 +++++++++++------------------------
 drivers/gpu/drm/msm/adreno/a5xx_gpu.h |  2 -
 2 files changed, 23 insertions(+), 51 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index a3393e1dc497..f9eae03aa1dc 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -15,7 +15,7 @@
 #include <linux/cpumask.h>
 #include <linux/qcom_scm.h>
 #include <linux/dma-mapping.h>
-#include <linux/of_reserved_mem.h>
+#include <linux/of_address.h>
 #include <linux/soc/qcom/mdt_loader.h>
 #include "msm_gem.h"
 #include "msm_mmu.h"
@@ -29,6 +29,8 @@ static void a5xx_dump(struct msm_gpu *gpu);
 static int zap_shader_load_mdt(struct device *dev, const char *fwname)
 {
 	const struct firmware *fw;
+	struct device_node *np;
+	struct resource r;
 	phys_addr_t mem_phys;
 	ssize_t mem_size;
 	void *mem_region = NULL;
@@ -37,6 +39,21 @@ static int zap_shader_load_mdt(struct device *dev, const char *fwname)
 	if (!IS_ENABLED(CONFIG_ARCH_QCOM))
 		return -EINVAL;
 
+	np = of_get_child_by_name(dev->of_node, "zap-shader");
+	if (!np)
+		return -ENODEV;
+
+	np = of_parse_phandle(np, "memory-region", 0);
+	if (!np)
+		return -EINVAL;
+
+	ret = of_address_to_resource(np, 0, &r);
+	if (ret)
+		return ret;
+
+	mem_phys = r.start;
+	mem_size = resource_size(&r);
+
 	/* Request the MDT file for the firmware */
 	ret = request_firmware(&fw, fwname, dev);
 	if (ret) {
@@ -52,7 +69,7 @@ static int zap_shader_load_mdt(struct device *dev, const char *fwname)
 	}
 
 	/* Allocate memory for the firmware image */
-	mem_region = dmam_alloc_coherent(dev, mem_size, &mem_phys, GFP_KERNEL);
+	mem_region = memremap(mem_phys, mem_size,  MEMREMAP_WC);
 	if (!mem_region) {
 		ret = -ENOMEM;
 		goto out;
@@ -70,6 +87,9 @@ static int zap_shader_load_mdt(struct device *dev, const char *fwname)
 		DRM_DEV_ERROR(dev, "Unable to authorize the image\n");
 
 out:
+	if (mem_region)
+		memunmap(mem_region);
+
 	release_firmware(fw);
 
 	return ret;
@@ -348,45 +368,6 @@ static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
 	return ret;
 }
 
-/* Set up a child device to "own" the zap shader */
-static int a5xx_zap_shader_dev_init(struct device *parent, struct device *dev)
-{
-	struct device_node *node;
-	int ret;
-
-	if (dev->parent)
-		return 0;
-
-	/* Find the sub-node for the zap shader */
-	node = of_get_child_by_name(parent->of_node, "zap-shader");
-	if (!node) {
-		DRM_DEV_ERROR(parent, "zap-shader not found in device tree\n");
-		return -ENODEV;
-	}
-
-	dev->parent = parent;
-	dev->of_node = node;
-	dev_set_name(dev, "adreno_zap_shader");
-
-	ret = device_register(dev);
-	if (ret) {
-		DRM_DEV_ERROR(parent, "Couldn't register zap shader device\n");
-		goto out;
-	}
-
-	ret = of_reserved_mem_device_init(dev);
-	if (ret) {
-		DRM_DEV_ERROR(parent, "Unable to set up the reserved memory\n");
-		device_unregister(dev);
-	}
-
-out:
-	if (ret)
-		dev->parent = NULL;
-
-	return ret;
-}
-
 static int a5xx_zap_shader_init(struct msm_gpu *gpu)
 {
 	static bool loaded;
@@ -415,11 +396,7 @@ static int a5xx_zap_shader_init(struct msm_gpu *gpu)
 		return -ENODEV;
 	}
 
-	ret = a5xx_zap_shader_dev_init(&pdev->dev, &a5xx_gpu->zap_dev);
-
-	if (!ret)
-		ret = zap_shader_load_mdt(&a5xx_gpu->zap_dev,
-			adreno_gpu->info->zapfw);
+	ret = zap_shader_load_mdt(&pdev->dev, adreno_gpu->info->zapfw);
 
 	loaded = !ret;
 
@@ -662,9 +639,6 @@ static void a5xx_destroy(struct msm_gpu *gpu)
 
 	DBG("%s", gpu->name);
 
-	if (a5xx_gpu->zap_dev.parent)
-		device_unregister(&a5xx_gpu->zap_dev);
-
 	if (a5xx_gpu->pm4_bo) {
 		if (a5xx_gpu->pm4_iova)
 			msm_gem_put_iova(a5xx_gpu->pm4_bo, gpu->aspace);
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
index d24796f3a706..1137092241d5 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
@@ -36,8 +36,6 @@ struct a5xx_gpu {
 	uint32_t gpmu_dwords;
 
 	uint32_t lm_leakage;
-
-	struct device zap_dev;
 };
 
 #define to_a5xx_gpu(x) container_of(x, struct a5xx_gpu, base)
-- 
cgit v1.2.3-59-g8ed1b


From a477b9cd37aa81a490dfa3265b7ff4f2c5a92463 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 1 Aug 2017 20:11:02 -0500
Subject: PCI: Add pci_reset_function_locked()

The implementation of PCI workarounds may require that the device is reset
from its probe function.  This implies that the PCI device lock is already
held, and makes calling pci_reset_function() impossible (since it will
itself try to take that lock).

Add pci_reset_function_locked(), which is the equivalent of
pci_reset_function(), except that it requires the PCI device lock to be
already held by the caller.

Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
[bhelgaas: folded in fix for conflict with 52354b9d1f46 ("PCI: Remove
__pci_dev_reset() and pci_dev_reset()")]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: stable@vger.kernel.org	# 4.11: 52354b9d1f46: PCI: Remove __pci_dev_reset() and pci_dev_reset()
Cc: stable@vger.kernel.org	# 4.11
---
 drivers/pci/pci.c   | 35 +++++++++++++++++++++++++++++++++++
 include/linux/pci.h |  1 +
 2 files changed, 36 insertions(+)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index af0cc3456dc1..b4b7eab29400 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4259,6 +4259,41 @@ int pci_reset_function(struct pci_dev *dev)
 }
 EXPORT_SYMBOL_GPL(pci_reset_function);
 
+/**
+ * pci_reset_function_locked - quiesce and reset a PCI device function
+ * @dev: PCI device to reset
+ *
+ * Some devices allow an individual function to be reset without affecting
+ * other functions in the same device.  The PCI device must be responsive
+ * to PCI config space in order to use this function.
+ *
+ * This function does not just reset the PCI portion of a device, but
+ * clears all the state associated with the device.  This function differs
+ * from __pci_reset_function() in that it saves and restores device state
+ * over the reset.  It also differs from pci_reset_function() in that it
+ * requires the PCI device lock to be held.
+ *
+ * Returns 0 if the device function was successfully reset or negative if the
+ * device doesn't support resetting a single function.
+ */
+int pci_reset_function_locked(struct pci_dev *dev)
+{
+	int rc;
+
+	rc = pci_probe_reset_function(dev);
+	if (rc)
+		return rc;
+
+	pci_dev_save_and_disable(dev);
+
+	rc = __pci_reset_function_locked(dev);
+
+	pci_dev_restore(dev);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(pci_reset_function_locked);
+
 /**
  * pci_try_reset_function - quiesce and reset a PCI device function
  * @dev: PCI device to reset
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 4869e66dd659..a75c13673852 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1067,6 +1067,7 @@ void pcie_flr(struct pci_dev *dev);
 int __pci_reset_function(struct pci_dev *dev);
 int __pci_reset_function_locked(struct pci_dev *dev);
 int pci_reset_function(struct pci_dev *dev);
+int pci_reset_function_locked(struct pci_dev *dev);
 int pci_try_reset_function(struct pci_dev *dev);
 int pci_probe_reset_slot(struct pci_slot *slot);
 int pci_reset_slot(struct pci_slot *slot);
-- 
cgit v1.2.3-59-g8ed1b


From 6184cc8ddbb318758a000da68c5285fc2dd74338 Mon Sep 17 00:00:00 2001
From: Chuanxiao Dong <chuanxiao.dong@intel.com>
Date: Tue, 1 Aug 2017 17:47:25 +0800
Subject: drm/i915/gvt: change resetting to resetting_eng

Use resetting_eng to identify which engine is resetting
so the rest ones' workload won't be impacted

v2:
- use ENGINE_MASK(ring_id) instead of (1 << ring_id). (Zhenyu)

Signed-off-by: Chuanxiao Dong <chuanxiao.dong@intel.com>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/execlist.c  | 10 +++++-----
 drivers/gpu/drm/i915/gvt/gvt.h       |  2 +-
 drivers/gpu/drm/i915/gvt/scheduler.c |  3 ++-
 drivers/gpu/drm/i915/gvt/vgpu.c      |  8 +++++---
 4 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c
index 700050556242..cac669b2b320 100644
--- a/drivers/gpu/drm/i915/gvt/execlist.c
+++ b/drivers/gpu/drm/i915/gvt/execlist.c
@@ -499,10 +499,10 @@ static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 static int complete_execlist_workload(struct intel_vgpu_workload *workload)
 {
 	struct intel_vgpu *vgpu = workload->vgpu;
-	struct intel_vgpu_execlist *execlist =
-		&vgpu->execlist[workload->ring_id];
+	int ring_id = workload->ring_id;
+	struct intel_vgpu_execlist *execlist = &vgpu->execlist[ring_id];
 	struct intel_vgpu_workload *next_workload;
-	struct list_head *next = workload_q_head(vgpu, workload->ring_id)->next;
+	struct list_head *next = workload_q_head(vgpu, ring_id)->next;
 	bool lite_restore = false;
 	int ret;
 
@@ -512,10 +512,10 @@ static int complete_execlist_workload(struct intel_vgpu_workload *workload)
 	release_shadow_batch_buffer(workload);
 	release_shadow_wa_ctx(&workload->wa_ctx);
 
-	if (workload->status || vgpu->resetting)
+	if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id)))
 		goto out;
 
-	if (!list_empty(workload_q_head(vgpu, workload->ring_id))) {
+	if (!list_empty(workload_q_head(vgpu, ring_id))) {
 		struct execlist_ctx_descriptor_format *this_desc, *next_desc;
 
 		next_workload = container_of(next,
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 3a74e79eac2f..d96c41aa5aa7 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -149,7 +149,7 @@ struct intel_vgpu {
 	bool active;
 	bool pv_notified;
 	bool failsafe;
-	bool resetting;
+	unsigned int resetting_eng;
 	void *sched_data;
 	struct vgpu_sched_ctl sched_ctl;
 
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 4f7057d62d88..22e08eb2d0b7 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -432,7 +432,8 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
 
 		i915_gem_request_put(fetch_and_zero(&workload->req));
 
-		if (!workload->status && !vgpu->resetting) {
+		if (!workload->status && !(vgpu->resetting_eng &
+					   ENGINE_MASK(ring_id))) {
 			update_guest_context(workload);
 
 			for_each_set_bit(event, workload->pending_events,
diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
index 90c14e6e3ea0..3deadcbd5a24 100644
--- a/drivers/gpu/drm/i915/gvt/vgpu.c
+++ b/drivers/gpu/drm/i915/gvt/vgpu.c
@@ -480,11 +480,13 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr,
 {
 	struct intel_gvt *gvt = vgpu->gvt;
 	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
+	unsigned int resetting_eng = dmlr ? ALL_ENGINES : engine_mask;
 
 	gvt_dbg_core("------------------------------------------\n");
 	gvt_dbg_core("resseting vgpu%d, dmlr %d, engine_mask %08x\n",
 		     vgpu->id, dmlr, engine_mask);
-	vgpu->resetting = true;
+
+	vgpu->resetting_eng = resetting_eng;
 
 	intel_vgpu_stop_schedule(vgpu);
 	/*
@@ -497,7 +499,7 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr,
 		mutex_lock(&gvt->lock);
 	}
 
-	intel_vgpu_reset_execlist(vgpu, dmlr ? ALL_ENGINES : engine_mask);
+	intel_vgpu_reset_execlist(vgpu, resetting_eng);
 
 	/* full GPU reset or device model level reset */
 	if (engine_mask == ALL_ENGINES || dmlr) {
@@ -520,7 +522,7 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr,
 		}
 	}
 
-	vgpu->resetting = false;
+	vgpu->resetting_eng = 0;
 	gvt_dbg_core("reset vgpu%d done\n", vgpu->id);
 	gvt_dbg_core("------------------------------------------\n");
 }
-- 
cgit v1.2.3-59-g8ed1b


From f2e2c00adcdc59b9fe86c82259abdaf32d0ee6ea Mon Sep 17 00:00:00 2001
From: Chuanxiao Dong <chuanxiao.dong@intel.com>
Date: Tue, 1 Aug 2017 17:47:26 +0800
Subject: drm/i915/gvt: clean workload queue if error happened

If a workload caused a HW GPU hang or it is in the middle of
vGPU reset, the workload queue should be cleaned up to emulate
the hang state of the GPU.

v2:
- use ENGINE_MASK(ring_id) instead of (1 << ring_id). (Zhenyu)

Signed-off-by: Chuanxiao Dong <chuanxiao.dong@intel.com>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/execlist.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c
index cac669b2b320..1648887d3f55 100644
--- a/drivers/gpu/drm/i915/gvt/execlist.c
+++ b/drivers/gpu/drm/i915/gvt/execlist.c
@@ -46,6 +46,8 @@
 #define same_context(a, b) (((a)->context_id == (b)->context_id) && \
 		((a)->lrca == (b)->lrca))
 
+static void clean_workloads(struct intel_vgpu *vgpu, unsigned long engine_mask);
+
 static int context_switch_events[] = {
 	[RCS] = RCS_AS_CONTEXT_SWITCH,
 	[BCS] = BCS_AS_CONTEXT_SWITCH,
@@ -512,8 +514,23 @@ static int complete_execlist_workload(struct intel_vgpu_workload *workload)
 	release_shadow_batch_buffer(workload);
 	release_shadow_wa_ctx(&workload->wa_ctx);
 
-	if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id)))
+	if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id))) {
+		/* if workload->status is not successful means HW GPU
+		 * has occurred GPU hang or something wrong with i915/GVT,
+		 * and GVT won't inject context switch interrupt to guest.
+		 * So this error is a vGPU hang actually to the guest.
+		 * According to this we should emunlate a vGPU hang. If
+		 * there are pending workloads which are already submitted
+		 * from guest, we should clean them up like HW GPU does.
+		 *
+		 * if it is in middle of engine resetting, the pending
+		 * workloads won't be submitted to HW GPU and will be
+		 * cleaned up during the resetting process later, so doing
+		 * the workload clean up here doesn't have any impact.
+		 **/
+		clean_workloads(vgpu, ENGINE_MASK(ring_id));
 		goto out;
+	}
 
 	if (!list_empty(workload_q_head(vgpu, ring_id))) {
 		struct execlist_ctx_descriptor_format *this_desc, *next_desc;
-- 
cgit v1.2.3-59-g8ed1b


From 8466489ef5ba48272ba4fa4ea9f8f403306de4c7 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 1 Aug 2017 20:11:08 -0500
Subject: xhci: Reset Renesas uPD72020x USB controller for 32-bit DMA issue

The Renesas uPD72020x XHCI controller seems to suffer from a really
annoying bug, where it may retain some of its DMA programming across a XHCI
reset, and despite the driver correctly programming new DMA addresses.
This is visible if the device has been using 64-bit DMA addresses, and is
then switched to using 32-bit DMA addresses.  The top 32 bits of the
address (now zero) are ignored are replaced by the 32 bits from the
*previous* programming.  Sticking with 64-bit DMA always works, but doesn't
seem very appropriate.

A PCI reset of the device restores the normal functionality, which is done
at probe time.  Unfortunately, this has to be done before any quirk has
been discovered, hence the intrusive nature of the fix.

Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Mathias Nyman <mathias.nyman@linux.intel.com>
CC: stable@vger.kernel.org	# v4.11+
---
 drivers/usb/host/pci-quirks.c | 20 ++++++++++++++++++++
 drivers/usb/host/pci-quirks.h |  1 +
 drivers/usb/host/xhci-pci.c   |  7 +++++++
 3 files changed, 28 insertions(+)

diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c
index c8989c62a262..858fefd67ebe 100644
--- a/drivers/usb/host/pci-quirks.c
+++ b/drivers/usb/host/pci-quirks.c
@@ -1150,3 +1150,23 @@ static void quirk_usb_early_handoff(struct pci_dev *pdev)
 }
 DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID,
 			PCI_CLASS_SERIAL_USB, 8, quirk_usb_early_handoff);
+
+bool usb_xhci_needs_pci_reset(struct pci_dev *pdev)
+{
+	/*
+	 * Our dear uPD72020{1,2} friend only partially resets when
+	 * asked to via the XHCI interface, and may end up doing DMA
+	 * at the wrong addresses, as it keeps the top 32bit of some
+	 * addresses from its previous programming under obscure
+	 * circumstances.
+	 * Give it a good wack at probe time. Unfortunately, this
+	 * needs to happen before we've had a chance to discover any
+	 * quirk, or the system will be in a rather bad state.
+	 */
+	if (pdev->vendor == PCI_VENDOR_ID_RENESAS &&
+	    (pdev->device == 0x0014 || pdev->device == 0x0015))
+		return true;
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(usb_xhci_needs_pci_reset);
diff --git a/drivers/usb/host/pci-quirks.h b/drivers/usb/host/pci-quirks.h
index 655994480198..5582cbafecd4 100644
--- a/drivers/usb/host/pci-quirks.h
+++ b/drivers/usb/host/pci-quirks.h
@@ -15,6 +15,7 @@ void usb_asmedia_modifyflowcontrol(struct pci_dev *pdev);
 void usb_enable_intel_xhci_ports(struct pci_dev *xhci_pdev);
 void usb_disable_xhci_ports(struct pci_dev *xhci_pdev);
 void sb800_prefetch(struct device *dev, int on);
+bool usb_xhci_needs_pci_reset(struct pci_dev *pdev);
 #else
 struct pci_dev;
 static inline void usb_amd_quirk_pll_disable(void) {}
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 5b0fa553c8bc..8071c8fdd15e 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -284,6 +284,13 @@ static int xhci_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 
 	driver = (struct hc_driver *)id->driver_data;
 
+	/* For some HW implementation, a XHCI reset is just not enough... */
+	if (usb_xhci_needs_pci_reset(dev)) {
+		dev_info(&dev->dev, "Resetting\n");
+		if (pci_reset_function_locked(dev))
+			dev_warn(&dev->dev, "Reset failed");
+	}
+
 	/* Prevent runtime suspending between USB-2 and USB-3 initialization */
 	pm_runtime_get_noresume(&dev->dev);
 
-- 
cgit v1.2.3-59-g8ed1b


From da6c9bbf418dcb0f8be261f4353400fa959b1e92 Mon Sep 17 00:00:00 2001
From: Mark yao <mark.yao@rock-chips.com>
Date: Mon, 31 Jul 2017 17:49:42 +0800
Subject: drm/rockchip: vop: fix iommu page fault when resume

Iommu would get page fault with following path:
   vop_disable:
      1, disable all windows and set vop config done
      2, vop enter to standy, all windows not works, but their registers
         are not clean, when you read window's enable bit, may found the
         window is enable.

   vop_enable:
      1, memcpy(vop->regsbak, vop->regs, len)
         save current vop registers to vop->regsbak, then you can found
         window is enable on regsbak.
      2, VOP_WIN_SET(vop, win, gate, 1);
         force enable window gate, but gate and enable are on same
         hardware register, then window enable bit rewrite to vop hardware.
      3, vop power on, and vop might try to scan destroyed buffer,
         then iommu get page fault.

Move windows disable after vop regsbak restore, then vop regsbak mechanism
would keep tracing the modify, everything would be safe.

Signed-off-by: Mark Yao <mark.yao@rock-chips.com>
Reviewed-by: Sandy huang <sandy.huang@rock-chips.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1501494582-6934-1-git-send-email-mark.yao@rock-chips.com
---
 drivers/gpu/drm/rockchip/rockchip_drm_vop.c | 33 +++++++++++++----------------
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
index 5d450332c2fd..804d0ff53059 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
@@ -500,7 +500,7 @@ static void vop_line_flag_irq_disable(struct vop *vop)
 static int vop_enable(struct drm_crtc *crtc)
 {
 	struct vop *vop = to_vop(crtc);
-	int ret;
+	int ret, i;
 
 	ret = pm_runtime_get_sync(vop->dev);
 	if (ret < 0) {
@@ -533,6 +533,20 @@ static int vop_enable(struct drm_crtc *crtc)
 	}
 
 	memcpy(vop->regs, vop->regsbak, vop->len);
+	/*
+	 * We need to make sure that all windows are disabled before we
+	 * enable the crtc. Otherwise we might try to scan from a destroyed
+	 * buffer later.
+	 */
+	for (i = 0; i < vop->data->win_size; i++) {
+		struct vop_win *vop_win = &vop->win[i];
+		const struct vop_win_data *win = vop_win->data;
+
+		spin_lock(&vop->reg_lock);
+		VOP_WIN_SET(vop, win, enable, 0);
+		spin_unlock(&vop->reg_lock);
+	}
+
 	vop_cfg_done(vop);
 
 	/*
@@ -566,28 +580,11 @@ err_put_pm_runtime:
 static void vop_crtc_disable(struct drm_crtc *crtc)
 {
 	struct vop *vop = to_vop(crtc);
-	int i;
 
 	WARN_ON(vop->event);
 
 	rockchip_drm_psr_deactivate(&vop->crtc);
 
-	/*
-	 * We need to make sure that all windows are disabled before we
-	 * disable that crtc. Otherwise we might try to scan from a destroyed
-	 * buffer later.
-	 */
-	for (i = 0; i < vop->data->win_size; i++) {
-		struct vop_win *vop_win = &vop->win[i];
-		const struct vop_win_data *win = vop_win->data;
-
-		spin_lock(&vop->reg_lock);
-		VOP_WIN_SET(vop, win, enable, 0);
-		spin_unlock(&vop->reg_lock);
-	}
-
-	vop_cfg_done(vop);
-
 	drm_crtc_vblank_off(crtc);
 
 	/*
-- 
cgit v1.2.3-59-g8ed1b


From 6f04f5925ce763e07cb405d1fbe97a53b6c026a3 Mon Sep 17 00:00:00 2001
From: Mark yao <mark.yao@rock-chips.com>
Date: Mon, 31 Jul 2017 17:49:46 +0800
Subject: drm/rockchip: vop: fix NV12 video display error

fixup the scale calculation formula on the case
src_height == (dst_height/2).

Signed-off-by: Mark Yao <mark.yao@rock-chips.com>
Reviewed-by: Sandy huang <sandy.huang@rock-chips.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1501494586-6984-1-git-send-email-mark.yao@rock-chips.com
---
 drivers/gpu/drm/rockchip/rockchip_drm_vop.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h
index 9979fd0c2282..27eefbfcf3d0 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h
@@ -282,6 +282,9 @@ static inline uint16_t scl_get_bili_dn_vskip(int src_h, int dst_h,
 
 	act_height = (src_h + vskiplines - 1) / vskiplines;
 
+	if (act_height == dst_h)
+		return GET_SCL_FT_BILI_DN(src_h, dst_h) / vskiplines;
+
 	return GET_SCL_FT_BILI_DN(act_height, dst_h);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 79a0b149d4e3d45fc46673f4b29a157776c174e2 Mon Sep 17 00:00:00 2001
From: Mark yao <mark.yao@rock-chips.com>
Date: Mon, 31 Jul 2017 17:49:50 +0800
Subject: drm/rockchip: vop: round_up pitches to word align

VOP pitch register is word align, need align to word.

VOP_WIN0_VIR:
  bit[31:16] win0_vir_stride_uv
    Number of words of Win0 uv Virtual width
  bit[15:0] win0_vir_width
    Number of words of Win0 yrgb Virtual width
    ARGB888 : win0_vir_width
    RGB888 : (win0_vir_width*3/4) + (win0_vir_width%3)
    RGB565 : ceil(win0_vir_width/2)
    YUV : ceil(win0_vir_width/4)

Signed-off-by: Mark Yao <mark.yao@rock-chips.com>
Reviewed-by: Sandy huang <sandy.huang@rock-chips.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1501494591-7034-1-git-send-email-mark.yao@rock-chips.com
---
 drivers/gpu/drm/rockchip/rockchip_drm_vop.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
index 804d0ff53059..e205c5cf9019 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
@@ -761,7 +761,7 @@ static void vop_plane_atomic_update(struct drm_plane *plane,
 	spin_lock(&vop->reg_lock);
 
 	VOP_WIN_SET(vop, win, format, format);
-	VOP_WIN_SET(vop, win, yrgb_vir, fb->pitches[0] >> 2);
+	VOP_WIN_SET(vop, win, yrgb_vir, DIV_ROUND_UP(fb->pitches[0], 4));
 	VOP_WIN_SET(vop, win, yrgb_mst, dma_addr);
 	if (is_yuv_support(fb->format->format)) {
 		int hsub = drm_format_horz_chroma_subsampling(fb->format->format);
@@ -775,7 +775,7 @@ static void vop_plane_atomic_update(struct drm_plane *plane,
 		offset += (src->y1 >> 16) * fb->pitches[1] / vsub;
 
 		dma_addr = rk_uv_obj->dma_addr + offset + fb->offsets[1];
-		VOP_WIN_SET(vop, win, uv_vir, fb->pitches[1] >> 2);
+		VOP_WIN_SET(vop, win, uv_vir, DIV_ROUND_UP(fb->pitches[1], 4));
 		VOP_WIN_SET(vop, win, uv_mst, dma_addr);
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 80c471ea040ad9006ebff6d64221a04e8fa1b7f6 Mon Sep 17 00:00:00 2001
From: Mark yao <mark.yao@rock-chips.com>
Date: Mon, 31 Jul 2017 17:49:55 +0800
Subject: drm/rockchip: vop: report error when check resource error

The user would be confused while facing a error commit without
any error report.

Signed-off-by: Mark Yao <mark.yao@rock-chips.com>
Reviewed-by: Sandy huang <sandy.huang@rock-chips.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1501494596-7090-1-git-send-email-mark.yao@rock-chips.com
---
 drivers/gpu/drm/rockchip/rockchip_drm_vop.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
index e205c5cf9019..2900f1410d95 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
@@ -679,8 +679,10 @@ static int vop_plane_atomic_check(struct drm_plane *plane,
 	 * Src.x1 can be odd when do clip, but yuv plane start point
 	 * need align with 2 pixel.
 	 */
-	if (is_yuv_support(fb->format->format) && ((state->src.x1 >> 16) % 2))
+	if (is_yuv_support(fb->format->format) && ((state->src.x1 >> 16) % 2)) {
+		DRM_ERROR("Invalid Source: Yuv format not support odd xpos\n");
 		return -EINVAL;
+	}
 
 	return 0;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 02b6ed44304e458d68e454493ab272f4e3c3c53a Mon Sep 17 00:00:00 2001
From: Tina Zhang <tina.zhang@intel.com>
Date: Fri, 4 Aug 2017 17:39:41 +0800
Subject: drm/i915/gvt: Initialize MMIO Block with HW state

MMIO block with tracked mmio, is introduced for the sake of performance
of searching tracked mmio. All the tracked mmio needs to get the initial
value from the HW state during vGPU being created. This patch is to
initialize the tracked registers in MMIO block with the HW state.

v2: Add "Fixes:" line for this patch (Zhenyu)

Fixes: 65f9f6febf12 ("drm/i915/gvt: Optimize MMIO register handling for some large MMIO blocks")
Signed-off-by: Tina Zhang <tina.zhang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/firmware.c | 11 ++++++++++-
 drivers/gpu/drm/i915/gvt/gvt.h      | 12 ++++++++++++
 drivers/gpu/drm/i915/gvt/handlers.c | 36 +++++++++++++++++-------------------
 3 files changed, 39 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/firmware.c b/drivers/gpu/drm/i915/gvt/firmware.c
index 5dad9298b2d5..a26c1705430e 100644
--- a/drivers/gpu/drm/i915/gvt/firmware.c
+++ b/drivers/gpu/drm/i915/gvt/firmware.c
@@ -72,11 +72,13 @@ static int expose_firmware_sysfs(struct intel_gvt *gvt)
 	struct intel_gvt_device_info *info = &gvt->device_info;
 	struct pci_dev *pdev = gvt->dev_priv->drm.pdev;
 	struct intel_gvt_mmio_info *e;
+	struct gvt_mmio_block *block = gvt->mmio.mmio_block;
+	int num = gvt->mmio.num_mmio_block;
 	struct gvt_firmware_header *h;
 	void *firmware;
 	void *p;
 	unsigned long size, crc32_start;
-	int i;
+	int i, j;
 	int ret;
 
 	size = sizeof(*h) + info->mmio_size + info->cfg_space_size;
@@ -105,6 +107,13 @@ static int expose_firmware_sysfs(struct intel_gvt *gvt)
 	hash_for_each(gvt->mmio.mmio_info_table, i, e, node)
 		*(u32 *)(p + e->offset) = I915_READ_NOTRACE(_MMIO(e->offset));
 
+	for (i = 0; i < num; i++, block++) {
+		for (j = 0; j < block->size; j += 4)
+			*(u32 *)(p + INTEL_GVT_MMIO_OFFSET(block->offset) + j) =
+				I915_READ_NOTRACE(_MMIO(INTEL_GVT_MMIO_OFFSET(
+							block->offset) + j));
+	}
+
 	memcpy(gvt->firmware.mmio, p, info->mmio_size);
 
 	crc32_start = offsetof(struct gvt_firmware_header, crc32) + 4;
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index d96c41aa5aa7..2964a4d01a66 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -195,6 +195,15 @@ struct intel_gvt_fence {
 	unsigned long vgpu_allocated_fence_num;
 };
 
+/* Special MMIO blocks. */
+struct gvt_mmio_block {
+	unsigned int device;
+	i915_reg_t   offset;
+	unsigned int size;
+	gvt_mmio_func read;
+	gvt_mmio_func write;
+};
+
 #define INTEL_GVT_MMIO_HASH_BITS 11
 
 struct intel_gvt_mmio {
@@ -214,6 +223,9 @@ struct intel_gvt_mmio {
 /* This reg could be accessed by unaligned address */
 #define F_UNALIGN	(1 << 6)
 
+	struct gvt_mmio_block *mmio_block;
+	unsigned int num_mmio_block;
+
 	DECLARE_HASHTABLE(mmio_info_table, INTEL_GVT_MMIO_HASH_BITS);
 	unsigned int num_tracked_mmio;
 };
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 17febe830ff6..323664a238f5 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -2857,31 +2857,15 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
 	return 0;
 }
 
-/* Special MMIO blocks. */
-static struct gvt_mmio_block {
-	unsigned int device;
-	i915_reg_t   offset;
-	unsigned int size;
-	gvt_mmio_func read;
-	gvt_mmio_func write;
-} gvt_mmio_blocks[] = {
-	{D_SKL_PLUS, _MMIO(CSR_MMIO_START_RANGE), 0x3000, NULL, NULL},
-	{D_ALL, _MMIO(MCHBAR_MIRROR_BASE_SNB), 0x40000, NULL, NULL},
-	{D_ALL, _MMIO(VGT_PVINFO_PAGE), VGT_PVINFO_SIZE,
-		pvinfo_mmio_read, pvinfo_mmio_write},
-	{D_ALL, LGC_PALETTE(PIPE_A, 0), 1024, NULL, NULL},
-	{D_ALL, LGC_PALETTE(PIPE_B, 0), 1024, NULL, NULL},
-	{D_ALL, LGC_PALETTE(PIPE_C, 0), 1024, NULL, NULL},
-};
-
 static struct gvt_mmio_block *find_mmio_block(struct intel_gvt *gvt,
 					      unsigned int offset)
 {
 	unsigned long device = intel_gvt_get_device_type(gvt);
-	struct gvt_mmio_block *block = gvt_mmio_blocks;
+	struct gvt_mmio_block *block = gvt->mmio.mmio_block;
+	int num = gvt->mmio.num_mmio_block;
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(gvt_mmio_blocks); i++, block++) {
+	for (i = 0; i < num; i++, block++) {
 		if (!(device & block->device))
 			continue;
 		if (offset >= INTEL_GVT_MMIO_OFFSET(block->offset) &&
@@ -2912,6 +2896,17 @@ void intel_gvt_clean_mmio_info(struct intel_gvt *gvt)
 	gvt->mmio.mmio_attribute = NULL;
 }
 
+/* Special MMIO blocks. */
+static struct gvt_mmio_block mmio_blocks[] = {
+	{D_SKL_PLUS, _MMIO(CSR_MMIO_START_RANGE), 0x3000, NULL, NULL},
+	{D_ALL, _MMIO(MCHBAR_MIRROR_BASE_SNB), 0x40000, NULL, NULL},
+	{D_ALL, _MMIO(VGT_PVINFO_PAGE), VGT_PVINFO_SIZE,
+		pvinfo_mmio_read, pvinfo_mmio_write},
+	{D_ALL, LGC_PALETTE(PIPE_A, 0), 1024, NULL, NULL},
+	{D_ALL, LGC_PALETTE(PIPE_B, 0), 1024, NULL, NULL},
+	{D_ALL, LGC_PALETTE(PIPE_C, 0), 1024, NULL, NULL},
+};
+
 /**
  * intel_gvt_setup_mmio_info - setup MMIO information table for GVT device
  * @gvt: GVT device
@@ -2951,6 +2946,9 @@ int intel_gvt_setup_mmio_info(struct intel_gvt *gvt)
 			goto err;
 	}
 
+	gvt->mmio.mmio_block = mmio_blocks;
+	gvt->mmio.num_mmio_block = ARRAY_SIZE(mmio_blocks);
+
 	gvt_dbg_mmio("traced %u virtual mmio registers\n",
 		     gvt->mmio.num_tracked_mmio);
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From 9e48cd4a77d5170433a2e611eeda7accdf96604d Mon Sep 17 00:00:00 2001
From: Allen Pais <allen.pais@oracle.com>
Date: Mon, 24 Jul 2017 11:44:17 +0530
Subject: sparc64: properly name the cpu constants

Acked-by: Sam Ravnborg <sam@ravnborg.org>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Allen Pais <allen.pais@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/include/asm/spitfire.h | 14 ++++++++++++++
 arch/sparc/kernel/head_64.S       | 16 ++++++++--------
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/arch/sparc/include/asm/spitfire.h b/arch/sparc/include/asm/spitfire.h
index 1d8321c827a8..9cc2afe10ef0 100644
--- a/arch/sparc/include/asm/spitfire.h
+++ b/arch/sparc/include/asm/spitfire.h
@@ -51,6 +51,20 @@
 #define SUN4V_CHIP_SPARC_SN	0x8b
 #define SUN4V_CHIP_UNKNOWN	0xff
 
+/*
+ * The following CPU_ID_xxx constants are used
+ * to identify the CPU type in the setup phase
+ * (see head_64.S)
+ */
+#define CPU_ID_NIAGARA1		('1')
+#define CPU_ID_NIAGARA2		('2')
+#define CPU_ID_NIAGARA3		('3')
+#define CPU_ID_NIAGARA4		('4')
+#define CPU_ID_NIAGARA5		('5')
+#define CPU_ID_M6		('6')
+#define CPU_ID_M7		('7')
+#define CPU_ID_SONOMA1		('N')
+
 #ifndef __ASSEMBLY__
 
 enum ultra_tlb_layout {
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index 41a407328667..ddb5e24adf49 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -424,22 +424,22 @@ EXPORT_SYMBOL(sun4v_chip_type)
 	 nop
 
 70:	ldub	[%g1 + 7], %g2
-	cmp	%g2, '3'
+	cmp	%g2, CPU_ID_NIAGARA3
 	be,pt	%xcc, 5f
 	 mov	SUN4V_CHIP_NIAGARA3, %g4
-	cmp	%g2, '4'
+	cmp	%g2, CPU_ID_NIAGARA4
 	be,pt	%xcc, 5f
 	 mov	SUN4V_CHIP_NIAGARA4, %g4
-	cmp	%g2, '5'
+	cmp	%g2, CPU_ID_NIAGARA5
 	be,pt	%xcc, 5f
 	 mov	SUN4V_CHIP_NIAGARA5, %g4
-	cmp	%g2, '6'
+	cmp	%g2, CPU_ID_M6
 	be,pt	%xcc, 5f
 	 mov	SUN4V_CHIP_SPARC_M6, %g4
-	cmp	%g2, '7'
+	cmp	%g2, CPU_ID_M7
 	be,pt	%xcc, 5f
 	 mov	SUN4V_CHIP_SPARC_M7, %g4
-	cmp	%g2, 'N'
+	cmp	%g2, CPU_ID_SONOMA1
 	be,pt	%xcc, 5f
 	 mov	SUN4V_CHIP_SPARC_SN, %g4
 	ba,pt	%xcc, 49f
@@ -448,10 +448,10 @@ EXPORT_SYMBOL(sun4v_chip_type)
 91:	sethi	%hi(prom_cpu_compatible), %g1
 	or	%g1, %lo(prom_cpu_compatible), %g1
 	ldub	[%g1 + 17], %g2
-	cmp	%g2, '1'
+	cmp	%g2, CPU_ID_NIAGARA1
 	be,pt	%xcc, 5f
 	 mov	SUN4V_CHIP_NIAGARA1, %g4
-	cmp	%g2, '2'
+	cmp	%g2, CPU_ID_NIAGARA2
 	be,pt	%xcc, 5f
 	 mov	SUN4V_CHIP_NIAGARA2, %g4
 	
-- 
cgit v1.2.3-59-g8ed1b


From 7d484acb2f90643de7e242fd47e48c3ebb22df3a Mon Sep 17 00:00:00 2001
From: Allen Pais <allen.pais@oracle.com>
Date: Mon, 24 Jul 2017 11:44:18 +0530
Subject: sparc64: recognize and support sparc M8 cpu type

Recognize SPARC-M8 cpu type, hardware caps and cpu
distribution map.

Signed-off-by: Allen Pais <allen.pais@oracle.com>
Signed-off-by: David Aldridge <david.j.aldridge@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/include/asm/spitfire.h |  2 ++
 arch/sparc/kernel/cpu.c           |  6 ++++++
 arch/sparc/kernel/cpumap.c        |  1 +
 arch/sparc/kernel/head_64.S       |  6 ++++++
 arch/sparc/kernel/setup_64.c      | 15 +++++++++++++--
 arch/sparc/mm/init_64.c           |  2 ++
 6 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/arch/sparc/include/asm/spitfire.h b/arch/sparc/include/asm/spitfire.h
index 9cc2afe10ef0..1b1286d05069 100644
--- a/arch/sparc/include/asm/spitfire.h
+++ b/arch/sparc/include/asm/spitfire.h
@@ -47,6 +47,7 @@
 #define SUN4V_CHIP_NIAGARA5	0x05
 #define SUN4V_CHIP_SPARC_M6	0x06
 #define SUN4V_CHIP_SPARC_M7	0x07
+#define SUN4V_CHIP_SPARC_M8	0x08
 #define SUN4V_CHIP_SPARC64X	0x8a
 #define SUN4V_CHIP_SPARC_SN	0x8b
 #define SUN4V_CHIP_UNKNOWN	0xff
@@ -63,6 +64,7 @@
 #define CPU_ID_NIAGARA5		('5')
 #define CPU_ID_M6		('6')
 #define CPU_ID_M7		('7')
+#define CPU_ID_M8		('8')
 #define CPU_ID_SONOMA1		('N')
 
 #ifndef __ASSEMBLY__
diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c
index 493e023a468a..ef4f18f7a674 100644
--- a/arch/sparc/kernel/cpu.c
+++ b/arch/sparc/kernel/cpu.c
@@ -506,6 +506,12 @@ static void __init sun4v_cpu_probe(void)
 		sparc_pmu_type = "sparc-m7";
 		break;
 
+	case SUN4V_CHIP_SPARC_M8:
+		sparc_cpu_type = "SPARC-M8";
+		sparc_fpu_type = "SPARC-M8 integrated FPU";
+		sparc_pmu_type = "sparc-m8";
+		break;
+
 	case SUN4V_CHIP_SPARC_SN:
 		sparc_cpu_type = "SPARC-SN";
 		sparc_fpu_type = "SPARC-SN integrated FPU";
diff --git a/arch/sparc/kernel/cpumap.c b/arch/sparc/kernel/cpumap.c
index 45c820e1cba5..90d550bbfeef 100644
--- a/arch/sparc/kernel/cpumap.c
+++ b/arch/sparc/kernel/cpumap.c
@@ -328,6 +328,7 @@ static int iterate_cpu(struct cpuinfo_tree *t, unsigned int root_index)
 	case SUN4V_CHIP_NIAGARA5:
 	case SUN4V_CHIP_SPARC_M6:
 	case SUN4V_CHIP_SPARC_M7:
+	case SUN4V_CHIP_SPARC_M8:
 	case SUN4V_CHIP_SPARC_SN:
 	case SUN4V_CHIP_SPARC64X:
 		rover_inc_table = niagara_iterate_method;
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index ddb5e24adf49..78e0211753d2 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -439,6 +439,9 @@ EXPORT_SYMBOL(sun4v_chip_type)
 	cmp	%g2, CPU_ID_M7
 	be,pt	%xcc, 5f
 	 mov	SUN4V_CHIP_SPARC_M7, %g4
+	cmp	%g2, CPU_ID_M8
+	be,pt	%xcc, 5f
+	 mov	SUN4V_CHIP_SPARC_M8, %g4
 	cmp	%g2, CPU_ID_SONOMA1
 	be,pt	%xcc, 5f
 	 mov	SUN4V_CHIP_SPARC_SN, %g4
@@ -600,6 +603,9 @@ niagara_tlb_fixup:
 	be,pt	%xcc, niagara4_patch
 	 nop
 	cmp	%g1, SUN4V_CHIP_SPARC_M7
+	be,pt	%xcc, niagara4_patch
+	 nop
+	cmp	%g1, SUN4V_CHIP_SPARC_M8
 	be,pt	%xcc, niagara4_patch
 	 nop
 	cmp	%g1, SUN4V_CHIP_SPARC_SN
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index 4d9c3e13c150..150ee7d4b059 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -288,10 +288,17 @@ static void __init sun4v_patch(void)
 
 	sun4v_patch_2insn_range(&__sun4v_2insn_patch,
 				&__sun4v_2insn_patch_end);
-	if (sun4v_chip_type == SUN4V_CHIP_SPARC_M7 ||
-	    sun4v_chip_type == SUN4V_CHIP_SPARC_SN)
+
+	switch (sun4v_chip_type) {
+	case SUN4V_CHIP_SPARC_M7:
+	case SUN4V_CHIP_SPARC_M8:
+	case SUN4V_CHIP_SPARC_SN:
 		sun_m7_patch_2insn_range(&__sun_m7_2insn_patch,
 					 &__sun_m7_2insn_patch_end);
+		break;
+	default:
+		break;
+	}
 
 	sun4v_hvapi_init();
 }
@@ -529,6 +536,7 @@ static void __init init_sparc64_elf_hwcap(void)
 		    sun4v_chip_type == SUN4V_CHIP_NIAGARA5 ||
 		    sun4v_chip_type == SUN4V_CHIP_SPARC_M6 ||
 		    sun4v_chip_type == SUN4V_CHIP_SPARC_M7 ||
+		    sun4v_chip_type == SUN4V_CHIP_SPARC_M8 ||
 		    sun4v_chip_type == SUN4V_CHIP_SPARC_SN ||
 		    sun4v_chip_type == SUN4V_CHIP_SPARC64X)
 			cap |= HWCAP_SPARC_BLKINIT;
@@ -538,6 +546,7 @@ static void __init init_sparc64_elf_hwcap(void)
 		    sun4v_chip_type == SUN4V_CHIP_NIAGARA5 ||
 		    sun4v_chip_type == SUN4V_CHIP_SPARC_M6 ||
 		    sun4v_chip_type == SUN4V_CHIP_SPARC_M7 ||
+		    sun4v_chip_type == SUN4V_CHIP_SPARC_M8 ||
 		    sun4v_chip_type == SUN4V_CHIP_SPARC_SN ||
 		    sun4v_chip_type == SUN4V_CHIP_SPARC64X)
 			cap |= HWCAP_SPARC_N2;
@@ -568,6 +577,7 @@ static void __init init_sparc64_elf_hwcap(void)
 			    sun4v_chip_type == SUN4V_CHIP_NIAGARA5 ||
 			    sun4v_chip_type == SUN4V_CHIP_SPARC_M6 ||
 			    sun4v_chip_type == SUN4V_CHIP_SPARC_M7 ||
+			    sun4v_chip_type == SUN4V_CHIP_SPARC_M8 ||
 			    sun4v_chip_type == SUN4V_CHIP_SPARC_SN ||
 			    sun4v_chip_type == SUN4V_CHIP_SPARC64X)
 				cap |= (AV_SPARC_VIS | AV_SPARC_VIS2 |
@@ -578,6 +588,7 @@ static void __init init_sparc64_elf_hwcap(void)
 			    sun4v_chip_type == SUN4V_CHIP_NIAGARA5 ||
 			    sun4v_chip_type == SUN4V_CHIP_SPARC_M6 ||
 			    sun4v_chip_type == SUN4V_CHIP_SPARC_M7 ||
+			    sun4v_chip_type == SUN4V_CHIP_SPARC_M8 ||
 			    sun4v_chip_type == SUN4V_CHIP_SPARC_SN ||
 			    sun4v_chip_type == SUN4V_CHIP_SPARC64X)
 				cap |= (AV_SPARC_VIS3 | AV_SPARC_HPC |
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index fed73f14aa49..c24f4bfc3b84 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2161,6 +2161,7 @@ static void __init sun4v_linear_pte_xor_finalize(void)
 	 */
 	switch (sun4v_chip_type) {
 	case SUN4V_CHIP_SPARC_M7:
+	case SUN4V_CHIP_SPARC_M8:
 	case SUN4V_CHIP_SPARC_SN:
 		pagecv_flag = 0x00;
 		break;
@@ -2313,6 +2314,7 @@ void __init paging_init(void)
 	 */
 	switch (sun4v_chip_type) {
 	case SUN4V_CHIP_SPARC_M7:
+	case SUN4V_CHIP_SPARC_M8:
 	case SUN4V_CHIP_SPARC_SN:
 		page_cache4v_flag = _PAGE_CP_4V;
 		break;
-- 
cgit v1.2.3-59-g8ed1b


From fdaccf74fe83ab3438df014efd55788a1dd414b5 Mon Sep 17 00:00:00 2001
From: Vijay Kumar <vijay.ac.kumar@oracle.com>
Date: Fri, 28 Jul 2017 19:29:32 -0600
Subject: sparc64: Increase max_phys_bits to 51 and VA bits to 53 for M8.

On M8 chips, use a max_phys_bits value of 51.

Also, M8 supports VA bits up to 54 bits. However, for now
restrict VA bits to 53 due to 4-level pagetable limitation.

Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
Reviewed-by: Bob Picco <bob.picco@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/mm/init_64.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index c24f4bfc3b84..afa0099f3748 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -1944,12 +1944,22 @@ static void __init setup_page_offset(void)
 			break;
 		case SUN4V_CHIP_SPARC_M7:
 		case SUN4V_CHIP_SPARC_SN:
-		default:
 			/* M7 and later support 52-bit virtual addresses.  */
 			sparc64_va_hole_top =    0xfff8000000000000UL;
 			sparc64_va_hole_bottom = 0x0008000000000000UL;
 			max_phys_bits = 49;
 			break;
+		case SUN4V_CHIP_SPARC_M8:
+		default:
+			/* M8 and later support 54-bit virtual addresses.
+			 * However, restricting M8 and above VA bits to 53
+			 * as 4-level page table cannot support more than
+			 * 53 VA bits.
+			 */
+			sparc64_va_hole_top =    0xfff0000000000000UL;
+			sparc64_va_hole_bottom = 0x0010000000000000UL;
+			max_phys_bits = 51;
+			break;
 		}
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From d6086598d34e1cf9091c7be201f5b2041dc6203e Mon Sep 17 00:00:00 2001
From: Xiong Zhang <xiong.y.zhang@intel.com>
Date: Wed, 2 Aug 2017 10:31:01 +0800
Subject: drm/i915/gvt: Change the max length of mmio_reg_rw from 4 to 8

When linux guest access mmio with __raw_i915_read64 or __raw_i915_write64,
its length is 8 bytes.

This fix the linux guest in xengt couldn't boot up as it fail in
reading pv_info->magic.

Fixes: 65f9f6febf12 ("drm/i915/gvt: Optimize MMIO register handling for some large MMIO blocks")
Signed-off-by: Xiong Zhang <xiong.y.zhang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/handlers.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 323664a238f5..feed9921b3b3 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -3028,7 +3028,7 @@ int intel_vgpu_mmio_reg_rw(struct intel_vgpu *vgpu, unsigned int offset,
 	gvt_mmio_func func;
 	int ret;
 
-	if (WARN_ON(bytes > 4))
+	if (WARN_ON(bytes > 8))
 		return -EINVAL;
 
 	/*
-- 
cgit v1.2.3-59-g8ed1b


From 5279fc7724ae3a82c9cfe5b09c1fb07ff0e41056 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Date: Mon, 24 Jul 2017 11:14:31 +0200
Subject: drm/i915: Fix out-of-bounds array access in bdw_load_gamma_lut

bdw_load_gamma_lut is writing beyond the array to the maximum value.
The intend of the function is to clamp values > 1 to 1, so write
the intended color to the max register.

This fixes the following KASAN warning:

[  197.020857] [IGT] kms_pipe_color: executing
[  197.063434] [IGT] kms_pipe_color: starting subtest ctm-0-25-pipe0
[  197.078989] ==================================================================
[  197.079127] BUG: KASAN: slab-out-of-bounds in bdw_load_gamma_lut.isra.2+0x3b9/0x570 [i915]
[  197.079188] Read of size 2 at addr ffff8800d38db150 by task kms_pipe_color/1839
[  197.079208] CPU: 2 PID: 1839 Comm: kms_pipe_color Tainted: G     U 4.13.0-rc1-patser+ #5211
[  197.079215] Hardware name: NUC5i7RYB, BIOS RYBDWi35.86A.0246.2015.0309.1355 03/09/2015
[  197.079220] Call Trace:
[  197.079230]  dump_stack+0x68/0x9e
[  197.079239]  print_address_description+0x6f/0x250
[  197.079251]  kasan_report+0x216/0x370
[  197.079374]  ? bdw_load_gamma_lut.isra.2+0x3b9/0x570 [i915]
[  197.079451]  ? gen8_write16+0x4e0/0x4e0 [i915]
[  197.079460]  __asan_report_load2_noabort+0x14/0x20
[  197.079535]  bdw_load_gamma_lut.isra.2+0x3b9/0x570 [i915]
[  197.079612]  broadwell_load_luts+0x1df/0x550 [i915]
[  197.079690]  intel_color_load_luts+0x7b/0x80 [i915]
[  197.079764]  intel_begin_crtc_commit+0x138/0x760 [i915]
[  197.079783]  drm_atomic_helper_commit_planes_on_crtc+0x1a3/0x820 [drm_kms_helper]
[  197.079859]  ? intel_pre_plane_update+0x571/0x580 [i915]
[  197.079937]  intel_update_crtc+0x238/0x330 [i915]
[  197.080016]  intel_update_crtcs+0x10f/0x210 [i915]
[  197.080092]  intel_atomic_commit_tail+0x1552/0x3340 [i915]
[  197.080101]  ? _raw_spin_unlock+0x3c/0x40
[  197.080110]  ? __queue_work+0xb40/0xbf0
[  197.080188]  ? skl_update_crtcs+0xc00/0xc00 [i915]
[  197.080195]  ? trace_hardirqs_on+0xd/0x10
[  197.080269]  ? intel_atomic_commit_ready+0x128/0x13c [i915]
[  197.080329]  ? __i915_sw_fence_complete+0x5b8/0x6d0 [i915]
[  197.080336]  ? debug_object_activate+0x39e/0x580
[  197.080397]  ? i915_sw_fence_await+0x30/0x30 [i915]
[  197.080409]  ? __might_sleep+0x15b/0x180
[  197.080483]  intel_atomic_commit+0x944/0xa70 [i915]
[  197.080490]  ? refcount_dec_and_test+0x11/0x20
[  197.080567]  ? intel_atomic_commit_tail+0x3340/0x3340 [i915]
[  197.080597]  ? drm_atomic_crtc_set_property+0x303/0x580 [drm]
[  197.080674]  ? intel_atomic_commit_tail+0x3340/0x3340 [i915]
[  197.080704]  drm_atomic_commit+0xd7/0xe0 [drm]
[  197.080722]  drm_atomic_helper_crtc_set_property+0xec/0x130 [drm_kms_helper]
[  197.080749]  drm_mode_crtc_set_obj_prop+0x7d/0xb0 [drm]
[  197.080775]  drm_mode_obj_set_property_ioctl+0x50b/0x5d0 [drm]
[  197.080783]  ? __might_fault+0x104/0x180
[  197.080809]  ? drm_mode_obj_find_prop_id+0x160/0x160 [drm]
[  197.080838]  ? drm_mode_obj_find_prop_id+0x160/0x160 [drm]
[  197.080861]  drm_ioctl_kernel+0x154/0x1a0 [drm]
[  197.080885]  drm_ioctl+0x624/0x8f0 [drm]
[  197.080910]  ? drm_mode_obj_find_prop_id+0x160/0x160 [drm]
[  197.080934]  ? drm_getunique+0x210/0x210 [drm]
[  197.080943]  ? __handle_mm_fault+0x1bd0/0x1ce0
[  197.080949]  ? lock_downgrade+0x610/0x610
[  197.080957]  ? __lru_cache_add+0x15a/0x180
[  197.080967]  do_vfs_ioctl+0xd92/0xe40
[  197.080975]  ? ioctl_preallocate+0x1b0/0x1b0
[  197.080982]  ? selinux_capable+0x20/0x20
[  197.080991]  ? __do_page_fault+0x7b7/0x9a0
[  197.080997]  ? lock_downgrade+0x5bb/0x610
[  197.081007]  ? security_file_ioctl+0x57/0x90
[  197.081016]  SyS_ioctl+0x4e/0x80
[  197.081024]  entry_SYSCALL_64_fastpath+0x18/0xad
[  197.081030] RIP: 0033:0x7f61f287a987
[  197.081035] RSP: 002b:00007fff7d44d188 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
[  197.081043] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f61f287a987
[  197.081048] RDX: 00007fff7d44d1c0 RSI: 00000000c01864ba RDI: 0000000000000003
[  197.081053] RBP: 00007f61f2b3eb00 R08: 0000000000000059 R09: 0000000000000000
[  197.081058] R10: 0000002ea5c4a290 R11: 0000000000000246 R12: 00007f61f2b3eb58
[  197.081063] R13: 0000000000001010 R14: 00007f61f2b3eb58 R15: 0000000000002702

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101659
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reported-by: Martin Peres <martin.peres@linux.intel.com>
Cc: Martin Peres <martin.peres@linux.intel.com>
Fixes: 82cf435b3134 ("drm/i915: Implement color management on bdw/skl/bxt/kbl")
Cc: Shashank Sharma <shashank.sharma@intel.com>
Cc: Kiran S Kumar <kiran.s.kumar@intel.com>
Cc: Kausal Malladi <kausalmalladi@gmail.com>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Daniel Vetter <daniel.vetter@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: intel-gfx@lists.freedesktop.org
Cc: <stable@vger.kernel.org> # v4.7+
Link: https://patchwork.freedesktop.org/patch/msgid/20170724091431.24251-1-maarten.lankhorst@linux.intel.com
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
(cherry picked from commit 09a92bc8773b4314e02b478e003fe5936ce85adb)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_color.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c
index 306c6b06b330..17c4ae7e4e7c 100644
--- a/drivers/gpu/drm/i915/intel_color.c
+++ b/drivers/gpu/drm/i915/intel_color.c
@@ -398,6 +398,7 @@ static void bdw_load_gamma_lut(struct drm_crtc_state *state, u32 offset)
 		}
 
 		/* Program the max register to clamp values > 1.0. */
+		i = lut_size - 1;
 		I915_WRITE(PREC_PAL_GC_MAX(pipe, 0),
 			   drm_color_lut_extract(lut[i].red, 16));
 		I915_WRITE(PREC_PAL_GC_MAX(pipe, 1),
-- 
cgit v1.2.3-59-g8ed1b


From b5fa57ddc4a2492441a1391f07d5c8a282271249 Mon Sep 17 00:00:00 2001
From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Date: Thu, 3 Aug 2017 17:58:07 +0100
Subject: drm/i915/perf: fix flex eu registers programming

We were reserving fewer dwords in the ring than necessary. Indeed
we're always writing all registers once, so discard the actual number
of registers given by the user and just program the whitelisted ones
once.

Fixes: 19f81df2859e ("drm/i915/perf: Add OA unit support for Gen 8+")
Reported-by: Matthew Auld <matthew.william.auld@gmail.com>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Cc: <stable@vger.kernel.org> # v4.12+
Link: https://patchwork.freedesktop.org/patch/msgid/20170803165812.2373-6-lionel.g.landwerlin@intel.com
(cherry picked from commit 01d928e9a1644eb2e28f684905f888e700c7b9dc)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_perf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 9cd22f83b0cf..f33d90226704 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1601,11 +1601,11 @@ static int gen8_emit_oa_config(struct drm_i915_gem_request *req)
 	u32 *cs;
 	int i;
 
-	cs = intel_ring_begin(req, n_flex_regs * 2 + 4);
+	cs = intel_ring_begin(req, ARRAY_SIZE(flex_mmio) * 2 + 4);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
-	*cs++ = MI_LOAD_REGISTER_IMM(n_flex_regs + 1);
+	*cs++ = MI_LOAD_REGISTER_IMM(ARRAY_SIZE(flex_mmio) + 1);
 
 	*cs++ = i915_mmio_reg_offset(GEN8_OACTXCONTROL);
 	*cs++ = (dev_priv->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
-- 
cgit v1.2.3-59-g8ed1b


From cd82f37a9ddaaafb33d8bc3f44857edbad5d52bf Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 4 Aug 2017 11:41:35 +0100
Subject: drm/i915/shrinker: Wrap need_resched() inside preempt-disable

In order for us to successfully detect the end of a timeslice,
preemption must be disabled. Otherwise, inside the loop we may be
preempted many times without our noticing, and each time our timeslice
will be reset, invalidating need_resched()

Reported-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reported-by: Tomi Sarvela <tomi.p.sarvela@intel.com>
Fixes: 290271de34f6 ("drm/i915: Spin for struct_mutex inside shrinker")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: <drm-intel-fixes@lists.freedesktop.org> # v4.13-rc1+
Link: https://patchwork.freedesktop.org/patch/msgid/20170804104135.26805-1-chris@chris-wilson.co.uk
Tested-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
(cherry picked from commit 6cb0c6ad9e07f2c7971c4e8e0d9b7ceba151a925)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 1032f98add11..77fb39808131 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -43,16 +43,21 @@ static bool shrinker_lock(struct drm_i915_private *dev_priv, bool *unlock)
 		return true;
 
 	case MUTEX_TRYLOCK_FAILED:
+		*unlock = false;
+		preempt_disable();
 		do {
 			cpu_relax();
 			if (mutex_trylock(&dev_priv->drm.struct_mutex)) {
-	case MUTEX_TRYLOCK_SUCCESS:
 				*unlock = true;
-				return true;
+				break;
 			}
 		} while (!need_resched());
+		preempt_enable();
+		return *unlock;
 
-		return false;
+	case MUTEX_TRYLOCK_SUCCESS:
+		*unlock = true;
+		return true;
 	}
 
 	BUG();
-- 
cgit v1.2.3-59-g8ed1b


From 1e2ba788787c86f527eca6ffd9adb97d691a810e Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Wed, 31 May 2017 11:33:55 +0300
Subject: drm/i915: fix backlight invert for non-zero minimum brightness

When we started following the backlight minimum brightness in
6dda730e55f4 ("drm/i915: respect the VBT minimum backlight brightness")
we overlooked the brightness invert quirk. Even if we invert the
brightness, we need to take the min limit into account. We probably
missed this because the invert has only been required on gen4 for proper
operation.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101127
Fixes: 6dda730e55f4 ("drm/i915: respect the VBT minimum backlight brightness")
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20170531083355.7898-1-jani.nikula@intel.com
(cherry picked from commit e9d7486eac949f2a8d121657e536c8abdd4ea088)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_panel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index 96c2cbd81869..593349be8b9d 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -469,7 +469,7 @@ static u32 intel_panel_compute_brightness(struct intel_connector *connector,
 
 	if (i915.invert_brightness > 0 ||
 	    dev_priv->quirks & QUIRK_INVERT_BRIGHTNESS) {
-		return panel->backlight.max - val;
+		return panel->backlight.max - val + panel->backlight.min;
 	}
 
 	return val;
-- 
cgit v1.2.3-59-g8ed1b


From d6f756e09f01ea7a0efbbcef269a1e384a35d824 Mon Sep 17 00:00:00 2001
From: "Wladimir J. van der Laan" <laanwj@gmail.com>
Date: Tue, 25 Jul 2017 14:33:36 +0200
Subject: drm/etnaviv: Fix off-by-one error in reloc checking

A relocation pointing to the last four bytes of a buffer can
legitimately happen in the case of small vertex buffers.

CC: stable@vger.kernel.org #4.9+
Signed-off-by: Wladimir J. van der Laan <laanwj@gmail.com>
Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de>
Reviewed-by: Christian Gmeiner <christian.gmeiner@gmail.com>
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
---
 drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
index 5bd93169dac2..6463fc2c736f 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -270,8 +270,8 @@ static int submit_reloc(struct etnaviv_gem_submit *submit, void *stream,
 		if (ret)
 			return ret;
 
-		if (r->reloc_offset >= bo->obj->base.size - sizeof(*ptr)) {
-			DRM_ERROR("relocation %u outside object", i);
+		if (r->reloc_offset > bo->obj->base.size - sizeof(*ptr)) {
+			DRM_ERROR("relocation %u outside object\n", i);
 			return -EINVAL;
 		}
 
-- 
cgit v1.2.3-59-g8ed1b


From 1899bd57570a3e610db574b57d1e7e66378aa908 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Wed, 12 Jul 2017 12:09:22 +0200
Subject: drm/exynos: forbid creating framebuffers from too small GEM buffers

Add a check if the framebuffer described by the provided drm_mode_fb_cmd2
structure fits into provided GEM buffers. Without this check it is
possible to create a framebuffer object from a small buffer and set it to
the hardware, what results in displaying system memory outside the
allocated GEM buffer.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: Tobias Jakobi <tjakobi@math.uni-bielefeld.de>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_fb.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c
index d48fd7c918f8..73217c281c9a 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fb.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c
@@ -145,13 +145,19 @@ static struct drm_framebuffer *
 exynos_user_fb_create(struct drm_device *dev, struct drm_file *file_priv,
 		      const struct drm_mode_fb_cmd2 *mode_cmd)
 {
+	const struct drm_format_info *info = drm_get_format_info(dev, mode_cmd);
 	struct exynos_drm_gem *exynos_gem[MAX_FB_BUFFER];
 	struct drm_gem_object *obj;
 	struct drm_framebuffer *fb;
 	int i;
 	int ret;
 
-	for (i = 0; i < drm_format_num_planes(mode_cmd->pixel_format); i++) {
+	for (i = 0; i < info->num_planes; i++) {
+		unsigned int height = (i == 0) ? mode_cmd->height :
+				     DIV_ROUND_UP(mode_cmd->height, info->vsub);
+		unsigned long size = height * mode_cmd->pitches[i] +
+				     mode_cmd->offsets[i];
+
 		obj = drm_gem_object_lookup(file_priv, mode_cmd->handles[i]);
 		if (!obj) {
 			DRM_ERROR("failed to lookup gem object\n");
@@ -160,6 +166,12 @@ exynos_user_fb_create(struct drm_device *dev, struct drm_file *file_priv,
 		}
 
 		exynos_gem[i] = to_exynos_gem(obj);
+
+		if (size > exynos_gem[i]->size) {
+			i++;
+			ret = -EINVAL;
+			goto err;
+		}
 	}
 
 	fb = exynos_drm_framebuffer_init(dev, mode_cmd, exynos_gem, i);
-- 
cgit v1.2.3-59-g8ed1b


From 92f190aba2d57c91ef99e227e36461234c35eb7b Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Wed, 9 Aug 2017 01:42:22 +0200
Subject: drm: make DRM_STM default n
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Default config value for all other drivers is N.

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/stm/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/stm/Kconfig b/drivers/gpu/drm/stm/Kconfig
index 2c4817fb0890..8fe5b184b4e8 100644
--- a/drivers/gpu/drm/stm/Kconfig
+++ b/drivers/gpu/drm/stm/Kconfig
@@ -7,7 +7,6 @@ config DRM_STM
 	select DRM_PANEL
 	select VIDEOMODE_HELPERS
 	select FB_PROVIDE_GET_FB_UNMAPPED_AREA
-	default y
 
 	help
 	  Enable support for the on-chip display controller on
-- 
cgit v1.2.3-59-g8ed1b


From 372aa73e20197d463fc8b34524c20b089a98b1c3 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Thu, 10 Aug 2017 11:32:18 +1000
Subject: drm/nouveau/disp/nv04: avoid creation of output paths

Fixes hitting WARN_ON() during initialisation of pre-NV50 GPUs, caused
by the recent changes to support pad macro routing on GM20x.

We currently don't use them here for older GPUs anyway.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c
index c7c84d34d97e..88582af8bd89 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c
@@ -267,6 +267,8 @@ nvkm_disp_oneinit(struct nvkm_engine *engine)
 	/* Create output path objects for each VBIOS display path. */
 	i = -1;
 	while ((data = dcb_outp_parse(bios, ++i, &ver, &hdr, &dcbE))) {
+		if (ver < 0x40) /* No support for chipsets prior to NV50. */
+			break;
 		if (dcbE.type == DCB_OUTPUT_UNUSED)
 			continue;
 		if (dcbE.type == DCB_OUTPUT_EOL)
-- 
cgit v1.2.3-59-g8ed1b


From 3ee70591d6c47ef4c4699b3395ba96ce287db937 Mon Sep 17 00:00:00 2001
From: Jim Quigley <Jim.Quigley@oracle.com>
Date: Fri, 21 Jul 2017 09:20:15 -0400
Subject: sunvdc: prevent sunvdc panic when mpgroup disk added to guest domain

Using mpgroup to define multiple paths for a virtual disk causes multiple
virtual-device-port ports to be created for that virtual device.
Each virtual-device-port port then gets a vdisk created for it by the Linux
sunvdc driver. As mpgroup is not supported by the Linux sunvdc driver it
cannot handle multiple ports for a single vdisk, leading to a kernel panic
at startup.

This fix prevents more than one vdisk per virtual-device-port being created
until full virtual disk multipathing (mpgroup) support is implemented.

Signed-off-by: Jim Quigley <Jim.Quigley@oracle.com>
Reviewed-by: Liam Merwick <liam.merwick@oracle.com>
Reviewed-by: Shannon Nelson <shannon.nelson@oracle.com>
Reviewed-by: Alexandre Chartre <alexandre.chartre@oracle.com>
Reviewed-by: Aaron Young <aaron.young@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/block/sunvdc.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 6b16ead1da58..ad9749463d4f 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -875,6 +875,56 @@ static void print_version(void)
 		printk(KERN_INFO "%s", version);
 }
 
+struct vdc_check_port_data {
+	int	dev_no;
+	char	*type;
+};
+
+static int vdc_device_probed(struct device *dev, void *arg)
+{
+	struct vio_dev *vdev = to_vio_dev(dev);
+	struct vdc_check_port_data *port_data;
+
+	port_data = (struct vdc_check_port_data *)arg;
+
+	if ((vdev->dev_no == port_data->dev_no) &&
+	    (!(strcmp((char *)&vdev->type, port_data->type))) &&
+		dev_get_drvdata(dev)) {
+		/* This device has already been configured
+		 * by vdc_port_probe()
+		 */
+		return 1;
+	} else {
+		return 0;
+	}
+}
+
+/* Determine whether the VIO device is part of an mpgroup
+ * by locating all the virtual-device-port nodes associated
+ * with the parent virtual-device node for the VIO device
+ * and checking whether any of these nodes are vdc-ports
+ * which have already been configured.
+ *
+ * Returns true if this device is part of an mpgroup and has
+ * already been probed.
+ */
+static bool vdc_port_mpgroup_check(struct vio_dev *vdev)
+{
+	struct vdc_check_port_data port_data;
+	struct device *dev;
+
+	port_data.dev_no = vdev->dev_no;
+	port_data.type = (char *)&vdev->type;
+
+	dev = device_find_child(vdev->dev.parent, &port_data,
+				vdc_device_probed);
+
+	if (dev)
+		return true;
+
+	return false;
+}
+
 static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 {
 	struct mdesc_handle *hp;
@@ -893,6 +943,14 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 		goto err_out_release_mdesc;
 	}
 
+	/* Check if this device is part of an mpgroup */
+	if (vdc_port_mpgroup_check(vdev)) {
+		printk(KERN_WARNING
+			"VIO: Ignoring extra vdisk port %s",
+			dev_name(&vdev->dev));
+		goto err_out_release_mdesc;
+	}
+
 	port = kzalloc(sizeof(*port), GFP_KERNEL);
 	err = -ENOMEM;
 	if (!port) {
@@ -943,6 +1001,9 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 	if (err)
 		goto err_out_free_tx_ring;
 
+	/* Note that the device driver_data is used to determine
+	 * whether the port has been probed.
+	 */
 	dev_set_drvdata(&vdev->dev, port);
 
 	mdesc_release(hp);
-- 
cgit v1.2.3-59-g8ed1b


From ed43594aede9719e56eca72fc6a9a200c60b60e6 Mon Sep 17 00:00:00 2001
From: Jon Paul Maloy <jon.maloy@ericsson.com>
Date: Tue, 8 Aug 2017 22:23:56 +0200
Subject: tipc: remove premature ESTABLISH FSM event at link synchronization

When a link between two nodes come up, both endpoints will initially
send out a STATE message to the peer, to increase the probability that
the peer endpoint also is up when the first traffic message arrives.
Thereafter, if the establishing link is the second link between two
nodes, this first "traffic" message is a TUNNEL_PROTOCOL/SYNCH message,
helping the peer to perform initial synchronization between the two
links.

However, the initial STATE message may be lost, in which case the SYNCH
message will be the first one arriving at the peer. This should also
work, as the SYNCH message itself will be used to take up the link
endpoint before  initializing synchronization.

Unfortunately the code for this case is broken. Currently, the link is
brought up through a tipc_link_fsm_evt(ESTABLISHED) when a SYNCH
arrives, whereupon __tipc_node_link_up() is called to distribute the
link slots and take the link into traffic. But, __tipc_node_link_up() is
itself starting with a test for whether the link is up, and if true,
returns without action. Clearly, the tipc_link_fsm_evt(ESTABLISHED) call
is unnecessary, since tipc_node_link_up() is itself issuing such an
event, but also harmful, since it inhibits tipc_node_link_up() to
perform the test of its tasks, and the link endpoint in question hence
is never taken into traffic.

This problem has been exposed when we set up dual links between pre-
and post-4.4 kernels, because the former ones don't send out the
initial STATE message described above.

We fix this by removing the unnecessary event call.

Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/node.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/net/tipc/node.c b/net/tipc/node.c
index aeef8011ac7d..9b4dcb6a16b5 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1455,10 +1455,8 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
 	/* Initiate synch mode if applicable */
 	if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG) && (oseqno == 1)) {
 		syncpt = iseqno + exp_pkts - 1;
-		if (!tipc_link_is_up(l)) {
-			tipc_link_fsm_evt(l, LINK_ESTABLISH_EVT);
+		if (!tipc_link_is_up(l))
 			__tipc_node_link_up(n, bearer_id, xmitq);
-		}
 		if (n->state == SELF_UP_PEER_UP) {
 			n->sync_point = syncpt;
 			tipc_link_fsm_evt(l, LINK_SYNCH_BEGIN_EVT);
-- 
cgit v1.2.3-59-g8ed1b


From 50ddfbafcd1e1f95185dfaa4ad794f742b0beaf8 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 8 Aug 2017 14:45:09 -0700
Subject: net: systemport: Fix software statistics for SYSTEMPORT Lite

With SYSTEMPORT Lite we have holes in our statistics layout that make us
skip over the hardware MIB counters, bcm_sysport_get_stats() was not
taking that into account resulting in reporting 0 for all SW-maintained
statistics, fix this by skipping accordingly.

Fixes: 44a4524c54af ("net: systemport: Add support for SYSTEMPORT Lite")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bcmsysport.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index 5333601f855f..dc3052751bc1 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -449,6 +449,10 @@ static void bcm_sysport_get_stats(struct net_device *dev,
 			p = (char *)&dev->stats;
 		else
 			p = (char *)priv;
+
+		if (priv->is_lite && !bcm_sysport_lite_stat_valid(s->type))
+			continue;
+
 		p += s->stat_offset;
 		data[j] = *(unsigned long *)p;
 		j++;
-- 
cgit v1.2.3-59-g8ed1b


From 04db70d9fe7019d96118158fbaaccb4959ba2bd4 Mon Sep 17 00:00:00 2001
From: Girish Moodalbail <girish.moodalbail@oracle.com>
Date: Tue, 8 Aug 2017 17:26:24 -0700
Subject: geneve: maximum value of VNI cannot be used

Geneve's Virtual Network Identifier (VNI) is 24 bit long, so the range
of values for it would be from 0 to 16777215 (2^24 -1).  However, one
cannot create a geneve device with VNI set to 16777215. This patch fixes
this issue.

Signed-off-by: Girish Moodalbail <girish.moodalbail@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/geneve.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index de8156c6b292..2bbda71818ad 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1091,7 +1091,7 @@ static int geneve_validate(struct nlattr *tb[], struct nlattr *data[],
 	if (data[IFLA_GENEVE_ID]) {
 		__u32 vni =  nla_get_u32(data[IFLA_GENEVE_ID]);
 
-		if (vni >= GENEVE_VID_MASK)
+		if (vni >= GENEVE_N_VID)
 			return -ERANGE;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 1714020e42b17135032c8606f7185b3fb2ba5d78 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 9 Aug 2017 14:38:04 +0300
Subject: igmp: Fix regression caused by igmp sysctl namespace code.

Commit dcd87999d415 ("igmp: net: Move igmp namespace init to correct file")
moved the igmp sysctls initialization from tcp_sk_init to igmp_net_init. This
function is only called as part of per-namespace initialization, only if
CONFIG_IP_MULTICAST is defined, otherwise igmp_mc_init() call in ip_init is
compiled out, casuing the igmp pernet ops to not be registerd and those sysctl
being left initialized with 0. However, there are certain functions, such as
ip_mc_join_group which are always compiled and make use of some of those
sysctls. Let's do a partial revert of the aforementioned commit and move the
sysctl initialization into inet_init_net, that way they will always have
sane values.

Fixes: dcd87999d415 ("igmp: net: Move igmp namespace init to correct file")
Link: https://bugzilla.kernel.org/show_bug.cgi?id=196595
Reported-by: Gerardo Exequiel Pozzi <vmlinuz386@gmail.com>
Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/af_inet.c | 7 +++++++
 net/ipv4/igmp.c    | 6 ------
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 76c2077c3f5b..2e548eca3489 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1731,6 +1731,13 @@ static __net_init int inet_init_net(struct net *net)
 	net->ipv4.sysctl_ip_prot_sock = PROT_SOCK;
 #endif
 
+	/* Some igmp sysctl, whose values are always used */
+	net->ipv4.sysctl_igmp_max_memberships = 20;
+	net->ipv4.sysctl_igmp_max_msf = 10;
+	/* IGMP reports for link-local multicast groups are enabled by default */
+	net->ipv4.sysctl_igmp_llm_reports = 1;
+	net->ipv4.sysctl_igmp_qrv = 2;
+
 	return 0;
 }
 
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 28f14afd0dd3..498706b072fb 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2974,12 +2974,6 @@ static int __net_init igmp_net_init(struct net *net)
 		goto out_sock;
 	}
 
-	/* Sysctl initialization */
-	net->ipv4.sysctl_igmp_max_memberships = 20;
-	net->ipv4.sysctl_igmp_max_msf = 10;
-	/* IGMP reports for link-local multicast groups are enabled by default */
-	net->ipv4.sysctl_igmp_llm_reports = 1;
-	net->ipv4.sysctl_igmp_qrv = 2;
 	return 0;
 
 out_sock:
-- 
cgit v1.2.3-59-g8ed1b


From 96d9703050a0036a3360ec98bb41e107c90664fe Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Wed, 9 Aug 2017 18:15:19 +0800
Subject: net: sched: set xt_tgchk_param par.nft_compat as 0 in ipt_init_target

Commit 55917a21d0cc ("netfilter: x_tables: add context to know if
extension runs from nft_compat") introduced a member nft_compat to
xt_tgchk_param structure.

But it didn't set it's value for ipt_init_target. With unexpected
value in par.nft_compat, it may return unexpected result in some
target's checkentry.

This patch is to set all it's fields as 0 and only initialize the
non-zero fields in ipt_init_target.

v1->v2:
  As Wang Cong's suggestion, fix it by setting all it's fields as
  0 and only initializing the non-zero fields.

Fixes: 55917a21d0cc ("netfilter: x_tables: add context to know if extension runs from nft_compat")
Suggested-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_ipt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 94ba5cfab860..d516ba8178b8 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -49,9 +49,9 @@ static int ipt_init_target(struct net *net, struct xt_entry_target *t,
 		return PTR_ERR(target);
 
 	t->u.kernel.target = target;
+	memset(&par, 0, sizeof(par));
 	par.net       = net;
 	par.table     = table;
-	par.entryinfo = NULL;
 	par.target    = target;
 	par.targinfo  = t->data;
 	par.hook_mask = hook;
-- 
cgit v1.2.3-59-g8ed1b


From 85f1bd9a7b5a79d5baa8bf44af19658f7bf77bfa Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Thu, 10 Aug 2017 12:29:19 -0400
Subject: udp: consistently apply ufo or fragmentation

When iteratively building a UDP datagram with MSG_MORE and that
datagram exceeds MTU, consistently choose UFO or fragmentation.

Once skb_is_gso, always apply ufo. Conversely, once a datagram is
split across multiple skbs, do not consider ufo.

Sendpage already maintains the first invariant, only add the second.
IPv6 does not have a sendpage implementation to modify.

A gso skb must have a partial checksum, do not follow sk_no_check_tx
in udp_send_skb.

Found by syzkaller.

Fixes: e89e9cf539a2 ("[IPv4/IPv6]: UFO Scatter-gather approach")
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c  | 8 +++++---
 net/ipv4/udp.c        | 2 +-
 net/ipv6/ip6_output.c | 7 ++++---
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 50c74cd890bc..e153c40c2436 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -965,11 +965,12 @@ static int __ip_append_data(struct sock *sk,
 		csummode = CHECKSUM_PARTIAL;
 
 	cork->length += length;
-	if ((((length + (skb ? skb->len : fragheaderlen)) > mtu) ||
-	     (skb && skb_is_gso(skb))) &&
+	if ((skb && skb_is_gso(skb)) ||
+	    (((length + (skb ? skb->len : fragheaderlen)) > mtu) &&
+	    (skb_queue_len(queue) <= 1) &&
 	    (sk->sk_protocol == IPPROTO_UDP) &&
 	    (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
-	    (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) {
+	    (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx)) {
 		err = ip_ufo_append_data(sk, queue, getfrag, from, length,
 					 hh_len, fragheaderlen, transhdrlen,
 					 maxfraglen, flags);
@@ -1288,6 +1289,7 @@ ssize_t	ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
 		return -EINVAL;
 
 	if ((size + skb->len > mtu) &&
+	    (skb_queue_len(&sk->sk_write_queue) == 1) &&
 	    (sk->sk_protocol == IPPROTO_UDP) &&
 	    (rt->dst.dev->features & NETIF_F_UFO)) {
 		if (skb->ip_summed != CHECKSUM_PARTIAL)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e6276fa3750b..a7c804f73990 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -802,7 +802,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
 	if (is_udplite)  				 /*     UDP-Lite      */
 		csum = udplite_csum(skb);
 
-	else if (sk->sk_no_check_tx) {   /* UDP csum disabled */
+	else if (sk->sk_no_check_tx && !skb_is_gso(skb)) {   /* UDP csum off */
 
 		skb->ip_summed = CHECKSUM_NONE;
 		goto send;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 162efba0d0cd..2dfe50d8d609 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1381,11 +1381,12 @@ emsgsize:
 	 */
 
 	cork->length += length;
-	if ((((length + (skb ? skb->len : headersize)) > mtu) ||
-	     (skb && skb_is_gso(skb))) &&
+	if ((skb && skb_is_gso(skb)) ||
+	    (((length + (skb ? skb->len : headersize)) > mtu) &&
+	    (skb_queue_len(queue) <= 1) &&
 	    (sk->sk_protocol == IPPROTO_UDP) &&
 	    (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
-	    (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) {
+	    (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk))) {
 		err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
 					  hh_len, fragheaderlen, exthdrlen,
 					  transhdrlen, mtu, flags, fl6);
-- 
cgit v1.2.3-59-g8ed1b


From c27927e372f0785f3303e8fad94b85945e2c97b7 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Thu, 10 Aug 2017 12:41:58 -0400
Subject: packet: fix tp_reserve race in packet_set_ring

Updates to tp_reserve can race with reads of the field in
packet_set_ring. Avoid this by holding the socket lock during
updates in setsockopt PACKET_RESERVE.

This bug was discovered by syzkaller.

Fixes: 8913336a7e8d ("packet: add PACKET_RESERVE sockopt")
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 0615c2a950fa..008a45ca3112 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3700,14 +3700,19 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 
 		if (optlen != sizeof(val))
 			return -EINVAL;
-		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
-			return -EBUSY;
 		if (copy_from_user(&val, optval, sizeof(val)))
 			return -EFAULT;
 		if (val > INT_MAX)
 			return -EINVAL;
-		po->tp_reserve = val;
-		return 0;
+		lock_sock(sk);
+		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
+			ret = -EBUSY;
+		} else {
+			po->tp_reserve = val;
+			ret = 0;
+		}
+		release_sock(sk);
+		return ret;
 	}
 	case PACKET_LOSS:
 	{
-- 
cgit v1.2.3-59-g8ed1b


From d507e2ebd2c7be9138e5cf5c0cb1931c90c42ab1 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 10 Aug 2017 15:23:31 -0700
Subject: mm: fix global NR_SLAB_.*CLAIMABLE counter reads

As Tetsuo points out:
 "Commit 385386cff4c6 ("mm: vmstat: move slab statistics from zone to
  node counters") broke "Slab:" field of /proc/meminfo . It shows nearly
  0kB"

In addition to /proc/meminfo, this problem also affects the slab
counters OOM/allocation failure info dumps, can cause early -ENOMEM from
overcommit protection, and miscalculate image size requirements during
suspend-to-disk.

This is because the patch in question switched the slab counters from
the zone level to the node level, but forgot to update the global
accessor functions to read the aggregate node data instead of the
aggregate zone data.

Use global_node_page_state() to access the global slab counters.

Fixes: 385386cff4c6 ("mm: vmstat: move slab statistics from zone to node counters")
Link: http://lkml.kernel.org/r/20170801134256.5400-1-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reported-by: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Stefan Agner <stefan@agner.ch>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/meminfo.c       | 8 ++++----
 kernel/power/snapshot.c | 2 +-
 mm/page_alloc.c         | 9 +++++----
 mm/util.c               | 2 +-
 4 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 8a428498d6b2..509a61668d90 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -106,13 +106,13 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 		    global_node_page_state(NR_FILE_MAPPED));
 	show_val_kb(m, "Shmem:          ", i.sharedram);
 	show_val_kb(m, "Slab:           ",
-		    global_page_state(NR_SLAB_RECLAIMABLE) +
-		    global_page_state(NR_SLAB_UNRECLAIMABLE));
+		    global_node_page_state(NR_SLAB_RECLAIMABLE) +
+		    global_node_page_state(NR_SLAB_UNRECLAIMABLE));
 
 	show_val_kb(m, "SReclaimable:   ",
-		    global_page_state(NR_SLAB_RECLAIMABLE));
+		    global_node_page_state(NR_SLAB_RECLAIMABLE));
 	show_val_kb(m, "SUnreclaim:     ",
-		    global_page_state(NR_SLAB_UNRECLAIMABLE));
+		    global_node_page_state(NR_SLAB_UNRECLAIMABLE));
 	seq_printf(m, "KernelStack:    %8lu kB\n",
 		   global_page_state(NR_KERNEL_STACK_KB));
 	show_val_kb(m, "PageTables:     ",
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 222317721c5a..0972a8e09d08 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1650,7 +1650,7 @@ static unsigned long minimum_image_size(unsigned long saveable)
 {
 	unsigned long size;
 
-	size = global_page_state(NR_SLAB_RECLAIMABLE)
+	size = global_node_page_state(NR_SLAB_RECLAIMABLE)
 		+ global_node_page_state(NR_ACTIVE_ANON)
 		+ global_node_page_state(NR_INACTIVE_ANON)
 		+ global_node_page_state(NR_ACTIVE_FILE)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fc32aa81f359..626a430e32d1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4458,8 +4458,9 @@ long si_mem_available(void)
 	 * Part of the reclaimable slab consists of items that are in use,
 	 * and cannot be freed. Cap this estimate at the low watermark.
 	 */
-	available += global_page_state(NR_SLAB_RECLAIMABLE) -
-		     min(global_page_state(NR_SLAB_RECLAIMABLE) / 2, wmark_low);
+	available += global_node_page_state(NR_SLAB_RECLAIMABLE) -
+		     min(global_node_page_state(NR_SLAB_RECLAIMABLE) / 2,
+			 wmark_low);
 
 	if (available < 0)
 		available = 0;
@@ -4602,8 +4603,8 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
 		global_node_page_state(NR_FILE_DIRTY),
 		global_node_page_state(NR_WRITEBACK),
 		global_node_page_state(NR_UNSTABLE_NFS),
-		global_page_state(NR_SLAB_RECLAIMABLE),
-		global_page_state(NR_SLAB_UNRECLAIMABLE),
+		global_node_page_state(NR_SLAB_RECLAIMABLE),
+		global_node_page_state(NR_SLAB_UNRECLAIMABLE),
 		global_node_page_state(NR_FILE_MAPPED),
 		global_node_page_state(NR_SHMEM),
 		global_page_state(NR_PAGETABLE),
diff --git a/mm/util.c b/mm/util.c
index 7b07ec852e01..9ecddf568fe3 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -633,7 +633,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
 		 * which are reclaimable, under pressure.  The dentry
 		 * cache and most inode caches should fall into this
 		 */
-		free += global_page_state(NR_SLAB_RECLAIMABLE);
+		free += global_node_page_state(NR_SLAB_RECLAIMABLE);
 
 		/*
 		 * Leave reserved pages. The pages are not for anonymous pages.
-- 
cgit v1.2.3-59-g8ed1b


From 75dddef32514f7aa58930bde6a1263253bc3d4ba Mon Sep 17 00:00:00 2001
From: Jonathan Toppins <jtoppins@redhat.com>
Date: Thu, 10 Aug 2017 15:23:35 -0700
Subject: mm: ratelimit PFNs busy info message

The RDMA subsystem can generate several thousand of these messages per
second eventually leading to a kernel crash.  Ratelimit these messages
to prevent this crash.

Doug said:
 "I've been carrying a version of this for several kernel versions. I
  don't remember when they started, but we have one (and only one) class
  of machines: Dell PE R730xd, that generate these errors. When it
  happens, without a rate limit, we get rcu timeouts and kernel oopses.
  With the rate limit, we just get a lot of annoying kernel messages but
  the machine continues on, recovers, and eventually the memory
  operations all succeed"

And:
 "> Well... why are all these EBUSY's occurring? It sounds inefficient
  > (at least) but if it is expected, normal and unavoidable then
  > perhaps we should just remove that message altogether?

  I don't have an answer to that question. To be honest, I haven't
  looked real hard. We never had this at all, then it started out of the
  blue, but only on our Dell 730xd machines (and it hits all of them),
  but no other classes or brands of machines. And we have our 730xd
  machines loaded up with different brands and models of cards (for
  instance one dedicated to mlx4 hardware, one for qib, one for mlx5, an
  ocrdma/cxgb4 combo, etc), so the fact that it hit all of the machines
  meant it wasn't tied to any particular brand/model of RDMA hardware.
  To me, it always smelled of a hardware oddity specific to maybe the
  CPUs or mainboard chipsets in these machines, so given that I'm not an
  mm expert anyway, I never chased it down.

  A few other relevant details: it showed up somewhere around 4.8/4.9 or
  thereabouts. It never happened before, but the prinkt has been there
  since the 3.18 days, so possibly the test to trigger this message was
  changed, or something else in the allocator changed such that the
  situation started happening on these machines?

  And, like I said, it is specific to our 730xd machines (but they are
  all identical, so that could mean it's something like their specific
  ram configuration is causing the allocator to hit this on these
  machine but not on other machines in the cluster, I don't want to say
  it's necessarily the model of chipset or CPU, there are other bits of
  identicalness between these machines)"

Link: http://lkml.kernel.org/r/499c0f6cc10d6eb829a67f2a4d75b4228a9b356e.1501695897.git.jtoppins@redhat.com
Signed-off-by: Jonathan Toppins <jtoppins@redhat.com>
Reviewed-by: Doug Ledford <dledford@redhat.com>
Tested-by: Doug Ledford <dledford@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 626a430e32d1..6d00f746c2fd 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -7669,7 +7669,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
 
 	/* Make sure the range is really isolated. */
 	if (test_pages_isolated(outer_start, end, false)) {
-		pr_info("%s: [%lx, %lx) PFNs busy\n",
+		pr_info_ratelimited("%s: [%lx, %lx) PFNs busy\n",
 			__func__, outer_start, end);
 		ret = -EBUSY;
 		goto done;
-- 
cgit v1.2.3-59-g8ed1b


From 5af10dfd0afc559bb4b0f7e3e8227a1578333995 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 10 Aug 2017 15:23:38 -0700
Subject: userfaultfd: hugetlbfs: remove superfluous page unlock in VM_SHARED
 case

huge_add_to_page_cache->add_to_page_cache implicitly unlocks the page
before returning in case of errors.

The error returned was -EEXIST by running UFFDIO_COPY on a non-hole
offset of a VM_SHARED hugetlbfs mapping.  It was an userland bug that
triggered it and the kernel must cope with it returning -EEXIST from
ioctl(UFFDIO_COPY) as expected.

  page dumped because: VM_BUG_ON_PAGE(!PageLocked(page))
  kernel BUG at mm/filemap.c:964!
  invalid opcode: 0000 [#1] SMP
  CPU: 1 PID: 22582 Comm: qemu-system-x86 Not tainted 4.11.11-300.fc26.x86_64 #1
  RIP: unlock_page+0x4a/0x50
  Call Trace:
    hugetlb_mcopy_atomic_pte+0xc0/0x320
    mcopy_atomic+0x96f/0xbe0
    userfaultfd_ioctl+0x218/0xe90
    do_vfs_ioctl+0xa5/0x600
    SyS_ioctl+0x79/0x90
    entry_SYSCALL_64_fastpath+0x1a/0xa9

Link: http://lkml.kernel.org/r/20170802165145.22628-2-aarcange@redhat.com
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Tested-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Alexey Perevalov <a.perevalov@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a1a0ac0ad6f6..31e207cb399b 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4062,9 +4062,9 @@ out:
 	return ret;
 out_release_unlock:
 	spin_unlock(ptl);
-out_release_nounlock:
 	if (vm_shared)
 		unlock_page(page);
+out_release_nounlock:
 	put_page(page);
 	goto out;
 }
-- 
cgit v1.2.3-59-g8ed1b


From a4afe8cdec1646c3d258b02d1cfdfb1313b76eb1 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 10 Aug 2017 15:23:40 -0700
Subject: test_kmod: fix spelling mistake: "EMTPY" -> "EMPTY"

Trivial fix to spelling mistake in snprintf text

[mcgrof@kernel.org: massaged commit message]
Link: http://lkml.kernel.org/r/20170802211450.27928-3-mcgrof@kernel.org
Fixes: 39258f448d71 ("kmod: add test driver to stress test the module loader")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Jessica Yu <jeyu@redhat.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Michal Marek <mmarek@suse.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Cc: David Binderman <dcb314@hotmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_kmod.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/test_kmod.c b/lib/test_kmod.c
index 6c1d678bcf8b..90c91541fc16 100644
--- a/lib/test_kmod.c
+++ b/lib/test_kmod.c
@@ -485,7 +485,7 @@ static ssize_t config_show(struct device *dev,
 				config->test_driver);
 	else
 		len += snprintf(buf+len, PAGE_SIZE - len,
-				"driver:\tEMTPY\n");
+				"driver:\tEMPTY\n");
 
 	if (config->test_fs)
 		len += snprintf(buf+len, PAGE_SIZE - len,
@@ -493,7 +493,7 @@ static ssize_t config_show(struct device *dev,
 				config->test_fs);
 	else
 		len += snprintf(buf+len, PAGE_SIZE - len,
-				"fs:\tEMTPY\n");
+				"fs:\tEMPTY\n");
 
 	mutex_unlock(&test_dev->config_mutex);
 
-- 
cgit v1.2.3-59-g8ed1b


From 434b06ae23bab4f4111a674f9b64409c87fa8df9 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Thu, 10 Aug 2017 15:23:44 -0700
Subject: test_kmod: fix bug which allows negative values on two config options

Parsing with kstrtol() enables values to be negative, and we failed to
check for negative values when parsing with test_dev_config_update_uint_sync()
or test_dev_config_update_uint_range().

test_dev_config_update_uint_range() has a minimum check though so an
issue is not present there.  test_dev_config_update_uint_sync() is only
used for the number of threads to use (config_num_threads_store()), and
indeed this would fail with an attempt for a large allocation.

Although the issue is only present in practice with the first fix both
by using kstrtoul() instead of kstrtol().

Link: http://lkml.kernel.org/r/20170802211450.27928-4-mcgrof@kernel.org
Fixes: 39258f448d71 ("kmod: add test driver to stress test the module loader")
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Cc: Colin Ian King <colin.king@canonical.com>
Cc: David Binderman <dcb314@hotmail.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Jessica Yu <jeyu@redhat.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Michal Marek <mmarek@suse.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_kmod.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/test_kmod.c b/lib/test_kmod.c
index 90c91541fc16..8fc0a7a19c83 100644
--- a/lib/test_kmod.c
+++ b/lib/test_kmod.c
@@ -880,10 +880,10 @@ static int test_dev_config_update_uint_sync(struct kmod_test_device *test_dev,
 					    int (*test_sync)(struct kmod_test_device *test_dev))
 {
 	int ret;
-	long new;
+	unsigned long new;
 	unsigned int old_val;
 
-	ret = kstrtol(buf, 10, &new);
+	ret = kstrtoul(buf, 10, &new);
 	if (ret)
 		return ret;
 
@@ -918,9 +918,9 @@ static int test_dev_config_update_uint_range(struct kmod_test_device *test_dev,
 					     unsigned int max)
 {
 	int ret;
-	long new;
+	unsigned long new;
 
-	ret = kstrtol(buf, 10, &new);
+	ret = kstrtoul(buf, 10, &new);
 	if (ret)
 		return ret;
 
-- 
cgit v1.2.3-59-g8ed1b


From 9c56771316ef50992ada284b4c01b03842b2660d Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 10 Aug 2017 15:23:47 -0700
Subject: test_kmod: fix the lock in register_test_dev_kmod()

We accidentally just drop the lock twice instead of taking it and then
releasing it.  This isn't a big issue unless you are adding more than
one device to test on, and the kmod.sh doesn't do that yet, however this
obviously is the correct thing to do.

[mcgrof@kernel.org: massaged subject, explain what happens]
Link: http://lkml.kernel.org/r/20170802211450.27928-5-mcgrof@kernel.org
Fixes: 39258f448d71 ("kmod: add test driver to stress test the module loader")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Cc: Colin Ian King <colin.king@canonical.com>
Cc: David Binderman <dcb314@hotmail.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Jessica Yu <jeyu@redhat.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Michal Marek <mmarek@suse.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_kmod.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/test_kmod.c b/lib/test_kmod.c
index 8fc0a7a19c83..1bc06bbfc97a 100644
--- a/lib/test_kmod.c
+++ b/lib/test_kmod.c
@@ -1146,7 +1146,7 @@ static struct kmod_test_device *register_test_dev_kmod(void)
 	struct kmod_test_device *test_dev = NULL;
 	int ret;
 
-	mutex_unlock(&reg_dev_mutex);
+	mutex_lock(&reg_dev_mutex);
 
 	/* int should suffice for number of devices, test for wrap */
 	if (unlikely(num_test_devs + 1) < 0) {
-- 
cgit v1.2.3-59-g8ed1b


From 4e98ebe5f435fbe5bca91c24bc5d5a2b33025e08 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 10 Aug 2017 15:23:50 -0700
Subject: test_kmod: fix small memory leak on filesystem tests

The break was in the wrong place so file system tests don't work as
intended, leaking memory at each test switch.

[mcgrof@kernel.org: massaged commit subject, noted memory leak issue without the fix]
Link: http://lkml.kernel.org/r/20170802211450.27928-6-mcgrof@kernel.org
Fixes: 39258f448d71 ("kmod: add test driver to stress test the module loader")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Reported-by: David Binderman <dcb314@hotmail.com>
Cc: Colin Ian King <colin.king@canonical.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Jessica Yu <jeyu@redhat.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Michal Marek <mmarek@suse.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_kmod.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/test_kmod.c b/lib/test_kmod.c
index 1bc06bbfc97a..ff9148969b92 100644
--- a/lib/test_kmod.c
+++ b/lib/test_kmod.c
@@ -746,11 +746,11 @@ static int trigger_config_run_type(struct kmod_test_device *test_dev,
 						      strlen(test_str));
 		break;
 	case TEST_KMOD_FS_TYPE:
-		break;
 		kfree_const(config->test_fs);
 		config->test_driver = NULL;
 		copied = config_copy_test_fs(config, test_str,
 					     strlen(test_str));
+		break;
 	default:
 		mutex_unlock(&test_dev->config_mutex);
 		return -EINVAL;
-- 
cgit v1.2.3-59-g8ed1b


From 9eeb52ae712e72141c4c1d173048a606ba8f42f6 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Thu, 10 Aug 2017 15:23:53 -0700
Subject: fault-inject: fix wrong should_fail() decision in task context

Commit 1203c8e6fb0a ("fault-inject: simplify access check for fail-nth")
unintentionally broke a conditional statement in should_fail().  Any
faults are not injected in the task context by the change when the
systematic fault injection is not used.

This change restores to the previous correct behaviour.

Link: http://lkml.kernel.org/r/1501633700-3488-1-git-send-email-akinobu.mita@gmail.com
Fixes: 1203c8e6fb0a ("fault-inject: simplify access check for fail-nth")
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Reported-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Tested-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/fault-inject.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index 7d315fdb9f13..cf7b129b0b2b 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -110,10 +110,12 @@ bool should_fail(struct fault_attr *attr, ssize_t size)
 	if (in_task()) {
 		unsigned int fail_nth = READ_ONCE(current->fail_nth);
 
-		if (fail_nth && !WRITE_ONCE(current->fail_nth, fail_nth - 1))
-			goto fail;
+		if (fail_nth) {
+			if (!WRITE_ONCE(current->fail_nth, fail_nth - 1))
+				goto fail;
 
-		return false;
+			return false;
+		}
 	}
 
 	/* No need to check any other properties if the probability is 0 */
-- 
cgit v1.2.3-59-g8ed1b


From 16af97dc5a8975371a83d9e30a64038b48f40a2d Mon Sep 17 00:00:00 2001
From: Nadav Amit <nadav.amit@gmail.com>
Date: Thu, 10 Aug 2017 15:23:56 -0700
Subject: mm: migrate: prevent racy access to tlb_flush_pending

Patch series "fixes of TLB batching races", v6.

It turns out that Linux TLB batching mechanism suffers from various
races.  Races that are caused due to batching during reclamation were
recently handled by Mel and this patch-set deals with others.  The more
fundamental issue is that concurrent updates of the page-tables allow
for TLB flushes to be batched on one core, while another core changes
the page-tables.  This other core may assume a PTE change does not
require a flush based on the updated PTE value, while it is unaware that
TLB flushes are still pending.

This behavior affects KSM (which may result in memory corruption) and
MADV_FREE and MADV_DONTNEED (which may result in incorrect behavior).  A
proof-of-concept can easily produce the wrong behavior of MADV_DONTNEED.
Memory corruption in KSM is harder to produce in practice, but was
observed by hacking the kernel and adding a delay before flushing and
replacing the KSM page.

Finally, there is also one memory barrier missing, which may affect
architectures with weak memory model.

This patch (of 7):

Setting and clearing mm->tlb_flush_pending can be performed by multiple
threads, since mmap_sem may only be acquired for read in
task_numa_work().  If this happens, tlb_flush_pending might be cleared
while one of the threads still changes PTEs and batches TLB flushes.

This can lead to the same race between migration and
change_protection_range() that led to the introduction of
tlb_flush_pending.  The result of this race was data corruption, which
means that this patch also addresses a theoretically possible data
corruption.

An actual data corruption was not observed, yet the race was was
confirmed by adding assertion to check tlb_flush_pending is not set by
two threads, adding artificial latency in change_protection_range() and
using sysctl to reduce kernel.numa_balancing_scan_delay_ms.

Link: http://lkml.kernel.org/r/20170802000818.4760-2-namit@vmware.com
Fixes: 20841405940e ("mm: fix TLB flush race between migration, and
change_protection_range")
Signed-off-by: Nadav Amit <namit@vmware.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h | 31 ++++++++++++++++++++++---------
 kernel/fork.c            |  2 +-
 mm/debug.c               |  2 +-
 mm/mprotect.c            |  4 ++--
 4 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 7f384bb62d8e..f58f76ee1dfa 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -493,7 +493,7 @@ struct mm_struct {
 	 * can move process memory needs to flush the TLB when moving a
 	 * PROT_NONE or PROT_NUMA mapped page.
 	 */
-	bool tlb_flush_pending;
+	atomic_t tlb_flush_pending;
 #endif
 #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 	/* See flush_tlb_batched_pending() */
@@ -532,33 +532,46 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
 static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
 {
 	barrier();
-	return mm->tlb_flush_pending;
+	return atomic_read(&mm->tlb_flush_pending) > 0;
 }
-static inline void set_tlb_flush_pending(struct mm_struct *mm)
+
+static inline void init_tlb_flush_pending(struct mm_struct *mm)
 {
-	mm->tlb_flush_pending = true;
+	atomic_set(&mm->tlb_flush_pending, 0);
+}
+
+static inline void inc_tlb_flush_pending(struct mm_struct *mm)
+{
+	atomic_inc(&mm->tlb_flush_pending);
 
 	/*
-	 * Guarantee that the tlb_flush_pending store does not leak into the
+	 * Guarantee that the tlb_flush_pending increase does not leak into the
 	 * critical section updating the page tables
 	 */
 	smp_mb__before_spinlock();
 }
+
 /* Clearing is done after a TLB flush, which also provides a barrier. */
-static inline void clear_tlb_flush_pending(struct mm_struct *mm)
+static inline void dec_tlb_flush_pending(struct mm_struct *mm)
 {
 	barrier();
-	mm->tlb_flush_pending = false;
+	atomic_dec(&mm->tlb_flush_pending);
 }
 #else
 static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
 {
 	return false;
 }
-static inline void set_tlb_flush_pending(struct mm_struct *mm)
+
+static inline void init_tlb_flush_pending(struct mm_struct *mm)
 {
 }
-static inline void clear_tlb_flush_pending(struct mm_struct *mm)
+
+static inline void inc_tlb_flush_pending(struct mm_struct *mm)
+{
+}
+
+static inline void dec_tlb_flush_pending(struct mm_struct *mm)
 {
 }
 #endif
diff --git a/kernel/fork.c b/kernel/fork.c
index 17921b0390b4..e075b7780421 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -807,7 +807,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 	mm_init_aio(mm);
 	mm_init_owner(mm, p);
 	mmu_notifier_mm_init(mm);
-	clear_tlb_flush_pending(mm);
+	init_tlb_flush_pending(mm);
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
 	mm->pmd_huge_pte = NULL;
 #endif
diff --git a/mm/debug.c b/mm/debug.c
index db1cd26d8752..d70103bb4731 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -159,7 +159,7 @@ void dump_mm(const struct mm_struct *mm)
 		mm->numa_next_scan, mm->numa_scan_offset, mm->numa_scan_seq,
 #endif
 #if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
-		mm->tlb_flush_pending,
+		atomic_read(&mm->tlb_flush_pending),
 #endif
 		mm->def_flags, &mm->def_flags
 	);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 4180ad8cc9c5..bd0f409922cb 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -244,7 +244,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma,
 	BUG_ON(addr >= end);
 	pgd = pgd_offset(mm, addr);
 	flush_cache_range(vma, addr, end);
-	set_tlb_flush_pending(mm);
+	inc_tlb_flush_pending(mm);
 	do {
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd))
@@ -256,7 +256,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma,
 	/* Only flush the TLB if we actually modified any entries: */
 	if (pages)
 		flush_tlb_range(vma, start, end);
-	clear_tlb_flush_pending(mm);
+	dec_tlb_flush_pending(mm);
 
 	return pages;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 0a2c40487f3e4215c6ab46e7f837036badfb542b Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@vmware.com>
Date: Thu, 10 Aug 2017 15:23:59 -0700
Subject: mm: migrate: fix barriers around tlb_flush_pending

Reading tlb_flush_pending while the page-table lock is taken does not
require a barrier, since the lock/unlock already acts as a barrier.
Removing the barrier in mm_tlb_flush_pending() to address this issue.

However, migrate_misplaced_transhuge_page() calls mm_tlb_flush_pending()
while the page-table lock is already released, which may present a
problem on architectures with weak memory model (PPC).  To deal with
this case, a new parameter is added to mm_tlb_flush_pending() to
indicate if it is read without the page-table lock taken, and calling
smp_mb__after_unlock_lock() in this case.

Link: http://lkml.kernel.org/r/20170802000818.4760-3-namit@vmware.com
Signed-off-by: Nadav Amit <namit@vmware.com>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index f58f76ee1dfa..0e478ebd2706 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -526,12 +526,12 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
 /*
  * Memory barriers to keep this state in sync are graciously provided by
  * the page table locks, outside of which no page table modifications happen.
- * The barriers below prevent the compiler from re-ordering the instructions
- * around the memory barriers that are already present in the code.
+ * The barriers are used to ensure the order between tlb_flush_pending updates,
+ * which happen while the lock is not taken, and the PTE updates, which happen
+ * while the lock is taken, are serialized.
  */
 static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
 {
-	barrier();
 	return atomic_read(&mm->tlb_flush_pending) > 0;
 }
 
@@ -554,7 +554,13 @@ static inline void inc_tlb_flush_pending(struct mm_struct *mm)
 /* Clearing is done after a TLB flush, which also provides a barrier. */
 static inline void dec_tlb_flush_pending(struct mm_struct *mm)
 {
-	barrier();
+	/*
+	 * Guarantee that the tlb_flush_pending does not not leak into the
+	 * critical section, since we must order the PTE change and changes to
+	 * the pending TLB flush indication. We could have relied on TLB flush
+	 * as a memory barrier, but this behavior is not clearly documented.
+	 */
+	smp_mb__before_atomic();
 	atomic_dec(&mm->tlb_flush_pending);
 }
 #else
-- 
cgit v1.2.3-59-g8ed1b


From a9b802500ebbff1544519a2969323b719dac21f0 Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@vmware.com>
Date: Thu, 10 Aug 2017 15:24:02 -0700
Subject: Revert "mm: numa: defer TLB flush for THP migration as long as
 possible"

While deferring TLB flushes is a good practice, the reverted patch
caused pending TLB flushes to be checked while the page-table lock is
not taken.  As a result, in architectures with weak memory model (PPC),
Linux may miss a memory-barrier, miss the fact TLB flushes are pending,
and cause (in theory) a memory corruption.

Since the alternative of using smp_mb__after_unlock_lock() was
considered a bit open-coded, and the performance impact is expected to
be small, the previous patch is reverted.

This reverts b0943d61b8fa ("mm: numa: defer TLB flush for THP migration
as long as possible").

Link: http://lkml.kernel.org/r/20170802000818.4760-4-namit@vmware.com
Signed-off-by: Nadav Amit <namit@vmware.com>
Suggested-by: Mel Gorman <mgorman@suse.de>
Acked-by: Mel Gorman <mgorman@suse.de>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/huge_memory.c | 7 +++++++
 mm/migrate.c     | 6 ------
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 86975dec0ba1..216114f6ef0b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1495,6 +1495,13 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
 		goto clear_pmdnuma;
 	}
 
+	/*
+	 * The page_table_lock above provides a memory barrier
+	 * with change_protection_range.
+	 */
+	if (mm_tlb_flush_pending(vma->vm_mm))
+		flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE);
+
 	/*
 	 * Migrate the THP to the requested node, returns with page unlocked
 	 * and access rights restored.
diff --git a/mm/migrate.c b/mm/migrate.c
index 627671551873..d68a41da6abb 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1937,12 +1937,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 		put_page(new_page);
 		goto out_fail;
 	}
-	/*
-	 * We are not sure a pending tlb flush here is for a huge page
-	 * mapping or not. Hence use the tlb range variant
-	 */
-	if (mm_tlb_flush_pending(mm))
-		flush_tlb_range(vma, mmun_start, mmun_end);
 
 	/* Prepare a page as a migration target */
 	__SetPageLocked(new_page);
-- 
cgit v1.2.3-59-g8ed1b


From 56236a59556cfd3bae7bffb7e5f438b5ef0af880 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Thu, 10 Aug 2017 15:24:05 -0700
Subject: mm: refactor TLB gathering API

This patch is a preparatory patch for solving race problems caused by
TLB batch.  For that, we will increase/decrease TLB flush pending count
of mm_struct whenever tlb_[gather|finish]_mmu is called.

Before making it simple, this patch separates architecture specific part
and rename it to arch_tlb_[gather|finish]_mmu and generic part just
calls it.

It shouldn't change any behavior.

Link: http://lkml.kernel.org/r/20170802000818.4760-5-namit@vmware.com
Signed-off-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Nadav Amit <namit@vmware.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/include/asm/tlb.h  |  6 ++++--
 arch/ia64/include/asm/tlb.h |  6 ++++--
 arch/s390/include/asm/tlb.h | 12 ++++++------
 arch/sh/include/asm/tlb.h   |  6 ++++--
 arch/um/include/asm/tlb.h   |  8 +++++---
 include/asm-generic/tlb.h   |  7 ++++---
 include/linux/mm_types.h    |  6 ++++++
 mm/memory.c                 | 28 +++++++++++++++++++++-------
 8 files changed, 54 insertions(+), 25 deletions(-)

diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index 3f2eb76243e3..7f5b2a2d3861 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -148,7 +148,8 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 }
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
+arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+			unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
 	tlb->fullmm = !(start | (end+1));
@@ -166,7 +167,8 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start
 }
 
 static inline void
-tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+arch_tlb_finish_mmu(struct mmu_gather *tlb,
+			unsigned long start, unsigned long end)
 {
 	tlb_flush_mmu(tlb);
 
diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h
index fced197b9626..93cadc04ac62 100644
--- a/arch/ia64/include/asm/tlb.h
+++ b/arch/ia64/include/asm/tlb.h
@@ -168,7 +168,8 @@ static inline void __tlb_alloc_page(struct mmu_gather *tlb)
 
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
+arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+			unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
 	tlb->max = ARRAY_SIZE(tlb->local);
@@ -185,7 +186,8 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start
  * collected.
  */
 static inline void
-tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+arch_tlb_finish_mmu(struct mmu_gather *tlb,
+			unsigned long start, unsigned long end)
 {
 	/*
 	 * Note: tlb->nr may be 0 at this point, so we can't rely on tlb->start_addr and
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 7317b3108a88..d574d0820dc8 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -47,10 +47,9 @@ struct mmu_table_batch {
 extern void tlb_table_flush(struct mmu_gather *tlb);
 extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
 
-static inline void tlb_gather_mmu(struct mmu_gather *tlb,
-				  struct mm_struct *mm,
-				  unsigned long start,
-				  unsigned long end)
+static inline void
+arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+			unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
 	tlb->start = start;
@@ -76,8 +75,9 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 	tlb_flush_mmu_free(tlb);
 }
 
-static inline void tlb_finish_mmu(struct mmu_gather *tlb,
-				  unsigned long start, unsigned long end)
+static inline void
+arch_tlb_finish_mmu(struct mmu_gather *tlb,
+		unsigned long start, unsigned long end)
 {
 	tlb_flush_mmu(tlb);
 }
diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h
index 46e0d635e36f..89786560dbd4 100644
--- a/arch/sh/include/asm/tlb.h
+++ b/arch/sh/include/asm/tlb.h
@@ -36,7 +36,8 @@ static inline void init_tlb_gather(struct mmu_gather *tlb)
 }
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
+arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+		unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
 	tlb->start = start;
@@ -47,7 +48,8 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start
 }
 
 static inline void
-tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+arch_tlb_finish_mmu(struct mmu_gather *tlb,
+		unsigned long start, unsigned long end)
 {
 	if (tlb->fullmm)
 		flush_tlb_mm(tlb->mm);
diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h
index 600a2e9bfee2..2a901eca7145 100644
--- a/arch/um/include/asm/tlb.h
+++ b/arch/um/include/asm/tlb.h
@@ -45,7 +45,8 @@ static inline void init_tlb_gather(struct mmu_gather *tlb)
 }
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
+arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+		unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
 	tlb->start = start;
@@ -80,12 +81,13 @@ tlb_flush_mmu(struct mmu_gather *tlb)
 	tlb_flush_mmu_free(tlb);
 }
 
-/* tlb_finish_mmu
+/* arch_tlb_finish_mmu
  *	Called at the end of the shootdown operation to free up any resources
  *	that were required.
  */
 static inline void
-tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+arch_tlb_finish_mmu(struct mmu_gather *tlb,
+		unsigned long start, unsigned long end)
 {
 	tlb_flush_mmu(tlb);
 
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 8afa4335e5b2..8f71521e7a44 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -112,10 +112,11 @@ struct mmu_gather {
 
 #define HAVE_GENERIC_MMU_GATHER
 
-void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end);
+void arch_tlb_gather_mmu(struct mmu_gather *tlb,
+	struct mm_struct *mm, unsigned long start, unsigned long end);
 void tlb_flush_mmu(struct mmu_gather *tlb);
-void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start,
-							unsigned long end);
+void arch_tlb_finish_mmu(struct mmu_gather *tlb,
+			 unsigned long start, unsigned long end);
 extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page,
 				   int page_size);
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 0e478ebd2706..c605f2a3a68e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -522,6 +522,12 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
 	return mm->cpu_vm_mask_var;
 }
 
+struct mmu_gather;
+extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+				unsigned long start, unsigned long end);
+extern void tlb_finish_mmu(struct mmu_gather *tlb,
+				unsigned long start, unsigned long end);
+
 #if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
 /*
  * Memory barriers to keep this state in sync are graciously provided by
diff --git a/mm/memory.c b/mm/memory.c
index f65beaad319b..34cba5113e06 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -215,12 +215,8 @@ static bool tlb_next_batch(struct mmu_gather *tlb)
 	return true;
 }
 
-/* tlb_gather_mmu
- *	Called to initialize an (on-stack) mmu_gather structure for page-table
- *	tear-down from @mm. The @fullmm argument is used when @mm is without
- *	users and we're going to destroy the full address space (exit/execve).
- */
-void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
+void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+				unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
 
@@ -275,7 +271,8 @@ void tlb_flush_mmu(struct mmu_gather *tlb)
  *	Called at the end of the shootdown operation to free up any resources
  *	that were required.
  */
-void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+void arch_tlb_finish_mmu(struct mmu_gather *tlb,
+		unsigned long start, unsigned long end)
 {
 	struct mmu_gather_batch *batch, *next;
 
@@ -398,6 +395,23 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
 
 #endif /* CONFIG_HAVE_RCU_TABLE_FREE */
 
+/* tlb_gather_mmu
+ *	Called to initialize an (on-stack) mmu_gather structure for page-table
+ *	tear-down from @mm. The @fullmm argument is used when @mm is without
+ *	users and we're going to destroy the full address space (exit/execve).
+ */
+void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+			unsigned long start, unsigned long end)
+{
+	arch_tlb_gather_mmu(tlb, mm, start, end);
+}
+
+void tlb_finish_mmu(struct mmu_gather *tlb,
+		unsigned long start, unsigned long end)
+{
+	arch_tlb_finish_mmu(tlb, start, end);
+}
+
 /*
  * Note: this doesn't free the actual pages themselves. That
  * has been handled earlier when unmapping all the memory regions.
-- 
cgit v1.2.3-59-g8ed1b


From 0a2dd266dd6b7a31503b5bbe63af05961a6b446d Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Thu, 10 Aug 2017 15:24:09 -0700
Subject: mm: make tlb_flush_pending global

Currently, tlb_flush_pending is used only for CONFIG_[NUMA_BALANCING|
COMPACTION] but upcoming patches to solve subtle TLB flush batching
problem will use it regardless of compaction/NUMA so this patch doesn't
remove the dependency.

[akpm@linux-foundation.org: remove more ifdefs from world's ugliest printk statement]
Link: http://lkml.kernel.org/r/20170802000818.4760-6-namit@vmware.com
Signed-off-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Nadav Amit <namit@vmware.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h | 21 ---------------------
 mm/debug.c               |  4 ----
 2 files changed, 25 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index c605f2a3a68e..892a7b0196fd 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -487,14 +487,12 @@ struct mm_struct {
 	/* numa_scan_seq prevents two threads setting pte_numa */
 	int numa_scan_seq;
 #endif
-#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
 	/*
 	 * An operation with batched TLB flushing is going on. Anything that
 	 * can move process memory needs to flush the TLB when moving a
 	 * PROT_NONE or PROT_NUMA mapped page.
 	 */
 	atomic_t tlb_flush_pending;
-#endif
 #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 	/* See flush_tlb_batched_pending() */
 	bool tlb_flush_batched;
@@ -528,7 +526,6 @@ extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
 extern void tlb_finish_mmu(struct mmu_gather *tlb,
 				unsigned long start, unsigned long end);
 
-#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
 /*
  * Memory barriers to keep this state in sync are graciously provided by
  * the page table locks, outside of which no page table modifications happen.
@@ -569,24 +566,6 @@ static inline void dec_tlb_flush_pending(struct mm_struct *mm)
 	smp_mb__before_atomic();
 	atomic_dec(&mm->tlb_flush_pending);
 }
-#else
-static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
-{
-	return false;
-}
-
-static inline void init_tlb_flush_pending(struct mm_struct *mm)
-{
-}
-
-static inline void inc_tlb_flush_pending(struct mm_struct *mm)
-{
-}
-
-static inline void dec_tlb_flush_pending(struct mm_struct *mm)
-{
-}
-#endif
 
 struct vm_fault;
 
diff --git a/mm/debug.c b/mm/debug.c
index d70103bb4731..5715448ab0b5 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -124,9 +124,7 @@ void dump_mm(const struct mm_struct *mm)
 #ifdef CONFIG_NUMA_BALANCING
 		"numa_next_scan %lu numa_scan_offset %lu numa_scan_seq %d\n"
 #endif
-#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
 		"tlb_flush_pending %d\n"
-#endif
 		"def_flags: %#lx(%pGv)\n",
 
 		mm, mm->mmap, mm->vmacache_seqnum, mm->task_size,
@@ -158,9 +156,7 @@ void dump_mm(const struct mm_struct *mm)
 #ifdef CONFIG_NUMA_BALANCING
 		mm->numa_next_scan, mm->numa_scan_offset, mm->numa_scan_seq,
 #endif
-#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
 		atomic_read(&mm->tlb_flush_pending),
-#endif
 		mm->def_flags, &mm->def_flags
 	);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 99baac21e4585f4258f919502c6e23f1e5edc98c Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Thu, 10 Aug 2017 15:24:12 -0700
Subject: mm: fix MADV_[FREE|DONTNEED] TLB flush miss problem

Nadav reported parallel MADV_DONTNEED on same range has a stale TLB
problem and Mel fixed it[1] and found same problem on MADV_FREE[2].

Quote from Mel Gorman:
 "The race in question is CPU 0 running madv_free and updating some PTEs
  while CPU 1 is also running madv_free and looking at the same PTEs.
  CPU 1 may have writable TLB entries for a page but fail the pte_dirty
  check (because CPU 0 has updated it already) and potentially fail to
  flush.

  Hence, when madv_free on CPU 1 returns, there are still potentially
  writable TLB entries and the underlying PTE is still present so that a
  subsequent write does not necessarily propagate the dirty bit to the
  underlying PTE any more. Reclaim at some unknown time at the future
  may then see that the PTE is still clean and discard the page even
  though a write has happened in the meantime. I think this is possible
  but I could have missed some protection in madv_free that prevents it
  happening."

This patch aims for solving both problems all at once and is ready for
other problem with KSM, MADV_FREE and soft-dirty story[3].

TLB batch API(tlb_[gather|finish]_mmu] uses [inc|dec]_tlb_flush_pending
and mmu_tlb_flush_pending so that when tlb_finish_mmu is called, we can
catch there are parallel threads going on.  In that case, forcefully,
flush TLB to prevent for user to access memory via stale TLB entry
although it fail to gather page table entry.

I confirmed this patch works with [4] test program Nadav gave so this
patch supersedes "mm: Always flush VMA ranges affected by zap_page_range
v2" in current mmotm.

NOTE:

This patch modifies arch-specific TLB gathering interface(x86, ia64,
s390, sh, um).  It seems most of architecture are straightforward but
s390 need to be careful because tlb_flush_mmu works only if
mm->context.flush_mm is set to non-zero which happens only a pte entry
really is cleared by ptep_get_and_clear and friends.  However, this
problem never changes the pte entries but need to flush to prevent
memory access from stale tlb.

[1] http://lkml.kernel.org/r/20170725101230.5v7gvnjmcnkzzql3@techsingularity.net
[2] http://lkml.kernel.org/r/20170725100722.2dxnmgypmwnrfawp@suse.de
[3] http://lkml.kernel.org/r/BD3A0EBE-ECF4-41D4-87FA-C755EA9AB6BD@gmail.com
[4] https://patchwork.kernel.org/patch/9861621/

[minchan@kernel.org: decrease tlb flush pending count in tlb_finish_mmu]
  Link: http://lkml.kernel.org/r/20170808080821.GA31730@bbox
Link: http://lkml.kernel.org/r/20170802000818.4760-7-namit@vmware.com
Signed-off-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Nadav Amit <namit@vmware.com>
Reported-by: Nadav Amit <namit@vmware.com>
Reported-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/include/asm/tlb.h  |  7 ++++++-
 arch/ia64/include/asm/tlb.h |  4 +++-
 arch/s390/include/asm/tlb.h |  7 ++++++-
 arch/sh/include/asm/tlb.h   |  4 ++--
 arch/um/include/asm/tlb.h   |  7 ++++++-
 include/asm-generic/tlb.h   |  2 +-
 include/linux/mm_types.h    |  8 ++++++++
 mm/memory.c                 | 18 ++++++++++++++++--
 8 files changed, 48 insertions(+), 9 deletions(-)

diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index 7f5b2a2d3861..d5562f9ce600 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -168,8 +168,13 @@ arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
 
 static inline void
 arch_tlb_finish_mmu(struct mmu_gather *tlb,
-			unsigned long start, unsigned long end)
+			unsigned long start, unsigned long end, bool force)
 {
+	if (force) {
+		tlb->range_start = start;
+		tlb->range_end = end;
+	}
+
 	tlb_flush_mmu(tlb);
 
 	/* keep the page table cache within bounds */
diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h
index 93cadc04ac62..cbe5ac3699bf 100644
--- a/arch/ia64/include/asm/tlb.h
+++ b/arch/ia64/include/asm/tlb.h
@@ -187,8 +187,10 @@ arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
  */
 static inline void
 arch_tlb_finish_mmu(struct mmu_gather *tlb,
-			unsigned long start, unsigned long end)
+			unsigned long start, unsigned long end, bool force)
 {
+	if (force)
+		tlb->need_flush = 1;
 	/*
 	 * Note: tlb->nr may be 0 at this point, so we can't rely on tlb->start_addr and
 	 * tlb->end_addr.
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index d574d0820dc8..2eb8ff0d6fca 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -77,8 +77,13 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 
 static inline void
 arch_tlb_finish_mmu(struct mmu_gather *tlb,
-		unsigned long start, unsigned long end)
+		unsigned long start, unsigned long end, bool force)
 {
+	if (force) {
+		tlb->start = start;
+		tlb->end = end;
+	}
+
 	tlb_flush_mmu(tlb);
 }
 
diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h
index 89786560dbd4..51a8bc967e75 100644
--- a/arch/sh/include/asm/tlb.h
+++ b/arch/sh/include/asm/tlb.h
@@ -49,9 +49,9 @@ arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
 
 static inline void
 arch_tlb_finish_mmu(struct mmu_gather *tlb,
-		unsigned long start, unsigned long end)
+		unsigned long start, unsigned long end, bool force)
 {
-	if (tlb->fullmm)
+	if (tlb->fullmm || force)
 		flush_tlb_mm(tlb->mm);
 
 	/* keep the page table cache within bounds */
diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h
index 2a901eca7145..344d95619d03 100644
--- a/arch/um/include/asm/tlb.h
+++ b/arch/um/include/asm/tlb.h
@@ -87,8 +87,13 @@ tlb_flush_mmu(struct mmu_gather *tlb)
  */
 static inline void
 arch_tlb_finish_mmu(struct mmu_gather *tlb,
-		unsigned long start, unsigned long end)
+		unsigned long start, unsigned long end, bool force)
 {
+	if (force) {
+		tlb->start = start;
+		tlb->end = end;
+		tlb->need_flush = 1;
+	}
 	tlb_flush_mmu(tlb);
 
 	/* keep the page table cache within bounds */
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 8f71521e7a44..faddde44de8c 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -116,7 +116,7 @@ void arch_tlb_gather_mmu(struct mmu_gather *tlb,
 	struct mm_struct *mm, unsigned long start, unsigned long end);
 void tlb_flush_mmu(struct mmu_gather *tlb);
 void arch_tlb_finish_mmu(struct mmu_gather *tlb,
-			 unsigned long start, unsigned long end);
+			 unsigned long start, unsigned long end, bool force);
 extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page,
 				   int page_size);
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 892a7b0196fd..3cadee0a3508 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -538,6 +538,14 @@ static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
 	return atomic_read(&mm->tlb_flush_pending) > 0;
 }
 
+/*
+ * Returns true if there are two above TLB batching threads in parallel.
+ */
+static inline bool mm_tlb_flush_nested(struct mm_struct *mm)
+{
+	return atomic_read(&mm->tlb_flush_pending) > 1;
+}
+
 static inline void init_tlb_flush_pending(struct mm_struct *mm)
 {
 	atomic_set(&mm->tlb_flush_pending, 0);
diff --git a/mm/memory.c b/mm/memory.c
index 34cba5113e06..e158f7ac6730 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -272,10 +272,13 @@ void tlb_flush_mmu(struct mmu_gather *tlb)
  *	that were required.
  */
 void arch_tlb_finish_mmu(struct mmu_gather *tlb,
-		unsigned long start, unsigned long end)
+		unsigned long start, unsigned long end, bool force)
 {
 	struct mmu_gather_batch *batch, *next;
 
+	if (force)
+		__tlb_adjust_range(tlb, start, end - start);
+
 	tlb_flush_mmu(tlb);
 
 	/* keep the page table cache within bounds */
@@ -404,12 +407,23 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
 			unsigned long start, unsigned long end)
 {
 	arch_tlb_gather_mmu(tlb, mm, start, end);
+	inc_tlb_flush_pending(tlb->mm);
 }
 
 void tlb_finish_mmu(struct mmu_gather *tlb,
 		unsigned long start, unsigned long end)
 {
-	arch_tlb_finish_mmu(tlb, start, end);
+	/*
+	 * If there are parallel threads are doing PTE changes on same range
+	 * under non-exclusive lock(e.g., mmap_sem read-side) but defer TLB
+	 * flush by batching, a thread has stable TLB entry can fail to flush
+	 * the TLB by observing pte_none|!pte_dirty, for example so flush TLB
+	 * forcefully if we detect parallel PTE batching threads.
+	 */
+	bool force = mm_tlb_flush_nested(tlb->mm);
+
+	arch_tlb_finish_mmu(tlb, start, end, force);
+	dec_tlb_flush_pending(tlb->mm);
 }
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From b3a81d0841a954a3d3e782059960f53e63b37066 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Thu, 10 Aug 2017 15:24:15 -0700
Subject: mm: fix KSM data corruption

Nadav reported KSM can corrupt the user data by the TLB batching
race[1].  That means data user written can be lost.

Quote from Nadav Amit:
 "For this race we need 4 CPUs:

  CPU0: Caches a writable and dirty PTE entry, and uses the stale value
  for write later.

  CPU1: Runs madvise_free on the range that includes the PTE. It would
  clear the dirty-bit. It batches TLB flushes.

  CPU2: Writes 4 to /proc/PID/clear_refs , clearing the PTEs soft-dirty.
  We care about the fact that it clears the PTE write-bit, and of
  course, batches TLB flushes.

  CPU3: Runs KSM. Our purpose is to pass the following test in
  write_protect_page():

	if (pte_write(*pvmw.pte) || pte_dirty(*pvmw.pte) ||
	    (pte_protnone(*pvmw.pte) && pte_savedwrite(*pvmw.pte)))

  Since it will avoid TLB flush. And we want to do it while the PTE is
  stale. Later, and before replacing the page, we would be able to
  change the page.

  Note that all the operations the CPU1-3 perform canhappen in parallel
  since they only acquire mmap_sem for read.

  We start with two identical pages. Everything below regards the same
  page/PTE.

  CPU0        CPU1        CPU2        CPU3
  ----        ----        ----        ----
  Write the same
  value on page

  [cache PTE as
   dirty in TLB]

              MADV_FREE
              pte_mkclean()

                          4 > clear_refs
                          pte_wrprotect()

                                      write_protect_page()
                                      [ success, no flush ]

                                      pages_indentical()
                                      [ ok ]

  Write to page
  different value

  [Ok, using stale
   PTE]

                                      replace_page()

  Later, CPU1, CPU2 and CPU3 would flush the TLB, but that is too late.
  CPU0 already wrote on the page, but KSM ignored this write, and it got
  lost"

In above scenario, MADV_FREE is fixed by changing TLB batching API
including [set|clear]_tlb_flush_pending.  Remained thing is soft-dirty
part.

This patch changes soft-dirty uses TLB batching API instead of
flush_tlb_mm and KSM checks pending TLB flush by using
mm_tlb_flush_pending so that it will flush TLB to avoid data lost if
there are other parallel threads pending TLB flush.

[1] http://lkml.kernel.org/r/BD3A0EBE-ECF4-41D4-87FA-C755EA9AB6BD@gmail.com

Link: http://lkml.kernel.org/r/20170802000818.4760-8-namit@vmware.com
Signed-off-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Nadav Amit <namit@vmware.com>
Reported-by: Nadav Amit <namit@vmware.com>
Tested-by: Nadav Amit <namit@vmware.com>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Hugh Dickins <hughd@google.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c | 7 +++++--
 mm/ksm.c           | 3 ++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index b836fd61ed87..fe8f3265e877 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -16,9 +16,10 @@
 #include <linux/mmu_notifier.h>
 #include <linux/page_idle.h>
 #include <linux/shmem_fs.h>
+#include <linux/uaccess.h>
 
 #include <asm/elf.h>
-#include <linux/uaccess.h>
+#include <asm/tlb.h>
 #include <asm/tlbflush.h>
 #include "internal.h"
 
@@ -1008,6 +1009,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 	struct mm_struct *mm;
 	struct vm_area_struct *vma;
 	enum clear_refs_types type;
+	struct mmu_gather tlb;
 	int itype;
 	int rv;
 
@@ -1054,6 +1056,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 		}
 
 		down_read(&mm->mmap_sem);
+		tlb_gather_mmu(&tlb, mm, 0, -1);
 		if (type == CLEAR_REFS_SOFT_DIRTY) {
 			for (vma = mm->mmap; vma; vma = vma->vm_next) {
 				if (!(vma->vm_flags & VM_SOFTDIRTY))
@@ -1075,7 +1078,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 		walk_page_range(0, mm->highest_vm_end, &clear_refs_walk);
 		if (type == CLEAR_REFS_SOFT_DIRTY)
 			mmu_notifier_invalidate_range_end(mm, 0, -1);
-		flush_tlb_mm(mm);
+		tlb_finish_mmu(&tlb, 0, -1);
 		up_read(&mm->mmap_sem);
 out_mm:
 		mmput(mm);
diff --git a/mm/ksm.c b/mm/ksm.c
index 4dc92f138786..db20f8436bc3 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1038,7 +1038,8 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
 		goto out_unlock;
 
 	if (pte_write(*pvmw.pte) || pte_dirty(*pvmw.pte) ||
-	    (pte_protnone(*pvmw.pte) && pte_savedwrite(*pvmw.pte))) {
+	    (pte_protnone(*pvmw.pte) && pte_savedwrite(*pvmw.pte)) ||
+						mm_tlb_flush_pending(mm)) {
 		pte_t entry;
 
 		swapped = PageSwapCache(page);
-- 
cgit v1.2.3-59-g8ed1b


From c0a6a5ae6b5d976592a83bdafc1c2f49b0718c65 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 10 Aug 2017 15:24:18 -0700
Subject: MAINTAINERS: copy virtio on balloon_compaction.c

Changes to mm/balloon_compaction.c can easily break virtio, and virtio
is the only user of that interface.  Add a line to MAINTAINERS so
whoever changes that file remembers to copy us.

Link: http://lkml.kernel.org/r/1501764010-24456-1-git-send-email-mst@redhat.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Rafael Aquini <aquini@redhat.com>
Acked-by: Wei Wang <wei.w.wang@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 84d6a8277cbd..6f7721d1634c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14004,6 +14004,7 @@ F:	drivers/block/virtio_blk.c
 F:	include/linux/virtio*.h
 F:	include/uapi/linux/virtio_*.h
 F:	drivers/crypto/virtio/
+F:	mm/balloon_compaction.c
 
 VIRTIO CRYPTO DRIVER
 M:	Gonglei <arei.gonglei@huawei.com>
-- 
cgit v1.2.3-59-g8ed1b


From af54aed94bf3a1cf0b847bbbf00f9a58e278b338 Mon Sep 17 00:00:00 2001
From: Wei Wang <wei.w.wang@intel.com>
Date: Thu, 10 Aug 2017 15:24:21 -0700
Subject: mm/balloon_compaction.c: don't zero ballooned pages

Revert commit bb01b64cfab7 ("mm/balloon_compaction.c: enqueue zero page
to balloon device")'

Zeroing ballon pages is rather time consuming, especially when a lot of
pages are in flight. E.g. 7GB worth of ballooned memory takes 2.8s with
__GFP_ZERO while it takes ~491ms without it.

The original commit argued that zeroing will help ksmd to merge these
pages on the host but this argument is assuming that the host actually
marks balloon pages for ksm which is not universally true.  So we pay
performance penalty for something that even might not be used in the end
which is wrong.  The host can zero out pages on its own when there is a
need.

[mhocko@kernel.org: new changelog text]
Link: http://lkml.kernel.org/r/1501761557-9758-1-git-send-email-wei.w.wang@intel.com
Fixes: bb01b64cfab7 ("mm/balloon_compaction.c: enqueue zero page to balloon device")
Signed-off-by: Wei Wang <wei.w.wang@intel.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: zhenwei.pi <zhenwei.pi@youruncloud.com>
Cc: David Hildenbrand <david@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/balloon_compaction.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c
index 9075aa54e955..b06d9fe23a28 100644
--- a/mm/balloon_compaction.c
+++ b/mm/balloon_compaction.c
@@ -24,7 +24,7 @@ struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info)
 {
 	unsigned long flags;
 	struct page *page = alloc_page(balloon_mapping_gfp_mask() |
-				__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_ZERO);
+				       __GFP_NOMEMALLOC | __GFP_NORETRY);
 	if (!page)
 		return NULL;
 
-- 
cgit v1.2.3-59-g8ed1b


From d041353dc98a6339182cd6f628b4c8f111278cb3 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Thu, 10 Aug 2017 15:24:24 -0700
Subject: mm: fix list corruptions on shmem shrinklist

We saw many list corruption warnings on shmem shrinklist:

  WARNING: CPU: 18 PID: 177 at lib/list_debug.c:59 __list_del_entry+0x9e/0xc0
  list_del corruption. prev->next should be ffff9ae5694b82d8, but was ffff9ae5699ba960
  Modules linked in: intel_rapl sb_edac edac_core x86_pkg_temp_thermal coretemp iTCO_wdt iTCO_vendor_support crct10dif_pclmul crc32_pclmul ghash_clmulni_intel raid0 dcdbas shpchp wmi hed i2c_i801 ioatdma lpc_ich i2c_smbus acpi_cpufreq tcp_diag inet_diag sch_fq_codel ipmi_si ipmi_devintf ipmi_msghandler igb ptp crc32c_intel pps_core i2c_algo_bit i2c_core dca ipv6 crc_ccitt
  CPU: 18 PID: 177 Comm: kswapd1 Not tainted 4.9.34-t3.el7.twitter.x86_64 #1
  Hardware name: Dell Inc. PowerEdge C6220/0W6W6G, BIOS 2.2.3 11/07/2013
  Call Trace:
    dump_stack+0x4d/0x66
    __warn+0xcb/0xf0
    warn_slowpath_fmt+0x4f/0x60
    __list_del_entry+0x9e/0xc0
    shmem_unused_huge_shrink+0xfa/0x2e0
    shmem_unused_huge_scan+0x20/0x30
    super_cache_scan+0x193/0x1a0
    shrink_slab.part.41+0x1e3/0x3f0
    shrink_slab+0x29/0x30
    shrink_node+0xf9/0x2f0
    kswapd+0x2d8/0x6c0
    kthread+0xd7/0xf0
    ret_from_fork+0x22/0x30

  WARNING: CPU: 23 PID: 639 at lib/list_debug.c:33 __list_add+0x89/0xb0
  list_add corruption. prev->next should be next (ffff9ae5699ba960), but was ffff9ae5694b82d8. (prev=ffff9ae5694b82d8).
  Modules linked in: intel_rapl sb_edac edac_core x86_pkg_temp_thermal coretemp iTCO_wdt iTCO_vendor_support crct10dif_pclmul crc32_pclmul ghash_clmulni_intel raid0 dcdbas shpchp wmi hed i2c_i801 ioatdma lpc_ich i2c_smbus acpi_cpufreq tcp_diag inet_diag sch_fq_codel ipmi_si ipmi_devintf ipmi_msghandler igb ptp crc32c_intel pps_core i2c_algo_bit i2c_core dca ipv6 crc_ccitt
  CPU: 23 PID: 639 Comm: systemd-udevd Tainted: G        W       4.9.34-t3.el7.twitter.x86_64 #1
  Hardware name: Dell Inc. PowerEdge C6220/0W6W6G, BIOS 2.2.3 11/07/2013
  Call Trace:
    dump_stack+0x4d/0x66
    __warn+0xcb/0xf0
    warn_slowpath_fmt+0x4f/0x60
    __list_add+0x89/0xb0
    shmem_setattr+0x204/0x230
    notify_change+0x2ef/0x440
    do_truncate+0x5d/0x90
    path_openat+0x331/0x1190
    do_filp_open+0x7e/0xe0
    do_sys_open+0x123/0x200
    SyS_open+0x1e/0x20
    do_syscall_64+0x61/0x170
    entry_SYSCALL64_slow_path+0x25/0x25

The problem is that shmem_unused_huge_shrink() moves entries from the
global sbinfo->shrinklist to its local lists and then releases the
spinlock.  However, a parallel shmem_setattr() could access one of these
entries directly and add it back to the global shrinklist if it is
removed, with the spinlock held.

The logic itself looks solid since an entry could be either in a local
list or the global list, otherwise it is removed from one of them by
list_del_init().  So probably the race condition is that, one CPU is in
the middle of INIT_LIST_HEAD() but the other CPU calls list_empty()
which returns true too early then the following list_add_tail() sees a
corrupted entry.

list_empty_careful() is designed to fix this situation.

[akpm@linux-foundation.org: add comments]
Link: http://lkml.kernel.org/r/20170803054630.18775-1-xiyou.wangcong@gmail.com
Fixes: 779750d20b93 ("shmem: split huge pages beyond i_size under memory pressure")
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/shmem.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index b0aa6075d164..6540e5982444 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1022,7 +1022,11 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
 			 */
 			if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) {
 				spin_lock(&sbinfo->shrinklist_lock);
-				if (list_empty(&info->shrinklist)) {
+				/*
+				 * _careful to defend against unlocked access to
+				 * ->shrink_list in shmem_unused_huge_shrink()
+				 */
+				if (list_empty_careful(&info->shrinklist)) {
 					list_add_tail(&info->shrinklist,
 							&sbinfo->shrinklist);
 					sbinfo->shrinklist_len++;
@@ -1817,7 +1821,11 @@ alloc_nohuge:		page = shmem_alloc_and_acct_page(gfp, info, sbinfo,
 			 * to shrink under memory pressure.
 			 */
 			spin_lock(&sbinfo->shrinklist_lock);
-			if (list_empty(&info->shrinklist)) {
+			/*
+			 * _careful to defend against unlocked access to
+			 * ->shrink_list in shmem_unused_huge_shrink()
+			 */
+			if (list_empty_careful(&info->shrinklist)) {
 				list_add_tail(&info->shrinklist,
 						&sbinfo->shrinklist);
 				sbinfo->shrinklist_len++;
-- 
cgit v1.2.3-59-g8ed1b


From aac2fea94f7a3df8ad1eeb477eb2643f81fd5393 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 10 Aug 2017 15:24:27 -0700
Subject: rmap: do not call mmu_notifier_invalidate_page() under ptl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

MMU notifiers can sleep, but in page_mkclean_one() we call
mmu_notifier_invalidate_page() under page table lock.

Let's instead use mmu_notifier_invalidate_range() outside
page_vma_mapped_walk() loop.

[jglisse@redhat.com: try_to_unmap_one() do not call mmu_notifier under ptl]
  Link: http://lkml.kernel.org/r/20170809204333.27485-1-jglisse@redhat.com
Link: http://lkml.kernel.org/r/20170804134928.l4klfcnqatni7vsc@black.fi.intel.com
Fixes: c7ab0d2fdc84 ("mm: convert try_to_unmap_one() to use page_vma_mapped_walk()")
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Jérôme Glisse <jglisse@redhat.com>
Reported-by: axie <axie@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: "Writer, Tim" <Tim.Writer@amd.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/rmap.c | 52 ++++++++++++++++++++++++++++++----------------------
 1 file changed, 30 insertions(+), 22 deletions(-)

diff --git a/mm/rmap.c b/mm/rmap.c
index c8993c63eb25..c1286d47aa1f 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -888,10 +888,10 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
 		.flags = PVMW_SYNC,
 	};
 	int *cleaned = arg;
+	bool invalidation_needed = false;
 
 	while (page_vma_mapped_walk(&pvmw)) {
 		int ret = 0;
-		address = pvmw.address;
 		if (pvmw.pte) {
 			pte_t entry;
 			pte_t *pte = pvmw.pte;
@@ -899,11 +899,11 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
 			if (!pte_dirty(*pte) && !pte_write(*pte))
 				continue;
 
-			flush_cache_page(vma, address, pte_pfn(*pte));
-			entry = ptep_clear_flush(vma, address, pte);
+			flush_cache_page(vma, pvmw.address, pte_pfn(*pte));
+			entry = ptep_clear_flush(vma, pvmw.address, pte);
 			entry = pte_wrprotect(entry);
 			entry = pte_mkclean(entry);
-			set_pte_at(vma->vm_mm, address, pte, entry);
+			set_pte_at(vma->vm_mm, pvmw.address, pte, entry);
 			ret = 1;
 		} else {
 #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
@@ -913,11 +913,11 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
 			if (!pmd_dirty(*pmd) && !pmd_write(*pmd))
 				continue;
 
-			flush_cache_page(vma, address, page_to_pfn(page));
-			entry = pmdp_huge_clear_flush(vma, address, pmd);
+			flush_cache_page(vma, pvmw.address, page_to_pfn(page));
+			entry = pmdp_huge_clear_flush(vma, pvmw.address, pmd);
 			entry = pmd_wrprotect(entry);
 			entry = pmd_mkclean(entry);
-			set_pmd_at(vma->vm_mm, address, pmd, entry);
+			set_pmd_at(vma->vm_mm, pvmw.address, pmd, entry);
 			ret = 1;
 #else
 			/* unexpected pmd-mapped page? */
@@ -926,11 +926,16 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
 		}
 
 		if (ret) {
-			mmu_notifier_invalidate_page(vma->vm_mm, address);
 			(*cleaned)++;
+			invalidation_needed = true;
 		}
 	}
 
+	if (invalidation_needed) {
+		mmu_notifier_invalidate_range(vma->vm_mm, address,
+				address + (1UL << compound_order(page)));
+	}
+
 	return true;
 }
 
@@ -1323,7 +1328,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 	};
 	pte_t pteval;
 	struct page *subpage;
-	bool ret = true;
+	bool ret = true, invalidation_needed = false;
 	enum ttu_flags flags = (enum ttu_flags)arg;
 
 	/* munlock has nothing to gain from examining un-locked vmas */
@@ -1363,11 +1368,9 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 		VM_BUG_ON_PAGE(!pvmw.pte, page);
 
 		subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
-		address = pvmw.address;
-
 
 		if (!(flags & TTU_IGNORE_ACCESS)) {
-			if (ptep_clear_flush_young_notify(vma, address,
+			if (ptep_clear_flush_young_notify(vma, pvmw.address,
 						pvmw.pte)) {
 				ret = false;
 				page_vma_mapped_walk_done(&pvmw);
@@ -1376,7 +1379,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 		}
 
 		/* Nuke the page table entry. */
-		flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
+		flush_cache_page(vma, pvmw.address, pte_pfn(*pvmw.pte));
 		if (should_defer_flush(mm, flags)) {
 			/*
 			 * We clear the PTE but do not flush so potentially
@@ -1386,11 +1389,12 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 			 * transition on a cached TLB entry is written through
 			 * and traps if the PTE is unmapped.
 			 */
-			pteval = ptep_get_and_clear(mm, address, pvmw.pte);
+			pteval = ptep_get_and_clear(mm, pvmw.address,
+						    pvmw.pte);
 
 			set_tlb_ubc_flush_pending(mm, pte_dirty(pteval));
 		} else {
-			pteval = ptep_clear_flush(vma, address, pvmw.pte);
+			pteval = ptep_clear_flush(vma, pvmw.address, pvmw.pte);
 		}
 
 		/* Move the dirty bit to the page. Now the pte is gone. */
@@ -1405,12 +1409,12 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 			if (PageHuge(page)) {
 				int nr = 1 << compound_order(page);
 				hugetlb_count_sub(nr, mm);
-				set_huge_swap_pte_at(mm, address,
+				set_huge_swap_pte_at(mm, pvmw.address,
 						     pvmw.pte, pteval,
 						     vma_mmu_pagesize(vma));
 			} else {
 				dec_mm_counter(mm, mm_counter(page));
-				set_pte_at(mm, address, pvmw.pte, pteval);
+				set_pte_at(mm, pvmw.address, pvmw.pte, pteval);
 			}
 
 		} else if (pte_unused(pteval)) {
@@ -1434,7 +1438,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 			swp_pte = swp_entry_to_pte(entry);
 			if (pte_soft_dirty(pteval))
 				swp_pte = pte_swp_mksoft_dirty(swp_pte);
-			set_pte_at(mm, address, pvmw.pte, swp_pte);
+			set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
 		} else if (PageAnon(page)) {
 			swp_entry_t entry = { .val = page_private(subpage) };
 			pte_t swp_pte;
@@ -1460,7 +1464,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 				 * If the page was redirtied, it cannot be
 				 * discarded. Remap the page to page table.
 				 */
-				set_pte_at(mm, address, pvmw.pte, pteval);
+				set_pte_at(mm, pvmw.address, pvmw.pte, pteval);
 				SetPageSwapBacked(page);
 				ret = false;
 				page_vma_mapped_walk_done(&pvmw);
@@ -1468,7 +1472,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 			}
 
 			if (swap_duplicate(entry) < 0) {
-				set_pte_at(mm, address, pvmw.pte, pteval);
+				set_pte_at(mm, pvmw.address, pvmw.pte, pteval);
 				ret = false;
 				page_vma_mapped_walk_done(&pvmw);
 				break;
@@ -1484,14 +1488,18 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 			swp_pte = swp_entry_to_pte(entry);
 			if (pte_soft_dirty(pteval))
 				swp_pte = pte_swp_mksoft_dirty(swp_pte);
-			set_pte_at(mm, address, pvmw.pte, swp_pte);
+			set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
 		} else
 			dec_mm_counter(mm, mm_counter_file(page));
 discard:
 		page_remove_rmap(subpage, PageHuge(page));
 		put_page(page);
-		mmu_notifier_invalidate_page(mm, address);
+		invalidation_needed = true;
 	}
+
+	if (invalidation_needed)
+		mmu_notifier_invalidate_range(mm, address,
+				address + (1UL << compound_order(page)));
 	return ret;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From f357e345eef7863da037e0243f2d3df4ba6df986 Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <mka@chromium.org>
Date: Thu, 10 Aug 2017 15:24:29 -0700
Subject: zram: rework copy of compressor name in comp_algorithm_store()

comp_algorithm_store() passes the size of the source buffer to strlcpy()
instead of the destination buffer size.  Make it explicit that the two
buffers have the same size and use strcpy() instead of strlcpy().  The
latter can be done safely since the function ensures that the string in
the source buffer is terminated.

Link: http://lkml.kernel.org/r/20170803163350.45245-1-mka@chromium.org
Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/block/zram/zram_drv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 856d5dc02451..3b1b6340ba13 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -308,7 +308,7 @@ static ssize_t comp_algorithm_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t len)
 {
 	struct zram *zram = dev_to_zram(dev);
-	char compressor[CRYPTO_MAX_ALG_NAME];
+	char compressor[ARRAY_SIZE(zram->compressor)];
 	size_t sz;
 
 	strlcpy(compressor, buf, sizeof(compressor));
@@ -327,7 +327,7 @@ static ssize_t comp_algorithm_store(struct device *dev,
 		return -EBUSY;
 	}
 
-	strlcpy(zram->compressor, compressor, sizeof(compressor));
+	strcpy(zram->compressor, compressor);
 	up_write(&zram->init_lock);
 	return len;
 }
-- 
cgit v1.2.3-59-g8ed1b


From e86b298bebf7e799e4b7232e9135799f1947552e Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Thu, 10 Aug 2017 15:24:32 -0700
Subject: userfaultfd: replace ENOSPC with ESRCH in case mm has gone during
 copy/zeropage

When the process exit races with outstanding mcopy_atomic, it would be
better to return ESRCH error.  When such race occurs the process and
it's mm are going away and returning "no such process" to the uffd
monitor seems better fit than ENOSPC.

Link: http://lkml.kernel.org/r/1502111545-32305-1-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Suggested-by: Michal Hocko <mhocko@suse.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Pavel Emelyanov <xemul@virtuozzo.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/userfaultfd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 06ea26b8c996..b0d5897bc4e6 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1600,7 +1600,7 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
 				   uffdio_copy.len);
 		mmput(ctx->mm);
 	} else {
-		return -ENOSPC;
+		return -ESRCH;
 	}
 	if (unlikely(put_user(ret, &user_uffdio_copy->copy)))
 		return -EFAULT;
@@ -1647,7 +1647,7 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
 				     uffdio_zeropage.range.len);
 		mmput(ctx->mm);
 	} else {
-		return -ENOSPC;
+		return -ESRCH;
 	}
 	if (unlikely(put_user(ret, &user_uffdio_zeropage->zeropage)))
 		return -EFAULT;
-- 
cgit v1.2.3-59-g8ed1b