From a9764869779081e8bf24da07ac040e8f3efcf13a Mon Sep 17 00:00:00 2001
From: KaiChieh Chuang <kaichieh.chuang@mediatek.com>
Date: Fri, 8 Mar 2019 13:05:53 +0800
Subject: ASoC: dpcm: prevent snd_soc_dpcm use after free

The dpcm get from fe_clients/be_clients
may be free before use

Add a spin lock at snd_soc_card level,
to protect the dpcm instance.
The lock may be used in atomic context, so use spin lock.

Use irq spin lock version,
since the lock may be used in interrupts.

possible race condition between
void dpcm_be_disconnect(
	...
	list_del(&dpcm->list_be);
	list_del(&dpcm->list_fe);
	kfree(dpcm);
	...

and
	for_each_dpcm_fe()
	for_each_dpcm_be*()

race condition example
Thread 1:
    snd_soc_dapm_mixer_update_power()
        -> soc_dpcm_runtime_update()
            -> dpcm_be_disconnect()
                -> kfree(dpcm);
Thread 2:
    dpcm_fe_dai_trigger()
        -> dpcm_be_dai_trigger()
            -> snd_soc_dpcm_can_be_free_stop()
                -> if (dpcm->fe == fe)

Excpetion Scenario:
	two FE link to same BE
	FE1 -> BE
	FE2 ->

	Thread 1: switch of mixer between FE2 -> BE
	Thread 2: pcm_stop FE1

Exception:

Unable to handle kernel paging request at virtual address dead0000000000e0

pc=<> [<ffffff8960e2cd10>] dpcm_be_dai_trigger+0x29c/0x47c
	sound/soc/soc-pcm.c:3226
		if (dpcm->fe == fe)
lr=<> [<ffffff8960e2f694>] dpcm_fe_dai_do_trigger+0x94/0x26c

Backtrace:
[<ffffff89602dba80>] notify_die+0x68/0xb8
[<ffffff896028c7dc>] die+0x118/0x2a8
[<ffffff89602a2f84>] __do_kernel_fault+0x13c/0x14c
[<ffffff89602a27f4>] do_translation_fault+0x64/0xa0
[<ffffff8960280cf8>] do_mem_abort+0x4c/0xd0
[<ffffff8960282ad0>] el1_da+0x24/0x40
[<ffffff8960e2cd10>] dpcm_be_dai_trigger+0x29c/0x47c
[<ffffff8960e2f694>] dpcm_fe_dai_do_trigger+0x94/0x26c
[<ffffff8960e2edec>] dpcm_fe_dai_trigger+0x3c/0x44
[<ffffff8960de5588>] snd_pcm_do_stop+0x50/0x5c
[<ffffff8960dded24>] snd_pcm_action+0xb4/0x13c
[<ffffff8960ddfdb4>] snd_pcm_drop+0xa0/0x128
[<ffffff8960de69bc>] snd_pcm_common_ioctl+0x9d8/0x30f0
[<ffffff8960de1cac>] snd_pcm_ioctl_compat+0x29c/0x2f14
[<ffffff89604c9d60>] compat_SyS_ioctl+0x128/0x244
[<ffffff8960283740>] el0_svc_naked+0x34/0x38
[<ffffffffffffffff>] 0xffffffffffffffff

Signed-off-by: KaiChieh Chuang <kaichieh.chuang@mediatek.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')
diff --git a/include/sound/soc.h b/include/sound/soc.h
index eb7db605955b..1e2be35ed36f 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -1083,6 +1083,8 @@ struct snd_soc_card {
 	struct mutex mutex;
 	struct mutex dapm_mutex;
 
+	spinlock_t dpcm_lock;
+
 	bool instantiated;
 	bool topology_shortname_created;
 
-- 
cgit v1.2.3-59-g8ed1b


From 5b989c18dab2e82bac8a5564a174794bf84b20e6 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 15 Mar 2019 11:03:35 +0100
Subject: mac80211: rework locking for txq scheduling / airtime fairness
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Holding the lock around the entire duration of tx scheduling can create
some nasty lock contention, especially when processing airtime information
from the tx status or the rx path.
Improve locking by only holding the active_txq_lock for lookups / scheduling
list modifications.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 49 +++++++++++++++++++------------------------------
 net/mac80211/tx.c      | 44 ++++++++++++++++----------------------------
 2 files changed, 35 insertions(+), 58 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index ac2ed8ec662b..616998252dc7 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -6231,8 +6231,6 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
  * @hw: pointer as obtained from ieee80211_alloc_hw()
  * @ac: AC number to return packets from.
  *
- * Should only be called between calls to ieee80211_txq_schedule_start()
- * and ieee80211_txq_schedule_end().
  * Returns the next txq if successful, %NULL if no queue is eligible. If a txq
  * is returned, it should be returned with ieee80211_return_txq() after the
  * driver has finished scheduling it.
@@ -6240,51 +6238,42 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
 struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac);
 
 /**
- * ieee80211_return_txq - return a TXQ previously acquired by ieee80211_next_txq()
- *
- * @hw: pointer as obtained from ieee80211_alloc_hw()
- * @txq: pointer obtained from station or virtual interface
- *
- * Should only be called between calls to ieee80211_txq_schedule_start()
- * and ieee80211_txq_schedule_end().
- */
-void ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq);
-
-/**
- * ieee80211_txq_schedule_start - acquire locks for safe scheduling of an AC
+ * ieee80211_txq_schedule_start - start new scheduling round for TXQs
  *
  * @hw: pointer as obtained from ieee80211_alloc_hw()
  * @ac: AC number to acquire locks for
  *
- * Acquire locks needed to schedule TXQs from the given AC. Should be called
- * before ieee80211_next_txq() or ieee80211_return_txq().
+ * Should be called before ieee80211_next_txq() or ieee80211_return_txq().
+ * The driver must not call multiple TXQ scheduling rounds concurrently.
  */
-void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)
-	__acquires(txq_lock);
+void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac);
+
+/* (deprecated) */
+static inline void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
+{
+}
 
 /**
- * ieee80211_txq_schedule_end - release locks for safe scheduling of an AC
+ * ieee80211_schedule_txq - schedule a TXQ for transmission
  *
  * @hw: pointer as obtained from ieee80211_alloc_hw()
- * @ac: AC number to acquire locks for
+ * @txq: pointer obtained from station or virtual interface
  *
- * Release locks previously acquired by ieee80211_txq_schedule_end().
+ * Schedules a TXQ for transmission if it is not already scheduled.
  */
-void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
-	__releases(txq_lock);
+void ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq);
 
 /**
- * ieee80211_schedule_txq - schedule a TXQ for transmission
+ * ieee80211_return_txq - return a TXQ previously acquired by ieee80211_next_txq()
  *
  * @hw: pointer as obtained from ieee80211_alloc_hw()
  * @txq: pointer obtained from station or virtual interface
- *
- * Schedules a TXQ for transmission if it is not already scheduled. Takes a
- * lock, which means it must *not* be called between
- * ieee80211_txq_schedule_start() and ieee80211_txq_schedule_end()
  */
-void ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq)
-	__acquires(txq_lock) __releases(txq_lock);
+static inline void
+ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq)
+{
+	ieee80211_schedule_txq(hw, txq);
+}
 
 /**
  * ieee80211_txq_may_transmit - check whether TXQ is allowed to transmit
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 5f546de10d96..134a3da147c6 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3649,16 +3649,17 @@ EXPORT_SYMBOL(ieee80211_tx_dequeue);
 struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
+	struct ieee80211_txq *ret = NULL;
 	struct txq_info *txqi = NULL;
 
-	lockdep_assert_held(&local->active_txq_lock[ac]);
+	spin_lock_bh(&local->active_txq_lock[ac]);
 
  begin:
 	txqi = list_first_entry_or_null(&local->active_txqs[ac],
 					struct txq_info,
 					schedule_order);
 	if (!txqi)
-		return NULL;
+		goto out;
 
 	if (txqi->txq.sta) {
 		struct sta_info *sta = container_of(txqi->txq.sta,
@@ -3675,21 +3676,25 @@ struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac)
 
 
 	if (txqi->schedule_round == local->schedule_round[ac])
-		return NULL;
+		goto out;
 
 	list_del_init(&txqi->schedule_order);
 	txqi->schedule_round = local->schedule_round[ac];
-	return &txqi->txq;
+	ret = &txqi->txq;
+
+out:
+	spin_unlock_bh(&local->active_txq_lock[ac]);
+	return ret;
 }
 EXPORT_SYMBOL(ieee80211_next_txq);
 
-void ieee80211_return_txq(struct ieee80211_hw *hw,
-			  struct ieee80211_txq *txq)
+void ieee80211_schedule_txq(struct ieee80211_hw *hw,
+			    struct ieee80211_txq *txq)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct txq_info *txqi = to_txq_info(txq);
 
-	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
+	spin_lock_bh(&local->active_txq_lock[txq->ac]);
 
 	if (list_empty(&txqi->schedule_order) &&
 	    (!skb_queue_empty(&txqi->frags) || txqi->tin.backlog_packets)) {
@@ -3709,17 +3714,7 @@ void ieee80211_return_txq(struct ieee80211_hw *hw,
 			list_add_tail(&txqi->schedule_order,
 				      &local->active_txqs[txq->ac]);
 	}
-}
-EXPORT_SYMBOL(ieee80211_return_txq);
 
-void ieee80211_schedule_txq(struct ieee80211_hw *hw,
-			    struct ieee80211_txq *txq)
-	__acquires(txq_lock) __releases(txq_lock)
-{
-	struct ieee80211_local *local = hw_to_local(hw);
-
-	spin_lock_bh(&local->active_txq_lock[txq->ac]);
-	ieee80211_return_txq(hw, txq);
 	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
 }
 EXPORT_SYMBOL(ieee80211_schedule_txq);
@@ -3732,7 +3727,7 @@ bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw,
 	struct sta_info *sta;
 	u8 ac = txq->ac;
 
-	lockdep_assert_held(&local->active_txq_lock[ac]);
+	spin_lock_bh(&local->active_txq_lock[ac]);
 
 	if (!txqi->txq.sta)
 		goto out;
@@ -3762,34 +3757,27 @@ bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw,
 
 	sta->airtime[ac].deficit += sta->airtime_weight;
 	list_move_tail(&txqi->schedule_order, &local->active_txqs[ac]);
+	spin_unlock_bh(&local->active_txq_lock[ac]);
 
 	return false;
 out:
 	if (!list_empty(&txqi->schedule_order))
 		list_del_init(&txqi->schedule_order);
+	spin_unlock_bh(&local->active_txq_lock[ac]);
 
 	return true;
 }
 EXPORT_SYMBOL(ieee80211_txq_may_transmit);
 
 void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)
-	__acquires(txq_lock)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 
 	spin_lock_bh(&local->active_txq_lock[ac]);
 	local->schedule_round[ac]++;
-}
-EXPORT_SYMBOL(ieee80211_txq_schedule_start);
-
-void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
-	__releases(txq_lock)
-{
-	struct ieee80211_local *local = hw_to_local(hw);
-
 	spin_unlock_bh(&local->active_txq_lock[ac]);
 }
-EXPORT_SYMBOL(ieee80211_txq_schedule_end);
+EXPORT_SYMBOL(ieee80211_txq_schedule_start);
 
 void __ieee80211_subif_start_xmit(struct sk_buff *skb,
 				  struct net_device *dev,
-- 
cgit v1.2.3-59-g8ed1b


From fe61692886669bbcc260f980903eacb4ddebaf59 Mon Sep 17 00:00:00 2001
From: Rodrigo Siqueira <rodrigosiqueiramelo@gmail.com>
Date: Thu, 14 Mar 2019 15:48:45 -0300
Subject: drm/atomic-helper: Make atomic_enable/disable crtc callbacks optional

Allow atomic_enable and atomic_disable operations from
drm_crtc_helper_funcs struct optional. With this, the target display
drivers don't need to define a dummy function if they don't need one.

Changes since v2:
* Don't make funcs optional
* Update kerneldoc for atomic_enable/disable
* Replace "if (funcs->atomic_enable)" by "if (funcs->commit)"
* Improve commit message

Signed-off-by: Rodrigo Siqueira <rodrigosiqueiramelo@gmail.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20190314184845.gjmvkamobj4dilyp@smtp.gmail.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 drivers/gpu/drm/drm_atomic_helper.c      | 5 ++---
 include/drm/drm_modeset_helper_vtables.h | 4 ++++
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 40ac19848034..fbb76332cc9f 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -1034,7 +1034,7 @@ disable_outputs(struct drm_device *dev, struct drm_atomic_state *old_state)
 			funcs->atomic_disable(crtc, old_crtc_state);
 		else if (funcs->disable)
 			funcs->disable(crtc);
-		else
+		else if (funcs->dpms)
 			funcs->dpms(crtc, DRM_MODE_DPMS_OFF);
 
 		if (!(dev->irq_enabled && dev->num_crtcs))
@@ -1277,10 +1277,9 @@ void drm_atomic_helper_commit_modeset_enables(struct drm_device *dev,
 		if (new_crtc_state->enable) {
 			DRM_DEBUG_ATOMIC("enabling [CRTC:%d:%s]\n",
 					 crtc->base.id, crtc->name);
-
 			if (funcs->atomic_enable)
 				funcs->atomic_enable(crtc, old_crtc_state);
-			else
+			else if (funcs->commit)
 				funcs->commit(crtc);
 		}
 	}
diff --git a/include/drm/drm_modeset_helper_vtables.h b/include/drm/drm_modeset_helper_vtables.h
index cfb7be40bed7..ce4de6b1e444 100644
--- a/include/drm/drm_modeset_helper_vtables.h
+++ b/include/drm/drm_modeset_helper_vtables.h
@@ -418,6 +418,8 @@ struct drm_crtc_helper_funcs {
 	 * Drivers can use the @old_crtc_state input parameter if the operations
 	 * needed to enable the CRTC don't depend solely on the new state but
 	 * also on the transition between the old state and the new state.
+	 *
+	 * This function is optional.
 	 */
 	void (*atomic_enable)(struct drm_crtc *crtc,
 			      struct drm_crtc_state *old_crtc_state);
@@ -441,6 +443,8 @@ struct drm_crtc_helper_funcs {
 	 * parameter @old_crtc_state which could be used to access the old
 	 * state. Atomic drivers should consider to use this one instead
 	 * of @disable.
+	 *
+	 * This function is optional.
 	 */
 	void (*atomic_disable)(struct drm_crtc *crtc,
 			       struct drm_crtc_state *old_crtc_state);
-- 
cgit v1.2.3-59-g8ed1b


From b5bdbb6ccd1117896bf4fba6bff75336ca423e8c Mon Sep 17 00:00:00 2001
From: Daniel Mentz <danielmentz@google.com>
Date: Fri, 29 Mar 2019 15:48:54 -0700
Subject: ALSA: uapi: #include <time.h> in asound.h

The uapi header asound.h defines types based on struct timespec. We need
to #include <time.h> to get access to the definition of this struct.

Previously, we encountered the following error message when building
applications with a clang/bionic toolchain:

kernel-headers/sound/asound.h:350:19: error: field has incomplete type 'struct timespec'
  struct timespec trigger_tstamp;
                  ^

The absence of the time.h #include statement does not cause build errors
with glibc, because its version of stdlib.h indirectly includes time.h.

Signed-off-by: Daniel Mentz <danielmentz@google.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/uapi/sound/asound.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h
index 404d4b9ffe76..df1153cea0b7 100644
--- a/include/uapi/sound/asound.h
+++ b/include/uapi/sound/asound.h
@@ -32,6 +32,7 @@
 
 #ifndef __KERNEL__
 #include <stdlib.h>
+#include <time.h>
 #endif
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From a0fe2c6479aab5723239b315ef1b552673f434a3 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Fri, 29 Mar 2019 22:46:49 +0100
Subject: linux/kernel.h: Use parentheses around argument in u64_to_user_ptr()

Use parentheses around uses of the argument in u64_to_user_ptr() to
ensure that the cast doesn't apply to part of the argument.

There are existing uses of the macro of the form

  u64_to_user_ptr(A + B)

which expands to

  (void __user *)(uintptr_t)A + B

(the cast applies to the first operand of the addition, the addition
is a pointer addition). This happens to still work as intended, the
semantic difference doesn't cause a difference in behavior.

But I want to use u64_to_user_ptr() with a ternary operator in the
argument, like so:

  u64_to_user_ptr(A ? B : C)

This currently doesn't work as intended.

Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Mukesh Ojha <mojha@codeaurora.org>
Cc: Andrei Vagin <avagin@openvz.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: NeilBrown <neilb@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Qiaowei Ren <qiaowei.ren@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190329214652.258477-1-jannh@google.com
---
 include/linux/kernel.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 34a5036debd3..2d14e21c16c0 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -47,8 +47,8 @@
 
 #define u64_to_user_ptr(x) (		\
 {					\
-	typecheck(u64, x);		\
-	(void __user *)(uintptr_t)x;	\
+	typecheck(u64, (x));		\
+	(void __user *)(uintptr_t)(x);	\
 }					\
 )
 
-- 
cgit v1.2.3-59-g8ed1b


From b4ed6b51f356224c6c71540ed94087f7f09b84af Mon Sep 17 00:00:00 2001
From: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Date: Fri, 5 Apr 2019 09:57:08 -0700
Subject: ASoC: core: conditionally increase module refcount on component open

Recently, for Intel platforms the "ignore_module_refcount" field
was introduced for the component driver. In order to avoid a
deadlock preventing the PCI modules from being removed
even when the card was idle, the refcounts were not incremented
for the device driver module during component probe.

However, this change introduced a nasty side effect:
the device driver module can be unloaded while a pcm stream is open.

This patch proposes to change the field to be renamed as
"module_get_upon_open". When this field is set, the module
refcount should be incremented on pcm open amd decremented
upon pcm close. This will enable modules to be removed
when no PCM playback/capture happens and prevent removal
when the component is actually in use.

Also, align with the skylake component driver with the new name.

Fixes: b450b878('ASoC: core: don't increase component module refcount
                 unconditionally'
Signed-off-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Acked-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc.h               | 9 +++++++--
 sound/soc/intel/skylake/skl-pcm.c | 2 +-
 sound/soc/soc-core.c              | 4 ++--
 3 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index 1e2be35ed36f..482b4ea87c3c 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -802,8 +802,13 @@ struct snd_soc_component_driver {
 	int probe_order;
 	int remove_order;
 
-	/* signal if the module handling the component cannot be removed */
-	unsigned int ignore_module_refcount:1;
+	/*
+	 * signal if the module handling the component should not be removed
+	 * if a pcm is open. Setting this would prevent the module
+	 * refcount being incremented in probe() but allow it be incremented
+	 * when a pcm is opened and decremented when it is closed.
+	 */
+	unsigned int module_get_upon_open:1;
 
 	/* bits */
 	unsigned int idle_bias_on:1;
diff --git a/sound/soc/intel/skylake/skl-pcm.c b/sound/soc/intel/skylake/skl-pcm.c
index 57031b6d4d45..9735e2412251 100644
--- a/sound/soc/intel/skylake/skl-pcm.c
+++ b/sound/soc/intel/skylake/skl-pcm.c
@@ -1475,7 +1475,7 @@ static const struct snd_soc_component_driver skl_component  = {
 	.ops		= &skl_platform_ops,
 	.pcm_new	= skl_pcm_new,
 	.pcm_free	= skl_pcm_free,
-	.ignore_module_refcount = 1, /* do not increase the refcount in core */
+	.module_get_upon_open = 1, /* increment refcount when a pcm is opened */
 };
 
 int skl_platform_register(struct device *dev)
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index d88757659729..46e3ab0fced4 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -947,7 +947,7 @@ static void soc_cleanup_component(struct snd_soc_component *component)
 	snd_soc_dapm_free(snd_soc_component_get_dapm(component));
 	soc_cleanup_component_debugfs(component);
 	component->card = NULL;
-	if (!component->driver->ignore_module_refcount)
+	if (!component->driver->module_get_upon_open)
 		module_put(component->dev->driver->owner);
 }
 
@@ -1381,7 +1381,7 @@ static int soc_probe_component(struct snd_soc_card *card,
 		return 0;
 	}
 
-	if (!component->driver->ignore_module_refcount &&
+	if (!component->driver->module_get_upon_open &&
 	    !try_module_get(component->dev->driver->owner))
 		return -ENODEV;
 
-- 
cgit v1.2.3-59-g8ed1b


From a58d7525b8014115d57fd30186a84f6d30783f2c Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Tue, 12 Mar 2019 10:51:40 +0100
Subject: cfg80211: add ratelimited variants of err and warn

wiphy_{err,warn}_ratelimited will be used by rt2x00

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index bb307a11ee63..13bfeb712d36 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -7183,6 +7183,11 @@ void cfg80211_pmsr_complete(struct wireless_dev *wdev,
 #define wiphy_info(wiphy, format, args...)			\
 	dev_info(&(wiphy)->dev, format, ##args)
 
+#define wiphy_err_ratelimited(wiphy, format, args...)		\
+	dev_err_ratelimited(&(wiphy)->dev, format, ##args)
+#define wiphy_warn_ratelimited(wiphy, format, args...)		\
+	dev_warn_ratelimited(&(wiphy)->dev, format, ##args)
+
 #define wiphy_debug(wiphy, format, args...)			\
 	wiphy_printk(KERN_DEBUG, wiphy, format, ##args)
 
-- 
cgit v1.2.3-59-g8ed1b


From 2b4a66980217332d91ab1785e1750857d6d52bc8 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Mon, 18 Mar 2019 12:00:58 +0100
Subject: mac80211: make ieee80211_schedule_txq schedule empty TXQs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently there is no way for the driver to signal to mac80211 that it should
schedule a TXQ even if there are no packets on the mac80211 part of that queue.
This is problematic if the driver has an internal retry queue to deal with
software A-MPDU retry.

This patch changes the behavior of ieee80211_schedule_txq to always schedule
the queue, as its only user (ath9k) seems to expect such behavior already:
it calls this function on tx status and on powersave wakeup whenever its
internal retry queue is not empty.

Also add an extra argument to ieee80211_return_txq to get the same behavior.

This fixes an issue on ath9k where tx queues with packets to retry (and no
new packets in mac80211) would not get serviced.

Fixes: 89cea7493a346 ("ath9k: Switch to mac80211 TXQ scheduling and airtime APIs")
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath10k/htt_rx.c |  2 +-
 drivers/net/wireless/ath/ath10k/mac.c    |  4 ++--
 drivers/net/wireless/ath/ath9k/xmit.c    |  5 ++++-
 include/net/mac80211.h                   | 24 ++++++++++++++++++++----
 net/mac80211/tx.c                        | 10 ++++++----
 5 files changed, 33 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
index a20ea270d519..1acc622d2183 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -2728,7 +2728,7 @@ static void ath10k_htt_rx_tx_fetch_ind(struct ath10k *ar, struct sk_buff *skb)
 			num_msdus++;
 			num_bytes += ret;
 		}
-		ieee80211_return_txq(hw, txq);
+		ieee80211_return_txq(hw, txq, false);
 		ieee80211_txq_schedule_end(hw, txq->ac);
 
 		record->num_msdus = cpu_to_le16(num_msdus);
diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index b73c23d4ce86..41e89db244d2 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -4089,7 +4089,7 @@ static int ath10k_mac_schedule_txq(struct ieee80211_hw *hw, u32 ac)
 			if (ret < 0)
 				break;
 		}
-		ieee80211_return_txq(hw, txq);
+		ieee80211_return_txq(hw, txq, false);
 		ath10k_htt_tx_txq_update(hw, txq);
 		if (ret == -EBUSY)
 			break;
@@ -4374,7 +4374,7 @@ static void ath10k_mac_op_wake_tx_queue(struct ieee80211_hw *hw,
 		if (ret < 0)
 			break;
 	}
-	ieee80211_return_txq(hw, txq);
+	ieee80211_return_txq(hw, txq, false);
 	ath10k_htt_tx_txq_update(hw, txq);
 out:
 	ieee80211_txq_schedule_end(hw, ac);
diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c
index 773d428ff1b0..b17e1ca40995 100644
--- a/drivers/net/wireless/ath/ath9k/xmit.c
+++ b/drivers/net/wireless/ath/ath9k/xmit.c
@@ -1938,12 +1938,15 @@ void ath_txq_schedule(struct ath_softc *sc, struct ath_txq *txq)
 		goto out;
 
 	while ((queue = ieee80211_next_txq(hw, txq->mac80211_qnum))) {
+		bool force;
+
 		tid = (struct ath_atx_tid *)queue->drv_priv;
 
 		ret = ath_tx_sched_aggr(sc, txq, tid);
 		ath_dbg(common, QUEUE, "ath_tx_sched_aggr returned %d\n", ret);
 
-		ieee80211_return_txq(hw, queue);
+		force = !skb_queue_empty(&tid->retry_q);
+		ieee80211_return_txq(hw, queue, force);
 	}
 
 out:
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 616998252dc7..112dc18c658f 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -6253,26 +6253,42 @@ static inline void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
 {
 }
 
+void __ieee80211_schedule_txq(struct ieee80211_hw *hw,
+			      struct ieee80211_txq *txq, bool force);
+
 /**
  * ieee80211_schedule_txq - schedule a TXQ for transmission
  *
  * @hw: pointer as obtained from ieee80211_alloc_hw()
  * @txq: pointer obtained from station or virtual interface
  *
- * Schedules a TXQ for transmission if it is not already scheduled.
+ * Schedules a TXQ for transmission if it is not already scheduled,
+ * even if mac80211 does not have any packets buffered.
+ *
+ * The driver may call this function if it has buffered packets for
+ * this TXQ internally.
  */
-void ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq);
+static inline void
+ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq)
+{
+	__ieee80211_schedule_txq(hw, txq, true);
+}
 
 /**
  * ieee80211_return_txq - return a TXQ previously acquired by ieee80211_next_txq()
  *
  * @hw: pointer as obtained from ieee80211_alloc_hw()
  * @txq: pointer obtained from station or virtual interface
+ * @force: schedule txq even if mac80211 does not have any buffered packets.
+ *
+ * The driver may set force=true if it has buffered packets for this TXQ
+ * internally.
  */
 static inline void
-ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq)
+ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq,
+		     bool force)
 {
-	ieee80211_schedule_txq(hw, txq);
+	__ieee80211_schedule_txq(hw, txq, force);
 }
 
 /**
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 134a3da147c6..2e816dd67be7 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3688,8 +3688,9 @@ out:
 }
 EXPORT_SYMBOL(ieee80211_next_txq);
 
-void ieee80211_schedule_txq(struct ieee80211_hw *hw,
-			    struct ieee80211_txq *txq)
+void __ieee80211_schedule_txq(struct ieee80211_hw *hw,
+			      struct ieee80211_txq *txq,
+			      bool force)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct txq_info *txqi = to_txq_info(txq);
@@ -3697,7 +3698,8 @@ void ieee80211_schedule_txq(struct ieee80211_hw *hw,
 	spin_lock_bh(&local->active_txq_lock[txq->ac]);
 
 	if (list_empty(&txqi->schedule_order) &&
-	    (!skb_queue_empty(&txqi->frags) || txqi->tin.backlog_packets)) {
+	    (force || !skb_queue_empty(&txqi->frags) ||
+	     txqi->tin.backlog_packets)) {
 		/* If airtime accounting is active, always enqueue STAs at the
 		 * head of the list to ensure that they only get moved to the
 		 * back by the airtime DRR scheduler once they have a negative
@@ -3717,7 +3719,7 @@ void ieee80211_schedule_txq(struct ieee80211_hw *hw,
 
 	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
 }
-EXPORT_SYMBOL(ieee80211_schedule_txq);
+EXPORT_SYMBOL(__ieee80211_schedule_txq);
 
 bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw,
 				struct ieee80211_txq *txq)
-- 
cgit v1.2.3-59-g8ed1b


From 1200e07f3ad4b9d976cf2fff3a0c3d9a1faecb3e Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Mon, 8 Apr 2019 19:02:38 +0800
Subject: block: don't use for-inside-for in bio_for_each_segment_all

Commit 6dc4f100c175 ("block: allow bio_for_each_segment_all() to
iterate over multi-page bvec") changes bio_for_each_segment_all()
to use for-inside-for.

This way breaks all bio_for_each_segment_all() call with error out
branch via 'break', since now 'break' can only break from the inner
loop.

Fixes this issue by implementing bio_for_each_segment_all() via
single 'for' loop, and now the logic is very similar with normal
bvec iterator.

Cc: Qu Wenruo <quwenruo.btrfs@gmx.com>
Cc: linux-btrfs@vger.kernel.org
Cc: linux-fsdevel@vger.kernel.org
Cc: Omar Sandoval <osandov@fb.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reported-and-Tested-by: Qu Wenruo <quwenruo.btrfs@gmx.com>
Fixes: 6dc4f100c175 ("block: allow bio_for_each_segment_all() to iterate over multi-page bvec")
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h  | 20 ++++++++++++--------
 include/linux/bvec.h | 14 ++++++++++----
 2 files changed, 22 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index bb6090aa165d..e584673c1881 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -120,19 +120,23 @@ static inline bool bio_full(struct bio *bio)
 	return bio->bi_vcnt >= bio->bi_max_vecs;
 }
 
-#define mp_bvec_for_each_segment(bv, bvl, i, iter_all)			\
-	for (bv = bvec_init_iter_all(&iter_all);			\
-		(iter_all.done < (bvl)->bv_len) &&			\
-		(mp_bvec_next_segment((bvl), &iter_all), 1);		\
-		iter_all.done += bv->bv_len, i += 1)
+static inline bool bio_next_segment(const struct bio *bio,
+				    struct bvec_iter_all *iter)
+{
+	if (iter->idx >= bio->bi_vcnt)
+		return false;
+
+	bvec_advance(&bio->bi_io_vec[iter->idx], iter);
+	return true;
+}
 
 /*
  * drivers should _never_ use the all version - the bio may have been split
  * before it got to the driver and the driver won't own all of it
  */
-#define bio_for_each_segment_all(bvl, bio, i, iter_all)		\
-	for (i = 0, iter_all.idx = 0; iter_all.idx < (bio)->bi_vcnt; iter_all.idx++)	\
-		mp_bvec_for_each_segment(bvl, &((bio)->bi_io_vec[iter_all.idx]), i, iter_all)
+#define bio_for_each_segment_all(bvl, bio, i, iter)			\
+	for (i = 0, bvl = bvec_init_iter_all(&iter);			\
+	     bio_next_segment((bio), &iter); i++)
 
 static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
 				    unsigned bytes)
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index f6275c4da13a..3bc91879e1e2 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -145,18 +145,18 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv,
 
 static inline struct bio_vec *bvec_init_iter_all(struct bvec_iter_all *iter_all)
 {
-	iter_all->bv.bv_page = NULL;
 	iter_all->done = 0;
+	iter_all->idx = 0;
 
 	return &iter_all->bv;
 }
 
-static inline void mp_bvec_next_segment(const struct bio_vec *bvec,
-					struct bvec_iter_all *iter_all)
+static inline void bvec_advance(const struct bio_vec *bvec,
+				struct bvec_iter_all *iter_all)
 {
 	struct bio_vec *bv = &iter_all->bv;
 
-	if (bv->bv_page) {
+	if (iter_all->done) {
 		bv->bv_page = nth_page(bv->bv_page, 1);
 		bv->bv_offset = 0;
 	} else {
@@ -165,6 +165,12 @@ static inline void mp_bvec_next_segment(const struct bio_vec *bvec,
 	}
 	bv->bv_len = min_t(unsigned int, PAGE_SIZE - bv->bv_offset,
 			   bvec->bv_len - iter_all->done);
+	iter_all->done += bv->bv_len;
+
+	if (iter_all->done == bvec->bv_len) {
+		iter_all->idx++;
+		iter_all->done = 0;
+	}
 }
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From cf94db21905333e610e479688add629397a4b384 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cohuck@redhat.com>
Date: Mon, 8 Apr 2019 14:33:22 +0200
Subject: virtio: Honour 'may_reduce_num' in vring_create_virtqueue

vring_create_virtqueue() allows the caller to specify via the
may_reduce_num parameter whether the vring code is allowed to
allocate a smaller ring than specified.

However, the split ring allocation code tries to allocate a
smaller ring on allocation failure regardless of what the
caller specified. This may cause trouble for e.g. virtio-pci
in legacy mode, which does not support ring resizing. (The
packed ring code does not resize in any case.)

Let's fix this by bailing out immediately in the split ring code
if the requested size cannot be allocated and may_reduce_num has
not been specified.

While at it, fix a typo in the usage instructions.

Fixes: 2a2d1382fe9d ("virtio: Add improved queue allocation API")
Cc: stable@vger.kernel.org # v4.6+
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Halil Pasic <pasic@linux.ibm.com>
Reviewed-by: Jens Freimann <jfreimann@redhat.com>
---
 drivers/virtio/virtio_ring.c | 2 ++
 include/linux/virtio_ring.h  | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 18846afb39da..5df92c308286 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -882,6 +882,8 @@ static struct virtqueue *vring_create_virtqueue_split(
 					  GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
 		if (queue)
 			break;
+		if (!may_reduce_num)
+			return NULL;
 	}
 
 	if (!num)
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index fab02133a919..3dc70adfe5f5 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -63,7 +63,7 @@ struct virtqueue;
 /*
  * Creates a virtqueue and allocates the descriptor ring.  If
  * may_reduce_num is set, then this may allocate a smaller ring than
- * expected.  The caller should query virtqueue_get_ring_size to learn
+ * expected.  The caller should query virtqueue_get_vring_size to learn
  * the actual size of the ring.
  */
 struct virtqueue *vring_create_virtqueue(unsigned int index,
-- 
cgit v1.2.3-59-g8ed1b


From 6ec4bae178d8a1e9814eb3bfdd321b0475de0468 Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul.walmsley@sifive.com>
Date: Wed, 3 Apr 2019 17:38:20 -0700
Subject: dt-bindings: clock: sifive: add FU540-C000 PRCI clock constants

Add preprocessor macros for the important PRCI output clocks
that are needed by both the FU540 PRCI driver and DT data.
Details are available in the FU540 manual in Chapter 7 of

    https://static.dev.sifive.com/FU540-C000-v1.0.pdf

Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 include/dt-bindings/clock/sifive-fu540-prci.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)
 create mode 100644 include/dt-bindings/clock/sifive-fu540-prci.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/sifive-fu540-prci.h b/include/dt-bindings/clock/sifive-fu540-prci.h
new file mode 100644
index 000000000000..6a0b70a37d78
--- /dev/null
+++ b/include/dt-bindings/clock/sifive-fu540-prci.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018-2019 SiFive, Inc.
+ * Wesley Terpstra
+ * Paul Walmsley
+ */
+
+#ifndef __DT_BINDINGS_CLOCK_SIFIVE_FU540_PRCI_H
+#define __DT_BINDINGS_CLOCK_SIFIVE_FU540_PRCI_H
+
+/* Clock indexes for use by Device Tree data and the PRCI driver */
+
+#define PRCI_CLK_COREPLL	       0
+#define PRCI_CLK_DDRPLL		       1
+#define PRCI_CLK_GEMGXLPLL	       2
+#define PRCI_CLK_TLCLK		       3
+
+#endif
-- 
cgit v1.2.3-59-g8ed1b


From 1b8f21b74c3c9c82fce5a751d7aefb7cc0b8d33d Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 9 Apr 2019 06:31:21 +0800
Subject: blk-mq: introduce blk_mq_complete_request_sync()

In NVMe's error handler, follows the typical steps of tearing down
hardware for recovering controller:

1) stop blk_mq hw queues
2) stop the real hw queues
3) cancel in-flight requests via
	blk_mq_tagset_busy_iter(tags, cancel_request, ...)
cancel_request():
	mark the request as abort
	blk_mq_complete_request(req);
4) destroy real hw queues

However, there may be race between #3 and #4, because blk_mq_complete_request()
may run q->mq_ops->complete(rq) remotelly and asynchronously, and
->complete(rq) may be run after #4.

This patch introduces blk_mq_complete_request_sync() for fixing the
above race.

Cc: Sagi Grimberg <sagi@grimberg.me>
Cc: Bart Van Assche <bvanassche@acm.org>
Cc: James Smart <james.smart@broadcom.com>
Cc: linux-nvme@lists.infradead.org
Reviewed-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         | 7 +++++++
 include/linux/blk-mq.h | 1 +
 2 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index a9354835cf51..9516304a38ee 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -654,6 +654,13 @@ bool blk_mq_complete_request(struct request *rq)
 }
 EXPORT_SYMBOL(blk_mq_complete_request);
 
+void blk_mq_complete_request_sync(struct request *rq)
+{
+	WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
+	rq->q->mq_ops->complete(rq);
+}
+EXPORT_SYMBOL_GPL(blk_mq_complete_request_sync);
+
 int blk_mq_request_started(struct request *rq)
 {
 	return blk_mq_rq_state(rq) != MQ_RQ_IDLE;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index cb2aa7ecafff..db29928de467 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -302,6 +302,7 @@ void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list);
 void blk_mq_kick_requeue_list(struct request_queue *q);
 void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
 bool blk_mq_complete_request(struct request *rq);
+void blk_mq_complete_request_sync(struct request *rq);
 bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list,
 			   struct bio *bio);
 bool blk_mq_queue_stopped(struct request_queue *q);
-- 
cgit v1.2.3-59-g8ed1b


From 35b71a34ada62c9573847a324bf06a133fe11b11 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 10 Apr 2019 11:04:31 -0700
Subject: net/tls: don't leak partially sent record in device mode

David reports that tls triggers warnings related to
sk->sk_forward_alloc not being zero at destruction time:

WARNING: CPU: 5 PID: 6831 at net/core/stream.c:206 sk_stream_kill_queues+0x103/0x110
WARNING: CPU: 5 PID: 6831 at net/ipv4/af_inet.c:160 inet_sock_destruct+0x15b/0x170

When sender fills up the write buffer and dies from
SIGPIPE.  This is due to the device implementation
not cleaning up the partially_sent_record.

This is because commit a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance")
moved the partial record cleanup to the SW-only path.

Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance")
Reported-by: David Beckett <david.beckett@netronome.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h    |  2 ++
 net/tls/tls_device.c |  7 +++++++
 net/tls/tls_main.c   | 22 ++++++++++++++++++++++
 net/tls/tls_sw.c     | 15 +--------------
 4 files changed, 32 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/net/tls.h b/include/net/tls.h
index a5a938583295..c7f7dc344e73 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -307,6 +307,7 @@ int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 int tls_device_sendpage(struct sock *sk, struct page *page,
 			int offset, size_t size, int flags);
 void tls_device_sk_destruct(struct sock *sk);
+void tls_device_free_resources_tx(struct sock *sk);
 void tls_device_init(void);
 void tls_device_cleanup(void);
 int tls_tx_records(struct sock *sk, int flags);
@@ -330,6 +331,7 @@ int tls_push_sg(struct sock *sk, struct tls_context *ctx,
 		int flags);
 int tls_push_partial_record(struct sock *sk, struct tls_context *ctx,
 			    int flags);
+bool tls_free_partial_record(struct sock *sk, struct tls_context *ctx);
 
 static inline struct tls_msg *tls_msg(struct sk_buff *skb)
 {
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 38b3b2a9835a..9f3bdbc1e593 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -219,6 +219,13 @@ void tls_device_sk_destruct(struct sock *sk)
 }
 EXPORT_SYMBOL(tls_device_sk_destruct);
 
+void tls_device_free_resources_tx(struct sock *sk)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+
+	tls_free_partial_record(sk, tls_ctx);
+}
+
 static void tls_append_frag(struct tls_record_info *record,
 			    struct page_frag *pfrag,
 			    int size)
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index df921a2904b9..a3cca1ef0098 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -208,6 +208,26 @@ int tls_push_partial_record(struct sock *sk, struct tls_context *ctx,
 	return tls_push_sg(sk, ctx, sg, offset, flags);
 }
 
+bool tls_free_partial_record(struct sock *sk, struct tls_context *ctx)
+{
+	struct scatterlist *sg;
+
+	sg = ctx->partially_sent_record;
+	if (!sg)
+		return false;
+
+	while (1) {
+		put_page(sg_page(sg));
+		sk_mem_uncharge(sk, sg->length);
+
+		if (sg_is_last(sg))
+			break;
+		sg++;
+	}
+	ctx->partially_sent_record = NULL;
+	return true;
+}
+
 static void tls_write_space(struct sock *sk)
 {
 	struct tls_context *ctx = tls_get_ctx(sk);
@@ -267,6 +287,8 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
 		kfree(ctx->tx.rec_seq);
 		kfree(ctx->tx.iv);
 		tls_sw_free_resources_tx(sk);
+	} else if (ctx->tx_conf == TLS_HW) {
+		tls_device_free_resources_tx(sk);
 	}
 
 	if (ctx->rx_conf == TLS_SW) {
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 20b191227969..b50ced862f6f 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -2052,20 +2052,7 @@ void tls_sw_free_resources_tx(struct sock *sk)
 	/* Free up un-sent records in tx_list. First, free
 	 * the partially sent record if any at head of tx_list.
 	 */
-	if (tls_ctx->partially_sent_record) {
-		struct scatterlist *sg = tls_ctx->partially_sent_record;
-
-		while (1) {
-			put_page(sg_page(sg));
-			sk_mem_uncharge(sk, sg->length);
-
-			if (sg_is_last(sg))
-				break;
-			sg++;
-		}
-
-		tls_ctx->partially_sent_record = NULL;
-
+	if (tls_free_partial_record(sk, tls_ctx)) {
 		rec = list_first_entry(&ctx->tx_list,
 				       struct tls_rec, list);
 		list_del(&rec->list);
-- 
cgit v1.2.3-59-g8ed1b


From 7c2e07130090ae001a97a6b65597830d6815e93e Mon Sep 17 00:00:00 2001
From: David Müller <dave.mueller@gmx.ch>
Date: Mon, 8 Apr 2019 15:33:54 +0200
Subject: clk: x86: Add system specific quirk to mark clocks as critical
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since commit 648e921888ad ("clk: x86: Stop marking clocks as
CLK_IS_CRITICAL"), the pmc_plt_clocks of the Bay Trail SoC are
unconditionally gated off. Unfortunately this will break systems where these
clocks are used for external purposes beyond the kernel's knowledge. Fix it
by implementing a system specific quirk to mark the necessary pmc_plt_clks as
critical.

Fixes: 648e921888ad ("clk: x86: Stop marking clocks as CLK_IS_CRITICAL")
Signed-off-by: David Müller <dave.mueller@gmx.ch>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/x86/clk-pmc-atom.c                 | 14 +++++++++++---
 drivers/platform/x86/pmc_atom.c                | 21 +++++++++++++++++++++
 include/linux/platform_data/x86/clk-pmc-atom.h |  3 +++
 3 files changed, 35 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/clk/x86/clk-pmc-atom.c b/drivers/clk/x86/clk-pmc-atom.c
index d977193842df..19174835693b 100644
--- a/drivers/clk/x86/clk-pmc-atom.c
+++ b/drivers/clk/x86/clk-pmc-atom.c
@@ -165,7 +165,7 @@ static const struct clk_ops plt_clk_ops = {
 };
 
 static struct clk_plt *plt_clk_register(struct platform_device *pdev, int id,
-					void __iomem *base,
+					const struct pmc_clk_data *pmc_data,
 					const char **parent_names,
 					int num_parents)
 {
@@ -184,9 +184,17 @@ static struct clk_plt *plt_clk_register(struct platform_device *pdev, int id,
 	init.num_parents = num_parents;
 
 	pclk->hw.init = &init;
-	pclk->reg = base + PMC_CLK_CTL_OFFSET + id * PMC_CLK_CTL_SIZE;
+	pclk->reg = pmc_data->base + PMC_CLK_CTL_OFFSET + id * PMC_CLK_CTL_SIZE;
 	spin_lock_init(&pclk->lock);
 
+	/*
+	 * On some systems, the pmc_plt_clocks already enabled by the
+	 * firmware are being marked as critical to avoid them being
+	 * gated by the clock framework.
+	 */
+	if (pmc_data->critical && plt_clk_is_enabled(&pclk->hw))
+		init.flags |= CLK_IS_CRITICAL;
+
 	ret = devm_clk_hw_register(&pdev->dev, &pclk->hw);
 	if (ret) {
 		pclk = ERR_PTR(ret);
@@ -332,7 +340,7 @@ static int plt_clk_probe(struct platform_device *pdev)
 		return PTR_ERR(parent_names);
 
 	for (i = 0; i < PMC_CLK_NUM; i++) {
-		data->clks[i] = plt_clk_register(pdev, i, pmc_data->base,
+		data->clks[i] = plt_clk_register(pdev, i, pmc_data,
 						 parent_names, data->nparents);
 		if (IS_ERR(data->clks[i])) {
 			err = PTR_ERR(data->clks[i]);
diff --git a/drivers/platform/x86/pmc_atom.c b/drivers/platform/x86/pmc_atom.c
index 8f018b3f3cd4..eaec2d306481 100644
--- a/drivers/platform/x86/pmc_atom.c
+++ b/drivers/platform/x86/pmc_atom.c
@@ -17,6 +17,7 @@
 
 #include <linux/debugfs.h>
 #include <linux/device.h>
+#include <linux/dmi.h>
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/platform_data/x86/clk-pmc-atom.h>
@@ -391,11 +392,27 @@ static int pmc_dbgfs_register(struct pmc_dev *pmc)
 }
 #endif /* CONFIG_DEBUG_FS */
 
+/*
+ * Some systems need one or more of their pmc_plt_clks to be
+ * marked as critical.
+ */
+static const struct dmi_system_id critclk_systems[] __initconst = {
+	{
+		.ident = "MPL CEC1x",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "MPL AG"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "CEC10 Family"),
+		},
+	},
+	{ /*sentinel*/ }
+};
+
 static int pmc_setup_clks(struct pci_dev *pdev, void __iomem *pmc_regmap,
 			  const struct pmc_data *pmc_data)
 {
 	struct platform_device *clkdev;
 	struct pmc_clk_data *clk_data;
+	const struct dmi_system_id *d = dmi_first_match(critclk_systems);
 
 	clk_data = kzalloc(sizeof(*clk_data), GFP_KERNEL);
 	if (!clk_data)
@@ -403,6 +420,10 @@ static int pmc_setup_clks(struct pci_dev *pdev, void __iomem *pmc_regmap,
 
 	clk_data->base = pmc_regmap; /* offset is added by client */
 	clk_data->clks = pmc_data->clks;
+	if (d) {
+		clk_data->critical = true;
+		pr_info("%s critclks quirk enabled\n", d->ident);
+	}
 
 	clkdev = platform_device_register_data(&pdev->dev, "clk-pmc-atom",
 					       PLATFORM_DEVID_NONE,
diff --git a/include/linux/platform_data/x86/clk-pmc-atom.h b/include/linux/platform_data/x86/clk-pmc-atom.h
index 3ab892208343..7a37ac27d0fb 100644
--- a/include/linux/platform_data/x86/clk-pmc-atom.h
+++ b/include/linux/platform_data/x86/clk-pmc-atom.h
@@ -35,10 +35,13 @@ struct pmc_clk {
  *
  * @base:	PMC clock register base offset
  * @clks:	pointer to set of registered clocks, typically 0..5
+ * @critical:	flag to indicate if firmware enabled pmc_plt_clks
+ *		should be marked as critial or not
  */
 struct pmc_clk_data {
 	void __iomem *base;
 	const struct pmc_clk *clks;
+	bool critical;
 };
 
 #endif /* __PLATFORM_DATA_X86_CLK_PMC_ATOM_H */
-- 
cgit v1.2.3-59-g8ed1b


From 8065a779f17e94536a1c4dcee4f9d88011672f97 Mon Sep 17 00:00:00 2001
From: Si-Wei Liu <si-wei.liu@oracle.com>
Date: Mon, 8 Apr 2019 19:45:27 -0400
Subject: failover: allow name change on IFF_UP slave interfaces

When a netdev appears through hot plug then gets enslaved by a failover
master that is already up and running, the slave will be opened
right away after getting enslaved. Today there's a race that userspace
(udev) may fail to rename the slave if the kernel (net_failover)
opens the slave earlier than when the userspace rename happens.
Unlike bond or team, the primary slave of failover can't be renamed by
userspace ahead of time, since the kernel initiated auto-enslavement is
unable to, or rather, is never meant to be synchronized with the rename
request from userspace.

As the failover slave interfaces are not designed to be operated
directly by userspace apps: IP configuration, filter rules with
regard to network traffic passing and etc., should all be done on master
interface. In general, userspace apps only care about the
name of master interface, while slave names are less important as long
as admin users can see reliable names that may carry
other information describing the netdev. For e.g., they can infer that
"ens3nsby" is a standby slave of "ens3", while for a
name like "eth0" they can't tell which master it belongs to.

Historically the name of IFF_UP interface can't be changed because
there might be admin script or management software that is already
relying on such behavior and assumes that the slave name can't be
changed once UP. But failover is special: with the in-kernel
auto-enslavement mechanism, the userspace expectation for device
enumeration and bring-up order is already broken. Previously initramfs
and various userspace config tools were modified to bypass failover
slaves because of auto-enslavement and duplicate MAC address. Similarly,
in case that users care about seeing reliable slave name, the new type
of failover slaves needs to be taken care of specifically in userspace
anyway.

It's less risky to lift up the rename restriction on failover slave
which is already UP. Although it's possible this change may potentially
break userspace component (most likely configuration scripts or
management software) that assumes slave name can't be changed while
UP, it's relatively a limited and controllable set among all userspace
components, which can be fixed specifically to listen for the rename
events on failover slaves. Userspace component interacting with slaves
is expected to be changed to operate on failover master interface
instead, as the failover slave is dynamic in nature which may come and
go at any point.  The goal is to make the role of failover slaves less
relevant, and userspace components should only deal with failover master
in the long run.

Fixes: 30c8bd5aa8b2 ("net: Introduce generic failover module")
Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
Reviewed-by: Liran Alon <liran.alon@oracle.com>
Acked-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  3 +++
 net/core/dev.c            | 16 +++++++++++++++-
 net/core/failover.c       |  6 +++---
 3 files changed, 21 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 26f69cf763f4..324e872c91d1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1500,6 +1500,7 @@ struct net_device_ops {
  * @IFF_FAILOVER: device is a failover master device
  * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device
  * @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device
+ * @IFF_LIVE_RENAME_OK: rename is allowed while device is up and running
  */
 enum netdev_priv_flags {
 	IFF_802_1Q_VLAN			= 1<<0,
@@ -1532,6 +1533,7 @@ enum netdev_priv_flags {
 	IFF_FAILOVER			= 1<<27,
 	IFF_FAILOVER_SLAVE		= 1<<28,
 	IFF_L3MDEV_RX_HANDLER		= 1<<29,
+	IFF_LIVE_RENAME_OK		= 1<<30,
 };
 
 #define IFF_802_1Q_VLAN			IFF_802_1Q_VLAN
@@ -1563,6 +1565,7 @@ enum netdev_priv_flags {
 #define IFF_FAILOVER			IFF_FAILOVER
 #define IFF_FAILOVER_SLAVE		IFF_FAILOVER_SLAVE
 #define IFF_L3MDEV_RX_HANDLER		IFF_L3MDEV_RX_HANDLER
+#define IFF_LIVE_RENAME_OK		IFF_LIVE_RENAME_OK
 
 /**
  *	struct net_device - The DEVICE structure.
diff --git a/net/core/dev.c b/net/core/dev.c
index fdcff29df915..f409406254dd 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1184,7 +1184,21 @@ int dev_change_name(struct net_device *dev, const char *newname)
 	BUG_ON(!dev_net(dev));
 
 	net = dev_net(dev);
-	if (dev->flags & IFF_UP)
+
+	/* Some auto-enslaved devices e.g. failover slaves are
+	 * special, as userspace might rename the device after
+	 * the interface had been brought up and running since
+	 * the point kernel initiated auto-enslavement. Allow
+	 * live name change even when these slave devices are
+	 * up and running.
+	 *
+	 * Typically, users of these auto-enslaving devices
+	 * don't actually care about slave name change, as
+	 * they are supposed to operate on master interface
+	 * directly.
+	 */
+	if (dev->flags & IFF_UP &&
+	    likely(!(dev->priv_flags & IFF_LIVE_RENAME_OK)))
 		return -EBUSY;
 
 	write_seqcount_begin(&devnet_rename_seq);
diff --git a/net/core/failover.c b/net/core/failover.c
index 4a92a98ccce9..b5cd3c727285 100644
--- a/net/core/failover.c
+++ b/net/core/failover.c
@@ -80,14 +80,14 @@ static int failover_slave_register(struct net_device *slave_dev)
 		goto err_upper_link;
 	}
 
-	slave_dev->priv_flags |= IFF_FAILOVER_SLAVE;
+	slave_dev->priv_flags |= (IFF_FAILOVER_SLAVE | IFF_LIVE_RENAME_OK);
 
 	if (fops && fops->slave_register &&
 	    !fops->slave_register(slave_dev, failover_dev))
 		return NOTIFY_OK;
 
 	netdev_upper_dev_unlink(slave_dev, failover_dev);
-	slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE;
+	slave_dev->priv_flags &= ~(IFF_FAILOVER_SLAVE | IFF_LIVE_RENAME_OK);
 err_upper_link:
 	netdev_rx_handler_unregister(slave_dev);
 done:
@@ -121,7 +121,7 @@ int failover_slave_unregister(struct net_device *slave_dev)
 
 	netdev_rx_handler_unregister(slave_dev);
 	netdev_upper_dev_unlink(slave_dev, failover_dev);
-	slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE;
+	slave_dev->priv_flags &= ~(IFF_FAILOVER_SLAVE | IFF_LIVE_RENAME_OK);
 
 	if (fops && fops->slave_unregister &&
 	    !fops->slave_unregister(slave_dev, failover_dev))
-- 
cgit v1.2.3-59-g8ed1b


From b4f47f3848eb70986f75d06112af7b48b7f5f462 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Mon, 8 Apr 2019 17:59:50 -0700
Subject: net/tls: prevent bad memory access in tls_is_sk_tx_device_offloaded()

Unlike '&&' operator, the '&' does not have short-circuit
evaluation semantics.  IOW both sides of the operator always
get evaluated.  Fix the wrong operator in
tls_is_sk_tx_device_offloaded(), which would lead to
out-of-bounds access for for non-full sockets.

Fixes: 4799ac81e52a ("tls: Add rx inline crypto offload")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/tls.h b/include/net/tls.h
index c7f7dc344e73..5934246b2c6f 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -381,7 +381,7 @@ tls_validate_xmit_skb(struct sock *sk, struct net_device *dev,
 static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk)
 {
 #ifdef CONFIG_SOCK_VALIDATE_XMIT
-	return sk_fullsock(sk) &
+	return sk_fullsock(sk) &&
 	       (smp_load_acquire(&sk->sk_validate_xmit_skb) ==
 	       &tls_validate_xmit_skb);
 #else
-- 
cgit v1.2.3-59-g8ed1b


From d808b7f759b50acf0784ce6230ffa63e12ef465d Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Tue, 9 Apr 2019 10:03:59 -0600
Subject: nvmet: fix discover log page when offsets are used

The nvme target hadn't been taking the Get Log Page offset parameter
into consideration, and so has been returning corrupted log pages when
offsets are used. Since many tools, including nvme-cli, split the log
request to 4k, we've been breaking discovery log responses when more
than 3 subsystems exist.

Fix the returned data by internally generating the entire discovery
log page and copying only the requested bytes into the user buffer. The
command log page offset type has been modified to a native __le64 to
make it easier to extract the value from a command.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Tested-by: Minwoo Im <minwoo.im@samsung.com>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/admin-cmd.c |  5 +++
 drivers/nvme/target/discovery.c | 68 +++++++++++++++++++++++++++--------------
 drivers/nvme/target/nvmet.h     |  1 +
 include/linux/nvme.h            |  9 ++++--
 4 files changed, 58 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 76250181fee0..9f72d515fc4b 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -24,6 +24,11 @@ u32 nvmet_get_log_page_len(struct nvme_command *cmd)
 	return len;
 }
 
+u64 nvmet_get_log_page_offset(struct nvme_command *cmd)
+{
+	return le64_to_cpu(cmd->get_log_page.lpo);
+}
+
 static void nvmet_execute_get_log_page_noop(struct nvmet_req *req)
 {
 	nvmet_req_complete(req, nvmet_zero_sgl(req, 0, req->data_len));
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index c872b47a88f3..33ed95e72d6b 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -131,54 +131,76 @@ static void nvmet_set_disc_traddr(struct nvmet_req *req, struct nvmet_port *port
 		memcpy(traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE);
 }
 
+static size_t discovery_log_entries(struct nvmet_req *req)
+{
+	struct nvmet_ctrl *ctrl = req->sq->ctrl;
+	struct nvmet_subsys_link *p;
+	struct nvmet_port *r;
+	size_t entries = 0;
+
+	list_for_each_entry(p, &req->port->subsystems, entry) {
+		if (!nvmet_host_allowed(p->subsys, ctrl->hostnqn))
+			continue;
+		entries++;
+	}
+	list_for_each_entry(r, &req->port->referrals, entry)
+		entries++;
+	return entries;
+}
+
 static void nvmet_execute_get_disc_log_page(struct nvmet_req *req)
 {
 	const int entry_size = sizeof(struct nvmf_disc_rsp_page_entry);
 	struct nvmet_ctrl *ctrl = req->sq->ctrl;
 	struct nvmf_disc_rsp_page_hdr *hdr;
+	u64 offset = nvmet_get_log_page_offset(req->cmd);
 	size_t data_len = nvmet_get_log_page_len(req->cmd);
-	size_t alloc_len = max(data_len, sizeof(*hdr));
-	int residual_len = data_len - sizeof(*hdr);
+	size_t alloc_len;
 	struct nvmet_subsys_link *p;
 	struct nvmet_port *r;
 	u32 numrec = 0;
 	u16 status = 0;
+	void *buffer;
+
+	/* Spec requires dword aligned offsets */
+	if (offset & 0x3) {
+		status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+		goto out;
+	}
 
 	/*
 	 * Make sure we're passing at least a buffer of response header size.
 	 * If host provided data len is less than the header size, only the
 	 * number of bytes requested by host will be sent to host.
 	 */
-	hdr = kzalloc(alloc_len, GFP_KERNEL);
-	if (!hdr) {
+	down_read(&nvmet_config_sem);
+	alloc_len = sizeof(*hdr) + entry_size * discovery_log_entries(req);
+	buffer = kzalloc(alloc_len, GFP_KERNEL);
+	if (!buffer) {
+		up_read(&nvmet_config_sem);
 		status = NVME_SC_INTERNAL;
 		goto out;
 	}
 
-	down_read(&nvmet_config_sem);
+	hdr = buffer;
 	list_for_each_entry(p, &req->port->subsystems, entry) {
+		char traddr[NVMF_TRADDR_SIZE];
+
 		if (!nvmet_host_allowed(p->subsys, ctrl->hostnqn))
 			continue;
-		if (residual_len >= entry_size) {
-			char traddr[NVMF_TRADDR_SIZE];
-
-			nvmet_set_disc_traddr(req, req->port, traddr);
-			nvmet_format_discovery_entry(hdr, req->port,
-					p->subsys->subsysnqn, traddr,
-					NVME_NQN_NVME, numrec);
-			residual_len -= entry_size;
-		}
+
+		nvmet_set_disc_traddr(req, req->port, traddr);
+		nvmet_format_discovery_entry(hdr, req->port,
+				p->subsys->subsysnqn, traddr,
+				NVME_NQN_NVME, numrec);
 		numrec++;
 	}
 
 	list_for_each_entry(r, &req->port->referrals, entry) {
-		if (residual_len >= entry_size) {
-			nvmet_format_discovery_entry(hdr, r,
-					NVME_DISC_SUBSYS_NAME,
-					r->disc_addr.traddr,
-					NVME_NQN_DISC, numrec);
-			residual_len -= entry_size;
-		}
+		nvmet_format_discovery_entry(hdr, r,
+				NVME_DISC_SUBSYS_NAME,
+				r->disc_addr.traddr,
+				NVME_NQN_DISC, numrec);
 		numrec++;
 	}
 
@@ -190,8 +212,8 @@ static void nvmet_execute_get_disc_log_page(struct nvmet_req *req)
 
 	up_read(&nvmet_config_sem);
 
-	status = nvmet_copy_to_sgl(req, 0, hdr, data_len);
-	kfree(hdr);
+	status = nvmet_copy_to_sgl(req, 0, buffer + offset, data_len);
+	kfree(buffer);
 out:
 	nvmet_req_complete(req, status);
 }
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 51e49efd7849..1653d19b187f 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -428,6 +428,7 @@ u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf,
 u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len);
 
 u32 nvmet_get_log_page_len(struct nvme_command *cmd);
+u64 nvmet_get_log_page_offset(struct nvme_command *cmd);
 
 extern struct list_head *nvmet_ports;
 void nvmet_port_disc_changed(struct nvmet_port *port,
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index baa49e6a23cc..c40720cb59ac 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -967,8 +967,13 @@ struct nvme_get_log_page_command {
 	__le16			numdl;
 	__le16			numdu;
 	__u16			rsvd11;
-	__le32			lpol;
-	__le32			lpou;
+	union {
+		struct {
+			__le32 lpol;
+			__le32 lpou;
+		};
+		__le64 lpo;
+	};
 	__u32			rsvd14[2];
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From 07603b230895a74ebb1e2a1231ac45c29c2a8cd3 Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.ibm.com>
Date: Thu, 11 Apr 2019 11:17:32 +0200
Subject: net/smc: propagate file from SMC to TCP socket

fcntl(fd, F_SETOWN, getpid()) selects the recipient of SIGURG signals
that are delivered when out-of-band data arrives on socket fd.
If an SMC socket program makes use of such an fcntl() call, it fails
in case of fallback to TCP-mode. In case of fallback the traffic is
processed with the internal TCP socket. Propagating field "file" from the
SMC socket to the internal TCP socket fixes the issue.

Reviewed-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h |  6 ------
 net/smc/af_smc.c   | 38 ++++++++++++++++++++++++++++----------
 2 files changed, 28 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 8de5ee258b93..341f8bafa0cf 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2084,12 +2084,6 @@ static inline bool skwq_has_sleeper(struct socket_wq *wq)
  * @p:              poll_table
  *
  * See the comments in the wq_has_sleeper function.
- *
- * Do not derive sock from filp->private_data here. An SMC socket establishes
- * an internal TCP socket that is used in the fallback case. All socket
- * operations on the SMC socket are then forwarded to the TCP socket. In case of
- * poll, the filp->private_data pointer references the SMC socket because the
- * TCP socket has no file assigned.
  */
 static inline void sock_poll_wait(struct file *filp, struct socket *sock,
 				  poll_table *p)
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 9bdaed2f2e35..d2a0d15f809c 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -445,10 +445,19 @@ static void smc_link_save_peer_info(struct smc_link *link,
 	link->peer_mtu = clc->qp_mtu;
 }
 
+static void smc_switch_to_fallback(struct smc_sock *smc)
+{
+	smc->use_fallback = true;
+	if (smc->sk.sk_socket && smc->sk.sk_socket->file) {
+		smc->clcsock->file = smc->sk.sk_socket->file;
+		smc->clcsock->file->private_data = smc->clcsock;
+	}
+}
+
 /* fall back during connect */
 static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
 {
-	smc->use_fallback = true;
+	smc_switch_to_fallback(smc);
 	smc->fallback_rsn = reason_code;
 	smc_copy_sock_settings_to_clc(smc);
 	if (smc->sk.sk_state == SMC_INIT)
@@ -774,10 +783,14 @@ static void smc_connect_work(struct work_struct *work)
 		smc->sk.sk_err = -rc;
 
 out:
-	if (smc->sk.sk_err)
-		smc->sk.sk_state_change(&smc->sk);
-	else
-		smc->sk.sk_write_space(&smc->sk);
+	if (!sock_flag(&smc->sk, SOCK_DEAD)) {
+		if (smc->sk.sk_err) {
+			smc->sk.sk_state_change(&smc->sk);
+		} else { /* allow polling before and after fallback decision */
+			smc->clcsock->sk->sk_write_space(smc->clcsock->sk);
+			smc->sk.sk_write_space(&smc->sk);
+		}
+	}
 	kfree(smc->connect_info);
 	smc->connect_info = NULL;
 	release_sock(&smc->sk);
@@ -934,8 +947,13 @@ struct sock *smc_accept_dequeue(struct sock *parent,
 			sock_put(new_sk); /* final */
 			continue;
 		}
-		if (new_sock)
+		if (new_sock) {
 			sock_graft(new_sk, new_sock);
+			if (isk->use_fallback) {
+				smc_sk(new_sk)->clcsock->file = new_sock->file;
+				isk->clcsock->file->private_data = isk->clcsock;
+			}
+		}
 		return new_sk;
 	}
 	return NULL;
@@ -1086,7 +1104,7 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
 		return;
 	}
 	smc_conn_free(&new_smc->conn);
-	new_smc->use_fallback = true;
+	smc_switch_to_fallback(new_smc);
 	new_smc->fallback_rsn = reason_code;
 	if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
 		if (smc_clc_send_decline(new_smc, reason_code) < 0) {
@@ -1246,7 +1264,7 @@ static void smc_listen_work(struct work_struct *work)
 
 	/* check if peer is smc capable */
 	if (!tcp_sk(newclcsock->sk)->syn_smc) {
-		new_smc->use_fallback = true;
+		smc_switch_to_fallback(new_smc);
 		new_smc->fallback_rsn = SMC_CLC_DECL_PEERNOSMC;
 		smc_listen_out_connected(new_smc);
 		return;
@@ -1503,7 +1521,7 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 
 	if (msg->msg_flags & MSG_FASTOPEN) {
 		if (sk->sk_state == SMC_INIT) {
-			smc->use_fallback = true;
+			smc_switch_to_fallback(smc);
 			smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
 		} else {
 			rc = -EINVAL;
@@ -1705,7 +1723,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
 	case TCP_FASTOPEN_NO_COOKIE:
 		/* option not supported by SMC */
 		if (sk->sk_state == SMC_INIT) {
-			smc->use_fallback = true;
+			smc_switch_to_fallback(smc);
 			smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
 		} else {
 			if (!smc->use_fallback)
-- 
cgit v1.2.3-59-g8ed1b


From af6b61d7ef58099c82d854395a0e002be6bd036c Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Thu, 11 Apr 2019 15:16:52 -0400
Subject: Revert "SUNRPC: Micro-optimise when the task is known not to be
 sleeping"

This reverts commit 009a82f6437490c262584d65a14094a818bcb747.

The ability to optimise here relies on compiler being able to optimise
away tail calls to avoid stack overflows. Unfortunately, we are seeing
reports of problems, so let's just revert.

Reported-by: Daniel Mack <daniel@zonque.org>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/sched.h |  8 --------
 net/sunrpc/clnt.c            | 45 ++++++++------------------------------------
 2 files changed, 8 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index ec861cd0cfe8..52d41d0c1ae1 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -304,12 +304,4 @@ rpc_clnt_swap_deactivate(struct rpc_clnt *clnt)
 }
 #endif /* CONFIG_SUNRPC_SWAP */
 
-static inline bool
-rpc_task_need_resched(const struct rpc_task *task)
-{
-	if (RPC_IS_QUEUED(task) || task->tk_callback)
-		return true;
-	return false;
-}
-
 #endif /* _LINUX_SUNRPC_SCHED_H_ */
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 187d10443a15..1d0395ef62c9 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1540,7 +1540,6 @@ call_start(struct rpc_task *task)
 	clnt->cl_stats->rpccnt++;
 	task->tk_action = call_reserve;
 	rpc_task_set_transport(task, clnt);
-	call_reserve(task);
 }
 
 /*
@@ -1554,9 +1553,6 @@ call_reserve(struct rpc_task *task)
 	task->tk_status  = 0;
 	task->tk_action  = call_reserveresult;
 	xprt_reserve(task);
-	if (rpc_task_need_resched(task))
-		return;
-	 call_reserveresult(task);
 }
 
 static void call_retry_reserve(struct rpc_task *task);
@@ -1579,7 +1575,6 @@ call_reserveresult(struct rpc_task *task)
 	if (status >= 0) {
 		if (task->tk_rqstp) {
 			task->tk_action = call_refresh;
-			call_refresh(task);
 			return;
 		}
 
@@ -1605,7 +1600,6 @@ call_reserveresult(struct rpc_task *task)
 		/* fall through */
 	case -EAGAIN:	/* woken up; retry */
 		task->tk_action = call_retry_reserve;
-		call_retry_reserve(task);
 		return;
 	case -EIO:	/* probably a shutdown */
 		break;
@@ -1628,9 +1622,6 @@ call_retry_reserve(struct rpc_task *task)
 	task->tk_status  = 0;
 	task->tk_action  = call_reserveresult;
 	xprt_retry_reserve(task);
-	if (rpc_task_need_resched(task))
-		return;
-	call_reserveresult(task);
 }
 
 /*
@@ -1645,9 +1636,6 @@ call_refresh(struct rpc_task *task)
 	task->tk_status = 0;
 	task->tk_client->cl_stats->rpcauthrefresh++;
 	rpcauth_refreshcred(task);
-	if (rpc_task_need_resched(task))
-		return;
-	call_refreshresult(task);
 }
 
 /*
@@ -1666,7 +1654,6 @@ call_refreshresult(struct rpc_task *task)
 	case 0:
 		if (rpcauth_uptodatecred(task)) {
 			task->tk_action = call_allocate;
-			call_allocate(task);
 			return;
 		}
 		/* Use rate-limiting and a max number of retries if refresh
@@ -1685,7 +1672,6 @@ call_refreshresult(struct rpc_task *task)
 		task->tk_cred_retry--;
 		dprintk("RPC: %5u %s: retry refresh creds\n",
 				task->tk_pid, __func__);
-		call_refresh(task);
 		return;
 	}
 	dprintk("RPC: %5u %s: refresh creds failed with error %d\n",
@@ -1711,10 +1697,8 @@ call_allocate(struct rpc_task *task)
 	task->tk_status = 0;
 	task->tk_action = call_encode;
 
-	if (req->rq_buffer) {
-		call_encode(task);
+	if (req->rq_buffer)
 		return;
-	}
 
 	if (proc->p_proc != 0) {
 		BUG_ON(proc->p_arglen == 0);
@@ -1740,12 +1724,8 @@ call_allocate(struct rpc_task *task)
 
 	status = xprt->ops->buf_alloc(task);
 	xprt_inject_disconnect(xprt);
-	if (status == 0) {
-		if (rpc_task_need_resched(task))
-			return;
-		call_encode(task);
+	if (status == 0)
 		return;
-	}
 	if (status != -ENOMEM) {
 		rpc_exit(task, status);
 		return;
@@ -1828,8 +1808,12 @@ call_encode(struct rpc_task *task)
 		xprt_request_enqueue_receive(task);
 	xprt_request_enqueue_transmit(task);
 out:
-	task->tk_action = call_bind;
-	call_bind(task);
+	task->tk_action = call_transmit;
+	/* Check that the connection is OK */
+	if (!xprt_bound(task->tk_xprt))
+		task->tk_action = call_bind;
+	else if (!xprt_connected(task->tk_xprt))
+		task->tk_action = call_connect;
 }
 
 /*
@@ -1847,7 +1831,6 @@ rpc_task_handle_transmitted(struct rpc_task *task)
 {
 	xprt_end_transmit(task);
 	task->tk_action = call_transmit_status;
-	call_transmit_status(task);
 }
 
 /*
@@ -1865,7 +1848,6 @@ call_bind(struct rpc_task *task)
 
 	if (xprt_bound(xprt)) {
 		task->tk_action = call_connect;
-		call_connect(task);
 		return;
 	}
 
@@ -1896,7 +1878,6 @@ call_bind_status(struct rpc_task *task)
 		dprint_status(task);
 		task->tk_status = 0;
 		task->tk_action = call_connect;
-		call_connect(task);
 		return;
 	}
 
@@ -1981,7 +1962,6 @@ call_connect(struct rpc_task *task)
 
 	if (xprt_connected(xprt)) {
 		task->tk_action = call_transmit;
-		call_transmit(task);
 		return;
 	}
 
@@ -2051,7 +2031,6 @@ call_connect_status(struct rpc_task *task)
 	case 0:
 		clnt->cl_stats->netreconn++;
 		task->tk_action = call_transmit;
-		call_transmit(task);
 		return;
 	}
 	rpc_exit(task, status);
@@ -2087,9 +2066,6 @@ call_transmit(struct rpc_task *task)
 		xprt_transmit(task);
 	}
 	xprt_end_transmit(task);
-	if (rpc_task_need_resched(task))
-		return;
-	call_transmit_status(task);
 }
 
 /*
@@ -2107,9 +2083,6 @@ call_transmit_status(struct rpc_task *task)
 	if (rpc_task_transmitted(task)) {
 		if (task->tk_status == 0)
 			xprt_request_wait_receive(task);
-		if (rpc_task_need_resched(task))
-			return;
-		call_status(task);
 		return;
 	}
 
@@ -2170,7 +2143,6 @@ call_bc_encode(struct rpc_task *task)
 {
 	xprt_request_enqueue_transmit(task);
 	task->tk_action = call_bc_transmit;
-	call_bc_transmit(task);
 }
 
 /*
@@ -2261,7 +2233,6 @@ call_status(struct rpc_task *task)
 	status = task->tk_status;
 	if (status >= 0) {
 		task->tk_action = call_decode;
-		call_decode(task);
 		return;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From d3706566ae3d92677b932dd156157fd6c72534b1 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Tue, 9 Apr 2019 19:53:55 +0800
Subject: net: netrom: Fix error cleanup path of nr_proto_init

Syzkaller report this:

BUG: unable to handle kernel paging request at fffffbfff830524b
PGD 237fe8067 P4D 237fe8067 PUD 237e64067 PMD 1c9716067 PTE 0
Oops: 0000 [#1] SMP KASAN PTI
CPU: 1 PID: 4465 Comm: syz-executor.0 Not tainted 5.0.0+ #5
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014
RIP: 0010:__list_add_valid+0x21/0xe0 lib/list_debug.c:23
Code: 8b 0c 24 e9 17 fd ff ff 90 55 48 89 fd 48 8d 7a 08 53 48 89 d3 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 48 83 ec 08 <80> 3c 02 00 0f 85 8b 00 00 00 48 8b 53 08 48 39 f2 75 35 48 89 f2
RSP: 0018:ffff8881ea2278d0 EFLAGS: 00010282
RAX: dffffc0000000000 RBX: ffffffffc1829250 RCX: 1ffff1103d444ef4
RDX: 1ffffffff830524b RSI: ffffffff85659300 RDI: ffffffffc1829258
RBP: ffffffffc1879250 R08: fffffbfff0acb269 R09: fffffbfff0acb269
R10: ffff8881ea2278f0 R11: fffffbfff0acb268 R12: ffffffffc1829250
R13: dffffc0000000000 R14: 0000000000000008 R15: ffffffffc187c830
FS:  00007fe0361df700(0000) GS:ffff8881f7300000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: fffffbfff830524b CR3: 00000001eb39a001 CR4: 00000000007606e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
PKRU: 55555554
Call Trace:
 __list_add include/linux/list.h:60 [inline]
 list_add include/linux/list.h:79 [inline]
 proto_register+0x444/0x8f0 net/core/sock.c:3375
 nr_proto_init+0x73/0x4b3 [netrom]
 ? 0xffffffffc1628000
 ? 0xffffffffc1628000
 do_one_initcall+0xbc/0x47d init/main.c:887
 do_init_module+0x1b5/0x547 kernel/module.c:3456
 load_module+0x6405/0x8c10 kernel/module.c:3804
 __do_sys_finit_module+0x162/0x190 kernel/module.c:3898
 do_syscall_64+0x9f/0x450 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x462e99
Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007fe0361dec58 EFLAGS: 00000246 ORIG_RAX: 0000000000000139
RAX: ffffffffffffffda RBX: 000000000073bf00 RCX: 0000000000462e99
RDX: 0000000000000000 RSI: 0000000020000100 RDI: 0000000000000003
RBP: 00007fe0361dec70 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00007fe0361df6bc
R13: 00000000004bcefa R14: 00000000006f6fb0 R15: 0000000000000004
Modules linked in: netrom(+) ax25 fcrypt pcbc af_alg arizona_ldo1 v4l2_common videodev media v4l2_dv_timings hdlc ide_cd_mod snd_soc_sigmadsp_regmap snd_soc_sigmadsp intel_spi_platform intel_spi mtd spi_nor snd_usbmidi_lib usbcore lcd ti_ads7950 hi6421_regulator snd_soc_kbl_rt5663_max98927 snd_soc_hdac_hdmi snd_hda_ext_core snd_hda_core snd_soc_rt5663 snd_soc_core snd_pcm_dmaengine snd_compress snd_soc_rl6231 mac80211 rtc_rc5t583 spi_slave_time leds_pwm hid_gt683r hid industrialio_triggered_buffer kfifo_buf industrialio ir_kbd_i2c rc_core led_class_flash dwc_xlgmac snd_ymfpci gameport snd_mpu401_uart snd_rawmidi snd_ac97_codec snd_pcm ac97_bus snd_opl3_lib snd_timer snd_seq_device snd_hwdep snd soundcore iptable_security iptable_raw iptable_mangle iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 iptable_filter bpfilter ip6_vti ip_vti ip_gre ipip sit tunnel4 ip_tunnel hsr veth netdevsim vxcan batman_adv cfg80211 rfkill chnl_net caif nlmon dummy team bonding vcan
 bridge stp llc ip6_gre gre ip6_tunnel tunnel6 tun joydev mousedev ppdev tpm kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel ide_pci_generic piix aesni_intel aes_x86_64 crypto_simd cryptd glue_helper ide_core psmouse input_leds i2c_piix4 serio_raw intel_agp intel_gtt ata_generic agpgart pata_acpi parport_pc rtc_cmos parport floppy sch_fq_codel ip_tables x_tables sha1_ssse3 sha1_generic ipv6 [last unloaded: rxrpc]
Dumping ftrace buffer:
   (ftrace buffer empty)
CR2: fffffbfff830524b
---[ end trace 039ab24b305c4b19 ]---

If nr_proto_init failed, it may forget to call proto_unregister,
tiggering this issue.This patch rearrange code of nr_proto_init
to avoid such issues.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netrom.h           |  2 +-
 net/netrom/af_netrom.c         | 76 ++++++++++++++++++++++++++++++------------
 net/netrom/nr_loopback.c       |  2 +-
 net/netrom/nr_route.c          |  2 +-
 net/netrom/sysctl_net_netrom.c |  5 ++-
 5 files changed, 61 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/net/netrom.h b/include/net/netrom.h
index 5a0714ff500f..80f15b1c1a48 100644
--- a/include/net/netrom.h
+++ b/include/net/netrom.h
@@ -266,7 +266,7 @@ void nr_stop_idletimer(struct sock *);
 int nr_t1timer_running(struct sock *);
 
 /* sysctl_net_netrom.c */
-void nr_register_sysctl(void);
+int nr_register_sysctl(void);
 void nr_unregister_sysctl(void);
 
 #endif
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 1d3144d19903..71ffd1a6dc7c 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1392,18 +1392,22 @@ static int __init nr_proto_init(void)
 	int i;
 	int rc = proto_register(&nr_proto, 0);
 
-	if (rc != 0)
-		goto out;
+	if (rc)
+		return rc;
 
 	if (nr_ndevs > 0x7fffffff/sizeof(struct net_device *)) {
-		printk(KERN_ERR "NET/ROM: nr_proto_init - nr_ndevs parameter to large\n");
-		return -1;
+		pr_err("NET/ROM: %s - nr_ndevs parameter too large\n",
+		       __func__);
+		rc = -EINVAL;
+		goto unregister_proto;
 	}
 
 	dev_nr = kcalloc(nr_ndevs, sizeof(struct net_device *), GFP_KERNEL);
-	if (dev_nr == NULL) {
-		printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device array\n");
-		return -1;
+	if (!dev_nr) {
+		pr_err("NET/ROM: %s - unable to allocate device array\n",
+		       __func__);
+		rc = -ENOMEM;
+		goto unregister_proto;
 	}
 
 	for (i = 0; i < nr_ndevs; i++) {
@@ -1413,13 +1417,13 @@ static int __init nr_proto_init(void)
 		sprintf(name, "nr%d", i);
 		dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, nr_setup);
 		if (!dev) {
-			printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device structure\n");
+			rc = -ENOMEM;
 			goto fail;
 		}
 
 		dev->base_addr = i;
-		if (register_netdev(dev)) {
-			printk(KERN_ERR "NET/ROM: nr_proto_init - unable to register network device\n");
+		rc = register_netdev(dev);
+		if (rc) {
 			free_netdev(dev);
 			goto fail;
 		}
@@ -1427,36 +1431,64 @@ static int __init nr_proto_init(void)
 		dev_nr[i] = dev;
 	}
 
-	if (sock_register(&nr_family_ops)) {
-		printk(KERN_ERR "NET/ROM: nr_proto_init - unable to register socket family\n");
+	rc = sock_register(&nr_family_ops);
+	if (rc)
 		goto fail;
-	}
 
-	register_netdevice_notifier(&nr_dev_notifier);
+	rc = register_netdevice_notifier(&nr_dev_notifier);
+	if (rc)
+		goto out_sock;
 
 	ax25_register_pid(&nr_pid);
 	ax25_linkfail_register(&nr_linkfail_notifier);
 
 #ifdef CONFIG_SYSCTL
-	nr_register_sysctl();
+	rc = nr_register_sysctl();
+	if (rc)
+		goto out_sysctl;
 #endif
 
 	nr_loopback_init();
 
-	proc_create_seq("nr", 0444, init_net.proc_net, &nr_info_seqops);
-	proc_create_seq("nr_neigh", 0444, init_net.proc_net, &nr_neigh_seqops);
-	proc_create_seq("nr_nodes", 0444, init_net.proc_net, &nr_node_seqops);
-out:
-	return rc;
+	rc = -ENOMEM;
+	if (!proc_create_seq("nr", 0444, init_net.proc_net, &nr_info_seqops))
+		goto proc_remove1;
+	if (!proc_create_seq("nr_neigh", 0444, init_net.proc_net,
+			     &nr_neigh_seqops))
+		goto proc_remove2;
+	if (!proc_create_seq("nr_nodes", 0444, init_net.proc_net,
+			     &nr_node_seqops))
+		goto proc_remove3;
+
+	return 0;
+
+proc_remove3:
+	remove_proc_entry("nr_neigh", init_net.proc_net);
+proc_remove2:
+	remove_proc_entry("nr", init_net.proc_net);
+proc_remove1:
+
+	nr_loopback_clear();
+	nr_rt_free();
+
+#ifdef CONFIG_SYSCTL
+	nr_unregister_sysctl();
+out_sysctl:
+#endif
+	ax25_linkfail_release(&nr_linkfail_notifier);
+	ax25_protocol_release(AX25_P_NETROM);
+	unregister_netdevice_notifier(&nr_dev_notifier);
+out_sock:
+	sock_unregister(PF_NETROM);
 fail:
 	while (--i >= 0) {
 		unregister_netdev(dev_nr[i]);
 		free_netdev(dev_nr[i]);
 	}
 	kfree(dev_nr);
+unregister_proto:
 	proto_unregister(&nr_proto);
-	rc = -1;
-	goto out;
+	return rc;
 }
 
 module_init(nr_proto_init);
diff --git a/net/netrom/nr_loopback.c b/net/netrom/nr_loopback.c
index 215ad22a9647..93d13f019981 100644
--- a/net/netrom/nr_loopback.c
+++ b/net/netrom/nr_loopback.c
@@ -70,7 +70,7 @@ static void nr_loopback_timer(struct timer_list *unused)
 	}
 }
 
-void __exit nr_loopback_clear(void)
+void nr_loopback_clear(void)
 {
 	del_timer_sync(&loopback_timer);
 	skb_queue_purge(&loopback_queue);
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index 6485f593e2f0..b76aa668a94b 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -953,7 +953,7 @@ const struct seq_operations nr_neigh_seqops = {
 /*
  *	Free all memory associated with the nodes and routes lists.
  */
-void __exit nr_rt_free(void)
+void nr_rt_free(void)
 {
 	struct nr_neigh *s = NULL;
 	struct nr_node  *t = NULL;
diff --git a/net/netrom/sysctl_net_netrom.c b/net/netrom/sysctl_net_netrom.c
index ba1c368b3f18..771011b84270 100644
--- a/net/netrom/sysctl_net_netrom.c
+++ b/net/netrom/sysctl_net_netrom.c
@@ -146,9 +146,12 @@ static struct ctl_table nr_table[] = {
 	{ }
 };
 
-void __init nr_register_sysctl(void)
+int __init nr_register_sysctl(void)
 {
 	nr_table_header = register_net_sysctl(&init_net, "net/netrom", nr_table);
+	if (!nr_table_header)
+		return -ENOMEM;
+	return 0;
 }
 
 void nr_unregister_sysctl(void)
-- 
cgit v1.2.3-59-g8ed1b


From 4611da30d679a4b0a2c2b5d4d7b3fbbafc922df7 Mon Sep 17 00:00:00 2001
From: Marc Dionne <marc.dionne@auristor.com>
Date: Fri, 12 Apr 2019 16:33:47 +0100
Subject: rxrpc: Make rxrpc_kernel_check_life() indicate if call completed

Make rxrpc_kernel_check_life() pass back the life counter through the
argument list and return true if the call has not yet completed.

Suggested-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/rxrpc.txt | 16 +++++++++-------
 fs/afs/rxrpc.c                     |  4 ++--
 include/net/af_rxrpc.h             |  4 +++-
 net/rxrpc/af_rxrpc.c               | 14 +++++++++-----
 4 files changed, 23 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt
index 2df5894353d6..cd7303d7fa25 100644
--- a/Documentation/networking/rxrpc.txt
+++ b/Documentation/networking/rxrpc.txt
@@ -1009,16 +1009,18 @@ The kernel interface functions are as follows:
 
  (*) Check call still alive.
 
-	u32 rxrpc_kernel_check_life(struct socket *sock,
-				    struct rxrpc_call *call);
+	bool rxrpc_kernel_check_life(struct socket *sock,
+				     struct rxrpc_call *call,
+				     u32 *_life);
 	void rxrpc_kernel_probe_life(struct socket *sock,
 				     struct rxrpc_call *call);
 
-     The first function returns a number that is updated when ACKs are received
-     from the peer (notably including PING RESPONSE ACKs which we can elicit by
-     sending PING ACKs to see if the call still exists on the server).  The
-     caller should compare the numbers of two calls to see if the call is still
-     alive after waiting for a suitable interval.
+     The first function passes back in *_life a number that is updated when
+     ACKs are received from the peer (notably including PING RESPONSE ACKs
+     which we can elicit by sending PING ACKs to see if the call still exists
+     on the server).  The caller should compare the numbers of two calls to see
+     if the call is still alive after waiting for a suitable interval.  It also
+     returns true as long as the call hasn't yet reached the completed state.
 
      This allows the caller to work out if the server is still contactable and
      if the call is still alive on the server while waiting for the server to
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 2c588f9bbbda..5cb11aff9298 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -621,7 +621,7 @@ static long afs_wait_for_call_to_complete(struct afs_call *call,
 		rtt2 = 2;
 
 	timeout = rtt2;
-	last_life = rxrpc_kernel_check_life(call->net->socket, call->rxcall);
+	rxrpc_kernel_check_life(call->net->socket, call->rxcall, &last_life);
 
 	add_wait_queue(&call->waitq, &myself);
 	for (;;) {
@@ -639,7 +639,7 @@ static long afs_wait_for_call_to_complete(struct afs_call *call,
 		if (afs_check_call_state(call, AFS_CALL_COMPLETE))
 			break;
 
-		life = rxrpc_kernel_check_life(call->net->socket, call->rxcall);
+		rxrpc_kernel_check_life(call->net->socket, call->rxcall, &life);
 		if (timeout == 0 &&
 		    life == last_life && signal_pending(current)) {
 			if (stalled)
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index 2bfb87eb98ce..78c856cba4f5 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -61,10 +61,12 @@ int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t,
 			       rxrpc_user_attach_call_t, unsigned long, gfp_t,
 			       unsigned int);
 void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64);
-u32 rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *);
+bool rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *,
+			     u32 *);
 void rxrpc_kernel_probe_life(struct socket *, struct rxrpc_call *);
 u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *);
 bool rxrpc_kernel_get_reply_time(struct socket *, struct rxrpc_call *,
 				 ktime_t *);
+bool rxrpc_kernel_call_is_complete(struct rxrpc_call *);
 
 #endif /* _NET_RXRPC_H */
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index c54dce3ca0dd..ae8c5d7f3bf1 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -371,18 +371,22 @@ EXPORT_SYMBOL(rxrpc_kernel_end_call);
  * rxrpc_kernel_check_life - Check to see whether a call is still alive
  * @sock: The socket the call is on
  * @call: The call to check
+ * @_life: Where to store the life value
  *
  * Allow a kernel service to find out whether a call is still alive - ie. we're
- * getting ACKs from the server.  Returns a number representing the life state
- * which can be compared to that returned by a previous call.
+ * getting ACKs from the server.  Passes back in *_life a number representing
+ * the life state which can be compared to that returned by a previous call and
+ * return true if the call is still alive.
  *
  * If the life state stalls, rxrpc_kernel_probe_life() should be called and
  * then 2RTT waited.
  */
-u32 rxrpc_kernel_check_life(const struct socket *sock,
-			    const struct rxrpc_call *call)
+bool rxrpc_kernel_check_life(const struct socket *sock,
+			     const struct rxrpc_call *call,
+			     u32 *_life)
 {
-	return call->acks_latest;
+	*_life = call->acks_latest;
+	return call->state != RXRPC_CALL_COMPLETE;
 }
 EXPORT_SYMBOL(rxrpc_kernel_check_life);
 
-- 
cgit v1.2.3-59-g8ed1b


From f958d7b528b1b40c44cfda5eabe2d82760d868c3 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 11 Apr 2019 10:06:20 -0700
Subject: mm: make page ref count overflow check tighter and more explicit

We have a VM_BUG_ON() to check that the page reference count doesn't
underflow (or get close to overflow) by checking the sign of the count.

That's all fine, but we actually want to allow people to use a "get page
ref unless it's already very high" helper function, and we want that one
to use the sign of the page ref (without triggering this VM_BUG_ON).

Change the VM_BUG_ON to only check for small underflows (or _very_ close
to overflowing), and ignore overflows which have strayed into negative
territory.

Acked-by: Matthew Wilcox <willy@infradead.org>
Cc: Jann Horn <jannh@google.com>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 80bb6408fe73..541d99b86aea 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -965,6 +965,10 @@ static inline bool is_pci_p2pdma_page(const struct page *page)
 }
 #endif /* CONFIG_DEV_PAGEMAP_OPS */
 
+/* 127: arbitrary random number, small enough to assemble well */
+#define page_ref_zero_or_close_to_overflow(page) \
+	((unsigned int) page_ref_count(page) + 127u <= 127u)
+
 static inline void get_page(struct page *page)
 {
 	page = compound_head(page);
@@ -972,7 +976,7 @@ static inline void get_page(struct page *page)
 	 * Getting a normal page or the head of a compound page
 	 * requires to already have an elevated page->_refcount.
 	 */
-	VM_BUG_ON_PAGE(page_ref_count(page) <= 0, page);
+	VM_BUG_ON_PAGE(page_ref_zero_or_close_to_overflow(page), page);
 	page_ref_inc(page);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 88b1a17dfc3ed7728316478fae0f5ad508f50397 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 11 Apr 2019 10:14:59 -0700
Subject: mm: add 'try_get_page()' helper function

This is the same as the traditional 'get_page()' function, but instead
of unconditionally incrementing the reference count of the page, it only
does so if the count was "safe".  It returns whether the reference count
was incremented (and is marked __must_check, since the caller obviously
has to be aware of it).

Also like 'get_page()', you can't use this function unless you already
had a reference to the page.  The intent is that you can use this
exactly like get_page(), but in situations where you want to limit the
maximum reference count.

The code currently does an unconditional WARN_ON_ONCE() if we ever hit
the reference count issues (either zero or negative), as a notification
that the conditional non-increment actually happened.

NOTE! The count access for the "safety" check is inherently racy, but
that doesn't matter since the buffer we use is basically half the range
of the reference count (ie we look at the sign of the count).

Acked-by: Matthew Wilcox <willy@infradead.org>
Cc: Jann Horn <jannh@google.com>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 541d99b86aea..7000ddd807e0 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -980,6 +980,15 @@ static inline void get_page(struct page *page)
 	page_ref_inc(page);
 }
 
+static inline __must_check bool try_get_page(struct page *page)
+{
+	page = compound_head(page);
+	if (WARN_ON_ONCE(page_ref_count(page) <= 0))
+		return false;
+	page_ref_inc(page);
+	return true;
+}
+
 static inline void put_page(struct page *page)
 {
 	page = compound_head(page);
-- 
cgit v1.2.3-59-g8ed1b


From 15fab63e1e57be9fdb5eec1bbc5916e9825e9acb Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 5 Apr 2019 14:02:10 -0700
Subject: fs: prevent page refcount overflow in pipe_buf_get

Change pipe_buf_get() to return a bool indicating whether it succeeded
in raising the refcount of the page (if the thing in the pipe is a page).
This removes another mechanism for overflowing the page refcount.  All
callers converted to handle a failure.

Reported-by: Jann Horn <jannh@google.com>
Signed-off-by: Matthew Wilcox <willy@infradead.org>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fuse/dev.c             | 12 ++++++------
 fs/pipe.c                 |  4 ++--
 fs/splice.c               | 12 ++++++++++--
 include/linux/pipe_fs_i.h | 10 ++++++----
 kernel/trace/trace.c      |  6 +++++-
 5 files changed, 29 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 809c0f2f9942..64f4de983468 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -2034,10 +2034,8 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
 		rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len;
 
 	ret = -EINVAL;
-	if (rem < len) {
-		pipe_unlock(pipe);
-		goto out;
-	}
+	if (rem < len)
+		goto out_free;
 
 	rem = len;
 	while (rem) {
@@ -2055,7 +2053,9 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
 			pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
 			pipe->nrbufs--;
 		} else {
-			pipe_buf_get(pipe, ibuf);
+			if (!pipe_buf_get(pipe, ibuf))
+				goto out_free;
+
 			*obuf = *ibuf;
 			obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
 			obuf->len = rem;
@@ -2078,11 +2078,11 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
 	ret = fuse_dev_do_write(fud, &cs, len);
 
 	pipe_lock(pipe);
+out_free:
 	for (idx = 0; idx < nbuf; idx++)
 		pipe_buf_release(pipe, &bufs[idx]);
 	pipe_unlock(pipe);
 
-out:
 	kvfree(bufs);
 	return ret;
 }
diff --git a/fs/pipe.c b/fs/pipe.c
index bdc5d3c0977d..b1543b85c14a 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -189,9 +189,9 @@ EXPORT_SYMBOL(generic_pipe_buf_steal);
  *	in the tee() system call, when we duplicate the buffers in one
  *	pipe into another.
  */
-void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
+bool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
 {
-	get_page(buf->page);
+	return try_get_page(buf->page);
 }
 EXPORT_SYMBOL(generic_pipe_buf_get);
 
diff --git a/fs/splice.c b/fs/splice.c
index de2ede048473..f30af82b850d 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1588,7 +1588,11 @@ retry:
 			 * Get a reference to this pipe buffer,
 			 * so we can copy the contents over.
 			 */
-			pipe_buf_get(ipipe, ibuf);
+			if (!pipe_buf_get(ipipe, ibuf)) {
+				if (ret == 0)
+					ret = -EFAULT;
+				break;
+			}
 			*obuf = *ibuf;
 
 			/*
@@ -1660,7 +1664,11 @@ static int link_pipe(struct pipe_inode_info *ipipe,
 		 * Get a reference to this pipe buffer,
 		 * so we can copy the contents over.
 		 */
-		pipe_buf_get(ipipe, ibuf);
+		if (!pipe_buf_get(ipipe, ibuf)) {
+			if (ret == 0)
+				ret = -EFAULT;
+			break;
+		}
 
 		obuf = opipe->bufs + nbuf;
 		*obuf = *ibuf;
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 5a3bb3b7c9ad..3f2a42c11e20 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -108,18 +108,20 @@ struct pipe_buf_operations {
 	/*
 	 * Get a reference to the pipe buffer.
 	 */
-	void (*get)(struct pipe_inode_info *, struct pipe_buffer *);
+	bool (*get)(struct pipe_inode_info *, struct pipe_buffer *);
 };
 
 /**
  * pipe_buf_get - get a reference to a pipe_buffer
  * @pipe:	the pipe that the buffer belongs to
  * @buf:	the buffer to get a reference to
+ *
+ * Return: %true if the reference was successfully obtained.
  */
-static inline void pipe_buf_get(struct pipe_inode_info *pipe,
+static inline __must_check bool pipe_buf_get(struct pipe_inode_info *pipe,
 				struct pipe_buffer *buf)
 {
-	buf->ops->get(pipe, buf);
+	return buf->ops->get(pipe, buf);
 }
 
 /**
@@ -178,7 +180,7 @@ struct pipe_inode_info *alloc_pipe_info(void);
 void free_pipe_info(struct pipe_inode_info *);
 
 /* Generic pipe buffer ops functions */
-void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
+bool generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
 int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *);
 int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *);
 void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c4238b441624..0f300d488c9f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6835,12 +6835,16 @@ static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
 	buf->private = 0;
 }
 
-static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
+static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
 				struct pipe_buffer *buf)
 {
 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
 
+	if (ref->ref > INT_MAX/2)
+		return false;
+
 	ref->ref++;
+	return true;
 }
 
 /* Pipe buffer operations for a buffer. */
-- 
cgit v1.2.3-59-g8ed1b


From 1d487e9bf8ba66a7174c56a0029c54b1eca8f99c Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 11 Apr 2019 11:16:47 +0200
Subject: KVM: fix spectrev1 gadgets

These were found with smatch, and then generalized when applicable.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c     |  4 +++-
 include/linux/kvm_host.h | 10 ++++++----
 virt/kvm/irqchip.c       |  5 +++--
 virt/kvm/kvm_main.c      |  6 ++++--
 4 files changed, 16 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 991fdf7fc17f..9bf70cf84564 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -138,6 +138,7 @@ static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
 		if (offset <= max_apic_id) {
 			u8 cluster_size = min(max_apic_id - offset + 1, 16U);
 
+			offset = array_index_nospec(offset, map->max_apic_id + 1);
 			*cluster = &map->phys_map[offset];
 			*mask = dest_id & (0xffff >> (16 - cluster_size));
 		} else {
@@ -901,7 +902,8 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
 		if (irq->dest_id > map->max_apic_id) {
 			*bitmap = 0;
 		} else {
-			*dst = &map->phys_map[irq->dest_id];
+			u32 dest_id = array_index_nospec(irq->dest_id, map->max_apic_id + 1);
+			*dst = &map->phys_map[dest_id];
 			*bitmap = 1;
 		}
 		return true;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9d55c63db09b..640a03642766 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -28,6 +28,7 @@
 #include <linux/irqbypass.h>
 #include <linux/swait.h>
 #include <linux/refcount.h>
+#include <linux/nospec.h>
 #include <asm/signal.h>
 
 #include <linux/kvm.h>
@@ -513,10 +514,10 @@ static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
 
 static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
 {
-	/* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu, in case
-	 * the caller has read kvm->online_vcpus before (as is the case
-	 * for kvm_for_each_vcpu, for example).
-	 */
+	int num_vcpus = atomic_read(&kvm->online_vcpus);
+	i = array_index_nospec(i, num_vcpus);
+
+	/* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu.  */
 	smp_rmb();
 	return kvm->vcpus[i];
 }
@@ -600,6 +601,7 @@ void kvm_put_kvm(struct kvm *kvm);
 
 static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
 {
+	as_id = array_index_nospec(as_id, KVM_ADDRESS_SPACE_NUM);
 	return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu,
 			lockdep_is_held(&kvm->slots_lock) ||
 			!refcount_read(&kvm->users_count));
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 3547b0d8c91e..79e59e4fa3dc 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -144,18 +144,19 @@ static int setup_routing_entry(struct kvm *kvm,
 {
 	struct kvm_kernel_irq_routing_entry *ei;
 	int r;
+	u32 gsi = array_index_nospec(ue->gsi, KVM_MAX_IRQ_ROUTES);
 
 	/*
 	 * Do not allow GSI to be mapped to the same irqchip more than once.
 	 * Allow only one to one mapping between GSI and non-irqchip routing.
 	 */
-	hlist_for_each_entry(ei, &rt->map[ue->gsi], link)
+	hlist_for_each_entry(ei, &rt->map[gsi], link)
 		if (ei->type != KVM_IRQ_ROUTING_IRQCHIP ||
 		    ue->type != KVM_IRQ_ROUTING_IRQCHIP ||
 		    ue->u.irqchip.irqchip == ei->irqchip.irqchip)
 			return -EINVAL;
 
-	e->gsi = ue->gsi;
+	e->gsi = gsi;
 	e->type = ue->type;
 	r = kvm_set_routing_entry(kvm, e, ue);
 	if (r)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 55fe8e20d8fd..dc8edc97ba85 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2977,12 +2977,14 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
 	struct kvm_device_ops *ops = NULL;
 	struct kvm_device *dev;
 	bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
+	int type;
 	int ret;
 
 	if (cd->type >= ARRAY_SIZE(kvm_device_ops_table))
 		return -ENODEV;
 
-	ops = kvm_device_ops_table[cd->type];
+	type = array_index_nospec(cd->type, ARRAY_SIZE(kvm_device_ops_table));
+	ops = kvm_device_ops_table[type];
 	if (ops == NULL)
 		return -ENODEV;
 
@@ -2997,7 +2999,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
 	dev->kvm = kvm;
 
 	mutex_lock(&kvm->lock);
-	ret = ops->create(dev, cd->type);
+	ret = ops->create(dev, type);
 	if (ret < 0) {
 		mutex_unlock(&kvm->lock);
 		kfree(dev);
-- 
cgit v1.2.3-59-g8ed1b