From 04a258c162a85c0f4ae56be67634dc43c9a4fa9b Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 4 Nov 2014 13:40:11 +0100
Subject: Drivers: hv: vmbus: Fix a race condition when unregistering a device

When build with Debug the following crash is sometimes observed:
Call Trace:
 [<ffffffff812b9600>] string+0x40/0x100
 [<ffffffff812bb038>] vsnprintf+0x218/0x5e0
 [<ffffffff810baf7d>] ? trace_hardirqs_off+0xd/0x10
 [<ffffffff812bb4c1>] vscnprintf+0x11/0x30
 [<ffffffff8107a2f0>] vprintk+0xd0/0x5c0
 [<ffffffffa0051ea0>] ? vmbus_process_rescind_offer+0x0/0x110 [hv_vmbus]
 [<ffffffff8155c71c>] printk+0x41/0x45
 [<ffffffffa004ebac>] vmbus_device_unregister+0x2c/0x40 [hv_vmbus]
 [<ffffffffa0051ecb>] vmbus_process_rescind_offer+0x2b/0x110 [hv_vmbus]
...

This happens due to the following race: between 'if (channel->device_obj)' check
in vmbus_process_rescind_offer() and pr_debug() in vmbus_device_unregister() the
device can disappear. Fix the issue by taking an additional reference to the
device before proceeding to vmbus_device_unregister().

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel_mgmt.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'drivers/hv')
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index a2d1a9612c86..d36ce6835fb7 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -216,9 +216,16 @@ static void vmbus_process_rescind_offer(struct work_struct *work)
 	unsigned long flags;
 	struct vmbus_channel *primary_channel;
 	struct vmbus_channel_relid_released msg;
+	struct device *dev;
+
+	if (channel->device_obj) {
+		dev = get_device(&channel->device_obj->device);
+		if (dev) {
+			vmbus_device_unregister(channel->device_obj);
+			put_device(dev);
+		}
+	}
 
-	if (channel->device_obj)
-		vmbus_device_unregister(channel->device_obj);
 	memset(&msg, 0, sizeof(struct vmbus_channel_relid_released));
 	msg.child_relid = channel->offermsg.child_relid;
 	msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
-- 
cgit v1.2.3-59-g8ed1b


From 649142074d86afebe0505431a93957505d244dd6 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Thu, 6 Nov 2014 18:21:24 +0100
Subject: Drivers: hv: vss: Introduce timeout for communication with userspace

In contrast with KVP there is no timeout when communicating with
userspace VSS daemon. In case it gets stuck performing freeze/thaw
operation no message will be sent to the host so it will take very
long (around 10 minutes) before backup fails. Introduce 10 second
timeout using schedule_delayed_work().

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/hv_snapshot.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

(limited to 'drivers/hv')

diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c
index 34f14fddb666..21e51be74e6c 100644
--- a/drivers/hv/hv_snapshot.c
+++ b/drivers/hv/hv_snapshot.c
@@ -28,7 +28,7 @@
 #define VSS_MINOR  0
 #define VSS_VERSION    (VSS_MAJOR << 16 | VSS_MINOR)
 
-
+#define VSS_USERSPACE_TIMEOUT (msecs_to_jiffies(10 * 1000))
 
 /*
  * Global state maintained for transaction that is being processed.
@@ -55,12 +55,24 @@ static const char vss_name[] = "vss_kernel_module";
 static __u8 *recv_buffer;
 
 static void vss_send_op(struct work_struct *dummy);
+static void vss_timeout_func(struct work_struct *dummy);
+
+static DECLARE_DELAYED_WORK(vss_timeout_work, vss_timeout_func);
 static DECLARE_WORK(vss_send_op_work, vss_send_op);
 
 /*
  * Callback when data is received from user mode.
  */
 
+static void vss_timeout_func(struct work_struct *dummy)
+{
+	/*
+	 * Timeout waiting for userspace component to reply happened.
+	 */
+	pr_warn("VSS: timeout waiting for daemon to reply\n");
+	vss_respond_to_host(HV_E_FAIL);
+}
+
 static void
 vss_cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
 {
@@ -76,7 +88,8 @@ vss_cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
 		return;
 
 	}
-	vss_respond_to_host(vss_msg->error);
+	if (cancel_delayed_work_sync(&vss_timeout_work))
+		vss_respond_to_host(vss_msg->error);
 }
 
 
@@ -223,6 +236,8 @@ void hv_vss_onchannelcallback(void *context)
 			case VSS_OP_FREEZE:
 			case VSS_OP_THAW:
 				schedule_work(&vss_send_op_work);
+				schedule_delayed_work(&vss_timeout_work,
+						      VSS_USERSPACE_TIMEOUT);
 				return;
 
 			case VSS_OP_HOT_BACKUP:
@@ -277,5 +292,6 @@ hv_vss_init(struct hv_util_service *srv)
 void hv_vss_deinit(void)
 {
 	cn_del_callback(&vss_id);
+	cancel_delayed_work_sync(&vss_timeout_work);
 	cancel_work_sync(&vss_send_op_work);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 8d9560ebcc6448472b3afe8f36f37d6b0de8f5a4 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Thu, 6 Nov 2014 18:21:25 +0100
Subject: Drivers: hv: kvp,vss: Fast propagation of userspace communication
 failure

If we fail to send a message to userspace daemon with cn_netlink_send()
there is no need to wait for userspace to reply as it is not going to
happen. This happens when kvp or vss daemon is stopped after a successful
handshake. Report HV_E_FAIL immediately and cancel the timeout job so
host won't receive two failures.
Use pr_warn() for VSS and pr_debug() for KVP deliberately as VSS request
are rare and result in a failed backup. KVP requests are much more frequent
after a successful handshake so avoid flooding logs. It would be nice to
have an ability to de-negotiate with the host in case userspace daemon gets
disconnected so we won't receive new requests. But I'm not sure it is
possible.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/hv_kvp.c      | 9 ++++++++-
 drivers/hv/hv_snapshot.c | 8 +++++++-
 2 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'drivers/hv')

diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index 521c14625b3a..beb8105c0e7b 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -350,6 +350,7 @@ kvp_send_key(struct work_struct *dummy)
 	__u8 pool = kvp_transaction.kvp_msg->kvp_hdr.pool;
 	__u32 val32;
 	__u64 val64;
+	int rc;
 
 	msg = kzalloc(sizeof(*msg) + sizeof(struct hv_kvp_msg) , GFP_ATOMIC);
 	if (!msg)
@@ -446,7 +447,13 @@ kvp_send_key(struct work_struct *dummy)
 	}
 
 	msg->len = sizeof(struct hv_kvp_msg);
-	cn_netlink_send(msg, 0, 0, GFP_ATOMIC);
+	rc = cn_netlink_send(msg, 0, 0, GFP_ATOMIC);
+	if (rc) {
+		pr_debug("KVP: failed to communicate to the daemon: %d\n", rc);
+		if (cancel_delayed_work_sync(&kvp_work))
+			kvp_respond_to_host(message, HV_E_FAIL);
+	}
+
 	kfree(msg);
 
 	return;
diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c
index 21e51be74e6c..9d5e0d1efdb5 100644
--- a/drivers/hv/hv_snapshot.c
+++ b/drivers/hv/hv_snapshot.c
@@ -96,6 +96,7 @@ vss_cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
 static void vss_send_op(struct work_struct *dummy)
 {
 	int op = vss_transaction.msg->vss_hdr.operation;
+	int rc;
 	struct cn_msg *msg;
 	struct hv_vss_msg *vss_msg;
 
@@ -111,7 +112,12 @@ static void vss_send_op(struct work_struct *dummy)
 	vss_msg->vss_hdr.operation = op;
 	msg->len = sizeof(struct hv_vss_msg);
 
-	cn_netlink_send(msg, 0, 0, GFP_ATOMIC);
+	rc = cn_netlink_send(msg, 0, 0, GFP_ATOMIC);
+	if (rc) {
+		pr_warn("VSS: failed to communicate to the daemon: %d\n", rc);
+		if (cancel_delayed_work_sync(&vss_timeout_work))
+			vss_respond_to_host(HV_E_FAIL);
+	}
 	kfree(msg);
 
 	return;
-- 
cgit v1.2.3-59-g8ed1b


From f6712238471a8afdbfcea482483fc121281292d8 Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Mon, 24 Nov 2014 20:32:43 -0800
Subject: hv: hv_balloon: avoid memory leak on alloc_error of 2MB memory block

If num_ballooned is not 0, we shouldn't neglect the
already-partially-allocated 2MB memory block(s).

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/hv_balloon.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'drivers/hv')

diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index 5e90c5d771a7..b958ded8ac7e 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -1087,10 +1087,12 @@ static void balloon_up(struct work_struct *dummy)
 	struct dm_balloon_response *bl_resp;
 	int alloc_unit;
 	int ret;
-	bool alloc_error = false;
+	bool alloc_error;
 	bool done = false;
 	int i;
 
+	/* The host balloons pages in 2M granularity. */
+	WARN_ON_ONCE(num_pages % PAGES_IN_2M != 0);
 
 	/*
 	 * We will attempt 2M allocations. However, if we fail to
@@ -1107,16 +1109,18 @@ static void balloon_up(struct work_struct *dummy)
 
 
 		num_pages -= num_ballooned;
+		alloc_error = false;
 		num_ballooned = alloc_balloon_pages(&dm_device, num_pages,
 						bl_resp, alloc_unit,
 						 &alloc_error);
 
-		if ((alloc_error) && (alloc_unit != 1)) {
+		if (alloc_unit != 1 && num_ballooned == 0) {
 			alloc_unit = 1;
 			continue;
 		}
 
-		if ((alloc_error) || (num_ballooned == num_pages)) {
+		if ((alloc_unit == 1 && alloc_error) ||
+			(num_ballooned == num_pages)) {
 			bl_resp->more_pages = 0;
 			done = true;
 			dm_device.state = DM_INITIALIZED;
-- 
cgit v1.2.3-59-g8ed1b