From 04a258c162a85c0f4ae56be67634dc43c9a4fa9b Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 4 Nov 2014 13:40:11 +0100 Subject: Drivers: hv: vmbus: Fix a race condition when unregistering a device When build with Debug the following crash is sometimes observed: Call Trace: [] string+0x40/0x100 [] vsnprintf+0x218/0x5e0 [] ? trace_hardirqs_off+0xd/0x10 [] vscnprintf+0x11/0x30 [] vprintk+0xd0/0x5c0 [] ? vmbus_process_rescind_offer+0x0/0x110 [hv_vmbus] [] printk+0x41/0x45 [] vmbus_device_unregister+0x2c/0x40 [hv_vmbus] [] vmbus_process_rescind_offer+0x2b/0x110 [hv_vmbus] ... This happens due to the following race: between 'if (channel->device_obj)' check in vmbus_process_rescind_offer() and pr_debug() in vmbus_device_unregister() the device can disappear. Fix the issue by taking an additional reference to the device before proceeding to vmbus_device_unregister(). Signed-off-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Cc: Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel_mgmt.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index a2d1a9612c86..d36ce6835fb7 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -216,9 +216,16 @@ static void vmbus_process_rescind_offer(struct work_struct *work) unsigned long flags; struct vmbus_channel *primary_channel; struct vmbus_channel_relid_released msg; + struct device *dev; + + if (channel->device_obj) { + dev = get_device(&channel->device_obj->device); + if (dev) { + vmbus_device_unregister(channel->device_obj); + put_device(dev); + } + } - if (channel->device_obj) - vmbus_device_unregister(channel->device_obj); memset(&msg, 0, sizeof(struct vmbus_channel_relid_released)); msg.child_relid = channel->offermsg.child_relid; msg.header.msgtype = CHANNELMSG_RELID_RELEASED; -- cgit v1.2.3-59-g8ed1b From 649142074d86afebe0505431a93957505d244dd6 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 6 Nov 2014 18:21:24 +0100 Subject: Drivers: hv: vss: Introduce timeout for communication with userspace In contrast with KVP there is no timeout when communicating with userspace VSS daemon. In case it gets stuck performing freeze/thaw operation no message will be sent to the host so it will take very long (around 10 minutes) before backup fails. Introduce 10 second timeout using schedule_delayed_work(). Signed-off-by: Vitaly Kuznetsov Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv_snapshot.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c index 34f14fddb666..21e51be74e6c 100644 --- a/drivers/hv/hv_snapshot.c +++ b/drivers/hv/hv_snapshot.c @@ -28,7 +28,7 @@ #define VSS_MINOR 0 #define VSS_VERSION (VSS_MAJOR << 16 | VSS_MINOR) - +#define VSS_USERSPACE_TIMEOUT (msecs_to_jiffies(10 * 1000)) /* * Global state maintained for transaction that is being processed. @@ -55,12 +55,24 @@ static const char vss_name[] = "vss_kernel_module"; static __u8 *recv_buffer; static void vss_send_op(struct work_struct *dummy); +static void vss_timeout_func(struct work_struct *dummy); + +static DECLARE_DELAYED_WORK(vss_timeout_work, vss_timeout_func); static DECLARE_WORK(vss_send_op_work, vss_send_op); /* * Callback when data is received from user mode. */ +static void vss_timeout_func(struct work_struct *dummy) +{ + /* + * Timeout waiting for userspace component to reply happened. + */ + pr_warn("VSS: timeout waiting for daemon to reply\n"); + vss_respond_to_host(HV_E_FAIL); +} + static void vss_cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp) { @@ -76,7 +88,8 @@ vss_cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp) return; } - vss_respond_to_host(vss_msg->error); + if (cancel_delayed_work_sync(&vss_timeout_work)) + vss_respond_to_host(vss_msg->error); } @@ -223,6 +236,8 @@ void hv_vss_onchannelcallback(void *context) case VSS_OP_FREEZE: case VSS_OP_THAW: schedule_work(&vss_send_op_work); + schedule_delayed_work(&vss_timeout_work, + VSS_USERSPACE_TIMEOUT); return; case VSS_OP_HOT_BACKUP: @@ -277,5 +292,6 @@ hv_vss_init(struct hv_util_service *srv) void hv_vss_deinit(void) { cn_del_callback(&vss_id); + cancel_delayed_work_sync(&vss_timeout_work); cancel_work_sync(&vss_send_op_work); } -- cgit v1.2.3-59-g8ed1b From 8d9560ebcc6448472b3afe8f36f37d6b0de8f5a4 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 6 Nov 2014 18:21:25 +0100 Subject: Drivers: hv: kvp,vss: Fast propagation of userspace communication failure If we fail to send a message to userspace daemon with cn_netlink_send() there is no need to wait for userspace to reply as it is not going to happen. This happens when kvp or vss daemon is stopped after a successful handshake. Report HV_E_FAIL immediately and cancel the timeout job so host won't receive two failures. Use pr_warn() for VSS and pr_debug() for KVP deliberately as VSS request are rare and result in a failed backup. KVP requests are much more frequent after a successful handshake so avoid flooding logs. It would be nice to have an ability to de-negotiate with the host in case userspace daemon gets disconnected so we won't receive new requests. But I'm not sure it is possible. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv_kvp.c | 9 ++++++++- drivers/hv/hv_snapshot.c | 8 +++++++- 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c index 521c14625b3a..beb8105c0e7b 100644 --- a/drivers/hv/hv_kvp.c +++ b/drivers/hv/hv_kvp.c @@ -350,6 +350,7 @@ kvp_send_key(struct work_struct *dummy) __u8 pool = kvp_transaction.kvp_msg->kvp_hdr.pool; __u32 val32; __u64 val64; + int rc; msg = kzalloc(sizeof(*msg) + sizeof(struct hv_kvp_msg) , GFP_ATOMIC); if (!msg) @@ -446,7 +447,13 @@ kvp_send_key(struct work_struct *dummy) } msg->len = sizeof(struct hv_kvp_msg); - cn_netlink_send(msg, 0, 0, GFP_ATOMIC); + rc = cn_netlink_send(msg, 0, 0, GFP_ATOMIC); + if (rc) { + pr_debug("KVP: failed to communicate to the daemon: %d\n", rc); + if (cancel_delayed_work_sync(&kvp_work)) + kvp_respond_to_host(message, HV_E_FAIL); + } + kfree(msg); return; diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c index 21e51be74e6c..9d5e0d1efdb5 100644 --- a/drivers/hv/hv_snapshot.c +++ b/drivers/hv/hv_snapshot.c @@ -96,6 +96,7 @@ vss_cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp) static void vss_send_op(struct work_struct *dummy) { int op = vss_transaction.msg->vss_hdr.operation; + int rc; struct cn_msg *msg; struct hv_vss_msg *vss_msg; @@ -111,7 +112,12 @@ static void vss_send_op(struct work_struct *dummy) vss_msg->vss_hdr.operation = op; msg->len = sizeof(struct hv_vss_msg); - cn_netlink_send(msg, 0, 0, GFP_ATOMIC); + rc = cn_netlink_send(msg, 0, 0, GFP_ATOMIC); + if (rc) { + pr_warn("VSS: failed to communicate to the daemon: %d\n", rc); + if (cancel_delayed_work_sync(&vss_timeout_work)) + vss_respond_to_host(HV_E_FAIL); + } kfree(msg); return; -- cgit v1.2.3-59-g8ed1b From f6712238471a8afdbfcea482483fc121281292d8 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Mon, 24 Nov 2014 20:32:43 -0800 Subject: hv: hv_balloon: avoid memory leak on alloc_error of 2MB memory block If num_ballooned is not 0, we shouldn't neglect the already-partially-allocated 2MB memory block(s). Signed-off-by: Dexuan Cui Signed-off-by: K. Y. Srinivasan Acked-by: Jason Wang Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv_balloon.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 5e90c5d771a7..b958ded8ac7e 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -1087,10 +1087,12 @@ static void balloon_up(struct work_struct *dummy) struct dm_balloon_response *bl_resp; int alloc_unit; int ret; - bool alloc_error = false; + bool alloc_error; bool done = false; int i; + /* The host balloons pages in 2M granularity. */ + WARN_ON_ONCE(num_pages % PAGES_IN_2M != 0); /* * We will attempt 2M allocations. However, if we fail to @@ -1107,16 +1109,18 @@ static void balloon_up(struct work_struct *dummy) num_pages -= num_ballooned; + alloc_error = false; num_ballooned = alloc_balloon_pages(&dm_device, num_pages, bl_resp, alloc_unit, &alloc_error); - if ((alloc_error) && (alloc_unit != 1)) { + if (alloc_unit != 1 && num_ballooned == 0) { alloc_unit = 1; continue; } - if ((alloc_error) || (num_ballooned == num_pages)) { + if ((alloc_unit == 1 && alloc_error) || + (num_ballooned == num_pages)) { bl_resp->more_pages = 0; done = true; dm_device.state = DM_INITIALIZED; -- cgit v1.2.3-59-g8ed1b