From a8c3209998afb5c4941b49e35b513cea9050cb4a Mon Sep 17 00:00:00 2001 From: Andres Beltran Date: Tue, 8 Dec 2020 05:53:11 +0100 Subject: Drivers: hv: vmbus: Copy packets sent by Hyper-V out of the ring buffer Pointers to ring-buffer packets sent by Hyper-V are used within the guest VM. Hyper-V can send packets with erroneous values or modify packet fields after they are processed by the guest. To defend against these scenarios, return a copy of the incoming VMBus packet after validating its length and offset fields in hv_pkt_iter_first(). In this way, the packet can no longer be modified by the host. Signed-off-by: Andres Beltran Co-developed-by: Andrea Parri (Microsoft) Signed-off-by: Andrea Parri (Microsoft) Cc: "David S. Miller" Cc: Jakub Kicinski Cc: "James E.J. Bottomley" Cc: "Martin K. Petersen" Cc: netdev@vger.kernel.org Cc: linux-scsi@vger.kernel.org Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20201208045311.10244-1-parri.andrea@gmail.com Signed-off-by: Wei Liu --- include/linux/hyperv.h | 48 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 5ddb479c4d4c..fbae8406d5d4 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -181,6 +181,10 @@ struct hv_ring_buffer_info { * being freed while the ring buffer is being accessed. */ struct mutex ring_buffer_mutex; + + /* Buffer that holds a copy of an incoming host packet */ + void *pkt_buffer; + u32 pkt_buffer_size; }; @@ -787,6 +791,8 @@ struct vmbus_device { bool perf_device; }; +#define VMBUS_DEFAULT_MAX_PKT_SIZE 4096 + struct vmbus_channel { struct list_head listentry; @@ -1008,6 +1014,9 @@ struct vmbus_channel { /* request/transaction ids for VMBus */ struct vmbus_requestor requestor; u32 rqstor_size; + + /* The max size of a packet on this channel */ + u32 max_pkt_size; }; u64 vmbus_next_request_id(struct vmbus_requestor *rqstor, u64 rqst_addr); @@ -1642,32 +1651,55 @@ static inline u32 hv_pkt_datalen(const struct vmpacket_descriptor *desc) } +struct vmpacket_descriptor * +hv_pkt_iter_first_raw(struct vmbus_channel *channel); + struct vmpacket_descriptor * hv_pkt_iter_first(struct vmbus_channel *channel); struct vmpacket_descriptor * __hv_pkt_iter_next(struct vmbus_channel *channel, - const struct vmpacket_descriptor *pkt); + const struct vmpacket_descriptor *pkt, + bool copy); void hv_pkt_iter_close(struct vmbus_channel *channel); -/* - * Get next packet descriptor from iterator - * If at end of list, return NULL and update host. - */ static inline struct vmpacket_descriptor * -hv_pkt_iter_next(struct vmbus_channel *channel, - const struct vmpacket_descriptor *pkt) +hv_pkt_iter_next_pkt(struct vmbus_channel *channel, + const struct vmpacket_descriptor *pkt, + bool copy) { struct vmpacket_descriptor *nxt; - nxt = __hv_pkt_iter_next(channel, pkt); + nxt = __hv_pkt_iter_next(channel, pkt, copy); if (!nxt) hv_pkt_iter_close(channel); return nxt; } +/* + * Get next packet descriptor without copying it out of the ring buffer + * If at end of list, return NULL and update host. + */ +static inline struct vmpacket_descriptor * +hv_pkt_iter_next_raw(struct vmbus_channel *channel, + const struct vmpacket_descriptor *pkt) +{ + return hv_pkt_iter_next_pkt(channel, pkt, false); +} + +/* + * Get next packet descriptor from iterator + * If at end of list, return NULL and update host. + */ +static inline struct vmpacket_descriptor * +hv_pkt_iter_next(struct vmbus_channel *channel, + const struct vmpacket_descriptor *pkt) +{ + return hv_pkt_iter_next_pkt(channel, pkt, true); +} + #define foreach_vmbus_pkt(pkt, channel) \ for (pkt = hv_pkt_iter_first(channel); pkt; \ pkt = hv_pkt_iter_next(channel, pkt)) -- cgit v1.2.3-59-g8ed1b From 06caa778d8b2fbcb4ac3878751e39d116424ba9b Mon Sep 17 00:00:00 2001 From: Andres Beltran Date: Mon, 9 Nov 2020 11:07:04 +0100 Subject: hv_utils: Add validation for untrusted Hyper-V values For additional robustness in the face of Hyper-V errors or malicious behavior, validate all values that originate from packets that Hyper-V has sent to the guest in the host-to-guest ring buffer. Ensure that invalid values cannot cause indexing off the end of the icversion_data array in vmbus_prep_negotiate_resp(). Signed-off-by: Andres Beltran Co-developed-by: Andrea Parri (Microsoft) Signed-off-by: Andrea Parri (Microsoft) Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20201109100704.9152-1-parri.andrea@gmail.com Signed-off-by: Wei Liu --- drivers/hv/channel_mgmt.c | 24 +++-- drivers/hv/hv_fcopy.c | 36 ++++++-- drivers/hv/hv_kvp.c | 122 ++++++++++++++----------- drivers/hv/hv_snapshot.c | 89 +++++++++++-------- drivers/hv/hv_util.c | 222 ++++++++++++++++++++++++++++------------------ include/linux/hyperv.h | 9 +- 6 files changed, 314 insertions(+), 188 deletions(-) (limited to 'include/linux') diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 1d44bb635bb8..5bc5eef5da15 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -190,6 +190,7 @@ static u16 hv_get_dev_type(const struct vmbus_channel *channel) * vmbus_prep_negotiate_resp() - Create default response for Negotiate message * @icmsghdrp: Pointer to msg header structure * @buf: Raw buffer channel data + * @buflen: Length of the raw buffer channel data. * @fw_version: The framework versions we can support. * @fw_vercnt: The size of @fw_version. * @srv_version: The service versions we can support. @@ -202,8 +203,8 @@ static u16 hv_get_dev_type(const struct vmbus_channel *channel) * Set up and fill in default negotiate response message. * Mainly used by Hyper-V drivers. */ -bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, - u8 *buf, const int *fw_version, int fw_vercnt, +bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, + u32 buflen, const int *fw_version, int fw_vercnt, const int *srv_version, int srv_vercnt, int *nego_fw_version, int *nego_srv_version) { @@ -215,10 +216,14 @@ bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, bool found_match = false; struct icmsg_negotiate *negop; + /* Check that there's enough space for icframe_vercnt, icmsg_vercnt */ + if (buflen < ICMSG_HDR + offsetof(struct icmsg_negotiate, reserved)) { + pr_err_ratelimited("Invalid icmsg negotiate\n"); + return false; + } + icmsghdrp->icmsgsize = 0x10; - negop = (struct icmsg_negotiate *)&buf[ - sizeof(struct vmbuspipe_hdr) + - sizeof(struct icmsg_hdr)]; + negop = (struct icmsg_negotiate *)&buf[ICMSG_HDR]; icframe_major = negop->icframe_vercnt; icframe_minor = 0; @@ -226,6 +231,15 @@ bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, icmsg_major = negop->icmsg_vercnt; icmsg_minor = 0; + /* Validate negop packet */ + if (icframe_major > IC_VERSION_NEGOTIATION_MAX_VER_COUNT || + icmsg_major > IC_VERSION_NEGOTIATION_MAX_VER_COUNT || + ICMSG_NEGOTIATE_PKT_SIZE(icframe_major, icmsg_major) > buflen) { + pr_err_ratelimited("Invalid icmsg negotiate - icframe_major: %u, icmsg_major: %u\n", + icframe_major, icmsg_major); + goto fw_error; + } + /* * Select the framework version number we will * support. diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c index 85784a9e6a36..660036da7449 100644 --- a/drivers/hv/hv_fcopy.c +++ b/drivers/hv/hv_fcopy.c @@ -235,15 +235,27 @@ void hv_fcopy_onchannelcallback(void *context) if (fcopy_transaction.state > HVUTIL_READY) return; - vmbus_recvpacket(channel, recv_buffer, HV_HYP_PAGE_SIZE * 2, &recvlen, - &requestid); - if (recvlen <= 0) + if (vmbus_recvpacket(channel, recv_buffer, HV_HYP_PAGE_SIZE * 2, &recvlen, &requestid)) { + pr_err_ratelimited("Fcopy request received. Could not read into recv buf\n"); return; + } + + if (!recvlen) + return; + + /* Ensure recvlen is big enough to read header data */ + if (recvlen < ICMSG_HDR) { + pr_err_ratelimited("Fcopy request received. Packet length too small: %d\n", + recvlen); + return; + } icmsghdr = (struct icmsg_hdr *)&recv_buffer[ sizeof(struct vmbuspipe_hdr)]; + if (icmsghdr->icmsgtype == ICMSGTYPE_NEGOTIATE) { - if (vmbus_prep_negotiate_resp(icmsghdr, recv_buffer, + if (vmbus_prep_negotiate_resp(icmsghdr, + recv_buffer, recvlen, fw_versions, FW_VER_COUNT, fcopy_versions, FCOPY_VER_COUNT, NULL, &fcopy_srv_version)) { @@ -252,10 +264,14 @@ void hv_fcopy_onchannelcallback(void *context) fcopy_srv_version >> 16, fcopy_srv_version & 0xFFFF); } - } else { - fcopy_msg = (struct hv_fcopy_hdr *)&recv_buffer[ - sizeof(struct vmbuspipe_hdr) + - sizeof(struct icmsg_hdr)]; + } else if (icmsghdr->icmsgtype == ICMSGTYPE_FCOPY) { + /* Ensure recvlen is big enough to contain hv_fcopy_hdr */ + if (recvlen < ICMSG_HDR + sizeof(struct hv_fcopy_hdr)) { + pr_err_ratelimited("Invalid Fcopy hdr. Packet length too small: %u\n", + recvlen); + return; + } + fcopy_msg = (struct hv_fcopy_hdr *)&recv_buffer[ICMSG_HDR]; /* * Stash away this global state for completing the @@ -280,6 +296,10 @@ void hv_fcopy_onchannelcallback(void *context) schedule_delayed_work(&fcopy_timeout_work, HV_UTIL_TIMEOUT * HZ); return; + } else { + pr_err_ratelimited("Fcopy request received. Invalid msg type: %d\n", + icmsghdr->icmsgtype); + return; } icmsghdr->icflags = ICMSGHDRFLAG_TRANSACTION | ICMSGHDRFLAG_RESPONSE; vmbus_sendpacket(channel, recv_buffer, recvlen, requestid, diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c index 7d2a7b918847..c698592b83e4 100644 --- a/drivers/hv/hv_kvp.c +++ b/drivers/hv/hv_kvp.c @@ -662,71 +662,87 @@ void hv_kvp_onchannelcallback(void *context) if (kvp_transaction.state > HVUTIL_READY) return; - vmbus_recvpacket(channel, recv_buffer, HV_HYP_PAGE_SIZE * 4, &recvlen, - &requestid); - - if (recvlen > 0) { - icmsghdrp = (struct icmsg_hdr *)&recv_buffer[ - sizeof(struct vmbuspipe_hdr)]; - - if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { - if (vmbus_prep_negotiate_resp(icmsghdrp, - recv_buffer, fw_versions, FW_VER_COUNT, - kvp_versions, KVP_VER_COUNT, - NULL, &kvp_srv_version)) { - pr_info("KVP IC version %d.%d\n", - kvp_srv_version >> 16, - kvp_srv_version & 0xFFFF); - } - } else { - kvp_msg = (struct hv_kvp_msg *)&recv_buffer[ - sizeof(struct vmbuspipe_hdr) + - sizeof(struct icmsg_hdr)]; + if (vmbus_recvpacket(channel, recv_buffer, HV_HYP_PAGE_SIZE * 4, &recvlen, &requestid)) { + pr_err_ratelimited("KVP request received. Could not read into recv buf\n"); + return; + } - /* - * Stash away this global state for completing the - * transaction; note transactions are serialized. - */ + if (!recvlen) + return; - kvp_transaction.recv_len = recvlen; - kvp_transaction.recv_req_id = requestid; - kvp_transaction.kvp_msg = kvp_msg; + /* Ensure recvlen is big enough to read header data */ + if (recvlen < ICMSG_HDR) { + pr_err_ratelimited("KVP request received. Packet length too small: %d\n", + recvlen); + return; + } - if (kvp_transaction.state < HVUTIL_READY) { - /* Userspace is not registered yet */ - kvp_respond_to_host(NULL, HV_E_FAIL); - return; - } - kvp_transaction.state = HVUTIL_HOSTMSG_RECEIVED; + icmsghdrp = (struct icmsg_hdr *)&recv_buffer[sizeof(struct vmbuspipe_hdr)]; + + if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { + if (vmbus_prep_negotiate_resp(icmsghdrp, + recv_buffer, recvlen, + fw_versions, FW_VER_COUNT, + kvp_versions, KVP_VER_COUNT, + NULL, &kvp_srv_version)) { + pr_info("KVP IC version %d.%d\n", + kvp_srv_version >> 16, + kvp_srv_version & 0xFFFF); + } + } else if (icmsghdrp->icmsgtype == ICMSGTYPE_KVPEXCHANGE) { + /* + * recvlen is not checked against sizeof(struct kvp_msg) because kvp_msg contains + * a union of structs and the msg type received is not known. Code using this + * struct should provide validation when accessing its fields. + */ + kvp_msg = (struct hv_kvp_msg *)&recv_buffer[ICMSG_HDR]; - /* - * Get the information from the - * user-mode component. - * component. This transaction will be - * completed when we get the value from - * the user-mode component. - * Set a timeout to deal with - * user-mode not responding. - */ - schedule_work(&kvp_sendkey_work); - schedule_delayed_work(&kvp_timeout_work, - HV_UTIL_TIMEOUT * HZ); + /* + * Stash away this global state for completing the + * transaction; note transactions are serialized. + */ - return; + kvp_transaction.recv_len = recvlen; + kvp_transaction.recv_req_id = requestid; + kvp_transaction.kvp_msg = kvp_msg; + if (kvp_transaction.state < HVUTIL_READY) { + /* Userspace is not registered yet */ + kvp_respond_to_host(NULL, HV_E_FAIL); + return; } + kvp_transaction.state = HVUTIL_HOSTMSG_RECEIVED; - icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION - | ICMSGHDRFLAG_RESPONSE; + /* + * Get the information from the + * user-mode component. + * component. This transaction will be + * completed when we get the value from + * the user-mode component. + * Set a timeout to deal with + * user-mode not responding. + */ + schedule_work(&kvp_sendkey_work); + schedule_delayed_work(&kvp_timeout_work, + HV_UTIL_TIMEOUT * HZ); - vmbus_sendpacket(channel, recv_buffer, - recvlen, requestid, - VM_PKT_DATA_INBAND, 0); + return; - host_negotiatied = NEGO_FINISHED; - hv_poll_channel(kvp_transaction.recv_channel, kvp_poll_wrapper); + } else { + pr_err_ratelimited("KVP request received. Invalid msg type: %d\n", + icmsghdrp->icmsgtype); + return; } + icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION + | ICMSGHDRFLAG_RESPONSE; + + vmbus_sendpacket(channel, recv_buffer, + recvlen, requestid, + VM_PKT_DATA_INBAND, 0); + + host_negotiatied = NEGO_FINISHED; + hv_poll_channel(kvp_transaction.recv_channel, kvp_poll_wrapper); } static void kvp_on_reset(void) diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c index 783779e4cc1a..2267bd4c3472 100644 --- a/drivers/hv/hv_snapshot.c +++ b/drivers/hv/hv_snapshot.c @@ -298,49 +298,64 @@ void hv_vss_onchannelcallback(void *context) if (vss_transaction.state > HVUTIL_READY) return; - vmbus_recvpacket(channel, recv_buffer, HV_HYP_PAGE_SIZE * 2, &recvlen, - &requestid); - - if (recvlen > 0) { - icmsghdrp = (struct icmsg_hdr *)&recv_buffer[ - sizeof(struct vmbuspipe_hdr)]; - - if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { - if (vmbus_prep_negotiate_resp(icmsghdrp, - recv_buffer, fw_versions, FW_VER_COUNT, - vss_versions, VSS_VER_COUNT, - NULL, &vss_srv_version)) { - - pr_info("VSS IC version %d.%d\n", - vss_srv_version >> 16, - vss_srv_version & 0xFFFF); - } - } else { - vss_msg = (struct hv_vss_msg *)&recv_buffer[ - sizeof(struct vmbuspipe_hdr) + - sizeof(struct icmsg_hdr)]; - - /* - * Stash away this global state for completing the - * transaction; note transactions are serialized. - */ - - vss_transaction.recv_len = recvlen; - vss_transaction.recv_req_id = requestid; - vss_transaction.msg = (struct hv_vss_msg *)vss_msg; - - schedule_work(&vss_handle_request_work); + if (vmbus_recvpacket(channel, recv_buffer, HV_HYP_PAGE_SIZE * 2, &recvlen, &requestid)) { + pr_err_ratelimited("VSS request received. Could not read into recv buf\n"); + return; + } + + if (!recvlen) + return; + + /* Ensure recvlen is big enough to read header data */ + if (recvlen < ICMSG_HDR) { + pr_err_ratelimited("VSS request received. Packet length too small: %d\n", + recvlen); + return; + } + + icmsghdrp = (struct icmsg_hdr *)&recv_buffer[sizeof(struct vmbuspipe_hdr)]; + + if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { + if (vmbus_prep_negotiate_resp(icmsghdrp, + recv_buffer, recvlen, + fw_versions, FW_VER_COUNT, + vss_versions, VSS_VER_COUNT, + NULL, &vss_srv_version)) { + + pr_info("VSS IC version %d.%d\n", + vss_srv_version >> 16, + vss_srv_version & 0xFFFF); + } + } else if (icmsghdrp->icmsgtype == ICMSGTYPE_VSS) { + /* Ensure recvlen is big enough to contain hv_vss_msg */ + if (recvlen < ICMSG_HDR + sizeof(struct hv_vss_msg)) { + pr_err_ratelimited("Invalid VSS msg. Packet length too small: %u\n", + recvlen); return; } + vss_msg = (struct hv_vss_msg *)&recv_buffer[ICMSG_HDR]; + + /* + * Stash away this global state for completing the + * transaction; note transactions are serialized. + */ - icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION - | ICMSGHDRFLAG_RESPONSE; + vss_transaction.recv_len = recvlen; + vss_transaction.recv_req_id = requestid; + vss_transaction.msg = (struct hv_vss_msg *)vss_msg; - vmbus_sendpacket(channel, recv_buffer, - recvlen, requestid, - VM_PKT_DATA_INBAND, 0); + schedule_work(&vss_handle_request_work); + return; + } else { + pr_err_ratelimited("VSS request received. Invalid msg type: %d\n", + icmsghdrp->icmsgtype); + return; } + icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION | + ICMSGHDRFLAG_RESPONSE; + vmbus_sendpacket(channel, recv_buffer, recvlen, requestid, + VM_PKT_DATA_INBAND, 0); } static void vss_on_reset(void) diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c index 05566ecdbe4b..34f3e789cc9a 100644 --- a/drivers/hv/hv_util.c +++ b/drivers/hv/hv_util.c @@ -195,73 +195,88 @@ static void shutdown_onchannelcallback(void *context) struct icmsg_hdr *icmsghdrp; - vmbus_recvpacket(channel, shut_txf_buf, - HV_HYP_PAGE_SIZE, &recvlen, &requestid); + if (vmbus_recvpacket(channel, shut_txf_buf, HV_HYP_PAGE_SIZE, &recvlen, &requestid)) { + pr_err_ratelimited("Shutdown request received. Could not read into shut txf buf\n"); + return; + } - if (recvlen > 0) { - icmsghdrp = (struct icmsg_hdr *)&shut_txf_buf[ - sizeof(struct vmbuspipe_hdr)]; + if (!recvlen) + return; - if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { - if (vmbus_prep_negotiate_resp(icmsghdrp, shut_txf_buf, - fw_versions, FW_VER_COUNT, - sd_versions, SD_VER_COUNT, - NULL, &sd_srv_version)) { - pr_info("Shutdown IC version %d.%d\n", - sd_srv_version >> 16, - sd_srv_version & 0xFFFF); - } - } else { - shutdown_msg = - (struct shutdown_msg_data *)&shut_txf_buf[ - sizeof(struct vmbuspipe_hdr) + - sizeof(struct icmsg_hdr)]; + /* Ensure recvlen is big enough to read header data */ + if (recvlen < ICMSG_HDR) { + pr_err_ratelimited("Shutdown request received. Packet length too small: %d\n", + recvlen); + return; + } - /* - * shutdown_msg->flags can be 0(shut down), 2(reboot), - * or 4(hibernate). It may bitwise-OR 1, which means - * performing the request by force. Linux always tries - * to perform the request by force. - */ - switch (shutdown_msg->flags) { - case 0: - case 1: - icmsghdrp->status = HV_S_OK; - work = &shutdown_work; - pr_info("Shutdown request received -" - " graceful shutdown initiated\n"); - break; - case 2: - case 3: - icmsghdrp->status = HV_S_OK; - work = &restart_work; - pr_info("Restart request received -" - " graceful restart initiated\n"); - break; - case 4: - case 5: - pr_info("Hibernation request received\n"); - icmsghdrp->status = hibernation_supported ? - HV_S_OK : HV_E_FAIL; - if (hibernation_supported) - work = &hibernate_context.work; - break; - default: - icmsghdrp->status = HV_E_FAIL; - pr_info("Shutdown request received -" - " Invalid request\n"); - break; - } + icmsghdrp = (struct icmsg_hdr *)&shut_txf_buf[sizeof(struct vmbuspipe_hdr)]; + + if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { + if (vmbus_prep_negotiate_resp(icmsghdrp, + shut_txf_buf, recvlen, + fw_versions, FW_VER_COUNT, + sd_versions, SD_VER_COUNT, + NULL, &sd_srv_version)) { + pr_info("Shutdown IC version %d.%d\n", + sd_srv_version >> 16, + sd_srv_version & 0xFFFF); + } + } else if (icmsghdrp->icmsgtype == ICMSGTYPE_SHUTDOWN) { + /* Ensure recvlen is big enough to contain shutdown_msg_data struct */ + if (recvlen < ICMSG_HDR + sizeof(struct shutdown_msg_data)) { + pr_err_ratelimited("Invalid shutdown msg data. Packet length too small: %u\n", + recvlen); + return; } - icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION - | ICMSGHDRFLAG_RESPONSE; - - vmbus_sendpacket(channel, shut_txf_buf, - recvlen, requestid, - VM_PKT_DATA_INBAND, 0); + shutdown_msg = (struct shutdown_msg_data *)&shut_txf_buf[ICMSG_HDR]; + + /* + * shutdown_msg->flags can be 0(shut down), 2(reboot), + * or 4(hibernate). It may bitwise-OR 1, which means + * performing the request by force. Linux always tries + * to perform the request by force. + */ + switch (shutdown_msg->flags) { + case 0: + case 1: + icmsghdrp->status = HV_S_OK; + work = &shutdown_work; + pr_info("Shutdown request received - graceful shutdown initiated\n"); + break; + case 2: + case 3: + icmsghdrp->status = HV_S_OK; + work = &restart_work; + pr_info("Restart request received - graceful restart initiated\n"); + break; + case 4: + case 5: + pr_info("Hibernation request received\n"); + icmsghdrp->status = hibernation_supported ? + HV_S_OK : HV_E_FAIL; + if (hibernation_supported) + work = &hibernate_context.work; + break; + default: + icmsghdrp->status = HV_E_FAIL; + pr_info("Shutdown request received - Invalid request\n"); + break; + } + } else { + icmsghdrp->status = HV_E_FAIL; + pr_err_ratelimited("Shutdown request received. Invalid msg type: %d\n", + icmsghdrp->icmsgtype); } + icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION + | ICMSGHDRFLAG_RESPONSE; + + vmbus_sendpacket(channel, shut_txf_buf, + recvlen, requestid, + VM_PKT_DATA_INBAND, 0); + if (work) schedule_work(work); } @@ -396,19 +411,27 @@ static void timesync_onchannelcallback(void *context) HV_HYP_PAGE_SIZE, &recvlen, &requestid); if (ret) { - pr_warn_once("TimeSync IC pkt recv failed (Err: %d)\n", - ret); + pr_err_ratelimited("TimeSync IC pkt recv failed (Err: %d)\n", + ret); break; } if (!recvlen) break; + /* Ensure recvlen is big enough to read header data */ + if (recvlen < ICMSG_HDR) { + pr_err_ratelimited("Timesync request received. Packet length too small: %d\n", + recvlen); + break; + } + icmsghdrp = (struct icmsg_hdr *)&time_txf_buf[ sizeof(struct vmbuspipe_hdr)]; if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { - if (vmbus_prep_negotiate_resp(icmsghdrp, time_txf_buf, + if (vmbus_prep_negotiate_resp(icmsghdrp, + time_txf_buf, recvlen, fw_versions, FW_VER_COUNT, ts_versions, TS_VER_COUNT, NULL, &ts_srv_version)) { @@ -416,33 +439,44 @@ static void timesync_onchannelcallback(void *context) ts_srv_version >> 16, ts_srv_version & 0xFFFF); } - } else { + } else if (icmsghdrp->icmsgtype == ICMSGTYPE_TIMESYNC) { if (ts_srv_version > TS_VERSION_3) { - refdata = (struct ictimesync_ref_data *) - &time_txf_buf[ - sizeof(struct vmbuspipe_hdr) + - sizeof(struct icmsg_hdr)]; + /* Ensure recvlen is big enough to read ictimesync_ref_data */ + if (recvlen < ICMSG_HDR + sizeof(struct ictimesync_ref_data)) { + pr_err_ratelimited("Invalid ictimesync ref data. Length too small: %u\n", + recvlen); + break; + } + refdata = (struct ictimesync_ref_data *)&time_txf_buf[ICMSG_HDR]; adj_guesttime(refdata->parenttime, refdata->vmreferencetime, refdata->flags); } else { - timedatap = (struct ictimesync_data *) - &time_txf_buf[ - sizeof(struct vmbuspipe_hdr) + - sizeof(struct icmsg_hdr)]; + /* Ensure recvlen is big enough to read ictimesync_data */ + if (recvlen < ICMSG_HDR + sizeof(struct ictimesync_data)) { + pr_err_ratelimited("Invalid ictimesync data. Length too small: %u\n", + recvlen); + break; + } + timedatap = (struct ictimesync_data *)&time_txf_buf[ICMSG_HDR]; + adj_guesttime(timedatap->parenttime, hv_read_reference_counter(), timedatap->flags); } + } else { + icmsghdrp->status = HV_E_FAIL; + pr_err_ratelimited("Timesync request received. Invalid msg type: %d\n", + icmsghdrp->icmsgtype); } icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION | ICMSGHDRFLAG_RESPONSE; vmbus_sendpacket(channel, time_txf_buf, - recvlen, requestid, - VM_PKT_DATA_INBAND, 0); + recvlen, requestid, + VM_PKT_DATA_INBAND, 0); } } @@ -462,18 +496,28 @@ static void heartbeat_onchannelcallback(void *context) while (1) { - vmbus_recvpacket(channel, hbeat_txf_buf, - HV_HYP_PAGE_SIZE, &recvlen, &requestid); + if (vmbus_recvpacket(channel, hbeat_txf_buf, HV_HYP_PAGE_SIZE, + &recvlen, &requestid)) { + pr_err_ratelimited("Heartbeat request received. Could not read into hbeat txf buf\n"); + return; + } if (!recvlen) break; + /* Ensure recvlen is big enough to read header data */ + if (recvlen < ICMSG_HDR) { + pr_err_ratelimited("Hearbeat request received. Packet length too small: %d\n", + recvlen); + break; + } + icmsghdrp = (struct icmsg_hdr *)&hbeat_txf_buf[ sizeof(struct vmbuspipe_hdr)]; if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { if (vmbus_prep_negotiate_resp(icmsghdrp, - hbeat_txf_buf, + hbeat_txf_buf, recvlen, fw_versions, FW_VER_COUNT, hb_versions, HB_VER_COUNT, NULL, &hb_srv_version)) { @@ -482,21 +526,31 @@ static void heartbeat_onchannelcallback(void *context) hb_srv_version >> 16, hb_srv_version & 0xFFFF); } - } else { - heartbeat_msg = - (struct heartbeat_msg_data *)&hbeat_txf_buf[ - sizeof(struct vmbuspipe_hdr) + - sizeof(struct icmsg_hdr)]; + } else if (icmsghdrp->icmsgtype == ICMSGTYPE_HEARTBEAT) { + /* + * Ensure recvlen is big enough to read seq_num. Reserved area is not + * included in the check as the host may not fill it up entirely + */ + if (recvlen < ICMSG_HDR + sizeof(u64)) { + pr_err_ratelimited("Invalid heartbeat msg data. Length too small: %u\n", + recvlen); + break; + } + heartbeat_msg = (struct heartbeat_msg_data *)&hbeat_txf_buf[ICMSG_HDR]; heartbeat_msg->seq_num += 1; + } else { + icmsghdrp->status = HV_E_FAIL; + pr_err_ratelimited("Heartbeat request received. Invalid msg type: %d\n", + icmsghdrp->icmsgtype); } icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION | ICMSGHDRFLAG_RESPONSE; vmbus_sendpacket(channel, hbeat_txf_buf, - recvlen, requestid, - VM_PKT_DATA_INBAND, 0); + recvlen, requestid, + VM_PKT_DATA_INBAND, 0); } } diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index fbae8406d5d4..2ea967bc17ad 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1480,6 +1480,7 @@ void vmbus_free_mmio(resource_size_t start, resource_size_t size); #define ICMSGTYPE_SHUTDOWN 3 #define ICMSGTYPE_TIMESYNC 4 #define ICMSGTYPE_VSS 5 +#define ICMSGTYPE_FCOPY 7 #define ICMSGHDRFLAG_TRANSACTION 1 #define ICMSGHDRFLAG_REQUEST 2 @@ -1523,6 +1524,12 @@ struct icmsg_hdr { u8 reserved[2]; } __packed; +#define IC_VERSION_NEGOTIATION_MAX_VER_COUNT 100 +#define ICMSG_HDR (sizeof(struct vmbuspipe_hdr) + sizeof(struct icmsg_hdr)) +#define ICMSG_NEGOTIATE_PKT_SIZE(icframe_vercnt, icmsg_vercnt) \ + (ICMSG_HDR + offsetof(struct icmsg_negotiate, icversion_data) + \ + (((icframe_vercnt) + (icmsg_vercnt)) * sizeof(struct ic_version))) + struct icmsg_negotiate { u16 icframe_vercnt; u16 icmsg_vercnt; @@ -1578,7 +1585,7 @@ struct hyperv_service_callback { }; #define MAX_SRV_VER 0x7ffffff -extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, +extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, u32 buflen, const int *fw_version, int fw_vercnt, const int *srv_version, int srv_vercnt, int *nego_fw_version, int *nego_srv_version); -- cgit v1.2.3-59-g8ed1b From e4d221b42354b2e2ddb9187a806afb651eee2cda Mon Sep 17 00:00:00 2001 From: "Andrea Parri (Microsoft)" Date: Wed, 9 Dec 2020 08:08:26 +0100 Subject: Drivers: hv: vmbus: Resolve race condition in vmbus_onoffer_rescind() An erroneous or malicious host could send multiple rescind messages for a same channel. In vmbus_onoffer_rescind(), the guest maps the channel ID to obtain a pointer to the channel object and it eventually releases such object and associated data. The host could time rescind messages and lead to an use-after-free. Add a new flag to the channel structure to make sure that only one instance of vmbus_onoffer_rescind() can get the reference to the channel object. Reported-by: Juan Vazquez Signed-off-by: Andrea Parri (Microsoft) Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20201209070827.29335-6-parri.andrea@gmail.com Signed-off-by: Wei Liu --- drivers/hv/channel_mgmt.c | 12 ++++++++++++ include/linux/hyperv.h | 1 + 2 files changed, 13 insertions(+) (limited to 'include/linux') diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 4072fd1f2214..68950a1e4b63 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -1063,6 +1063,18 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) mutex_lock(&vmbus_connection.channel_mutex); channel = relid2channel(rescind->child_relid); + if (channel != NULL) { + /* + * Guarantee that no other instance of vmbus_onoffer_rescind() + * has got a reference to the channel object. Synchronize on + * &vmbus_connection.channel_mutex. + */ + if (channel->rescind_ref) { + mutex_unlock(&vmbus_connection.channel_mutex); + return; + } + channel->rescind_ref = true; + } mutex_unlock(&vmbus_connection.channel_mutex); if (channel == NULL) { diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 2ea967bc17ad..f0d48a368f13 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -809,6 +809,7 @@ struct vmbus_channel { u8 monitor_bit; bool rescind; /* got rescind msg */ + bool rescind_ref; /* got rescind msg, got channel reference */ struct completion rescind_event; u32 ringbuffer_gpadlhandle; -- cgit v1.2.3-59-g8ed1b From 21a4e356d3588806307555c149b80cec3dedb180 Mon Sep 17 00:00:00 2001 From: "Andrea Parri (Microsoft)" Date: Mon, 1 Feb 2021 15:48:12 +0100 Subject: Drivers: hv: vmbus: Restrict vmbus_devices on isolated guests Only the VSCs or ICs that have been hardened and that are critical for the successful adoption of Confidential VMs should be allowed if the guest is running isolated. This change reduces the footprint of the code that will be exercised by Confidential VMs and hence the exposure to bugs and vulnerabilities. Signed-off-by: Andrea Parri (Microsoft) Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20210201144814.2701-3-parri.andrea@gmail.com Signed-off-by: Wei Liu --- drivers/hv/channel_mgmt.c | 38 ++++++++++++++++++++++++++++++++++++++ include/linux/hyperv.h | 1 + 2 files changed, 39 insertions(+) (limited to 'include/linux') diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 68950a1e4b63..f0ed730e2e4e 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -31,101 +31,118 @@ const struct vmbus_device vmbus_devs[] = { { .dev_type = HV_IDE, HV_IDE_GUID, .perf_device = true, + .allowed_in_isolated = false, }, /* SCSI */ { .dev_type = HV_SCSI, HV_SCSI_GUID, .perf_device = true, + .allowed_in_isolated = true, }, /* Fibre Channel */ { .dev_type = HV_FC, HV_SYNTHFC_GUID, .perf_device = true, + .allowed_in_isolated = false, }, /* Synthetic NIC */ { .dev_type = HV_NIC, HV_NIC_GUID, .perf_device = true, + .allowed_in_isolated = true, }, /* Network Direct */ { .dev_type = HV_ND, HV_ND_GUID, .perf_device = true, + .allowed_in_isolated = false, }, /* PCIE */ { .dev_type = HV_PCIE, HV_PCIE_GUID, .perf_device = false, + .allowed_in_isolated = false, }, /* Synthetic Frame Buffer */ { .dev_type = HV_FB, HV_SYNTHVID_GUID, .perf_device = false, + .allowed_in_isolated = false, }, /* Synthetic Keyboard */ { .dev_type = HV_KBD, HV_KBD_GUID, .perf_device = false, + .allowed_in_isolated = false, }, /* Synthetic MOUSE */ { .dev_type = HV_MOUSE, HV_MOUSE_GUID, .perf_device = false, + .allowed_in_isolated = false, }, /* KVP */ { .dev_type = HV_KVP, HV_KVP_GUID, .perf_device = false, + .allowed_in_isolated = false, }, /* Time Synch */ { .dev_type = HV_TS, HV_TS_GUID, .perf_device = false, + .allowed_in_isolated = true, }, /* Heartbeat */ { .dev_type = HV_HB, HV_HEART_BEAT_GUID, .perf_device = false, + .allowed_in_isolated = true, }, /* Shutdown */ { .dev_type = HV_SHUTDOWN, HV_SHUTDOWN_GUID, .perf_device = false, + .allowed_in_isolated = true, }, /* File copy */ { .dev_type = HV_FCOPY, HV_FCOPY_GUID, .perf_device = false, + .allowed_in_isolated = false, }, /* Backup */ { .dev_type = HV_BACKUP, HV_VSS_GUID, .perf_device = false, + .allowed_in_isolated = false, }, /* Dynamic Memory */ { .dev_type = HV_DM, HV_DM_GUID, .perf_device = false, + .allowed_in_isolated = false, }, /* Unknown GUID */ { .dev_type = HV_UNKNOWN, .perf_device = false, + .allowed_in_isolated = false, }, }; @@ -903,6 +920,20 @@ find_primary_channel_by_offer(const struct vmbus_channel_offer_channel *offer) return channel; } +static bool vmbus_is_valid_device(const guid_t *guid) +{ + u16 i; + + if (!hv_is_isolation_supported()) + return true; + + for (i = 0; i < ARRAY_SIZE(vmbus_devs); i++) { + if (guid_equal(guid, &vmbus_devs[i].guid)) + return vmbus_devs[i].allowed_in_isolated; + } + return false; +} + /* * vmbus_onoffer - Handler for channel offers from vmbus in parent partition. * @@ -917,6 +948,13 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr) trace_vmbus_onoffer(offer); + if (!vmbus_is_valid_device(&offer->offer.if_type)) { + pr_err_ratelimited("Invalid offer %d from the host supporting isolation\n", + offer->child_relid); + atomic_dec(&vmbus_connection.offer_in_progress); + return; + } + oldchannel = find_primary_channel_by_offer(offer); if (oldchannel != NULL) { diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index f0d48a368f13..e3426f8c12db 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -789,6 +789,7 @@ struct vmbus_device { u16 dev_type; guid_t guid; bool perf_device; + bool allowed_in_isolated; }; #define VMBUS_DEFAULT_MAX_PKT_SIZE 4096 -- cgit v1.2.3-59-g8ed1b From 78785010d428f7755bf51d1c08cb2566a73dc7f5 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 1 Feb 2021 11:43:34 -0600 Subject: hv: hyperv.h: Replace one-element array with flexible-array in struct icmsg_negotiate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. Refactor the code according to the use of a flexible-array member in struct icmsg_negotiate, instead of a one-element array. Also, this helps the ongoing efforts to enable -Warray-bounds and fix the following warnings: drivers/hv/channel_mgmt.c:315:23: warning: array subscript 1 is above array bounds of ‘struct ic_version[1]’ [-Warray-bounds] drivers/hv/channel_mgmt.c:316:23: warning: array subscript 1 is above array bounds of ‘struct ic_version[1]’ [-Warray-bounds] [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.9/process/deprecated.html#zero-length-and-one-element-arrays Link: https://github.com/KSPP/linux/issues/79 Link: https://github.com/KSPP/linux/issues/109 Signed-off-by: Gustavo A. R. Silva Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20210201174334.GA171933@embeddedor Signed-off-by: Wei Liu --- include/linux/hyperv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index e3426f8c12db..9dd22af1b7f6 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1529,14 +1529,14 @@ struct icmsg_hdr { #define IC_VERSION_NEGOTIATION_MAX_VER_COUNT 100 #define ICMSG_HDR (sizeof(struct vmbuspipe_hdr) + sizeof(struct icmsg_hdr)) #define ICMSG_NEGOTIATE_PKT_SIZE(icframe_vercnt, icmsg_vercnt) \ - (ICMSG_HDR + offsetof(struct icmsg_negotiate, icversion_data) + \ + (ICMSG_HDR + sizeof(struct icmsg_negotiate) + \ (((icframe_vercnt) + (icmsg_vercnt)) * sizeof(struct ic_version))) struct icmsg_negotiate { u16 icframe_vercnt; u16 icmsg_vercnt; u32 reserved; - struct ic_version icversion_data[1]; /* any size array */ + struct ic_version icversion_data[]; /* any size array */ } __packed; struct shutdown_msg_data { -- cgit v1.2.3-59-g8ed1b From 3019270282a175defc02c8331786c73e082cd2a8 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Mon, 15 Feb 2021 10:44:58 +0000 Subject: Revert "Drivers: hv: vmbus: Copy packets sent by Hyper-V out of the ring buffer" This reverts commit a8c3209998afb5c4941b49e35b513cea9050cb4a. It is reported that the said commit caused regression in netvsc. Reported-by: Andrea Parri (Microsoft) Signed-off-by: Wei Liu --- drivers/hv/channel.c | 9 ++--- drivers/hv/hv_fcopy.c | 1 - drivers/hv/hv_kvp.c | 1 - drivers/hv/hyperv_vmbus.h | 2 +- drivers/hv/ring_buffer.c | 82 ++++++--------------------------------- drivers/net/hyperv/hyperv_net.h | 3 -- drivers/net/hyperv/netvsc.c | 2 - drivers/net/hyperv/rndis_filter.c | 2 - drivers/scsi/storvsc_drv.c | 10 ----- include/linux/hyperv.h | 48 ++++------------------- net/vmw_vsock/hyperv_transport.c | 4 +- 11 files changed, 25 insertions(+), 139 deletions(-) (limited to 'include/linux') diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 9aa789e5f22b..0bd202de7960 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -597,15 +597,12 @@ static int __vmbus_open(struct vmbus_channel *newchannel, newchannel->onchannel_callback = onchannelcallback; newchannel->channel_callback_context = context; - if (!newchannel->max_pkt_size) - newchannel->max_pkt_size = VMBUS_DEFAULT_MAX_PKT_SIZE; - - err = hv_ringbuffer_init(&newchannel->outbound, page, send_pages, 0); + err = hv_ringbuffer_init(&newchannel->outbound, page, send_pages); if (err) goto error_clean_ring; - err = hv_ringbuffer_init(&newchannel->inbound, &page[send_pages], - recv_pages, newchannel->max_pkt_size); + err = hv_ringbuffer_init(&newchannel->inbound, + &page[send_pages], recv_pages); if (err) goto error_clean_ring; diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c index 660036da7449..59ce85e00a02 100644 --- a/drivers/hv/hv_fcopy.c +++ b/drivers/hv/hv_fcopy.c @@ -349,7 +349,6 @@ int hv_fcopy_init(struct hv_util_service *srv) { recv_buffer = srv->recv_buffer; fcopy_transaction.recv_channel = srv->channel; - fcopy_transaction.recv_channel->max_pkt_size = HV_HYP_PAGE_SIZE * 2; /* * When this driver loads, the user level daemon that diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c index c698592b83e4..b49962d312ce 100644 --- a/drivers/hv/hv_kvp.c +++ b/drivers/hv/hv_kvp.c @@ -757,7 +757,6 @@ hv_kvp_init(struct hv_util_service *srv) { recv_buffer = srv->recv_buffer; kvp_transaction.recv_channel = srv->channel; - kvp_transaction.recv_channel->max_pkt_size = HV_HYP_PAGE_SIZE * 4; /* * When this driver loads, the user level daemon that diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 42f3d9d123a1..9416e09ebd58 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -174,7 +174,7 @@ extern int hv_synic_cleanup(unsigned int cpu); void hv_ringbuffer_pre_init(struct vmbus_channel *channel); int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info, - struct page *pages, u32 pagecnt, u32 max_pkt_size); + struct page *pages, u32 pagecnt); void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info); diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index 29e90477363a..35833d4d1a1d 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -190,7 +190,7 @@ void hv_ringbuffer_pre_init(struct vmbus_channel *channel) /* Initialize the ring buffer. */ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info, - struct page *pages, u32 page_cnt, u32 max_pkt_size) + struct page *pages, u32 page_cnt) { int i; struct page **pages_wraparound; @@ -232,14 +232,6 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info, sizeof(struct hv_ring_buffer); ring_info->priv_read_index = 0; - /* Initialize buffer that holds copies of incoming packets */ - if (max_pkt_size) { - ring_info->pkt_buffer = kzalloc(max_pkt_size, GFP_KERNEL); - if (!ring_info->pkt_buffer) - return -ENOMEM; - ring_info->pkt_buffer_size = max_pkt_size; - } - spin_lock_init(&ring_info->ring_lock); return 0; @@ -252,9 +244,6 @@ void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info) vunmap(ring_info->ring_buffer); ring_info->ring_buffer = NULL; mutex_unlock(&ring_info->ring_buffer_mutex); - - kfree(ring_info->pkt_buffer); - ring_info->pkt_buffer_size = 0; } /* Write to the ring buffer. */ @@ -396,7 +385,7 @@ int hv_ringbuffer_read(struct vmbus_channel *channel, memcpy(buffer, (const char *)desc + offset, packetlen); /* Advance ring index to next packet descriptor */ - __hv_pkt_iter_next(channel, desc, true); + __hv_pkt_iter_next(channel, desc); /* Notify host of update */ hv_pkt_iter_close(channel); @@ -422,22 +411,6 @@ static u32 hv_pkt_iter_avail(const struct hv_ring_buffer_info *rbi) return (rbi->ring_datasize - priv_read_loc) + write_loc; } -/* - * Get first vmbus packet without copying it out of the ring buffer - */ -struct vmpacket_descriptor *hv_pkt_iter_first_raw(struct vmbus_channel *channel) -{ - struct hv_ring_buffer_info *rbi = &channel->inbound; - - hv_debug_delay_test(channel, MESSAGE_DELAY); - - if (hv_pkt_iter_avail(rbi) < sizeof(struct vmpacket_descriptor)) - return NULL; - - return (struct vmpacket_descriptor *)(hv_get_ring_buffer(rbi) + rbi->priv_read_index); -} -EXPORT_SYMBOL_GPL(hv_pkt_iter_first_raw); - /* * Get first vmbus packet from ring buffer after read_index * @@ -446,49 +419,17 @@ EXPORT_SYMBOL_GPL(hv_pkt_iter_first_raw); struct vmpacket_descriptor *hv_pkt_iter_first(struct vmbus_channel *channel) { struct hv_ring_buffer_info *rbi = &channel->inbound; - struct vmpacket_descriptor *desc, *desc_copy; - u32 bytes_avail, pkt_len, pkt_offset; + struct vmpacket_descriptor *desc; - desc = hv_pkt_iter_first_raw(channel); - if (!desc) + hv_debug_delay_test(channel, MESSAGE_DELAY); + if (hv_pkt_iter_avail(rbi) < sizeof(struct vmpacket_descriptor)) return NULL; - bytes_avail = min(rbi->pkt_buffer_size, hv_pkt_iter_avail(rbi)); - - /* - * Ensure the compiler does not use references to incoming Hyper-V values (which - * could change at any moment) when reading local variables later in the code - */ - pkt_len = READ_ONCE(desc->len8) << 3; - pkt_offset = READ_ONCE(desc->offset8) << 3; - - /* - * If pkt_len is invalid, set it to the smaller of hv_pkt_iter_avail() and - * rbi->pkt_buffer_size - */ - if (pkt_len < sizeof(struct vmpacket_descriptor) || pkt_len > bytes_avail) - pkt_len = bytes_avail; - - /* - * If pkt_offset is invalid, arbitrarily set it to - * the size of vmpacket_descriptor - */ - if (pkt_offset < sizeof(struct vmpacket_descriptor) || pkt_offset > pkt_len) - pkt_offset = sizeof(struct vmpacket_descriptor); - - /* Copy the Hyper-V packet out of the ring buffer */ - desc_copy = (struct vmpacket_descriptor *)rbi->pkt_buffer; - memcpy(desc_copy, desc, pkt_len); - - /* - * Hyper-V could still change len8 and offset8 after the earlier read. - * Ensure that desc_copy has legal values for len8 and offset8 that - * are consistent with the copy we just made - */ - desc_copy->len8 = pkt_len >> 3; - desc_copy->offset8 = pkt_offset >> 3; + desc = hv_get_ring_buffer(rbi) + rbi->priv_read_index; + if (desc) + prefetch((char *)desc + (desc->len8 << 3)); - return desc_copy; + return desc; } EXPORT_SYMBOL_GPL(hv_pkt_iter_first); @@ -500,8 +441,7 @@ EXPORT_SYMBOL_GPL(hv_pkt_iter_first); */ struct vmpacket_descriptor * __hv_pkt_iter_next(struct vmbus_channel *channel, - const struct vmpacket_descriptor *desc, - bool copy) + const struct vmpacket_descriptor *desc) { struct hv_ring_buffer_info *rbi = &channel->inbound; u32 packetlen = desc->len8 << 3; @@ -514,7 +454,7 @@ __hv_pkt_iter_next(struct vmbus_channel *channel, rbi->priv_read_index -= dsize; /* more data? */ - return copy ? hv_pkt_iter_first(channel) : hv_pkt_iter_first_raw(channel); + return hv_pkt_iter_first(channel); } EXPORT_SYMBOL_GPL(__hv_pkt_iter_next); diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 7ea6936f86ef..2a87cfa27ac0 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -860,12 +860,9 @@ static inline u32 netvsc_rqstor_size(unsigned long ringbytes) ringbytes / NETVSC_MIN_IN_MSG_SIZE; } -#define NETVSC_MAX_XFER_PAGE_RANGES 375 #define NETVSC_XFER_HEADER_SIZE(rng_cnt) \ (offsetof(struct vmtransfer_page_packet_header, ranges) + \ (rng_cnt) * sizeof(struct vmtransfer_page_range)) -#define NETVSC_MAX_PKT_SIZE (NETVSC_XFER_HEADER_SIZE(NETVSC_MAX_XFER_PAGE_RANGES) + \ - sizeof(struct nvsp_message) + (sizeof(u32) * VRSS_SEND_TAB_SIZE)) struct multi_send_data { struct sk_buff *skb; /* skb containing the pkt */ diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 51005f2d4a82..2353623259f3 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -1544,8 +1544,6 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device, /* Open the channel */ device->channel->rqstor_size = netvsc_rqstor_size(netvsc_ring_bytes); - device->channel->max_pkt_size = NETVSC_MAX_PKT_SIZE; - ret = vmbus_open(device->channel, netvsc_ring_bytes, netvsc_ring_bytes, NULL, 0, netvsc_channel_cb, net_device->chan_table); diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 7e6dee2f02a4..598713c0d5a8 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -1174,8 +1174,6 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc) nvchan->channel = new_sc; new_sc->rqstor_size = netvsc_rqstor_size(netvsc_ring_bytes); - new_sc->max_pkt_size = NETVSC_MAX_PKT_SIZE; - ret = vmbus_open(new_sc, netvsc_ring_bytes, netvsc_ring_bytes, NULL, 0, netvsc_channel_cb, nvchan); diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 7e59284dbf5b..2e4fa77445fd 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -414,14 +414,6 @@ static void storvsc_on_channel_callback(void *context); #define STORVSC_IDE_MAX_TARGETS 1 #define STORVSC_IDE_MAX_CHANNELS 1 -/* - * Upper bound on the size of a storvsc packet. vmscsi_size_delta is not - * included in the calculation because it is set after STORVSC_MAX_PKT_SIZE - * is used in storvsc_connect_to_vsp - */ -#define STORVSC_MAX_PKT_SIZE (sizeof(struct vmpacket_descriptor) +\ - sizeof(struct vstor_packet)) - struct storvsc_cmd_request { struct scsi_cmnd *cmd; @@ -706,7 +698,6 @@ static void handle_sc_creation(struct vmbus_channel *new_sc) return; memset(&props, 0, sizeof(struct vmstorage_channel_properties)); - new_sc->max_pkt_size = STORVSC_MAX_PKT_SIZE; /* * The size of vmbus_requestor is an upper bound on the number of requests @@ -1289,7 +1280,6 @@ static int storvsc_connect_to_vsp(struct hv_device *device, u32 ring_size, memset(&props, 0, sizeof(struct vmstorage_channel_properties)); - device->channel->max_pkt_size = STORVSC_MAX_PKT_SIZE; /* * The size of vmbus_requestor is an upper bound on the number of requests * that can be in-progress at any one time across all channels. diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 9dd22af1b7f6..f1d74dcf0353 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -181,10 +181,6 @@ struct hv_ring_buffer_info { * being freed while the ring buffer is being accessed. */ struct mutex ring_buffer_mutex; - - /* Buffer that holds a copy of an incoming host packet */ - void *pkt_buffer; - u32 pkt_buffer_size; }; @@ -792,8 +788,6 @@ struct vmbus_device { bool allowed_in_isolated; }; -#define VMBUS_DEFAULT_MAX_PKT_SIZE 4096 - struct vmbus_channel { struct list_head listentry; @@ -1016,9 +1010,6 @@ struct vmbus_channel { /* request/transaction ids for VMBus */ struct vmbus_requestor requestor; u32 rqstor_size; - - /* The max size of a packet on this channel */ - u32 max_pkt_size; }; u64 vmbus_next_request_id(struct vmbus_requestor *rqstor, u64 rqst_addr); @@ -1660,44 +1651,15 @@ static inline u32 hv_pkt_datalen(const struct vmpacket_descriptor *desc) } -struct vmpacket_descriptor * -hv_pkt_iter_first_raw(struct vmbus_channel *channel); - struct vmpacket_descriptor * hv_pkt_iter_first(struct vmbus_channel *channel); struct vmpacket_descriptor * __hv_pkt_iter_next(struct vmbus_channel *channel, - const struct vmpacket_descriptor *pkt, - bool copy); + const struct vmpacket_descriptor *pkt); void hv_pkt_iter_close(struct vmbus_channel *channel); -static inline struct vmpacket_descriptor * -hv_pkt_iter_next_pkt(struct vmbus_channel *channel, - const struct vmpacket_descriptor *pkt, - bool copy) -{ - struct vmpacket_descriptor *nxt; - - nxt = __hv_pkt_iter_next(channel, pkt, copy); - if (!nxt) - hv_pkt_iter_close(channel); - - return nxt; -} - -/* - * Get next packet descriptor without copying it out of the ring buffer - * If at end of list, return NULL and update host. - */ -static inline struct vmpacket_descriptor * -hv_pkt_iter_next_raw(struct vmbus_channel *channel, - const struct vmpacket_descriptor *pkt) -{ - return hv_pkt_iter_next_pkt(channel, pkt, false); -} - /* * Get next packet descriptor from iterator * If at end of list, return NULL and update host. @@ -1706,7 +1668,13 @@ static inline struct vmpacket_descriptor * hv_pkt_iter_next(struct vmbus_channel *channel, const struct vmpacket_descriptor *pkt) { - return hv_pkt_iter_next_pkt(channel, pkt, true); + struct vmpacket_descriptor *nxt; + + nxt = __hv_pkt_iter_next(channel, pkt); + if (!nxt) + hv_pkt_iter_close(channel); + + return nxt; } #define foreach_vmbus_pkt(pkt, channel) \ diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index cd8b7c1ca9f1..630b851f8150 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -600,7 +600,7 @@ static ssize_t hvs_stream_dequeue(struct vsock_sock *vsk, struct msghdr *msg, return -EOPNOTSUPP; if (need_refill) { - hvs->recv_desc = hv_pkt_iter_first_raw(hvs->chan); + hvs->recv_desc = hv_pkt_iter_first(hvs->chan); ret = hvs_update_recv_data(hvs); if (ret) return ret; @@ -614,7 +614,7 @@ static ssize_t hvs_stream_dequeue(struct vsock_sock *vsk, struct msghdr *msg, hvs->recv_data_len -= to_read; if (hvs->recv_data_len == 0) { - hvs->recv_desc = hv_pkt_iter_next_raw(hvs->chan, hvs->recv_desc); + hvs->recv_desc = hv_pkt_iter_next(hvs->chan, hvs->recv_desc); if (hvs->recv_desc) { ret = hvs_update_recv_data(hvs); if (ret) -- cgit v1.2.3-59-g8ed1b