From d6af2ed29c7c1c311b96dac989dcb991e90ee195 Mon Sep 17 00:00:00 2001 From: Wei Hu Date: Mon, 27 Jul 2020 15:17:31 +0800 Subject: PCI: hv: Fix a timing issue which causes kdump to fail occasionally Kdump could fail sometime on Hyper-V guest because the retry in hv_pci_enter_d0() releases child device structures in hv_pci_bus_exit(). Although there is a second asynchronous device relations message sending from the host, if this message arrives to the guest after hv_send_resource_allocated() is called, the retry would fail. Fix the problem by moving retry to hv_pci_probe() and start the retry from hv_pci_query_relations() call. This will cause a device relations message to arrive to the guest synchronously; the guest would then be able to rebuild the child device structures before calling hv_send_resource_allocated(). Link: https://lore.kernel.org/r/20200727071731.18516-1-weh@microsoft.com Fixes: c81992e7f4aa ("PCI: hv: Retry PCI bus D0 entry on invalid device state") Signed-off-by: Wei Hu [lorenzo.pieralisi@arm.com: fixed a comment and commit log] Signed-off-by: Lorenzo Pieralisi Reviewed-by: Michael Kelley --- drivers/pci/controller/pci-hyperv.c | 71 +++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 34 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index bf40ff09c99d..d0033ff6c143 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -2759,10 +2759,8 @@ static int hv_pci_enter_d0(struct hv_device *hdev) struct pci_bus_d0_entry *d0_entry; struct hv_pci_compl comp_pkt; struct pci_packet *pkt; - bool retry = true; int ret; -enter_d0_retry: /* * Tell the host that the bus is ready to use, and moved into the * powered-on state. This includes telling the host which region @@ -2789,38 +2787,6 @@ enter_d0_retry: if (ret) goto exit; - /* - * In certain case (Kdump) the pci device of interest was - * not cleanly shut down and resource is still held on host - * side, the host could return invalid device status. - * We need to explicitly request host to release the resource - * and try to enter D0 again. - */ - if (comp_pkt.completion_status < 0 && retry) { - retry = false; - - dev_err(&hdev->device, "Retrying D0 Entry\n"); - - /* - * Hv_pci_bus_exit() calls hv_send_resource_released() - * to free up resources of its child devices. - * In the kdump kernel we need to set the - * wslot_res_allocated to 255 so it scans all child - * devices to release resources allocated in the - * normal kernel before panic happened. - */ - hbus->wslot_res_allocated = 255; - - ret = hv_pci_bus_exit(hdev, true); - - if (ret == 0) { - kfree(pkt); - goto enter_d0_retry; - } - dev_err(&hdev->device, - "Retrying D0 failed with ret %d\n", ret); - } - if (comp_pkt.completion_status < 0) { dev_err(&hdev->device, "PCI Pass-through VSP failed D0 Entry with status %x\n", @@ -3058,6 +3024,7 @@ static int hv_pci_probe(struct hv_device *hdev, struct hv_pcibus_device *hbus; u16 dom_req, dom; char *name; + bool enter_d0_retry = true; int ret; /* @@ -3178,11 +3145,47 @@ static int hv_pci_probe(struct hv_device *hdev, if (ret) goto free_fwnode; +retry: ret = hv_pci_query_relations(hdev); if (ret) goto free_irq_domain; ret = hv_pci_enter_d0(hdev); + /* + * In certain case (Kdump) the pci device of interest was + * not cleanly shut down and resource is still held on host + * side, the host could return invalid device status. + * We need to explicitly request host to release the resource + * and try to enter D0 again. + * Since the hv_pci_bus_exit() call releases structures + * of all its child devices, we need to start the retry from + * hv_pci_query_relations() call, requesting host to send + * the synchronous child device relations message before this + * information is needed in hv_send_resources_allocated() + * call later. + */ + if (ret == -EPROTO && enter_d0_retry) { + enter_d0_retry = false; + + dev_err(&hdev->device, "Retrying D0 Entry\n"); + + /* + * Hv_pci_bus_exit() calls hv_send_resources_released() + * to free up resources of its child devices. + * In the kdump kernel we need to set the + * wslot_res_allocated to 255 so it scans all child + * devices to release resources allocated in the + * normal kernel before panic happened. + */ + hbus->wslot_res_allocated = 255; + ret = hv_pci_bus_exit(hdev, true); + + if (ret == 0) + goto retry; + + dev_err(&hdev->device, + "Retrying D0 failed with ret %d\n", ret); + } if (ret) goto free_irq_domain; -- cgit v1.2.3-59-g8ed1b From a459d9e107887a1fef32d5c6d0951241876812d9 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 6 Jul 2020 21:52:34 +0800 Subject: PCI: hv: Make some functions static sparse report build warning as follows: drivers/pci/controller/pci-hyperv.c:941:5: warning: symbol 'hv_read_config_block' was not declared. Should it be static? drivers/pci/controller/pci-hyperv.c:1021:5: warning: symbol 'hv_write_config_block' was not declared. Should it be static? drivers/pci/controller/pci-hyperv.c:1090:5: warning: symbol 'hv_register_block_invalidate' was not declared. Should it be static? Those functions are not used outside of this file, so mark them static. Link: https://lore.kernel.org/r/20200706135234.80758-1-weiyongjun1@huawei.com Reported-by: Hulk Robot Signed-off-by: Wei Yongjun Signed-off-by: Lorenzo Pieralisi --- drivers/pci/controller/pci-hyperv.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index d0033ff6c143..fc4c3a15e570 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -938,8 +938,9 @@ out: * * Return: 0 on success, -errno on failure */ -int hv_read_config_block(struct pci_dev *pdev, void *buf, unsigned int len, - unsigned int block_id, unsigned int *bytes_returned) +static int hv_read_config_block(struct pci_dev *pdev, void *buf, + unsigned int len, unsigned int block_id, + unsigned int *bytes_returned) { struct hv_pcibus_device *hbus = container_of(pdev->bus->sysdata, struct hv_pcibus_device, @@ -1018,8 +1019,8 @@ static void hv_pci_write_config_compl(void *context, struct pci_response *resp, * * Return: 0 on success, -errno on failure */ -int hv_write_config_block(struct pci_dev *pdev, void *buf, unsigned int len, - unsigned int block_id) +static int hv_write_config_block(struct pci_dev *pdev, void *buf, + unsigned int len, unsigned int block_id) { struct hv_pcibus_device *hbus = container_of(pdev->bus->sysdata, struct hv_pcibus_device, @@ -1087,9 +1088,9 @@ int hv_write_config_block(struct pci_dev *pdev, void *buf, unsigned int len, * * Return: 0 on success, -errno on failure */ -int hv_register_block_invalidate(struct pci_dev *pdev, void *context, - void (*block_invalidate)(void *context, - u64 block_mask)) +static int hv_register_block_invalidate(struct pci_dev *pdev, void *context, + void (*block_invalidate)(void *context, + u64 block_mask)) { struct hv_pcibus_device *hbus = container_of(pdev->bus->sysdata, struct hv_pcibus_device, -- cgit v1.2.3-59-g8ed1b