From 62ce94a7a5a54aac80975f5e6731707225d4077e Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Wed, 12 Jul 2017 00:04:14 -0400 Subject: PCI: Mark Broadcom HT2100 Root Port Extended Tags as broken Per PCIe r3.1, sec 2.2.6.2 and 7.8.4, a Requester may not use 8-bit Tags unless its Extended Tag Field Enable is set, but all Receivers/Completers must handle 8-bit Tags correctly regardless of their Extended Tag Field Enable. Some devices do not handle 8-bit Tags as Completers, so add a quirk for them. If we find such a device, we disable Extended Tags for the entire hierarchy to make peer-to-peer DMA possible. The Broadcom HT2100 seems to have issues with handling 8-bit tags. Mark it as broken. The pci_walk_bus() in the quirk handles devices we've enumerated in the past, and pci_configure_device() handles devices we enumerate in the future. Fixes: 60db3a4d8cc9 ("PCI: Enable PCIe Extended Tags if supported") Link: https://bugzilla.redhat.com/show_bug.cgi?id=1467674 Reported-and-tested-by: Wim ten Have Signed-off-by: Sinan Kaya [bhelgaas: changelog, tweak messages, rename bit and quirk] Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.h | 1 + drivers/pci/probe.c | 43 ++++++++++++++++++++++++++++++++++++------- drivers/pci/quirks.c | 16 ++++++++++++++++ 3 files changed, 53 insertions(+), 7 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 22e061738c6f..a6560c9baa52 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -235,6 +235,7 @@ enum pci_bar_type { pci_bar_mem64, /* A 64-bit memory BAR */ }; +int pci_configure_extended_tags(struct pci_dev *dev, void *ign); bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *pl, int crs_timeout); int pci_setup_device(struct pci_dev *dev); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index c31310db0404..c81c9835f4c7 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1745,21 +1745,50 @@ static void program_hpp_type2(struct pci_dev *dev, struct hpp_type2 *hpp) */ } -static void pci_configure_extended_tags(struct pci_dev *dev) +int pci_configure_extended_tags(struct pci_dev *dev, void *ign) { - u32 dev_cap; + struct pci_host_bridge *host; + u32 cap; + u16 ctl; int ret; if (!pci_is_pcie(dev)) - return; + return 0; - ret = pcie_capability_read_dword(dev, PCI_EXP_DEVCAP, &dev_cap); + ret = pcie_capability_read_dword(dev, PCI_EXP_DEVCAP, &cap); if (ret) - return; + return 0; + + if (!(cap & PCI_EXP_DEVCAP_EXT_TAG)) + return 0; - if (dev_cap & PCI_EXP_DEVCAP_EXT_TAG) + ret = pcie_capability_read_word(dev, PCI_EXP_DEVCTL, &ctl); + if (ret) + return 0; + + host = pci_find_host_bridge(dev->bus); + if (!host) + return 0; + + /* + * If some device in the hierarchy doesn't handle Extended Tags + * correctly, make sure they're disabled. + */ + if (host->no_ext_tags) { + if (ctl & PCI_EXP_DEVCTL_EXT_TAG) { + dev_info(&dev->dev, "disabling Extended Tags\n"); + pcie_capability_clear_word(dev, PCI_EXP_DEVCTL, + PCI_EXP_DEVCTL_EXT_TAG); + } + return 0; + } + + if (!(ctl & PCI_EXP_DEVCTL_EXT_TAG)) { + dev_info(&dev->dev, "enabling Extended Tags\n"); pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_EXT_TAG); + } + return 0; } static void pci_configure_device(struct pci_dev *dev) @@ -1768,7 +1797,7 @@ static void pci_configure_device(struct pci_dev *dev) int ret; pci_configure_mps(dev); - pci_configure_extended_tags(dev); + pci_configure_extended_tags(dev, NULL); memset(&hpp, 0, sizeof(hpp)); ret = pci_get_hp_params(dev, &hpp); diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 6967c6b4cf6b..f135765555c9 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -4681,3 +4681,19 @@ static void quirk_intel_no_flr(struct pci_dev *dev) } DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1502, quirk_intel_no_flr); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1503, quirk_intel_no_flr); + +static void quirk_no_ext_tags(struct pci_dev *pdev) +{ + struct pci_host_bridge *bridge = pci_find_host_bridge(pdev->bus); + + if (!bridge) + return; + + bridge->no_ext_tags = 1; + dev_info(&pdev->dev, "disabling Extended Tags (this device can't handle them)\n"); + + pci_walk_bus(bridge->bus, pci_configure_extended_tags, NULL); +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0140, quirk_no_ext_tags); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0142, quirk_no_ext_tags); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0144, quirk_no_ext_tags); -- cgit v1.2.3-59-g8ed1b From 40f11adc7cd9281227f0a6a627d966dd0a5f0cd9 Mon Sep 17 00:00:00 2001 From: Srinath Mannam Date: Fri, 18 Aug 2017 21:50:48 -0500 Subject: PCI: Avoid race while enabling upstream bridges When we enable a device, we first enable any upstream bridges. If a bridge has multiple downstream devices and we enable them simultaneously, the race to enable the upstream bridge may cause problems. Consider this hierarchy: bridge A --+-- device B +-- device C If drivers for B and C call pci_enable_device() simultaneously, both will attempt to enable A, which involves setting PCI_COMMAND_MASTER via pci_set_master() and PCI_COMMAND_MEMORY via pci_enable_resources(). In the following sequence, B's update to set A's PCI_COMMAND_MEMORY is lost, and neither B nor C will work correctly: B C pci_set_master(A) cmd = read(A, PCI_COMMAND) cmd |= PCI_COMMAND_MASTER pci_set_master(A) cmd = read(A, PCI_COMMAND) cmd |= PCI_COMMAND_MASTER write(A, PCI_COMMAND, cmd) pci_enable_device(A) pci_enable_resources(A) cmd = read(A, PCI_COMMAND) cmd |= PCI_COMMAND_MEMORY write(A, PCI_COMMAND, cmd) write(A, PCI_COMMAND, cmd) Avoid this race by holding a new pci_bridge_mutex while enabling a bridge. This ensures that both PCI_COMMAND_MASTER and PCI_COMMAND_MEMORY will be updated before another thread can start enabling the bridge. Note that although pci_enable_bridge() is recursive, it enables any upstream bridges *before* acquiring the mutex. When it acquires the mutex and calls pci_set_master() and pci_enable_device(), any upstream bridges have already been enabled so pci_enable_device() will not deadlock by calling pci_enable_bridge() again. Signed-off-by: Srinath Mannam [bhelgaas: changelog, comment] Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index af0cc3456dc1..7cb29a223b73 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -52,6 +52,7 @@ static void pci_pme_list_scan(struct work_struct *work); static LIST_HEAD(pci_pme_list); static DEFINE_MUTEX(pci_pme_list_mutex); static DECLARE_DELAYED_WORK(pci_pme_work, pci_pme_list_scan); +static DEFINE_MUTEX(pci_bridge_mutex); struct pci_pme_device { struct list_head list; @@ -1348,10 +1349,16 @@ static void pci_enable_bridge(struct pci_dev *dev) if (bridge) pci_enable_bridge(bridge); + /* + * Hold pci_bridge_mutex to prevent a race when enabling two + * devices below the bridge simultaneously. The race may cause a + * PCI_COMMAND_MEMORY update to be lost (see changelog). + */ + mutex_lock(&pci_bridge_mutex); if (pci_is_enabled(dev)) { if (!dev->is_busmaster) pci_set_master(dev); - return; + goto end; } retval = pci_enable_device(dev); @@ -1359,6 +1366,8 @@ static void pci_enable_bridge(struct pci_dev *dev) dev_err(&dev->dev, "Error enabling bridge (%d), continuing\n", retval); pci_set_master(dev); +end: + mutex_unlock(&pci_bridge_mutex); } static int pci_enable_device_flags(struct pci_dev *dev, unsigned long flags) @@ -1383,7 +1392,7 @@ static int pci_enable_device_flags(struct pci_dev *dev, unsigned long flags) return 0; /* already enabled */ bridge = pci_upstream_bridge(dev); - if (bridge) + if (bridge && !pci_is_enabled(bridge)) pci_enable_bridge(bridge); /* only skip sriov related */ -- cgit v1.2.3-59-g8ed1b From 9f98275671a58516f87b7eb02a3497635861930a Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 29 Aug 2017 14:45:43 -0500 Subject: PCI: Always check for non-CRS response before timeout While waiting for a device to become ready (i.e., to return a non-CRS completion to a read of its Vendor ID), if we got a valid response to the very last read before timing out, we printed a warning and gave up on the device even though it was actually ready. For a typical 60s timeout, we wait about 65s (it's not exact because of the exponential backoff), but we treated devices that became ready between 33s and 65s as though they failed. Move the Device ID read later so we check whether the device is ready before checking for a timeout. Thanks to Sinan Kaya , reorder reads so we always check device presence after sleep, since it's pointless to sleep unless we recheck afterwards. Signed-off-by: Bjorn Helgaas --- drivers/pci/probe.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index c81c9835f4c7..150093817c59 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1876,17 +1876,18 @@ bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *l, if (!crs_timeout) return false; - msleep(delay); - delay *= 2; - if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, l)) - return false; - /* Card hasn't responded in 60 seconds? Must be stuck. */ if (delay > crs_timeout) { printk(KERN_WARNING "pci %04x:%02x:%02x.%d: not responding\n", pci_domain_nr(bus), bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn)); return false; } + + msleep(delay); + delay *= 2; + + if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, l)) + return false; } return true; -- cgit v1.2.3-59-g8ed1b From 62bc6a6f7468bc6d6cb39177504e79df401aea76 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Tue, 29 Aug 2017 14:45:44 -0500 Subject: PCI: Add pci_bus_crs_vendor_id() to detect CRS response data Add pci_bus_crs_vendor_id() to determine whether data returned for a config read of the Vendor ID indicates a Configuration Request Retry Status (CRS) response. Per PCIe r3.1, sec 2.3.2, this data is only returned if: - CRS Software Visibility is enabled, - a config read includes both bytes of the Vendor ID, and - the read receives a CRS completion Signed-off-by: Sinan Kaya [bhelgaas: changelog, change name to pci_bus_crs_vendor_id(), make static in probe.c, use it in pci_bus_read_dev_vendor_id()] Signed-off-by: Bjorn Helgaas --- drivers/pci/probe.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 150093817c59..5e001bbc2392 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1853,6 +1853,11 @@ struct pci_dev *pci_alloc_dev(struct pci_bus *bus) } EXPORT_SYMBOL(pci_alloc_dev); +static bool pci_bus_crs_vendor_id(u32 l) +{ + return (l & 0xffff) == 0x0001; +} + bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *l, int crs_timeout) { @@ -1872,7 +1877,7 @@ bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *l, * by the PCIe spec. Ignore the device ID and only check for * (vendor id == 1). */ - while ((*l & 0xffff) == 0x0001) { + while (pci_bus_crs_vendor_id(*l)) { if (!crs_timeout) return false; -- cgit v1.2.3-59-g8ed1b From 6a802ef0af94e7148c3fef71f5502654b65ed46a Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Tue, 29 Aug 2017 14:45:44 -0500 Subject: PCI: Factor out pci_bus_wait_crs() Configuration Request Retry Status (CRS) was previously hidden inside pci_bus_read_dev_vendor_id(). We want to add support for CRS in other situations, such as waiting for a device to become ready after a Function Level Reset. Move CRS handling into pci_bus_wait_crs() so it can be called from other places. Signed-off-by: Sinan Kaya [bhelgaas: pass pointer, not value, to pci_bus_wait_crs() so caller gets correct Vendor ID] Signed-off-by: Bjorn Helgaas --- drivers/pci/probe.c | 43 +++++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 16 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 5e001bbc2392..f09d8cd3df7a 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1858,30 +1858,24 @@ static bool pci_bus_crs_vendor_id(u32 l) return (l & 0xffff) == 0x0001; } -bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *l, - int crs_timeout) +static bool pci_bus_wait_crs(struct pci_bus *bus, int devfn, u32 *l, + int timeout) { int delay = 1; - if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, l)) - return false; + if (!pci_bus_crs_vendor_id(*l)) + return true; /* not a CRS completion */ - /* some broken boards return 0 or ~0 if a slot is empty: */ - if (*l == 0xffffffff || *l == 0x00000000 || - *l == 0x0000ffff || *l == 0xffff0000) - return false; + if (!timeout) + return false; /* CRS, but caller doesn't want to wait */ /* - * Configuration Request Retry Status. Some root ports return the - * actual device ID instead of the synthetic ID (0xFFFF) required - * by the PCIe spec. Ignore the device ID and only check for - * (vendor id == 1). + * We got the reserved Vendor ID that indicates a completion with + * Configuration Request Retry Status (CRS). Retry until we get a + * valid Vendor ID or we time out. */ while (pci_bus_crs_vendor_id(*l)) { - if (!crs_timeout) - return false; - - if (delay > crs_timeout) { + if (delay > timeout) { printk(KERN_WARNING "pci %04x:%02x:%02x.%d: not responding\n", pci_domain_nr(bus), bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn)); @@ -1897,6 +1891,23 @@ bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *l, return true; } + +bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *l, + int timeout) +{ + if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, l)) + return false; + + /* some broken boards return 0 or ~0 if a slot is empty: */ + if (*l == 0xffffffff || *l == 0x00000000 || + *l == 0x0000ffff || *l == 0xffff0000) + return false; + + if (pci_bus_crs_vendor_id(*l)) + return pci_bus_wait_crs(bus, devfn, l, timeout); + + return true; +} EXPORT_SYMBOL(pci_bus_read_dev_vendor_id); /* -- cgit v1.2.3-59-g8ed1b From 821cdad5c46cae94ce65b9a98614c70a6ff021f8 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Tue, 29 Aug 2017 14:45:45 -0500 Subject: PCI: Wait up to 60 seconds for device to become ready after FLR Sporadic reset issues have been observed with an Intel 750 NVMe drive while assigning the physical function to the guest machine. The sequence of events observed is as follows: - perform a Function Level Reset (FLR) - sleep up to 1000ms total - read ~0 from PCI_COMMAND (CRS completion for config read) - warn that the device didn't return from FLR - touch the device before it's ready - device drops config writes when we restore register settings (there's no mechanism for software to learn about CRS completions for writes) - incomplete register restore leaves device in inconsistent state - device probe fails because device is in inconsistent state After reset, an endpoint may respond to config requests with Configuration Request Retry Status (CRS) to indicate that it is not ready to accept new requests. See PCIe r3.1, sec 2.3.1 and 6.6.2. Increase the timeout value from 1 second to 60 seconds to cover the period where device responds with CRS and also report polling progress. Signed-off-by: Sinan Kaya [bhelgaas: include the mandatory 100ms in the delays we print] Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 52 +++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 15 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 7cb29a223b73..827a9f99a8e5 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3820,27 +3820,49 @@ int pci_wait_for_pending_transaction(struct pci_dev *dev) } EXPORT_SYMBOL(pci_wait_for_pending_transaction); -/* - * We should only need to wait 100ms after FLR, but some devices take longer. - * Wait for up to 1000ms for config space to return something other than -1. - * Intel IGD requires this when an LCD panel is attached. We read the 2nd - * dword because VFs don't implement the 1st dword. - */ static void pci_flr_wait(struct pci_dev *dev) { - int i = 0; + int delay = 1, timeout = 60000; u32 id; - do { - msleep(100); + /* + * Per PCIe r3.1, sec 6.6.2, a device must complete an FLR within + * 100ms, but may silently discard requests while the FLR is in + * progress. Wait 100ms before trying to access the device. + */ + msleep(100); + + /* + * After 100ms, the device should not silently discard config + * requests, but it may still indicate that it needs more time by + * responding to them with CRS completions. The Root Port will + * generally synthesize ~0 data to complete the read (except when + * CRS SV is enabled and the read was for the Vendor ID; in that + * case it synthesizes 0x0001 data). + * + * Wait for the device to return a non-CRS completion. Read the + * Command register instead of Vendor ID so we don't have to + * contend with the CRS SV value. + */ + pci_read_config_dword(dev, PCI_COMMAND, &id); + while (id == ~0) { + if (delay > timeout) { + dev_warn(&dev->dev, "not ready %dms after FLR; giving up\n", + 100 + delay - 1); + return; + } + + if (delay > 1000) + dev_info(&dev->dev, "not ready %dms after FLR; waiting\n", + 100 + delay - 1); + + msleep(delay); + delay *= 2; pci_read_config_dword(dev, PCI_COMMAND, &id); - } while (i++ < 10 && id == ~0); + } - if (id == ~0) - dev_warn(&dev->dev, "Failed to return from FLR\n"); - else if (i > 1) - dev_info(&dev->dev, "Required additional %dms to return from FLR\n", - (i - 1) * 100); + if (delay > 1000) + dev_info(&dev->dev, "ready %dms after FLR\n", 100 + delay - 1); } /** -- cgit v1.2.3-59-g8ed1b From e78e661fae3296c4e20a029d6bd04c5e925c72de Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Tue, 29 Aug 2017 14:45:45 -0500 Subject: PCI: Warn periodically while waiting for non-CRS ("device ready") status Add a print statement in pci_bus_wait_crs() so that user observes the progress of device polling instead of silently waiting for timeout to be reached. Signed-off-by: Sinan Kaya [bhelgaas: check for timeout first so we don't print "waiting, giving up", always print time we've slept (not the actual timeout, print a "ready" message if we've printed a "waiting" message] Signed-off-by: Bjorn Helgaas --- drivers/pci/probe.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index f09d8cd3df7a..9b8a445b9294 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1876,11 +1876,16 @@ static bool pci_bus_wait_crs(struct pci_bus *bus, int devfn, u32 *l, */ while (pci_bus_crs_vendor_id(*l)) { if (delay > timeout) { - printk(KERN_WARNING "pci %04x:%02x:%02x.%d: not responding\n", - pci_domain_nr(bus), bus->number, PCI_SLOT(devfn), - PCI_FUNC(devfn)); + pr_warn("pci %04x:%02x:%02x.%d: not ready after %dms; giving up\n", + pci_domain_nr(bus), bus->number, + PCI_SLOT(devfn), PCI_FUNC(devfn), delay - 1); + return false; } + if (delay >= 1000) + pr_info("pci %04x:%02x:%02x.%d: not ready after %dms; waiting\n", + pci_domain_nr(bus), bus->number, + PCI_SLOT(devfn), PCI_FUNC(devfn), delay - 1); msleep(delay); delay *= 2; @@ -1889,6 +1894,11 @@ static bool pci_bus_wait_crs(struct pci_bus *bus, int devfn, u32 *l, return false; } + if (delay >= 1000) + pr_info("pci %04x:%02x:%02x.%d: ready after %dms\n", + pci_domain_nr(bus), bus->number, + PCI_SLOT(devfn), PCI_FUNC(devfn), delay - 1); + return true; } -- cgit v1.2.3-59-g8ed1b