aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/drivers/pci/pci.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/pci/pci.c')
-rw-r--r--drivers/pci/pci.c336
1 files changed, 224 insertions, 112 deletions
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index f5532468dd31..61df2e04176d 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -835,14 +835,16 @@ static int pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state)
return -EINVAL;
/*
- * Validate current state:
- * Can enter D0 from any state, but if we can only go deeper
- * to sleep if we're already in a low power state
+ * Validate transition: We can enter D0 from any state, but if
+ * we're already in a low-power state, we can only go deeper. E.g.,
+ * we can go from D1 to D3, but we can't go directly from D3 to D1;
+ * we'd have to go from D3 to D0, then to D1.
*/
if (state != PCI_D0 && dev->current_state <= PCI_D3cold
&& dev->current_state > state) {
- pci_err(dev, "invalid power transition (from state %d to %d)\n",
- dev->current_state, state);
+ pci_err(dev, "invalid power transition (from %s to %s)\n",
+ pci_power_name(dev->current_state),
+ pci_power_name(state));
return -EINVAL;
}
@@ -852,6 +854,12 @@ static int pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state)
return -EIO;
pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
+ if (pmcsr == (u16) ~0) {
+ pci_err(dev, "can't change power state from %s to %s (config space inaccessible)\n",
+ pci_power_name(dev->current_state),
+ pci_power_name(state));
+ return -EIO;
+ }
/*
* If we're (effectively) in D3, force entire word to 0.
@@ -887,13 +895,14 @@ static int pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state)
if (state == PCI_D3hot || dev->current_state == PCI_D3hot)
pci_dev_d3_sleep(dev);
else if (state == PCI_D2 || dev->current_state == PCI_D2)
- udelay(PCI_PM_D2_DELAY);
+ msleep(PCI_PM_D2_DELAY);
pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
dev->current_state = (pmcsr & PCI_PM_CTRL_STATE_MASK);
if (dev->current_state != state)
- pci_info_ratelimited(dev, "Refused to change power state, currently in D%d\n",
- dev->current_state);
+ pci_info_ratelimited(dev, "refused to change power state from %s to %s\n",
+ pci_power_name(dev->current_state),
+ pci_power_name(state));
/*
* According to section 5.4.1 of the "PCI BUS POWER MANAGEMENT
@@ -964,7 +973,7 @@ void pci_refresh_power_state(struct pci_dev *dev)
* @dev: PCI device to handle.
* @state: State to put the device into.
*/
-static int pci_platform_power_transition(struct pci_dev *dev, pci_power_t state)
+int pci_platform_power_transition(struct pci_dev *dev, pci_power_t state)
{
int error;
@@ -980,6 +989,7 @@ static int pci_platform_power_transition(struct pci_dev *dev, pci_power_t state)
return error;
}
+EXPORT_SYMBOL_GPL(pci_platform_power_transition);
/**
* pci_wakeup - Wake up a PCI device
@@ -1003,34 +1013,70 @@ void pci_wakeup_bus(struct pci_bus *bus)
pci_walk_bus(bus, pci_wakeup, NULL);
}
+static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
+{
+ int delay = 1;
+ u32 id;
+
+ /*
+ * After reset, the device should not silently discard config
+ * requests, but it may still indicate that it needs more time by
+ * responding to them with CRS completions. The Root Port will
+ * generally synthesize ~0 data to complete the read (except when
+ * CRS SV is enabled and the read was for the Vendor ID; in that
+ * case it synthesizes 0x0001 data).
+ *
+ * Wait for the device to return a non-CRS completion. Read the
+ * Command register instead of Vendor ID so we don't have to
+ * contend with the CRS SV value.
+ */
+ pci_read_config_dword(dev, PCI_COMMAND, &id);
+ while (id == ~0) {
+ if (delay > timeout) {
+ pci_warn(dev, "not ready %dms after %s; giving up\n",
+ delay - 1, reset_type);
+ return -ENOTTY;
+ }
+
+ if (delay > 1000)
+ pci_info(dev, "not ready %dms after %s; waiting\n",
+ delay - 1, reset_type);
+
+ msleep(delay);
+ delay *= 2;
+ pci_read_config_dword(dev, PCI_COMMAND, &id);
+ }
+
+ if (delay > 1000)
+ pci_info(dev, "ready %dms after %s\n", delay - 1,
+ reset_type);
+
+ return 0;
+}
+
/**
- * __pci_start_power_transition - Start power transition of a PCI device
- * @dev: PCI device to handle.
- * @state: State to put the device into.
+ * pci_power_up - Put the given device into D0
+ * @dev: PCI device to power up
*/
-static void __pci_start_power_transition(struct pci_dev *dev, pci_power_t state)
+int pci_power_up(struct pci_dev *dev)
{
- if (state == PCI_D0) {
- pci_platform_power_transition(dev, PCI_D0);
+ pci_platform_power_transition(dev, PCI_D0);
+
+ /*
+ * Mandatory power management transition delays are handled in
+ * pci_pm_resume_noirq() and pci_pm_runtime_resume() of the
+ * corresponding bridge.
+ */
+ if (dev->runtime_d3cold) {
/*
- * Mandatory power management transition delays, see
- * PCI Express Base Specification Revision 2.0 Section
- * 6.6.1: Conventional Reset. Do not delay for
- * devices powered on/off by corresponding bridge,
- * because have already delayed for the bridge.
+ * When powering on a bridge from D3cold, the whole hierarchy
+ * may be powered on into D0uninitialized state, resume them to
+ * give them a chance to suspend again
*/
- if (dev->runtime_d3cold) {
- if (dev->d3cold_delay && !dev->imm_ready)
- msleep(dev->d3cold_delay);
- /*
- * When powering on a bridge from D3cold, the
- * whole hierarchy may be powered on into
- * D0uninitialized state, resume them to give
- * them a chance to suspend again
- */
- pci_wakeup_bus(dev->subordinate);
- }
+ pci_wakeup_bus(dev->subordinate);
}
+
+ return pci_raw_set_power_state(dev, PCI_D0);
}
/**
@@ -1058,27 +1104,6 @@ void pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state)
}
/**
- * __pci_complete_power_transition - Complete power transition of a PCI device
- * @dev: PCI device to handle.
- * @state: State to put the device into.
- *
- * This function should not be called directly by device drivers.
- */
-int __pci_complete_power_transition(struct pci_dev *dev, pci_power_t state)
-{
- int ret;
-
- if (state <= PCI_D0)
- return -EINVAL;
- ret = pci_platform_power_transition(dev, state);
- /* Power off the bridge may power off the whole hierarchy */
- if (!ret && state == PCI_D3cold)
- pci_bus_set_current_state(dev->subordinate, PCI_D3cold);
- return ret;
-}
-EXPORT_SYMBOL_GPL(__pci_complete_power_transition);
-
-/**
* pci_set_power_state - Set the power state of a PCI device
* @dev: PCI device to handle.
* @state: PCI power state (D0, D1, D2, D3hot) to put the device into.
@@ -1118,7 +1143,8 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
if (dev->current_state == state)
return 0;
- __pci_start_power_transition(dev, state);
+ if (state == PCI_D0)
+ return pci_power_up(dev);
/*
* This device is quirked not to be put into D3, so don't put it in
@@ -1134,23 +1160,16 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
error = pci_raw_set_power_state(dev, state > PCI_D3hot ?
PCI_D3hot : state);
- if (!__pci_complete_power_transition(dev, state))
- error = 0;
+ if (pci_platform_power_transition(dev, state))
+ return error;
- return error;
-}
-EXPORT_SYMBOL(pci_set_power_state);
+ /* Powering off a bridge may power off the whole hierarchy */
+ if (state == PCI_D3cold)
+ pci_bus_set_current_state(dev->subordinate, PCI_D3cold);
-/**
- * pci_power_up - Put the given device into D0 forcibly
- * @dev: PCI device to power up
- */
-void pci_power_up(struct pci_dev *dev)
-{
- __pci_start_power_transition(dev, PCI_D0);
- pci_raw_set_power_state(dev, PCI_D0);
- pci_update_current_state(dev, PCI_D0);
+ return 0;
}
+EXPORT_SYMBOL(pci_set_power_state);
/**
* pci_choose_state - Choose the power state of a PCI device
@@ -4431,47 +4450,6 @@ int pci_wait_for_pending_transaction(struct pci_dev *dev)
}
EXPORT_SYMBOL(pci_wait_for_pending_transaction);
-static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
-{
- int delay = 1;
- u32 id;
-
- /*
- * After reset, the device should not silently discard config
- * requests, but it may still indicate that it needs more time by
- * responding to them with CRS completions. The Root Port will
- * generally synthesize ~0 data to complete the read (except when
- * CRS SV is enabled and the read was for the Vendor ID; in that
- * case it synthesizes 0x0001 data).
- *
- * Wait for the device to return a non-CRS completion. Read the
- * Command register instead of Vendor ID so we don't have to
- * contend with the CRS SV value.
- */
- pci_read_config_dword(dev, PCI_COMMAND, &id);
- while (id == ~0) {
- if (delay > timeout) {
- pci_warn(dev, "not ready %dms after %s; giving up\n",
- delay - 1, reset_type);
- return -ENOTTY;
- }
-
- if (delay > 1000)
- pci_info(dev, "not ready %dms after %s; waiting\n",
- delay - 1, reset_type);
-
- msleep(delay);
- delay *= 2;
- pci_read_config_dword(dev, PCI_COMMAND, &id);
- }
-
- if (delay > 1000)
- pci_info(dev, "ready %dms after %s\n", delay - 1,
- reset_type);
-
- return 0;
-}
-
/**
* pcie_has_flr - check if a device supports function level resets
* @dev: device to check
@@ -4606,16 +4584,19 @@ static int pci_pm_reset(struct pci_dev *dev, int probe)
pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, csr);
pci_dev_d3_sleep(dev);
- return pci_dev_wait(dev, "PM D3->D0", PCIE_RESET_READY_POLL_MS);
+ return pci_dev_wait(dev, "PM D3hot->D0", PCIE_RESET_READY_POLL_MS);
}
+
/**
- * pcie_wait_for_link - Wait until link is active or inactive
+ * pcie_wait_for_link_delay - Wait until link is active or inactive
* @pdev: Bridge device
* @active: waiting for active or inactive?
+ * @delay: Delay to wait after link has become active (in ms)
*
* Use this to wait till link becomes active or inactive.
*/
-bool pcie_wait_for_link(struct pci_dev *pdev, bool active)
+static bool pcie_wait_for_link_delay(struct pci_dev *pdev, bool active,
+ int delay)
{
int timeout = 1000;
bool ret;
@@ -4652,13 +4633,144 @@ bool pcie_wait_for_link(struct pci_dev *pdev, bool active)
timeout -= 10;
}
if (active && ret)
- msleep(100);
+ msleep(delay);
else if (ret != active)
pci_info(pdev, "Data Link Layer Link Active not %s in 1000 msec\n",
active ? "set" : "cleared");
return ret == active;
}
+/**
+ * pcie_wait_for_link - Wait until link is active or inactive
+ * @pdev: Bridge device
+ * @active: waiting for active or inactive?
+ *
+ * Use this to wait till link becomes active or inactive.
+ */
+bool pcie_wait_for_link(struct pci_dev *pdev, bool active)
+{
+ return pcie_wait_for_link_delay(pdev, active, 100);
+}
+
+/*
+ * Find maximum D3cold delay required by all the devices on the bus. The
+ * spec says 100 ms, but firmware can lower it and we allow drivers to
+ * increase it as well.
+ *
+ * Called with @pci_bus_sem locked for reading.
+ */
+static int pci_bus_max_d3cold_delay(const struct pci_bus *bus)
+{
+ const struct pci_dev *pdev;
+ int min_delay = 100;
+ int max_delay = 0;
+
+ list_for_each_entry(pdev, &bus->devices, bus_list) {
+ if (pdev->d3cold_delay < min_delay)
+ min_delay = pdev->d3cold_delay;
+ if (pdev->d3cold_delay > max_delay)
+ max_delay = pdev->d3cold_delay;
+ }
+
+ return max(min_delay, max_delay);
+}
+
+/**
+ * pci_bridge_wait_for_secondary_bus - Wait for secondary bus to be accessible
+ * @dev: PCI bridge
+ *
+ * Handle necessary delays before access to the devices on the secondary
+ * side of the bridge are permitted after D3cold to D0 transition.
+ *
+ * For PCIe this means the delays in PCIe 5.0 section 6.6.1. For
+ * conventional PCI it means Tpvrh + Trhfa specified in PCI 3.0 section
+ * 4.3.2.
+ */
+void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev)
+{
+ struct pci_dev *child;
+ int delay;
+
+ if (pci_dev_is_disconnected(dev))
+ return;
+
+ if (!pci_is_bridge(dev) || !dev->bridge_d3)
+ return;
+
+ down_read(&pci_bus_sem);
+
+ /*
+ * We only deal with devices that are present currently on the bus.
+ * For any hot-added devices the access delay is handled in pciehp
+ * board_added(). In case of ACPI hotplug the firmware is expected
+ * to configure the devices before OS is notified.
+ */
+ if (!dev->subordinate || list_empty(&dev->subordinate->devices)) {
+ up_read(&pci_bus_sem);
+ return;
+ }
+
+ /* Take d3cold_delay requirements into account */
+ delay = pci_bus_max_d3cold_delay(dev->subordinate);
+ if (!delay) {
+ up_read(&pci_bus_sem);
+ return;
+ }
+
+ child = list_first_entry(&dev->subordinate->devices, struct pci_dev,
+ bus_list);
+ up_read(&pci_bus_sem);
+
+ /*
+ * Conventional PCI and PCI-X we need to wait Tpvrh + Trhfa before
+ * accessing the device after reset (that is 1000 ms + 100 ms). In
+ * practice this should not be needed because we don't do power
+ * management for them (see pci_bridge_d3_possible()).
+ */
+ if (!pci_is_pcie(dev)) {
+ pci_dbg(dev, "waiting %d ms for secondary bus\n", 1000 + delay);
+ msleep(1000 + delay);
+ return;
+ }
+
+ /*
+ * For PCIe downstream and root ports that do not support speeds
+ * greater than 5 GT/s need to wait minimum 100 ms. For higher
+ * speeds (gen3) we need to wait first for the data link layer to
+ * become active.
+ *
+ * However, 100 ms is the minimum and the PCIe spec says the
+ * software must allow at least 1s before it can determine that the
+ * device that did not respond is a broken device. There is
+ * evidence that 100 ms is not always enough, for example certain
+ * Titan Ridge xHCI controller does not always respond to
+ * configuration requests if we only wait for 100 ms (see
+ * https://bugzilla.kernel.org/show_bug.cgi?id=203885).
+ *
+ * Therefore we wait for 100 ms and check for the device presence.
+ * If it is still not present give it an additional 100 ms.
+ */
+ if (!pcie_downstream_port(dev))
+ return;
+
+ if (pcie_get_speed_cap(dev) <= PCIE_SPEED_5_0GT) {
+ pci_dbg(dev, "waiting %d ms for downstream link\n", delay);
+ msleep(delay);
+ } else {
+ pci_dbg(dev, "waiting %d ms for downstream link, after activation\n",
+ delay);
+ if (!pcie_wait_for_link_delay(dev, true, delay)) {
+ /* Did not train, no need to wait any further */
+ return;
+ }
+ }
+
+ if (!pci_device_is_present(child)) {
+ pci_dbg(child, "waiting additional %d ms to become accessible\n", delay);
+ msleep(delay);
+ }
+}
+
void pci_reset_secondary_bus(struct pci_dev *dev)
{
u16 ctrl;