aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/powerplay/smu_v11_0.c')
-rw-r--r--drivers/gpu/drm/amd/powerplay/smu_v11_0.c139
1 files changed, 92 insertions, 47 deletions
diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
index 655ba4fb05dc..ae0361e225bb 100644
--- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
@@ -23,6 +23,7 @@
#include <linux/firmware.h>
#include <linux/module.h>
#include <linux/pci.h>
+#include <linux/reboot.h>
#define SMU_11_0_PARTIAL_PPTABLE
@@ -57,7 +58,7 @@ static int smu_v11_0_send_msg_without_waiting(struct smu_context *smu,
uint16_t msg)
{
struct amdgpu_device *adev = smu->adev;
- WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_66, msg);
+ WREG32_SOC15_NO_KIQ(MP1, 0, mmMP1_SMN_C2PMSG_66, msg);
return 0;
}
@@ -65,7 +66,7 @@ static int smu_v11_0_read_arg(struct smu_context *smu, uint32_t *arg)
{
struct amdgpu_device *adev = smu->adev;
- *arg = RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82);
+ *arg = RREG32_SOC15_NO_KIQ(MP1, 0, mmMP1_SMN_C2PMSG_82);
return 0;
}
@@ -75,7 +76,7 @@ static int smu_v11_0_wait_for_response(struct smu_context *smu)
uint32_t cur_value, i, timeout = adev->usec_timeout * 10;
for (i = 0; i < timeout; i++) {
- cur_value = RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90);
+ cur_value = RREG32_SOC15_NO_KIQ(MP1, 0, mmMP1_SMN_C2PMSG_90);
if ((cur_value & MP1_C2PMSG_90__CONTENT_MASK) != 0)
return cur_value == 0x1 ? 0 : -EIO;
@@ -83,7 +84,10 @@ static int smu_v11_0_wait_for_response(struct smu_context *smu)
}
/* timeout means wrong logic */
- return -ETIME;
+ if (i == timeout)
+ return -ETIME;
+
+ return RREG32_SOC15_NO_KIQ(MP1, 0, mmMP1_SMN_C2PMSG_90) == 0x1 ? 0 : -EIO;
}
int
@@ -107,9 +111,9 @@ smu_v11_0_send_msg_with_param(struct smu_context *smu,
goto out;
}
- WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0);
+ WREG32_SOC15_NO_KIQ(MP1, 0, mmMP1_SMN_C2PMSG_90, 0);
- WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82, param);
+ WREG32_SOC15_NO_KIQ(MP1, 0, mmMP1_SMN_C2PMSG_82, param);
smu_v11_0_send_msg_without_waiting(smu, (uint16_t)index);
@@ -119,6 +123,7 @@ smu_v11_0_send_msg_with_param(struct smu_context *smu,
smu_get_message_name(smu, msg), index, param, ret);
goto out;
}
+
if (read_arg) {
ret = smu_v11_0_read_arg(smu, read_arg);
if (ret) {
@@ -201,13 +206,15 @@ int smu_v11_0_load_microcode(struct smu_context *smu)
const struct smc_firmware_header_v1_0 *hdr;
uint32_t addr_start = MP1_SRAM;
uint32_t i;
+ uint32_t smc_fw_size;
uint32_t mp1_fw_flags;
hdr = (const struct smc_firmware_header_v1_0 *) adev->pm.fw->data;
src = (const uint32_t *)(adev->pm.fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ smc_fw_size = hdr->header.ucode_size_bytes;
- for (i = 1; i < MP1_SMC_SIZE/4 - 1; i++) {
+ for (i = 1; i < smc_fw_size/4 - 1; i++) {
WREG32_PCIE(addr_start, src[i]);
addr_start += 4;
}
@@ -264,23 +271,23 @@ int smu_v11_0_check_fw_version(struct smu_context *smu)
switch (smu->adev->asic_type) {
case CHIP_VEGA20:
- smu->smc_if_version = SMU11_DRIVER_IF_VERSION_VG20;
+ smu->smc_driver_if_version = SMU11_DRIVER_IF_VERSION_VG20;
break;
case CHIP_ARCTURUS:
- smu->smc_if_version = SMU11_DRIVER_IF_VERSION_ARCT;
+ smu->smc_driver_if_version = SMU11_DRIVER_IF_VERSION_ARCT;
break;
case CHIP_NAVI10:
- smu->smc_if_version = SMU11_DRIVER_IF_VERSION_NV10;
+ smu->smc_driver_if_version = SMU11_DRIVER_IF_VERSION_NV10;
break;
case CHIP_NAVI12:
- smu->smc_if_version = SMU11_DRIVER_IF_VERSION_NV12;
+ smu->smc_driver_if_version = SMU11_DRIVER_IF_VERSION_NV12;
break;
case CHIP_NAVI14:
- smu->smc_if_version = SMU11_DRIVER_IF_VERSION_NV14;
+ smu->smc_driver_if_version = SMU11_DRIVER_IF_VERSION_NV14;
break;
default:
pr_err("smu unsupported asic type:%d.\n", smu->adev->asic_type);
- smu->smc_if_version = SMU11_DRIVER_IF_VERSION_INV;
+ smu->smc_driver_if_version = SMU11_DRIVER_IF_VERSION_INV;
break;
}
@@ -292,10 +299,10 @@ int smu_v11_0_check_fw_version(struct smu_context *smu)
* Considering above, we just leave user a warning message instead
* of halt driver loading.
*/
- if (if_version != smu->smc_if_version) {
+ if (if_version != smu->smc_driver_if_version) {
pr_info("smu driver if version = 0x%08x, smu fw if version = 0x%08x, "
"smu fw version = 0x%08x (%d.%d.%d)\n",
- smu->smc_if_version, if_version,
+ smu->smc_driver_if_version, if_version,
smu_version, smu_major, smu_minor, smu_debug);
pr_warn("SMU driver if version not matched\n");
}
@@ -479,8 +486,6 @@ int smu_v11_0_init_power(struct smu_context *smu)
{
struct smu_power_context *smu_power = &smu->smu_power;
- if (!smu->pm_enabled)
- return 0;
if (smu_power->power_context || smu_power->power_context_size != 0)
return -EINVAL;
@@ -497,8 +502,6 @@ int smu_v11_0_fini_power(struct smu_context *smu)
{
struct smu_power_context *smu_power = &smu->smu_power;
- if (!smu->pm_enabled)
- return 0;
if (!smu_power->power_context || smu_power->power_context_size == 0)
return -EINVAL;
@@ -730,8 +733,9 @@ int smu_v11_0_parse_pptable(struct smu_context *smu)
struct smu_table_context *table_context = &smu->smu_table;
struct smu_table *table = &table_context->tables[SMU_TABLE_PPTABLE];
+ /* during TDR we need to free and alloc the pptable */
if (table_context->driver_pptable)
- return -EINVAL;
+ kfree(table_context->driver_pptable);
table_context->driver_pptable = kzalloc(table->size, GFP_KERNEL);
@@ -771,6 +775,9 @@ int smu_v11_0_set_deep_sleep_dcefclk(struct smu_context *smu, uint32_t clk)
{
int ret;
+ if (amdgpu_sriov_vf(smu->adev))
+ return 0;
+
ret = smu_send_smc_msg_with_param(smu,
SMU_MSG_SetMinDeepSleepDcefclk, clk, NULL);
if (ret)
@@ -783,8 +790,6 @@ int smu_v11_0_set_min_dcef_deep_sleep(struct smu_context *smu)
{
struct smu_table_context *table_context = &smu->smu_table;
- if (!smu->pm_enabled)
- return 0;
if (!table_context)
return -EINVAL;
@@ -816,6 +821,9 @@ int smu_v11_0_set_tool_table_location(struct smu_context *smu)
int ret = 0;
struct smu_table *tool_table = &smu->smu_table.tables[SMU_TABLE_PMSTATUSLOG];
+ if (amdgpu_sriov_vf(smu->adev))
+ return 0;
+
if (tool_table->mc_address) {
ret = smu_send_smc_msg_with_param(smu,
SMU_MSG_SetToolsDramAddrHigh,
@@ -835,6 +843,9 @@ int smu_v11_0_init_display_count(struct smu_context *smu, uint32_t count)
{
int ret = 0;
+ if (amdgpu_sriov_vf(smu->adev))
+ return 0;
+
if (!smu->pm_enabled)
return ret;
@@ -849,6 +860,9 @@ int smu_v11_0_set_allowed_mask(struct smu_context *smu)
int ret = 0;
uint32_t feature_mask[2];
+ if (amdgpu_sriov_vf(smu->adev))
+ return 0;
+
mutex_lock(&feature->mutex);
if (bitmap_empty(feature->allowed, SMU_FEATURE_MAX) || feature->feature_num < 64)
goto failed;
@@ -877,6 +891,9 @@ int smu_v11_0_get_enabled_mask(struct smu_context *smu,
struct smu_feature *feature = &smu->smu_feature;
int ret = 0;
+ if (amdgpu_sriov_vf(smu->adev) && !amdgpu_sriov_is_pp_one_vf(smu->adev))
+ return 0;
+
if (!feature_mask || num < 2)
return -EINVAL;
@@ -932,8 +949,12 @@ int smu_v11_0_notify_display_change(struct smu_context *smu)
{
int ret = 0;
+ if (amdgpu_sriov_vf(smu->adev))
+ return 0;
+
if (!smu->pm_enabled)
return ret;
+
if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_UCLK_BIT) &&
smu->adev->gmc.vram_type == AMDGPU_VRAM_TYPE_HBM)
ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetUclkFastSwitch, 1, NULL);
@@ -948,9 +969,6 @@ smu_v11_0_get_max_sustainable_clock(struct smu_context *smu, uint32_t *clock,
int ret = 0;
int clk_id;
- if (!smu->pm_enabled)
- return ret;
-
if ((smu_msg_get_index(smu, SMU_MSG_GetDcModeMaxDpmFreq) < 0) ||
(smu_msg_get_index(smu, SMU_MSG_GetMaxDpmFreq) < 0))
return 0;
@@ -1096,6 +1114,9 @@ int smu_v11_0_set_power_limit(struct smu_context *smu, uint32_t n)
int ret = 0;
uint32_t max_power_limit;
+ if (amdgpu_sriov_vf(smu->adev))
+ return 0;
+
max_power_limit = smu_v11_0_get_max_power_limit(smu);
if (n > max_power_limit) {
@@ -1205,9 +1226,6 @@ int smu_v11_0_start_thermal_control(struct smu_context *smu)
struct smu_temperature_range range;
struct amdgpu_device *adev = smu->adev;
- if (!smu->pm_enabled)
- return ret;
-
memcpy(&range, &smu11_thermal_policy[0], sizeof(struct smu_temperature_range));
ret = smu_get_thermal_temperature_range(smu, &range);
@@ -1321,9 +1339,6 @@ smu_v11_0_display_clock_voltage_request(struct smu_context *smu,
enum smu_clk_type clk_select = 0;
uint32_t clk_freq = clock_req->clock_freq_in_khz / 1000;
- if (!smu->pm_enabled)
- return -EINVAL;
-
if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_DCEFCLK_BIT) ||
smu_feature_is_enabled(smu, SMU_FEATURE_DPM_UCLK_BIT)) {
switch (clk_type) {
@@ -1533,39 +1548,59 @@ static int smu_v11_0_ack_ac_dc_interrupt(struct smu_context *smu)
#define THM_11_0__SRCID__THM_DIG_THERM_L2H 0 /* ASIC_TEMP > CG_THERMAL_INT.DIG_THERM_INTH */
#define THM_11_0__SRCID__THM_DIG_THERM_H2L 1 /* ASIC_TEMP < CG_THERMAL_INT.DIG_THERM_INTL */
+#define SMUIO_11_0__SRCID__SMUIO_GPIO19 83
+
static int smu_v11_0_irq_process(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
uint32_t client_id = entry->client_id;
uint32_t src_id = entry->src_id;
+ /*
+ * ctxid is used to distinguish different
+ * events for SMCToHost interrupt.
+ */
+ uint32_t ctxid = entry->src_data[0];
if (client_id == SOC15_IH_CLIENTID_THM) {
switch (src_id) {
case THM_11_0__SRCID__THM_DIG_THERM_L2H:
- pr_warn("GPU over temperature range detected on PCIe %d:%d.%d!\n",
- PCI_BUS_NUM(adev->pdev->devfn),
- PCI_SLOT(adev->pdev->devfn),
- PCI_FUNC(adev->pdev->devfn));
+ dev_emerg(adev->dev, "ERROR: GPU over temperature range(SW CTF) detected!\n");
+ /*
+ * SW CTF just occurred.
+ * Try to do a graceful shutdown to prevent further damage.
+ */
+ dev_emerg(adev->dev, "ERROR: System is going to shutdown due to GPU SW CTF!\n");
+ orderly_poweroff(true);
break;
case THM_11_0__SRCID__THM_DIG_THERM_H2L:
- pr_warn("GPU under temperature range detected on PCIe %d:%d.%d!\n",
- PCI_BUS_NUM(adev->pdev->devfn),
- PCI_SLOT(adev->pdev->devfn),
- PCI_FUNC(adev->pdev->devfn));
+ dev_emerg(adev->dev, "ERROR: GPU under temperature range detected\n");
break;
default:
- pr_warn("GPU under temperature range unknown src id (%d), detected on PCIe %d:%d.%d!\n",
- src_id,
- PCI_BUS_NUM(adev->pdev->devfn),
- PCI_SLOT(adev->pdev->devfn),
- PCI_FUNC(adev->pdev->devfn));
+ dev_emerg(adev->dev, "ERROR: GPU under temperature range unknown src id (%d)\n",
+ src_id);
break;
-
}
+ } else if (client_id == SOC15_IH_CLIENTID_ROM_SMUIO) {
+ dev_emerg(adev->dev, "ERROR: GPU HW Critical Temperature Fault(aka CTF) detected!\n");
+ /*
+ * HW CTF just occurred. Shutdown to prevent further damage.
+ */
+ dev_emerg(adev->dev, "ERROR: System is going to shutdown due to GPU HW CTF!\n");
+ orderly_poweroff(true);
} else if (client_id == SOC15_IH_CLIENTID_MP1) {
- if (src_id == 0xfe)
- smu_v11_0_ack_ac_dc_interrupt(&adev->smu);
+ if (src_id == 0xfe) {
+ switch (ctxid) {
+ case 0x3:
+ dev_dbg(adev->dev, "Switched to AC mode!\n");
+ smu_v11_0_ack_ac_dc_interrupt(&adev->smu);
+ break;
+ case 0x4:
+ dev_dbg(adev->dev, "Switched to DC mode!\n");
+ smu_v11_0_ack_ac_dc_interrupt(&adev->smu);
+ break;
+ }
+ }
}
return 0;
@@ -1605,6 +1640,13 @@ int smu_v11_0_register_irq_handler(struct smu_context *smu)
if (ret)
return ret;
+ /* Register CTF(GPIO_19) interrupt */
+ ret = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_ROM_SMUIO,
+ SMUIO_11_0__SRCID__SMUIO_GPIO19,
+ irq_src);
+ if (ret)
+ return ret;
+
ret = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_MP1,
0xfe,
irq_src);
@@ -1833,6 +1875,9 @@ int smu_v11_0_override_pcie_parameters(struct smu_context *smu)
uint32_t pcie_gen = 0, pcie_width = 0;
int ret;
+ if (amdgpu_sriov_vf(smu->adev))
+ return 0;
+
if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4)
pcie_gen = 3;
else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)