aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/radeon
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/radeon')
-rw-r--r--drivers/gpu/drm/radeon/Makefile4
-rw-r--r--drivers/gpu/drm/radeon/atombios_crtc.c1
-rw-r--r--drivers/gpu/drm/radeon/ci_dpm.c752
-rw-r--r--drivers/gpu/drm/radeon/ci_dpm.h8
-rw-r--r--drivers/gpu/drm/radeon/ci_smc.c2
-rw-r--r--drivers/gpu/drm/radeon/cik.c214
-rw-r--r--drivers/gpu/drm/radeon/cik_reg.h136
-rw-r--r--drivers/gpu/drm/radeon/cik_sdma.c42
-rw-r--r--drivers/gpu/drm/radeon/cikd.h93
-rw-r--r--drivers/gpu/drm/radeon/evergreen_cs.c14
-rw-r--r--drivers/gpu/drm/radeon/evergreen_dma.c18
-rw-r--r--drivers/gpu/drm/radeon/ni.c20
-rw-r--r--drivers/gpu/drm/radeon/ni_dma.c17
-rw-r--r--drivers/gpu/drm/radeon/ppsmc.h18
-rw-r--r--drivers/gpu/drm/radeon/pptable.h8
-rw-r--r--drivers/gpu/drm/radeon/r100.c10
-rw-r--r--drivers/gpu/drm/radeon/r200.c2
-rw-r--r--drivers/gpu/drm/radeon/r300.c6
-rw-r--r--drivers/gpu/drm/radeon/r600.c18
-rw-r--r--drivers/gpu/drm/radeon/r600_cs.c26
-rw-r--r--drivers/gpu/drm/radeon/r600_dma.c18
-rw-r--r--drivers/gpu/drm/radeon/r600_dpm.c9
-rw-r--r--drivers/gpu/drm/radeon/r600_dpm.h3
-rw-r--r--drivers/gpu/drm/radeon/radeon.h162
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.h18
-rw-r--r--drivers/gpu/drm/radeon/radeon_atombios.c21
-rw-r--r--drivers/gpu/drm/radeon/radeon_cs.c121
-rw-r--r--drivers/gpu/drm/radeon/radeon_cursor.c268
-rw-r--r--drivers/gpu/drm/radeon/radeon_device.c32
-rw-r--r--drivers/gpu/drm/radeon/radeon_display.c3
-rw-r--r--drivers/gpu/drm/radeon/radeon_drv.c5
-rw-r--r--drivers/gpu/drm/radeon/radeon_fb.c32
-rw-r--r--drivers/gpu/drm/radeon/radeon_fence.c1
-rw-r--r--drivers/gpu/drm/radeon/radeon_gem.c92
-rw-r--r--drivers/gpu/drm/radeon/radeon_ib.c16
-rw-r--r--drivers/gpu/drm/radeon/radeon_kfd.c563
-rw-r--r--drivers/gpu/drm/radeon/radeon_kfd.h47
-rw-r--r--drivers/gpu/drm/radeon/radeon_kms.c9
-rw-r--r--drivers/gpu/drm/radeon/radeon_legacy_crtc.c1
-rw-r--r--drivers/gpu/drm/radeon/radeon_mode.h20
-rw-r--r--drivers/gpu/drm/radeon/radeon_object.c83
-rw-r--r--drivers/gpu/drm/radeon/radeon_object.h2
-rw-r--r--drivers/gpu/drm/radeon/radeon_semaphore.c154
-rw-r--r--drivers/gpu/drm/radeon/radeon_sync.c220
-rw-r--r--drivers/gpu/drm/radeon/radeon_trace.h2
-rw-r--r--drivers/gpu/drm/radeon/radeon_ttm.c27
-rw-r--r--drivers/gpu/drm/radeon/radeon_uvd.c14
-rw-r--r--drivers/gpu/drm/radeon/radeon_vce.c8
-rw-r--r--drivers/gpu/drm/radeon/radeon_vm.c236
-rw-r--r--drivers/gpu/drm/radeon/rv770_dma.c18
-rw-r--r--drivers/gpu/drm/radeon/si.c24
-rw-r--r--drivers/gpu/drm/radeon/si_dma.c37
-rw-r--r--drivers/gpu/drm/radeon/si_dpm.c381
-rw-r--r--drivers/gpu/drm/radeon/si_dpm.h5
-rw-r--r--drivers/gpu/drm/radeon/si_smc.c2
-rw-r--r--drivers/gpu/drm/radeon/sid.h40
-rw-r--r--drivers/gpu/drm/radeon/sislands_smc.h25
-rw-r--r--drivers/gpu/drm/radeon/smu7_discrete.h30
58 files changed, 3236 insertions, 922 deletions
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
index d01b87991422..12bc21219a0e 100644
--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile
@@ -80,7 +80,8 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \
r600_dpm.o rs780_dpm.o rv6xx_dpm.o rv770_dpm.o rv730_dpm.o rv740_dpm.o \
rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o \
trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \
- ci_dpm.o dce6_afmt.o radeon_vm.o radeon_ucode.o radeon_ib.o radeon_mn.o
+ ci_dpm.o dce6_afmt.o radeon_vm.o radeon_ucode.o radeon_ib.o radeon_mn.o \
+ radeon_sync.o
# add async DMA block
radeon-y += \
@@ -104,6 +105,7 @@ radeon-y += \
radeon_vce.o \
vce_v1_0.o \
vce_v2_0.o \
+ radeon_kfd.o
radeon-$(CONFIG_COMPAT) += radeon_ioc32.o
radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index 30d242b25078..d59ec491dbb9 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -2039,6 +2039,7 @@ int atombios_crtc_mode_set(struct drm_crtc *crtc,
atombios_crtc_set_base(crtc, x, y, old_fb);
atombios_overscan_setup(crtc, mode, adjusted_mode);
atombios_scaler_setup(crtc);
+ radeon_cursor_reset(crtc);
/* update the hw version fpr dpm */
radeon_crtc->hw_mode = *adjusted_mode;
diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c
index 11a55e9dad7f..f373a81ba3d5 100644
--- a/drivers/gpu/drm/radeon/ci_dpm.c
+++ b/drivers/gpu/drm/radeon/ci_dpm.c
@@ -46,15 +46,15 @@
static const struct ci_pt_defaults defaults_hawaii_xt =
{
1, 0xF, 0xFD, 0x19, 5, 0x14, 0, 0xB0000,
- { 0x84, 0x0, 0x0, 0x7F, 0x0, 0x0, 0x5A, 0x60, 0x51, 0x8E, 0x79, 0x6B, 0x5F, 0x90, 0x79 },
- { 0x1EA, 0x1EA, 0x1EA, 0x224, 0x224, 0x224, 0x24F, 0x24F, 0x24F, 0x28E, 0x28E, 0x28E, 0x2BC, 0x2BC, 0x2BC }
+ { 0x2E, 0x00, 0x00, 0x88, 0x00, 0x00, 0x72, 0x60, 0x51, 0xA7, 0x79, 0x6B, 0x90, 0xBD, 0x79 },
+ { 0x217, 0x217, 0x217, 0x242, 0x242, 0x242, 0x269, 0x269, 0x269, 0x2A1, 0x2A1, 0x2A1, 0x2C9, 0x2C9, 0x2C9 }
};
static const struct ci_pt_defaults defaults_hawaii_pro =
{
1, 0xF, 0xFD, 0x19, 5, 0x14, 0, 0x65062,
- { 0x93, 0x0, 0x0, 0x97, 0x0, 0x0, 0x6B, 0x60, 0x51, 0x95, 0x79, 0x6B, 0x5F, 0x90, 0x79 },
- { 0x1EA, 0x1EA, 0x1EA, 0x224, 0x224, 0x224, 0x24F, 0x24F, 0x24F, 0x28E, 0x28E, 0x28E, 0x2BC, 0x2BC, 0x2BC }
+ { 0x2E, 0x00, 0x00, 0x88, 0x00, 0x00, 0x72, 0x60, 0x51, 0xA7, 0x79, 0x6B, 0x90, 0xBD, 0x79 },
+ { 0x217, 0x217, 0x217, 0x242, 0x242, 0x242, 0x269, 0x269, 0x269, 0x2A1, 0x2A1, 0x2A1, 0x2C9, 0x2C9, 0x2C9 }
};
static const struct ci_pt_defaults defaults_bonaire_xt =
@@ -184,6 +184,9 @@ static int ci_set_overdrive_target_tdp(struct radeon_device *rdev,
u32 target_tdp);
static int ci_update_uvd_dpm(struct radeon_device *rdev, bool gate);
+static PPSMC_Result ci_send_msg_to_smc_with_parameter(struct radeon_device *rdev,
+ PPSMC_Msg msg, u32 parameter);
+
static struct ci_power_info *ci_get_pi(struct radeon_device *rdev)
{
struct ci_power_info *pi = rdev->pm.dpm.priv;
@@ -249,7 +252,10 @@ static void ci_initialize_powertune_defaults(struct radeon_device *rdev)
if (pi->caps_power_containment) {
pi->caps_cac = true;
- pi->enable_bapm_feature = true;
+ if (rdev->family == CHIP_HAWAII)
+ pi->enable_bapm_feature = false;
+ else
+ pi->enable_bapm_feature = true;
pi->enable_tdc_limit_feature = true;
pi->enable_pkg_pwr_tracking_feature = true;
}
@@ -352,6 +358,21 @@ static int ci_populate_dw8(struct radeon_device *rdev)
return 0;
}
+static int ci_populate_fuzzy_fan(struct radeon_device *rdev)
+{
+ struct ci_power_info *pi = ci_get_pi(rdev);
+
+ if ((rdev->pm.dpm.fan.fan_output_sensitivity & (1 << 15)) ||
+ (rdev->pm.dpm.fan.fan_output_sensitivity == 0))
+ rdev->pm.dpm.fan.fan_output_sensitivity =
+ rdev->pm.dpm.fan.default_fan_output_sensitivity;
+
+ pi->smc_powertune_table.FuzzyFan_PwmSetDelta =
+ cpu_to_be16(rdev->pm.dpm.fan.fan_output_sensitivity);
+
+ return 0;
+}
+
static int ci_min_max_v_gnbl_pm_lid_from_bapm_vddc(struct radeon_device *rdev)
{
struct ci_power_info *pi = ci_get_pi(rdev);
@@ -477,6 +498,9 @@ static int ci_populate_pm_base(struct radeon_device *rdev)
ret = ci_populate_dw8(rdev);
if (ret)
return ret;
+ ret = ci_populate_fuzzy_fan(rdev);
+ if (ret)
+ return ret;
ret = ci_min_max_v_gnbl_pm_lid_from_bapm_vddc(rdev);
if (ret)
return ret;
@@ -690,6 +714,25 @@ static int ci_enable_smc_cac(struct radeon_device *rdev, bool enable)
return ret;
}
+static int ci_enable_thermal_based_sclk_dpm(struct radeon_device *rdev,
+ bool enable)
+{
+ struct ci_power_info *pi = ci_get_pi(rdev);
+ PPSMC_Result smc_result = PPSMC_Result_OK;
+
+ if (pi->thermal_sclk_dpm_enabled) {
+ if (enable)
+ smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_ENABLE_THERMAL_DPM);
+ else
+ smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_DISABLE_THERMAL_DPM);
+ }
+
+ if (smc_result == PPSMC_Result_OK)
+ return 0;
+ else
+ return -EINVAL;
+}
+
static int ci_power_control_set_level(struct radeon_device *rdev)
{
struct ci_power_info *pi = ci_get_pi(rdev);
@@ -700,13 +743,11 @@ static int ci_power_control_set_level(struct radeon_device *rdev)
int ret = 0;
bool adjust_polarity = false; /* ??? */
- if (pi->caps_power_containment &&
- (pi->power_containment_features & POWERCONTAINMENT_FEATURE_BAPM)) {
+ if (pi->caps_power_containment) {
adjust_percent = adjust_polarity ?
rdev->pm.dpm.tdp_adjustment : (-1 * rdev->pm.dpm.tdp_adjustment);
target_tdp = ((100 + adjust_percent) *
(s32)cac_tdp_table->configurable_tdp) / 100;
- target_tdp *= 256;
ret = ci_set_overdrive_target_tdp(rdev, (u32)target_tdp);
}
@@ -814,7 +855,7 @@ static void ci_apply_state_adjust_rules(struct radeon_device *rdev,
}
}
-static int ci_set_thermal_temperature_range(struct radeon_device *rdev,
+static int ci_thermal_set_temperature_range(struct radeon_device *rdev,
int min_temp, int max_temp)
{
int low_temp = 0 * 1000;
@@ -850,6 +891,350 @@ static int ci_set_thermal_temperature_range(struct radeon_device *rdev,
return 0;
}
+static int ci_thermal_enable_alert(struct radeon_device *rdev,
+ bool enable)
+{
+ u32 thermal_int = RREG32_SMC(CG_THERMAL_INT);
+ PPSMC_Result result;
+
+ if (enable) {
+ thermal_int &= ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
+ WREG32_SMC(CG_THERMAL_INT, thermal_int);
+ rdev->irq.dpm_thermal = false;
+ result = ci_send_msg_to_smc(rdev, PPSMC_MSG_Thermal_Cntl_Enable);
+ if (result != PPSMC_Result_OK) {
+ DRM_DEBUG_KMS("Could not enable thermal interrupts.\n");
+ return -EINVAL;
+ }
+ } else {
+ thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
+ WREG32_SMC(CG_THERMAL_INT, thermal_int);
+ rdev->irq.dpm_thermal = true;
+ result = ci_send_msg_to_smc(rdev, PPSMC_MSG_Thermal_Cntl_Disable);
+ if (result != PPSMC_Result_OK) {
+ DRM_DEBUG_KMS("Could not disable thermal interrupts.\n");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static void ci_fan_ctrl_set_static_mode(struct radeon_device *rdev, u32 mode)
+{
+ struct ci_power_info *pi = ci_get_pi(rdev);
+ u32 tmp;
+
+ if (pi->fan_ctrl_is_in_default_mode) {
+ tmp = (RREG32_SMC(CG_FDO_CTRL2) & FDO_PWM_MODE_MASK) >> FDO_PWM_MODE_SHIFT;
+ pi->fan_ctrl_default_mode = tmp;
+ tmp = (RREG32_SMC(CG_FDO_CTRL2) & TMIN_MASK) >> TMIN_SHIFT;
+ pi->t_min = tmp;
+ pi->fan_ctrl_is_in_default_mode = false;
+ }
+
+ tmp = RREG32_SMC(CG_FDO_CTRL2) & ~TMIN_MASK;
+ tmp |= TMIN(0);
+ WREG32_SMC(CG_FDO_CTRL2, tmp);
+
+ tmp = RREG32_SMC(CG_FDO_CTRL2) & ~FDO_PWM_MODE_MASK;
+ tmp |= FDO_PWM_MODE(mode);
+ WREG32_SMC(CG_FDO_CTRL2, tmp);
+}
+
+static int ci_thermal_setup_fan_table(struct radeon_device *rdev)
+{
+ struct ci_power_info *pi = ci_get_pi(rdev);
+ SMU7_Discrete_FanTable fan_table = { FDO_MODE_HARDWARE };
+ u32 duty100;
+ u32 t_diff1, t_diff2, pwm_diff1, pwm_diff2;
+ u16 fdo_min, slope1, slope2;
+ u32 reference_clock, tmp;
+ int ret;
+ u64 tmp64;
+
+ if (!pi->fan_table_start) {
+ rdev->pm.dpm.fan.ucode_fan_control = false;
+ return 0;
+ }
+
+ duty100 = (RREG32_SMC(CG_FDO_CTRL1) & FMAX_DUTY100_MASK) >> FMAX_DUTY100_SHIFT;
+
+ if (duty100 == 0) {
+ rdev->pm.dpm.fan.ucode_fan_control = false;
+ return 0;
+ }
+
+ tmp64 = (u64)rdev->pm.dpm.fan.pwm_min * duty100;
+ do_div(tmp64, 10000);
+ fdo_min = (u16)tmp64;
+
+ t_diff1 = rdev->pm.dpm.fan.t_med - rdev->pm.dpm.fan.t_min;
+ t_diff2 = rdev->pm.dpm.fan.t_high - rdev->pm.dpm.fan.t_med;
+
+ pwm_diff1 = rdev->pm.dpm.fan.pwm_med - rdev->pm.dpm.fan.pwm_min;
+ pwm_diff2 = rdev->pm.dpm.fan.pwm_high - rdev->pm.dpm.fan.pwm_med;
+
+ slope1 = (u16)((50 + ((16 * duty100 * pwm_diff1) / t_diff1)) / 100);
+ slope2 = (u16)((50 + ((16 * duty100 * pwm_diff2) / t_diff2)) / 100);
+
+ fan_table.TempMin = cpu_to_be16((50 + rdev->pm.dpm.fan.t_min) / 100);
+ fan_table.TempMed = cpu_to_be16((50 + rdev->pm.dpm.fan.t_med) / 100);
+ fan_table.TempMax = cpu_to_be16((50 + rdev->pm.dpm.fan.t_max) / 100);
+
+ fan_table.Slope1 = cpu_to_be16(slope1);
+ fan_table.Slope2 = cpu_to_be16(slope2);
+
+ fan_table.FdoMin = cpu_to_be16(fdo_min);
+
+ fan_table.HystDown = cpu_to_be16(rdev->pm.dpm.fan.t_hyst);
+
+ fan_table.HystUp = cpu_to_be16(1);
+
+ fan_table.HystSlope = cpu_to_be16(1);
+
+ fan_table.TempRespLim = cpu_to_be16(5);
+
+ reference_clock = radeon_get_xclk(rdev);
+
+ fan_table.RefreshPeriod = cpu_to_be32((rdev->pm.dpm.fan.cycle_delay *
+ reference_clock) / 1600);
+
+ fan_table.FdoMax = cpu_to_be16((u16)duty100);
+
+ tmp = (RREG32_SMC(CG_MULT_THERMAL_CTRL) & TEMP_SEL_MASK) >> TEMP_SEL_SHIFT;
+ fan_table.TempSrc = (uint8_t)tmp;
+
+ ret = ci_copy_bytes_to_smc(rdev,
+ pi->fan_table_start,
+ (u8 *)(&fan_table),
+ sizeof(fan_table),
+ pi->sram_end);
+
+ if (ret) {
+ DRM_ERROR("Failed to load fan table to the SMC.");
+ rdev->pm.dpm.fan.ucode_fan_control = false;
+ }
+
+ return 0;
+}
+
+static int ci_fan_ctrl_start_smc_fan_control(struct radeon_device *rdev)
+{
+ struct ci_power_info *pi = ci_get_pi(rdev);
+ PPSMC_Result ret;
+
+ if (pi->caps_od_fuzzy_fan_control_support) {
+ ret = ci_send_msg_to_smc_with_parameter(rdev,
+ PPSMC_StartFanControl,
+ FAN_CONTROL_FUZZY);
+ if (ret != PPSMC_Result_OK)
+ return -EINVAL;
+ ret = ci_send_msg_to_smc_with_parameter(rdev,
+ PPSMC_MSG_SetFanPwmMax,
+ rdev->pm.dpm.fan.default_max_fan_pwm);
+ if (ret != PPSMC_Result_OK)
+ return -EINVAL;
+ } else {
+ ret = ci_send_msg_to_smc_with_parameter(rdev,
+ PPSMC_StartFanControl,
+ FAN_CONTROL_TABLE);
+ if (ret != PPSMC_Result_OK)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+#if 0
+static int ci_fan_ctrl_stop_smc_fan_control(struct radeon_device *rdev)
+{
+ PPSMC_Result ret;
+
+ ret = ci_send_msg_to_smc(rdev, PPSMC_StopFanControl);
+ if (ret == PPSMC_Result_OK)
+ return 0;
+ else
+ return -EINVAL;
+}
+
+static int ci_fan_ctrl_get_fan_speed_percent(struct radeon_device *rdev,
+ u32 *speed)
+{
+ u32 duty, duty100;
+ u64 tmp64;
+
+ if (rdev->pm.no_fan)
+ return -ENOENT;
+
+ duty100 = (RREG32_SMC(CG_FDO_CTRL1) & FMAX_DUTY100_MASK) >> FMAX_DUTY100_SHIFT;
+ duty = (RREG32_SMC(CG_THERMAL_STATUS) & FDO_PWM_DUTY_MASK) >> FDO_PWM_DUTY_SHIFT;
+
+ if (duty100 == 0)
+ return -EINVAL;
+
+ tmp64 = (u64)duty * 100;
+ do_div(tmp64, duty100);
+ *speed = (u32)tmp64;
+
+ if (*speed > 100)
+ *speed = 100;
+
+ return 0;
+}
+
+static int ci_fan_ctrl_set_fan_speed_percent(struct radeon_device *rdev,
+ u32 speed)
+{
+ u32 tmp;
+ u32 duty, duty100;
+ u64 tmp64;
+
+ if (rdev->pm.no_fan)
+ return -ENOENT;
+
+ if (speed > 100)
+ return -EINVAL;
+
+ if (rdev->pm.dpm.fan.ucode_fan_control)
+ ci_fan_ctrl_stop_smc_fan_control(rdev);
+
+ duty100 = (RREG32_SMC(CG_FDO_CTRL1) & FMAX_DUTY100_MASK) >> FMAX_DUTY100_SHIFT;
+
+ if (duty100 == 0)
+ return -EINVAL;
+
+ tmp64 = (u64)speed * duty100;
+ do_div(tmp64, 100);
+ duty = (u32)tmp64;
+
+ tmp = RREG32_SMC(CG_FDO_CTRL0) & ~FDO_STATIC_DUTY_MASK;
+ tmp |= FDO_STATIC_DUTY(duty);
+ WREG32_SMC(CG_FDO_CTRL0, tmp);
+
+ ci_fan_ctrl_set_static_mode(rdev, FDO_PWM_MODE_STATIC);
+
+ return 0;
+}
+
+static int ci_fan_ctrl_get_fan_speed_rpm(struct radeon_device *rdev,
+ u32 *speed)
+{
+ u32 tach_period;
+ u32 xclk = radeon_get_xclk(rdev);
+
+ if (rdev->pm.no_fan)
+ return -ENOENT;
+
+ if (rdev->pm.fan_pulses_per_revolution == 0)
+ return -ENOENT;
+
+ tach_period = (RREG32_SMC(CG_TACH_STATUS) & TACH_PERIOD_MASK) >> TACH_PERIOD_SHIFT;
+ if (tach_period == 0)
+ return -ENOENT;
+
+ *speed = 60 * xclk * 10000 / tach_period;
+
+ return 0;
+}
+
+static int ci_fan_ctrl_set_fan_speed_rpm(struct radeon_device *rdev,
+ u32 speed)
+{
+ u32 tach_period, tmp;
+ u32 xclk = radeon_get_xclk(rdev);
+
+ if (rdev->pm.no_fan)
+ return -ENOENT;
+
+ if (rdev->pm.fan_pulses_per_revolution == 0)
+ return -ENOENT;
+
+ if ((speed < rdev->pm.fan_min_rpm) ||
+ (speed > rdev->pm.fan_max_rpm))
+ return -EINVAL;
+
+ if (rdev->pm.dpm.fan.ucode_fan_control)
+ ci_fan_ctrl_stop_smc_fan_control(rdev);
+
+ tach_period = 60 * xclk * 10000 / (8 * speed);
+ tmp = RREG32_SMC(CG_TACH_CTRL) & ~TARGET_PERIOD_MASK;
+ tmp |= TARGET_PERIOD(tach_period);
+ WREG32_SMC(CG_TACH_CTRL, tmp);
+
+ ci_fan_ctrl_set_static_mode(rdev, FDO_PWM_MODE_STATIC_RPM);
+
+ return 0;
+}
+#endif
+
+static void ci_fan_ctrl_set_default_mode(struct radeon_device *rdev)
+{
+ struct ci_power_info *pi = ci_get_pi(rdev);
+ u32 tmp;
+
+ if (!pi->fan_ctrl_is_in_default_mode) {
+ tmp = RREG32_SMC(CG_FDO_CTRL2) & ~FDO_PWM_MODE_MASK;
+ tmp |= FDO_PWM_MODE(pi->fan_ctrl_default_mode);
+ WREG32_SMC(CG_FDO_CTRL2, tmp);
+
+ tmp = RREG32_SMC(CG_FDO_CTRL2) & ~TMIN_MASK;
+ tmp |= TMIN(pi->t_min);
+ WREG32_SMC(CG_FDO_CTRL2, tmp);
+ pi->fan_ctrl_is_in_default_mode = true;
+ }
+}
+
+static void ci_thermal_start_smc_fan_control(struct radeon_device *rdev)
+{
+ if (rdev->pm.dpm.fan.ucode_fan_control) {
+ ci_fan_ctrl_start_smc_fan_control(rdev);
+ ci_fan_ctrl_set_static_mode(rdev, FDO_PWM_MODE_STATIC);
+ }
+}
+
+static void ci_thermal_initialize(struct radeon_device *rdev)
+{
+ u32 tmp;
+
+ if (rdev->pm.fan_pulses_per_revolution) {
+ tmp = RREG32_SMC(CG_TACH_CTRL) & ~EDGE_PER_REV_MASK;
+ tmp |= EDGE_PER_REV(rdev->pm.fan_pulses_per_revolution -1);
+ WREG32_SMC(CG_TACH_CTRL, tmp);
+ }
+
+ tmp = RREG32_SMC(CG_FDO_CTRL2) & ~TACH_PWM_RESP_RATE_MASK;
+ tmp |= TACH_PWM_RESP_RATE(0x28);
+ WREG32_SMC(CG_FDO_CTRL2, tmp);
+}
+
+static int ci_thermal_start_thermal_controller(struct radeon_device *rdev)
+{
+ int ret;
+
+ ci_thermal_initialize(rdev);
+ ret = ci_thermal_set_temperature_range(rdev, R600_TEMP_RANGE_MIN, R600_TEMP_RANGE_MAX);
+ if (ret)
+ return ret;
+ ret = ci_thermal_enable_alert(rdev, true);
+ if (ret)
+ return ret;
+ if (rdev->pm.dpm.fan.ucode_fan_control) {
+ ret = ci_thermal_setup_fan_table(rdev);
+ if (ret)
+ return ret;
+ ci_thermal_start_smc_fan_control(rdev);
+ }
+
+ return 0;
+}
+
+static void ci_thermal_stop_thermal_controller(struct radeon_device *rdev)
+{
+ if (!rdev->pm.no_fan)
+ ci_fan_ctrl_set_default_mode(rdev);
+}
+
#if 0
static int ci_read_smc_soft_register(struct radeon_device *rdev,
u16 reg_offset, u32 *value)
@@ -1253,7 +1638,7 @@ static int ci_dpm_force_state_sclk(struct radeon_device *rdev, u32 n)
if (!pi->sclk_dpm_key_disabled) {
PPSMC_Result smc_result =
- ci_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_DPM_ForceState, n);
+ ci_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_SCLKDPM_SetEnabledMask, 1 << n);
if (smc_result != PPSMC_Result_OK)
return -EINVAL;
}
@@ -1267,7 +1652,7 @@ static int ci_dpm_force_state_mclk(struct radeon_device *rdev, u32 n)
if (!pi->mclk_dpm_key_disabled) {
PPSMC_Result smc_result =
- ci_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_MCLKDPM_ForceState, n);
+ ci_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_MCLKDPM_SetEnabledMask, 1 << n);
if (smc_result != PPSMC_Result_OK)
return -EINVAL;
}
@@ -2042,6 +2427,33 @@ static int ci_force_switch_to_arb_f0(struct radeon_device *rdev)
return ni_copy_and_switch_arb_sets(rdev, tmp, MC_CG_ARB_FREQ_F0);
}
+static void ci_register_patching_mc_arb(struct radeon_device *rdev,
+ const u32 engine_clock,
+ const u32 memory_clock,
+ u32 *dram_timimg2)
+{
+ bool patch;
+ u32 tmp, tmp2;
+
+ tmp = RREG32(MC_SEQ_MISC0);
+ patch = ((tmp & 0x0000f00) == 0x300) ? true : false;
+
+ if (patch &&
+ ((rdev->pdev->device == 0x67B0) ||
+ (rdev->pdev->device == 0x67B1))) {
+ if ((memory_clock > 100000) && (memory_clock <= 125000)) {
+ tmp2 = (((0x31 * engine_clock) / 125000) - 1) & 0xff;
+ *dram_timimg2 &= ~0x00ff0000;
+ *dram_timimg2 |= tmp2 << 16;
+ } else if ((memory_clock > 125000) && (memory_clock <= 137500)) {
+ tmp2 = (((0x36 * engine_clock) / 137500) - 1) & 0xff;
+ *dram_timimg2 &= ~0x00ff0000;
+ *dram_timimg2 |= tmp2 << 16;
+ }
+ }
+}
+
+
static int ci_populate_memory_timing_parameters(struct radeon_device *rdev,
u32 sclk,
u32 mclk,
@@ -2057,6 +2469,8 @@ static int ci_populate_memory_timing_parameters(struct radeon_device *rdev,
dram_timing2 = RREG32(MC_ARB_DRAM_TIMING2);
burst_time = RREG32(MC_ARB_BURST_TIME) & STATE0_MASK;
+ ci_register_patching_mc_arb(rdev, sclk, mclk, &dram_timing2);
+
arb_regs->McArbDramTiming = cpu_to_be32(dram_timing);
arb_regs->McArbDramTiming2 = cpu_to_be32(dram_timing2);
arb_regs->McArbBurstTime = (u8)burst_time;
@@ -2351,10 +2765,10 @@ static int ci_calculate_mclk_params(struct radeon_device *rdev,
u32 tmp;
u32 reference_clock = rdev->clock.mpll.reference_freq;
- if (pi->mem_gddr5)
- freq_nom = memory_clock * 4;
+ if (mpll_param.qdr == 1)
+ freq_nom = memory_clock * 4 * (1 << mpll_param.post_div);
else
- freq_nom = memory_clock * 2;
+ freq_nom = memory_clock * 2 * (1 << mpll_param.post_div);
tmp = (freq_nom / reference_clock);
tmp = tmp * tmp;
@@ -2434,7 +2848,6 @@ static int ci_populate_single_memory_level(struct radeon_device *rdev,
&memory_level->MinVddcPhases);
memory_level->EnabledForThrottle = 1;
- memory_level->EnabledForActivity = 1;
memory_level->UpH = 0;
memory_level->DownH = 100;
memory_level->VoltageDownH = 0;
@@ -2767,7 +3180,6 @@ static int ci_populate_single_graphic_level(struct radeon_device *rdev,
graphic_level->CcPwrDynRm = 0;
graphic_level->CcPwrDynRm1 = 0;
- graphic_level->EnabledForActivity = 1;
graphic_level->EnabledForThrottle = 1;
graphic_level->UpH = 0;
graphic_level->DownH = 0;
@@ -2816,10 +3228,13 @@ static int ci_populate_all_graphic_levels(struct radeon_device *rdev)
&pi->smc_state_table.GraphicsLevel[i]);
if (ret)
return ret;
+ if (i > 1)
+ pi->smc_state_table.GraphicsLevel[i].DeepSleepDivId = 0;
if (i == (dpm_table->sclk_table.count - 1))
pi->smc_state_table.GraphicsLevel[i].DisplayWatermark =
PPSMC_DISPLAY_WATERMARK_HIGH;
}
+ pi->smc_state_table.GraphicsLevel[0].EnabledForActivity = 1;
pi->smc_state_table.GraphicsDpmLevelCount = (u8)dpm_table->sclk_table.count;
pi->dpm_level_enable_mask.sclk_dpm_enable_mask =
@@ -2863,6 +3278,16 @@ static int ci_populate_all_memory_levels(struct radeon_device *rdev)
return ret;
}
+ pi->smc_state_table.MemoryLevel[0].EnabledForActivity = 1;
+
+ if ((dpm_table->mclk_table.count >= 2) &&
+ ((rdev->pdev->device == 0x67B0) || (rdev->pdev->device == 0x67B1))) {
+ pi->smc_state_table.MemoryLevel[1].MinVddc =
+ pi->smc_state_table.MemoryLevel[0].MinVddc;
+ pi->smc_state_table.MemoryLevel[1].MinVddcPhases =
+ pi->smc_state_table.MemoryLevel[0].MinVddcPhases;
+ }
+
pi->smc_state_table.MemoryLevel[0].ActivityLevel = cpu_to_be16(0x1F);
pi->smc_state_table.MemoryDpmLevelCount = (u8)dpm_table->mclk_table.count;
@@ -2919,9 +3344,14 @@ static int ci_setup_default_pcie_tables(struct radeon_device *rdev)
&pi->dpm_table.pcie_speed_table,
SMU7_MAX_LEVELS_LINK);
- ci_setup_pcie_table_entry(&pi->dpm_table.pcie_speed_table, 0,
- pi->pcie_gen_powersaving.min,
- pi->pcie_lane_powersaving.min);
+ if (rdev->family == CHIP_BONAIRE)
+ ci_setup_pcie_table_entry(&pi->dpm_table.pcie_speed_table, 0,
+ pi->pcie_gen_powersaving.min,
+ pi->pcie_lane_powersaving.max);
+ else
+ ci_setup_pcie_table_entry(&pi->dpm_table.pcie_speed_table, 0,
+ pi->pcie_gen_powersaving.min,
+ pi->pcie_lane_powersaving.min);
ci_setup_pcie_table_entry(&pi->dpm_table.pcie_speed_table, 1,
pi->pcie_gen_performance.min,
pi->pcie_lane_performance.min);
@@ -2988,19 +3418,21 @@ static int ci_setup_default_dpm_tables(struct radeon_device *rdev)
allowed_sclk_vddc_table->entries[i].clk)) {
pi->dpm_table.sclk_table.dpm_levels[pi->dpm_table.sclk_table.count].value =
allowed_sclk_vddc_table->entries[i].clk;
- pi->dpm_table.sclk_table.dpm_levels[pi->dpm_table.sclk_table.count].enabled = true;
+ pi->dpm_table.sclk_table.dpm_levels[pi->dpm_table.sclk_table.count].enabled =
+ (i == 0) ? true : false;
pi->dpm_table.sclk_table.count++;
}
}
pi->dpm_table.mclk_table.count = 0;
for (i = 0; i < allowed_mclk_table->count; i++) {
- if ((i==0) ||
+ if ((i == 0) ||
(pi->dpm_table.mclk_table.dpm_levels[pi->dpm_table.mclk_table.count-1].value !=
allowed_mclk_table->entries[i].clk)) {
pi->dpm_table.mclk_table.dpm_levels[pi->dpm_table.mclk_table.count].value =
allowed_mclk_table->entries[i].clk;
- pi->dpm_table.mclk_table.dpm_levels[pi->dpm_table.mclk_table.count].enabled = true;
+ pi->dpm_table.mclk_table.dpm_levels[pi->dpm_table.mclk_table.count].enabled =
+ (i == 0) ? true : false;
pi->dpm_table.mclk_table.count++;
}
}
@@ -3166,7 +3598,7 @@ static int ci_init_smc_table(struct radeon_device *rdev)
table->VddcVddciDelta = 4000;
table->PhaseResponseTime = 0;
table->MemoryThermThrottleEnable = 1;
- table->PCIeBootLinkLevel = 0;
+ table->PCIeBootLinkLevel = pi->dpm_table.pcie_speed_table.count - 1;
table->PCIeGenInterval = 1;
if (pi->voltage_control == CISLANDS_VOLTAGE_CONTROL_BY_SVID2)
table->SVI2Enable = 1;
@@ -3320,6 +3752,8 @@ static int ci_upload_dpm_level_enable_mask(struct radeon_device *rdev)
struct ci_power_info *pi = ci_get_pi(rdev);
PPSMC_Result result;
+ ci_apply_disp_minimum_voltage_request(rdev);
+
if (!pi->sclk_dpm_key_disabled) {
if (pi->dpm_level_enable_mask.sclk_dpm_enable_mask) {
result = ci_send_msg_to_smc_with_parameter(rdev,
@@ -3339,7 +3773,7 @@ static int ci_upload_dpm_level_enable_mask(struct radeon_device *rdev)
return -EINVAL;
}
}
-
+#if 0
if (!pi->pcie_dpm_key_disabled) {
if (pi->dpm_level_enable_mask.pcie_dpm_enable_mask) {
result = ci_send_msg_to_smc_with_parameter(rdev,
@@ -3349,9 +3783,7 @@ static int ci_upload_dpm_level_enable_mask(struct radeon_device *rdev)
return -EINVAL;
}
}
-
- ci_apply_disp_minimum_voltage_request(rdev);
-
+#endif
return 0;
}
@@ -3377,7 +3809,7 @@ static void ci_find_dpm_states_clocks_in_dpm_table(struct radeon_device *rdev,
pi->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_SCLK;
} else {
/* XXX check display min clock requirements */
- if (0 != CISLAND_MINIMUM_ENGINE_CLOCK)
+ if (CISLAND_MINIMUM_ENGINE_CLOCK != CISLAND_MINIMUM_ENGINE_CLOCK)
pi->need_update_smu7_dpm_table |= DPMTABLE_UPDATE_SCLK;
}
@@ -3707,62 +4139,61 @@ int ci_dpm_force_performance_level(struct radeon_device *rdev,
enum radeon_dpm_forced_level level)
{
struct ci_power_info *pi = ci_get_pi(rdev);
- PPSMC_Result smc_result;
u32 tmp, levels, i;
int ret;
if (level == RADEON_DPM_FORCED_LEVEL_HIGH) {
- if ((!pi->sclk_dpm_key_disabled) &&
- pi->dpm_level_enable_mask.sclk_dpm_enable_mask) {
+ if ((!pi->pcie_dpm_key_disabled) &&
+ pi->dpm_level_enable_mask.pcie_dpm_enable_mask) {
levels = 0;
- tmp = pi->dpm_level_enable_mask.sclk_dpm_enable_mask;
+ tmp = pi->dpm_level_enable_mask.pcie_dpm_enable_mask;
while (tmp >>= 1)
levels++;
if (levels) {
- ret = ci_dpm_force_state_sclk(rdev, levels);
+ ret = ci_dpm_force_state_pcie(rdev, level);
if (ret)
return ret;
for (i = 0; i < rdev->usec_timeout; i++) {
- tmp = (RREG32_SMC(TARGET_AND_CURRENT_PROFILE_INDEX) &
- CURR_SCLK_INDEX_MASK) >> CURR_SCLK_INDEX_SHIFT;
+ tmp = (RREG32_SMC(TARGET_AND_CURRENT_PROFILE_INDEX_1) &
+ CURR_PCIE_INDEX_MASK) >> CURR_PCIE_INDEX_SHIFT;
if (tmp == levels)
break;
udelay(1);
}
}
}
- if ((!pi->mclk_dpm_key_disabled) &&
- pi->dpm_level_enable_mask.mclk_dpm_enable_mask) {
+ if ((!pi->sclk_dpm_key_disabled) &&
+ pi->dpm_level_enable_mask.sclk_dpm_enable_mask) {
levels = 0;
- tmp = pi->dpm_level_enable_mask.mclk_dpm_enable_mask;
+ tmp = pi->dpm_level_enable_mask.sclk_dpm_enable_mask;
while (tmp >>= 1)
levels++;
if (levels) {
- ret = ci_dpm_force_state_mclk(rdev, levels);
+ ret = ci_dpm_force_state_sclk(rdev, levels);
if (ret)
return ret;
for (i = 0; i < rdev->usec_timeout; i++) {
tmp = (RREG32_SMC(TARGET_AND_CURRENT_PROFILE_INDEX) &
- CURR_MCLK_INDEX_MASK) >> CURR_MCLK_INDEX_SHIFT;
+ CURR_SCLK_INDEX_MASK) >> CURR_SCLK_INDEX_SHIFT;
if (tmp == levels)
break;
udelay(1);
}
}
}
- if ((!pi->pcie_dpm_key_disabled) &&
- pi->dpm_level_enable_mask.pcie_dpm_enable_mask) {
+ if ((!pi->mclk_dpm_key_disabled) &&
+ pi->dpm_level_enable_mask.mclk_dpm_enable_mask) {
levels = 0;
- tmp = pi->dpm_level_enable_mask.pcie_dpm_enable_mask;
+ tmp = pi->dpm_level_enable_mask.mclk_dpm_enable_mask;
while (tmp >>= 1)
levels++;
if (levels) {
- ret = ci_dpm_force_state_pcie(rdev, level);
+ ret = ci_dpm_force_state_mclk(rdev, levels);
if (ret)
return ret;
for (i = 0; i < rdev->usec_timeout; i++) {
- tmp = (RREG32_SMC(TARGET_AND_CURRENT_PROFILE_INDEX_1) &
- CURR_PCIE_INDEX_MASK) >> CURR_PCIE_INDEX_SHIFT;
+ tmp = (RREG32_SMC(TARGET_AND_CURRENT_PROFILE_INDEX) &
+ CURR_MCLK_INDEX_MASK) >> CURR_MCLK_INDEX_SHIFT;
if (tmp == levels)
break;
udelay(1);
@@ -3816,21 +4247,17 @@ int ci_dpm_force_performance_level(struct radeon_device *rdev,
}
}
} else if (level == RADEON_DPM_FORCED_LEVEL_AUTO) {
- if (!pi->sclk_dpm_key_disabled) {
- smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_NoForcedLevel);
- if (smc_result != PPSMC_Result_OK)
- return -EINVAL;
- }
- if (!pi->mclk_dpm_key_disabled) {
- smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_MCLKDPM_NoForcedLevel);
- if (smc_result != PPSMC_Result_OK)
- return -EINVAL;
- }
if (!pi->pcie_dpm_key_disabled) {
- smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_PCIeDPM_UnForceLevel);
+ PPSMC_Result smc_result;
+
+ smc_result = ci_send_msg_to_smc(rdev,
+ PPSMC_MSG_PCIeDPM_UnForceLevel);
if (smc_result != PPSMC_Result_OK)
return -EINVAL;
}
+ ret = ci_upload_dpm_level_enable_mask(rdev);
+ if (ret)
+ return ret;
}
rdev->pm.dpm.forced_level = level;
@@ -4036,6 +4463,96 @@ static int ci_copy_vbios_mc_reg_table(const struct atom_mc_reg_table *table,
return 0;
}
+static int ci_register_patching_mc_seq(struct radeon_device *rdev,
+ struct ci_mc_reg_table *table)
+{
+ u8 i, k;
+ u32 tmp;
+ bool patch;
+
+ tmp = RREG32(MC_SEQ_MISC0);
+ patch = ((tmp & 0x0000f00) == 0x300) ? true : false;
+
+ if (patch &&
+ ((rdev->pdev->device == 0x67B0) ||
+ (rdev->pdev->device == 0x67B1))) {
+ for (i = 0; i < table->last; i++) {
+ if (table->last >= SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE)
+ return -EINVAL;
+ switch(table->mc_reg_address[i].s1 >> 2) {
+ case MC_SEQ_MISC1:
+ for (k = 0; k < table->num_entries; k++) {
+ if ((table->mc_reg_table_entry[k].mclk_max == 125000) ||
+ (table->mc_reg_table_entry[k].mclk_max == 137500))
+ table->mc_reg_table_entry[k].mc_data[i] =
+ (table->mc_reg_table_entry[k].mc_data[i] & 0xFFFFFFF8) |
+ 0x00000007;
+ }
+ break;
+ case MC_SEQ_WR_CTL_D0:
+ for (k = 0; k < table->num_entries; k++) {
+ if ((table->mc_reg_table_entry[k].mclk_max == 125000) ||
+ (table->mc_reg_table_entry[k].mclk_max == 137500))
+ table->mc_reg_table_entry[k].mc_data[i] =
+ (table->mc_reg_table_entry[k].mc_data[i] & 0xFFFF0F00) |
+ 0x0000D0DD;
+ }
+ break;
+ case MC_SEQ_WR_CTL_D1:
+ for (k = 0; k < table->num_entries; k++) {
+ if ((table->mc_reg_table_entry[k].mclk_max == 125000) ||
+ (table->mc_reg_table_entry[k].mclk_max == 137500))
+ table->mc_reg_table_entry[k].mc_data[i] =
+ (table->mc_reg_table_entry[k].mc_data[i] & 0xFFFF0F00) |
+ 0x0000D0DD;
+ }
+ break;
+ case MC_SEQ_WR_CTL_2:
+ for (k = 0; k < table->num_entries; k++) {
+ if ((table->mc_reg_table_entry[k].mclk_max == 125000) ||
+ (table->mc_reg_table_entry[k].mclk_max == 137500))
+ table->mc_reg_table_entry[k].mc_data[i] = 0;
+ }
+ break;
+ case MC_SEQ_CAS_TIMING:
+ for (k = 0; k < table->num_entries; k++) {
+ if (table->mc_reg_table_entry[k].mclk_max == 125000)
+ table->mc_reg_table_entry[k].mc_data[i] =
+ (table->mc_reg_table_entry[k].mc_data[i] & 0xFFE0FE0F) |
+ 0x000C0140;
+ else if (table->mc_reg_table_entry[k].mclk_max == 137500)
+ table->mc_reg_table_entry[k].mc_data[i] =
+ (table->mc_reg_table_entry[k].mc_data[i] & 0xFFE0FE0F) |
+ 0x000C0150;
+ }
+ break;
+ case MC_SEQ_MISC_TIMING:
+ for (k = 0; k < table->num_entries; k++) {
+ if (table->mc_reg_table_entry[k].mclk_max == 125000)
+ table->mc_reg_table_entry[k].mc_data[i] =
+ (table->mc_reg_table_entry[k].mc_data[i] & 0xFFFFFFE0) |
+ 0x00000030;
+ else if (table->mc_reg_table_entry[k].mclk_max == 137500)
+ table->mc_reg_table_entry[k].mc_data[i] =
+ (table->mc_reg_table_entry[k].mc_data[i] & 0xFFFFFFE0) |
+ 0x00000035;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ WREG32(MC_SEQ_IO_DEBUG_INDEX, 3);
+ tmp = RREG32(MC_SEQ_IO_DEBUG_DATA);
+ tmp = (tmp & 0xFFF8FFFF) | (1 << 16);
+ WREG32(MC_SEQ_IO_DEBUG_INDEX, 3);
+ WREG32(MC_SEQ_IO_DEBUG_DATA, tmp);
+ }
+
+ return 0;
+}
+
static int ci_initialize_mc_reg_table(struct radeon_device *rdev)
{
struct ci_power_info *pi = ci_get_pi(rdev);
@@ -4079,6 +4596,10 @@ static int ci_initialize_mc_reg_table(struct radeon_device *rdev)
ci_set_s0_mc_reg_index(ci_table);
+ ret = ci_register_patching_mc_seq(rdev, ci_table);
+ if (ret)
+ goto init_mc_done;
+
ret = ci_set_mc_special_registers(rdev, ci_table);
if (ret)
goto init_mc_done;
@@ -4675,36 +5196,51 @@ int ci_dpm_enable(struct radeon_device *rdev)
return ret;
}
+ ret = ci_power_control_set_level(rdev);
+ if (ret) {
+ DRM_ERROR("ci_power_control_set_level failed\n");
+ return ret;
+ }
+
ci_enable_auto_throttle_source(rdev, RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL, true);
+ ret = ci_enable_thermal_based_sclk_dpm(rdev, true);
+ if (ret) {
+ DRM_ERROR("ci_enable_thermal_based_sclk_dpm failed\n");
+ return ret;
+ }
+
+ ci_thermal_start_thermal_controller(rdev);
+
ci_update_current_ps(rdev, boot_ps);
return 0;
}
-int ci_dpm_late_enable(struct radeon_device *rdev)
+static int ci_set_temperature_range(struct radeon_device *rdev)
{
int ret;
- if (rdev->irq.installed &&
- r600_is_internal_thermal_sensor(rdev->pm.int_thermal_type)) {
-#if 0
- PPSMC_Result result;
-#endif
- ret = ci_set_thermal_temperature_range(rdev, R600_TEMP_RANGE_MIN, R600_TEMP_RANGE_MAX);
- if (ret) {
- DRM_ERROR("ci_set_thermal_temperature_range failed\n");
- return ret;
- }
- rdev->irq.dpm_thermal = true;
- radeon_irq_set(rdev);
-#if 0
- result = ci_send_msg_to_smc(rdev, PPSMC_MSG_EnableThermalInterrupt);
+ ret = ci_thermal_enable_alert(rdev, false);
+ if (ret)
+ return ret;
+ ret = ci_thermal_set_temperature_range(rdev, R600_TEMP_RANGE_MIN, R600_TEMP_RANGE_MAX);
+ if (ret)
+ return ret;
+ ret = ci_thermal_enable_alert(rdev, true);
+ if (ret)
+ return ret;
- if (result != PPSMC_Result_OK)
- DRM_DEBUG_KMS("Could not enable thermal interrupts.\n");
-#endif
- }
+ return ret;
+}
+
+int ci_dpm_late_enable(struct radeon_device *rdev)
+{
+ int ret;
+
+ ret = ci_set_temperature_range(rdev);
+ if (ret)
+ return ret;
ci_dpm_powergate_uvd(rdev, true);
@@ -4721,6 +5257,8 @@ void ci_dpm_disable(struct radeon_device *rdev)
if (!ci_is_smc_running(rdev))
return;
+ ci_thermal_stop_thermal_controller(rdev);
+
if (pi->thermal_protection)
ci_enable_thermal_protection(rdev, false);
ci_enable_power_containment(rdev, false);
@@ -4729,12 +5267,13 @@ void ci_dpm_disable(struct radeon_device *rdev)
ci_enable_spread_spectrum(rdev, false);
ci_enable_auto_throttle_source(rdev, RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL, false);
ci_stop_dpm(rdev);
- ci_enable_ds_master_switch(rdev, true);
+ ci_enable_ds_master_switch(rdev, false);
ci_enable_ulv(rdev, false);
ci_clear_vc(rdev);
ci_reset_to_default(rdev);
ci_dpm_stop_smc(rdev);
ci_force_switch_to_arb_f0(rdev);
+ ci_enable_thermal_based_sclk_dpm(rdev, false);
ci_update_current_ps(rdev, boot_ps);
}
@@ -4804,11 +5343,6 @@ int ci_dpm_set_power_state(struct radeon_device *rdev)
return 0;
}
-int ci_dpm_power_control_set_level(struct radeon_device *rdev)
-{
- return ci_power_control_set_level(rdev);
-}
-
void ci_dpm_reset_asic(struct radeon_device *rdev)
{
ci_set_boot_state(rdev);
@@ -5068,6 +5602,8 @@ void ci_dpm_fini(struct radeon_device *rdev)
int ci_dpm_init(struct radeon_device *rdev)
{
int index = GetIndexIntoMasterTable(DATA, ASIC_InternalSS_Info);
+ SMU7_Discrete_DpmTable *dpm_table;
+ struct radeon_gpio_rec gpio;
u16 data_offset, size;
u8 frev, crev;
struct ci_power_info *pi;
@@ -5137,6 +5673,7 @@ int ci_dpm_init(struct radeon_device *rdev)
pi->sclk_dpm_key_disabled = 0;
pi->mclk_dpm_key_disabled = 0;
pi->pcie_dpm_key_disabled = 0;
+ pi->thermal_sclk_dpm_enabled = 0;
/* mclk dpm is unstable on some R7 260X cards with the old mc ucode */
if ((rdev->pdev->device == 0x6658) &&
@@ -5201,6 +5738,55 @@ int ci_dpm_init(struct radeon_device *rdev)
pi->uvd_enabled = false;
+ dpm_table = &pi->smc_state_table;
+
+ gpio = radeon_atombios_lookup_gpio(rdev, VDDC_VRHOT_GPIO_PINID);
+ if (gpio.valid) {
+ dpm_table->VRHotGpio = gpio.shift;
+ rdev->pm.dpm.platform_caps |= ATOM_PP_PLATFORM_CAP_REGULATOR_HOT;
+ } else {
+ dpm_table->VRHotGpio = CISLANDS_UNUSED_GPIO_PIN;
+ rdev->pm.dpm.platform_caps &= ~ATOM_PP_PLATFORM_CAP_REGULATOR_HOT;
+ }
+
+ gpio = radeon_atombios_lookup_gpio(rdev, PP_AC_DC_SWITCH_GPIO_PINID);
+ if (gpio.valid) {
+ dpm_table->AcDcGpio = gpio.shift;
+ rdev->pm.dpm.platform_caps |= ATOM_PP_PLATFORM_CAP_HARDWAREDC;
+ } else {
+ dpm_table->AcDcGpio = CISLANDS_UNUSED_GPIO_PIN;
+ rdev->pm.dpm.platform_caps &= ~ATOM_PP_PLATFORM_CAP_HARDWAREDC;
+ }
+
+ gpio = radeon_atombios_lookup_gpio(rdev, VDDC_PCC_GPIO_PINID);
+ if (gpio.valid) {
+ u32 tmp = RREG32_SMC(CNB_PWRMGT_CNTL);
+
+ switch (gpio.shift) {
+ case 0:
+ tmp &= ~GNB_SLOW_MODE_MASK;
+ tmp |= GNB_SLOW_MODE(1);
+ break;
+ case 1:
+ tmp &= ~GNB_SLOW_MODE_MASK;
+ tmp |= GNB_SLOW_MODE(2);
+ break;
+ case 2:
+ tmp |= GNB_SLOW;
+ break;
+ case 3:
+ tmp |= FORCE_NB_PS1;
+ break;
+ case 4:
+ tmp |= DPM_ENABLED;
+ break;
+ default:
+ DRM_ERROR("Invalid PCC GPIO: %u!\n", gpio.shift);
+ break;
+ }
+ WREG32_SMC(CNB_PWRMGT_CNTL, tmp);
+ }
+
pi->voltage_control = CISLANDS_VOLTAGE_CONTROL_NONE;
pi->vddci_control = CISLANDS_VOLTAGE_CONTROL_NONE;
pi->mvdd_control = CISLANDS_VOLTAGE_CONTROL_NONE;
@@ -5262,6 +5848,8 @@ int ci_dpm_init(struct radeon_device *rdev)
rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc =
rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac;
+ pi->fan_ctrl_is_in_default_mode = true;
+
return 0;
}
diff --git a/drivers/gpu/drm/radeon/ci_dpm.h b/drivers/gpu/drm/radeon/ci_dpm.h
index 93bbed977ffb..84e3d3bcf9f3 100644
--- a/drivers/gpu/drm/radeon/ci_dpm.h
+++ b/drivers/gpu/drm/radeon/ci_dpm.h
@@ -33,6 +33,8 @@
#define CISLANDS_MAX_HARDWARE_POWERLEVELS 2
+#define CISLANDS_UNUSED_GPIO_PIN 0x7F
+
struct ci_pl {
u32 mclk;
u32 sclk;
@@ -237,6 +239,7 @@ struct ci_power_info {
u32 sclk_dpm_key_disabled;
u32 mclk_dpm_key_disabled;
u32 pcie_dpm_key_disabled;
+ u32 thermal_sclk_dpm_enabled;
struct ci_pcie_perf_range pcie_gen_performance;
struct ci_pcie_perf_range pcie_lane_performance;
struct ci_pcie_perf_range pcie_gen_powersaving;
@@ -264,6 +267,7 @@ struct ci_power_info {
bool caps_automatic_dc_transition;
bool caps_sclk_throttle_low_notification;
bool caps_dynamic_ac_timing;
+ bool caps_od_fuzzy_fan_control_support;
/* flags */
bool thermal_protection;
bool pcie_performance_request;
@@ -285,6 +289,10 @@ struct ci_power_info {
struct ci_ps current_ps;
struct radeon_ps requested_rps;
struct ci_ps requested_ps;
+ /* fan control */
+ bool fan_ctrl_is_in_default_mode;
+ u32 t_min;
+ u32 fan_ctrl_default_mode;
};
#define CISLANDS_VOLTAGE_CONTROL_NONE 0x0
diff --git a/drivers/gpu/drm/radeon/ci_smc.c b/drivers/gpu/drm/radeon/ci_smc.c
index b630edc2fd0c..e78bcad7a43e 100644
--- a/drivers/gpu/drm/radeon/ci_smc.c
+++ b/drivers/gpu/drm/radeon/ci_smc.c
@@ -129,7 +129,7 @@ void ci_reset_smc(struct radeon_device *rdev)
int ci_program_jump_on_start(struct radeon_device *rdev)
{
- static u8 data[] = { 0xE0, 0x00, 0x80, 0x40 };
+ static const u8 data[] = { 0xE0, 0x00, 0x80, 0x40 };
return ci_copy_bytes_to_smc(rdev, 0x0, data, 4, sizeof(data)+1);
}
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index 89c01fa6dd8e..6dcde3798b45 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -32,6 +32,7 @@
#include "cik_blit_shaders.h"
#include "radeon_ucode.h"
#include "clearstate_ci.h"
+#include "radeon_kfd.h"
MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
@@ -1563,6 +1564,8 @@ static const u32 godavari_golden_registers[] =
static void cik_init_golden_registers(struct radeon_device *rdev)
{
+ /* Some of the registers might be dependent on GRBM_GFX_INDEX */
+ mutex_lock(&rdev->grbm_idx_mutex);
switch (rdev->family) {
case CHIP_BONAIRE:
radeon_program_register_sequence(rdev,
@@ -1637,6 +1640,7 @@ static void cik_init_golden_registers(struct radeon_device *rdev)
default:
break;
}
+ mutex_unlock(&rdev->grbm_idx_mutex);
}
/**
@@ -1806,7 +1810,7 @@ int ci_mc_load_microcode(struct radeon_device *rdev)
{
const __be32 *fw_data = NULL;
const __le32 *new_fw_data = NULL;
- u32 running, blackout = 0;
+ u32 running, blackout = 0, tmp;
u32 *io_mc_regs = NULL;
const __le32 *new_io_mc_regs = NULL;
int i, regs_size, ucode_size;
@@ -1866,6 +1870,15 @@ int ci_mc_load_microcode(struct radeon_device *rdev)
WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
}
}
+
+ tmp = RREG32(MC_SEQ_MISC0);
+ if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
+ WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
+ WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
+ WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
+ WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
+ }
+
/* load the MC ucode */
for (i = 0; i < ucode_size; i++) {
if (rdev->new_fw)
@@ -3419,6 +3432,7 @@ static void cik_setup_rb(struct radeon_device *rdev,
u32 disabled_rbs = 0;
u32 enabled_rbs = 0;
+ mutex_lock(&rdev->grbm_idx_mutex);
for (i = 0; i < se_num; i++) {
for (j = 0; j < sh_per_se; j++) {
cik_select_se_sh(rdev, i, j);
@@ -3430,6 +3444,7 @@ static void cik_setup_rb(struct radeon_device *rdev,
}
}
cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
+ mutex_unlock(&rdev->grbm_idx_mutex);
mask = 1;
for (i = 0; i < max_rb_num_per_se * se_num; i++) {
@@ -3440,6 +3455,7 @@ static void cik_setup_rb(struct radeon_device *rdev,
rdev->config.cik.backend_enable_mask = enabled_rbs;
+ mutex_lock(&rdev->grbm_idx_mutex);
for (i = 0; i < se_num; i++) {
cik_select_se_sh(rdev, i, 0xffffffff);
data = 0;
@@ -3467,6 +3483,7 @@ static void cik_setup_rb(struct radeon_device *rdev,
WREG32(PA_SC_RASTER_CONFIG, data);
}
cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
+ mutex_unlock(&rdev->grbm_idx_mutex);
}
/**
@@ -3684,6 +3701,12 @@ static void cik_gpu_init(struct radeon_device *rdev)
/* set HW defaults for 3D engine */
WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
+ mutex_lock(&rdev->grbm_idx_mutex);
+ /*
+ * making sure that the following register writes will be broadcasted
+ * to all the shaders
+ */
+ cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
WREG32(SX_DEBUG_1, 0x20);
WREG32(TA_CNTL_AUX, 0x00010000);
@@ -3739,6 +3762,7 @@ static void cik_gpu_init(struct radeon_device *rdev)
WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
+ mutex_unlock(&rdev->grbm_idx_mutex);
udelay(50);
}
@@ -3970,31 +3994,27 @@ struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
unsigned num_gpu_pages,
struct reservation_object *resv)
{
- struct radeon_semaphore *sem = NULL;
struct radeon_fence *fence;
+ struct radeon_sync sync;
int ring_index = rdev->asic->copy.blit_ring_index;
struct radeon_ring *ring = &rdev->ring[ring_index];
u32 size_in_bytes, cur_size_in_bytes, control;
int i, num_loops;
int r = 0;
- r = radeon_semaphore_create(rdev, &sem);
- if (r) {
- DRM_ERROR("radeon: moving bo (%d).\n", r);
- return ERR_PTR(r);
- }
+ radeon_sync_create(&sync);
size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
if (r) {
DRM_ERROR("radeon: moving bo (%d).\n", r);
- radeon_semaphore_free(rdev, &sem, NULL);
+ radeon_sync_free(rdev, &sync, NULL);
return ERR_PTR(r);
}
- radeon_semaphore_sync_resv(rdev, sem, resv, false);
- radeon_semaphore_sync_rings(rdev, sem, ring->idx);
+ radeon_sync_resv(rdev, &sync, resv, false);
+ radeon_sync_rings(rdev, &sync, ring->idx);
for (i = 0; i < num_loops; i++) {
cur_size_in_bytes = size_in_bytes;
@@ -4018,12 +4038,12 @@ struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
r = radeon_fence_emit(rdev, &fence, ring->idx);
if (r) {
radeon_ring_unlock_undo(rdev, ring);
- radeon_semaphore_free(rdev, &sem, NULL);
+ radeon_sync_free(rdev, &sync, NULL);
return ERR_PTR(r);
}
radeon_ring_unlock_commit(rdev, ring, false);
- radeon_semaphore_free(rdev, &sem, fence);
+ radeon_sync_free(rdev, &sync, fence);
return fence;
}
@@ -4046,6 +4066,7 @@ struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
{
struct radeon_ring *ring = &rdev->ring[ib->ring];
+ unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
u32 header, control = INDIRECT_BUFFER_VALID;
if (ib->is_const_ib) {
@@ -4074,8 +4095,7 @@ void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
}
- control |= ib->length_dw |
- (ib->vm ? (ib->vm->id << 24) : 0);
+ control |= ib->length_dw | (vm_id << 24);
radeon_ring_write(ring, header);
radeon_ring_write(ring,
@@ -4675,12 +4695,11 @@ static int cik_mec_init(struct radeon_device *rdev)
/*
* KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
* CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
+ * Nonetheless, we assign only 1 pipe because all other pipes will
+ * be handled by KFD
*/
- if (rdev->family == CHIP_KAVERI)
- rdev->mec.num_mec = 2;
- else
- rdev->mec.num_mec = 1;
- rdev->mec.num_pipe = 4;
+ rdev->mec.num_mec = 1;
+ rdev->mec.num_pipe = 1;
rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
if (rdev->mec.hpd_eop_obj == NULL) {
@@ -4822,28 +4841,24 @@ static int cik_cp_compute_resume(struct radeon_device *rdev)
/* init the pipes */
mutex_lock(&rdev->srbm_mutex);
- for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
- int me = (i < 4) ? 1 : 2;
- int pipe = (i < 4) ? i : (i - 4);
- eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
+ eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
- cik_srbm_select(rdev, me, pipe, 0, 0);
+ cik_srbm_select(rdev, 0, 0, 0, 0);
- /* write the EOP addr */
- WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
- WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
+ /* write the EOP addr */
+ WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
+ WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
- /* set the VMID assigned */
- WREG32(CP_HPD_EOP_VMID, 0);
+ /* set the VMID assigned */
+ WREG32(CP_HPD_EOP_VMID, 0);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ tmp = RREG32(CP_HPD_EOP_CONTROL);
+ tmp &= ~EOP_SIZE_MASK;
+ tmp |= order_base_2(MEC_HPD_SIZE / 8);
+ WREG32(CP_HPD_EOP_CONTROL, tmp);
- /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
- tmp = RREG32(CP_HPD_EOP_CONTROL);
- tmp &= ~EOP_SIZE_MASK;
- tmp |= order_base_2(MEC_HPD_SIZE / 8);
- WREG32(CP_HPD_EOP_CONTROL, tmp);
- }
- cik_srbm_select(rdev, 0, 0, 0, 0);
mutex_unlock(&rdev->srbm_mutex);
/* init the queues. Just two for now. */
@@ -5897,8 +5912,13 @@ int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
*/
int cik_vm_init(struct radeon_device *rdev)
{
- /* number of VMs */
- rdev->vm_manager.nvm = 16;
+ /*
+ * number of VMs
+ * VMID 0 is reserved for System
+ * radeon graphics/compute will use VMIDs 1-7
+ * amdkfd will use VMIDs 8-15
+ */
+ rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
/* base offset of vram pages */
if (rdev->flags & RADEON_IS_IGP) {
u64 tmp = RREG32(MC_VM_FB_OFFSET);
@@ -5958,26 +5978,23 @@ static void cik_vm_decode_fault(struct radeon_device *rdev,
* Update the page table base and flush the VM TLB
* using the CP (CIK).
*/
-void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
+void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
+ unsigned vm_id, uint64_t pd_addr)
{
- struct radeon_ring *ring = &rdev->ring[ridx];
- int usepfp = (ridx == RADEON_RING_TYPE_GFX_INDEX);
-
- if (vm == NULL)
- return;
+ int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
WRITE_DATA_DST_SEL(0)));
- if (vm->id < 8) {
+ if (vm_id < 8) {
radeon_ring_write(ring,
- (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
+ (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
} else {
radeon_ring_write(ring,
- (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
+ (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
}
radeon_ring_write(ring, 0);
- radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
+ radeon_ring_write(ring, pd_addr >> 12);
/* update SH_MEM_* regs */
radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
@@ -5985,7 +6002,7 @@ void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
WRITE_DATA_DST_SEL(0)));
radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
radeon_ring_write(ring, 0);
- radeon_ring_write(ring, VMID(vm->id));
+ radeon_ring_write(ring, VMID(vm_id));
radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
@@ -6006,7 +6023,7 @@ void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
radeon_ring_write(ring, VMID(0));
/* HDP flush */
- cik_hdp_flush_cp_ring_emit(rdev, ridx);
+ cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
/* bits 0-15 are the VM contexts0-15 */
radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
@@ -6014,7 +6031,7 @@ void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
WRITE_DATA_DST_SEL(0)));
radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
radeon_ring_write(ring, 0);
- radeon_ring_write(ring, 1 << vm->id);
+ radeon_ring_write(ring, 1 << vm_id);
/* compute doesn't have PFP */
if (usepfp) {
@@ -6059,6 +6076,7 @@ static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
u32 i, j, k;
u32 mask;
+ mutex_lock(&rdev->grbm_idx_mutex);
for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
cik_select_se_sh(rdev, i, j);
@@ -6070,6 +6088,7 @@ static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
}
}
cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
+ mutex_unlock(&rdev->grbm_idx_mutex);
mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
for (k = 0; k < rdev->usec_timeout; k++) {
@@ -6204,10 +6223,12 @@ static int cik_rlc_resume(struct radeon_device *rdev)
WREG32(RLC_LB_CNTR_INIT, 0);
WREG32(RLC_LB_CNTR_MAX, 0x00008000);
+ mutex_lock(&rdev->grbm_idx_mutex);
cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
WREG32(RLC_LB_PARAMS, 0x00600408);
WREG32(RLC_LB_CNTL, 0x80000004);
+ mutex_unlock(&rdev->grbm_idx_mutex);
WREG32(RLC_MC_CNTL, 0);
WREG32(RLC_UCODE_CNTL, 0);
@@ -6274,11 +6295,13 @@ static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
tmp = cik_halt_rlc(rdev);
+ mutex_lock(&rdev->grbm_idx_mutex);
cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
WREG32(RLC_SERDES_WR_CTRL, tmp2);
+ mutex_unlock(&rdev->grbm_idx_mutex);
cik_update_rlc(rdev, tmp);
@@ -6314,17 +6337,20 @@ static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
}
orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
+ data |= 0x00000001;
data &= 0xfffffffd;
if (orig != data)
WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
tmp = cik_halt_rlc(rdev);
+ mutex_lock(&rdev->grbm_idx_mutex);
cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
WREG32(RLC_SERDES_WR_CTRL, data);
+ mutex_unlock(&rdev->grbm_idx_mutex);
cik_update_rlc(rdev, tmp);
@@ -6345,7 +6371,7 @@ static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
}
} else {
orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
- data |= 0x00000002;
+ data |= 0x00000003;
if (orig != data)
WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
@@ -6368,11 +6394,13 @@ static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
tmp = cik_halt_rlc(rdev);
+ mutex_lock(&rdev->grbm_idx_mutex);
cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
WREG32(RLC_SERDES_WR_CTRL, data);
+ mutex_unlock(&rdev->grbm_idx_mutex);
cik_update_rlc(rdev, tmp);
}
@@ -6801,10 +6829,12 @@ static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
u32 mask = 0, tmp, tmp1;
int i;
+ mutex_lock(&rdev->grbm_idx_mutex);
cik_select_se_sh(rdev, se, sh);
tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
+ mutex_unlock(&rdev->grbm_idx_mutex);
tmp &= 0xffff0000;
@@ -7288,8 +7318,7 @@ static int cik_irq_init(struct radeon_device *rdev)
int cik_irq_set(struct radeon_device *rdev)
{
u32 cp_int_cntl;
- u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
- u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
+ u32 cp_m1p0;
u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
u32 grbm_int_cntl = 0;
@@ -7323,13 +7352,6 @@ int cik_irq_set(struct radeon_device *rdev)
dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
- cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
- cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
- cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
- cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
- cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
- cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
- cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
if (rdev->flags & RADEON_IS_IGP)
thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
@@ -7351,33 +7373,6 @@ int cik_irq_set(struct radeon_device *rdev)
case 0:
cp_m1p0 |= TIME_STAMP_INT_ENABLE;
break;
- case 1:
- cp_m1p1 |= TIME_STAMP_INT_ENABLE;
- break;
- case 2:
- cp_m1p2 |= TIME_STAMP_INT_ENABLE;
- break;
- case 3:
- cp_m1p2 |= TIME_STAMP_INT_ENABLE;
- break;
- default:
- DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
- break;
- }
- } else if (ring->me == 2) {
- switch (ring->pipe) {
- case 0:
- cp_m2p0 |= TIME_STAMP_INT_ENABLE;
- break;
- case 1:
- cp_m2p1 |= TIME_STAMP_INT_ENABLE;
- break;
- case 2:
- cp_m2p2 |= TIME_STAMP_INT_ENABLE;
- break;
- case 3:
- cp_m2p2 |= TIME_STAMP_INT_ENABLE;
- break;
default:
DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
break;
@@ -7394,33 +7389,6 @@ int cik_irq_set(struct radeon_device *rdev)
case 0:
cp_m1p0 |= TIME_STAMP_INT_ENABLE;
break;
- case 1:
- cp_m1p1 |= TIME_STAMP_INT_ENABLE;
- break;
- case 2:
- cp_m1p2 |= TIME_STAMP_INT_ENABLE;
- break;
- case 3:
- cp_m1p2 |= TIME_STAMP_INT_ENABLE;
- break;
- default:
- DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
- break;
- }
- } else if (ring->me == 2) {
- switch (ring->pipe) {
- case 0:
- cp_m2p0 |= TIME_STAMP_INT_ENABLE;
- break;
- case 1:
- cp_m2p1 |= TIME_STAMP_INT_ENABLE;
- break;
- case 2:
- cp_m2p2 |= TIME_STAMP_INT_ENABLE;
- break;
- case 3:
- cp_m2p2 |= TIME_STAMP_INT_ENABLE;
- break;
default:
DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
break;
@@ -7509,13 +7477,6 @@ int cik_irq_set(struct radeon_device *rdev)
WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
- WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
- WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
- WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
- WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
- WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
- WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
- WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
WREG32(GRBM_INT_CNTL, grbm_int_cntl);
@@ -7832,6 +7793,10 @@ restart_ih:
while (rptr != wptr) {
/* wptr/rptr are in bytes! */
ring_index = rptr / 4;
+
+ radeon_kfd_interrupt(rdev,
+ (const void *) &rdev->ih.ring[ring_index]);
+
src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
@@ -8521,6 +8486,10 @@ static int cik_startup(struct radeon_device *rdev)
if (r)
return r;
+ r = radeon_kfd_resume(rdev);
+ if (r)
+ return r;
+
return 0;
}
@@ -8569,6 +8538,7 @@ int cik_resume(struct radeon_device *rdev)
*/
int cik_suspend(struct radeon_device *rdev)
{
+ radeon_kfd_suspend(rdev);
radeon_pm_suspend(rdev);
dce6_audio_fini(rdev);
radeon_vm_manager_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/cik_reg.h b/drivers/gpu/drm/radeon/cik_reg.h
index ca1bb6133580..79c45e8a536b 100644
--- a/drivers/gpu/drm/radeon/cik_reg.h
+++ b/drivers/gpu/drm/radeon/cik_reg.h
@@ -147,4 +147,140 @@
#define CIK_LB_DESKTOP_HEIGHT 0x6b0c
+#define CP_HQD_IQ_RPTR 0xC970u
+#define AQL_ENABLE (1U << 0)
+
+#define IDLE (1 << 2)
+
+struct cik_mqd {
+ uint32_t header;
+ uint32_t compute_dispatch_initiator;
+ uint32_t compute_dim_x;
+ uint32_t compute_dim_y;
+ uint32_t compute_dim_z;
+ uint32_t compute_start_x;
+ uint32_t compute_start_y;
+ uint32_t compute_start_z;
+ uint32_t compute_num_thread_x;
+ uint32_t compute_num_thread_y;
+ uint32_t compute_num_thread_z;
+ uint32_t compute_pipelinestat_enable;
+ uint32_t compute_perfcount_enable;
+ uint32_t compute_pgm_lo;
+ uint32_t compute_pgm_hi;
+ uint32_t compute_tba_lo;
+ uint32_t compute_tba_hi;
+ uint32_t compute_tma_lo;
+ uint32_t compute_tma_hi;
+ uint32_t compute_pgm_rsrc1;
+ uint32_t compute_pgm_rsrc2;
+ uint32_t compute_vmid;
+ uint32_t compute_resource_limits;
+ uint32_t compute_static_thread_mgmt_se0;
+ uint32_t compute_static_thread_mgmt_se1;
+ uint32_t compute_tmpring_size;
+ uint32_t compute_static_thread_mgmt_se2;
+ uint32_t compute_static_thread_mgmt_se3;
+ uint32_t compute_restart_x;
+ uint32_t compute_restart_y;
+ uint32_t compute_restart_z;
+ uint32_t compute_thread_trace_enable;
+ uint32_t compute_misc_reserved;
+ uint32_t compute_user_data_0;
+ uint32_t compute_user_data_1;
+ uint32_t compute_user_data_2;
+ uint32_t compute_user_data_3;
+ uint32_t compute_user_data_4;
+ uint32_t compute_user_data_5;
+ uint32_t compute_user_data_6;
+ uint32_t compute_user_data_7;
+ uint32_t compute_user_data_8;
+ uint32_t compute_user_data_9;
+ uint32_t compute_user_data_10;
+ uint32_t compute_user_data_11;
+ uint32_t compute_user_data_12;
+ uint32_t compute_user_data_13;
+ uint32_t compute_user_data_14;
+ uint32_t compute_user_data_15;
+ uint32_t cp_compute_csinvoc_count_lo;
+ uint32_t cp_compute_csinvoc_count_hi;
+ uint32_t cp_mqd_base_addr_lo;
+ uint32_t cp_mqd_base_addr_hi;
+ uint32_t cp_hqd_active;
+ uint32_t cp_hqd_vmid;
+ uint32_t cp_hqd_persistent_state;
+ uint32_t cp_hqd_pipe_priority;
+ uint32_t cp_hqd_queue_priority;
+ uint32_t cp_hqd_quantum;
+ uint32_t cp_hqd_pq_base_lo;
+ uint32_t cp_hqd_pq_base_hi;
+ uint32_t cp_hqd_pq_rptr;
+ uint32_t cp_hqd_pq_rptr_report_addr_lo;
+ uint32_t cp_hqd_pq_rptr_report_addr_hi;
+ uint32_t cp_hqd_pq_wptr_poll_addr_lo;
+ uint32_t cp_hqd_pq_wptr_poll_addr_hi;
+ uint32_t cp_hqd_pq_doorbell_control;
+ uint32_t cp_hqd_pq_wptr;
+ uint32_t cp_hqd_pq_control;
+ uint32_t cp_hqd_ib_base_addr_lo;
+ uint32_t cp_hqd_ib_base_addr_hi;
+ uint32_t cp_hqd_ib_rptr;
+ uint32_t cp_hqd_ib_control;
+ uint32_t cp_hqd_iq_timer;
+ uint32_t cp_hqd_iq_rptr;
+ uint32_t cp_hqd_dequeue_request;
+ uint32_t cp_hqd_dma_offload;
+ uint32_t cp_hqd_sema_cmd;
+ uint32_t cp_hqd_msg_type;
+ uint32_t cp_hqd_atomic0_preop_lo;
+ uint32_t cp_hqd_atomic0_preop_hi;
+ uint32_t cp_hqd_atomic1_preop_lo;
+ uint32_t cp_hqd_atomic1_preop_hi;
+ uint32_t cp_hqd_hq_status0;
+ uint32_t cp_hqd_hq_control0;
+ uint32_t cp_mqd_control;
+ uint32_t cp_mqd_query_time_lo;
+ uint32_t cp_mqd_query_time_hi;
+ uint32_t cp_mqd_connect_start_time_lo;
+ uint32_t cp_mqd_connect_start_time_hi;
+ uint32_t cp_mqd_connect_end_time_lo;
+ uint32_t cp_mqd_connect_end_time_hi;
+ uint32_t cp_mqd_connect_end_wf_count;
+ uint32_t cp_mqd_connect_end_pq_rptr;
+ uint32_t cp_mqd_connect_end_pq_wptr;
+ uint32_t cp_mqd_connect_end_ib_rptr;
+ uint32_t reserved_96;
+ uint32_t reserved_97;
+ uint32_t reserved_98;
+ uint32_t reserved_99;
+ uint32_t iqtimer_pkt_header;
+ uint32_t iqtimer_pkt_dw0;
+ uint32_t iqtimer_pkt_dw1;
+ uint32_t iqtimer_pkt_dw2;
+ uint32_t iqtimer_pkt_dw3;
+ uint32_t iqtimer_pkt_dw4;
+ uint32_t iqtimer_pkt_dw5;
+ uint32_t iqtimer_pkt_dw6;
+ uint32_t reserved_108;
+ uint32_t reserved_109;
+ uint32_t reserved_110;
+ uint32_t reserved_111;
+ uint32_t queue_doorbell_id0;
+ uint32_t queue_doorbell_id1;
+ uint32_t queue_doorbell_id2;
+ uint32_t queue_doorbell_id3;
+ uint32_t queue_doorbell_id4;
+ uint32_t queue_doorbell_id5;
+ uint32_t queue_doorbell_id6;
+ uint32_t queue_doorbell_id7;
+ uint32_t queue_doorbell_id8;
+ uint32_t queue_doorbell_id9;
+ uint32_t queue_doorbell_id10;
+ uint32_t queue_doorbell_id11;
+ uint32_t queue_doorbell_id12;
+ uint32_t queue_doorbell_id13;
+ uint32_t queue_doorbell_id14;
+ uint32_t queue_doorbell_id15;
+};
+
#endif
diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c
index d748963af08b..dde5c7e29eb2 100644
--- a/drivers/gpu/drm/radeon/cik_sdma.c
+++ b/drivers/gpu/drm/radeon/cik_sdma.c
@@ -134,7 +134,7 @@ void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
struct radeon_ib *ib)
{
struct radeon_ring *ring = &rdev->ring[ib->ring];
- u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
+ u32 extra_bits = (ib->vm ? ib->vm->ids[ib->ring].id : 0) & 0xf;
if (rdev->wb.enabled) {
u32 next_rptr = ring->wptr + 5;
@@ -541,31 +541,27 @@ struct radeon_fence *cik_copy_dma(struct radeon_device *rdev,
unsigned num_gpu_pages,
struct reservation_object *resv)
{
- struct radeon_semaphore *sem = NULL;
struct radeon_fence *fence;
+ struct radeon_sync sync;
int ring_index = rdev->asic->copy.dma_ring_index;
struct radeon_ring *ring = &rdev->ring[ring_index];
u32 size_in_bytes, cur_size_in_bytes;
int i, num_loops;
int r = 0;
- r = radeon_semaphore_create(rdev, &sem);
- if (r) {
- DRM_ERROR("radeon: moving bo (%d).\n", r);
- return ERR_PTR(r);
- }
+ radeon_sync_create(&sync);
size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
if (r) {
DRM_ERROR("radeon: moving bo (%d).\n", r);
- radeon_semaphore_free(rdev, &sem, NULL);
+ radeon_sync_free(rdev, &sync, NULL);
return ERR_PTR(r);
}
- radeon_semaphore_sync_resv(rdev, sem, resv, false);
- radeon_semaphore_sync_rings(rdev, sem, ring->idx);
+ radeon_sync_resv(rdev, &sync, resv, false);
+ radeon_sync_rings(rdev, &sync, ring->idx);
for (i = 0; i < num_loops; i++) {
cur_size_in_bytes = size_in_bytes;
@@ -586,12 +582,12 @@ struct radeon_fence *cik_copy_dma(struct radeon_device *rdev,
r = radeon_fence_emit(rdev, &fence, ring->idx);
if (r) {
radeon_ring_unlock_undo(rdev, ring);
- radeon_semaphore_free(rdev, &sem, NULL);
+ radeon_sync_free(rdev, &sync, NULL);
return ERR_PTR(r);
}
radeon_ring_unlock_commit(rdev, ring, false);
- radeon_semaphore_free(rdev, &sem, fence);
+ radeon_sync_free(rdev, &sync, fence);
return fence;
}
@@ -904,25 +900,21 @@ void cik_sdma_vm_pad_ib(struct radeon_ib *ib)
* Update the page table base and flush the VM TLB
* using sDMA (CIK).
*/
-void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
+void cik_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
+ unsigned vm_id, uint64_t pd_addr)
{
- struct radeon_ring *ring = &rdev->ring[ridx];
-
- if (vm == NULL)
- return;
-
radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
- if (vm->id < 8) {
- radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
+ if (vm_id < 8) {
+ radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
} else {
- radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
+ radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
}
- radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
+ radeon_ring_write(ring, pd_addr >> 12);
/* update SH_MEM_* regs */
radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
- radeon_ring_write(ring, VMID(vm->id));
+ radeon_ring_write(ring, VMID(vm_id));
radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
radeon_ring_write(ring, SH_MEM_BASES >> 2);
@@ -945,11 +937,11 @@ void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm
radeon_ring_write(ring, VMID(0));
/* flush HDP */
- cik_sdma_hdp_flush_ring_emit(rdev, ridx);
+ cik_sdma_hdp_flush_ring_emit(rdev, ring->idx);
/* flush TLB */
radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
- radeon_ring_write(ring, 1 << vm->id);
+ radeon_ring_write(ring, 1 << vm_id);
}
diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h
index 0c6e1b55d968..ba85986febea 100644
--- a/drivers/gpu/drm/radeon/cikd.h
+++ b/drivers/gpu/drm/radeon/cikd.h
@@ -30,6 +30,8 @@
#define CIK_RB_BITMAP_WIDTH_PER_SH 2
#define HAWAII_RB_BITMAP_WIDTH_PER_SH 4
+#define RADEON_NUM_OF_VMIDS 8
+
/* DIDT IND registers */
#define DIDT_SQ_CTRL0 0x0
# define DIDT_CTRL_EN (1 << 0)
@@ -184,7 +186,10 @@
#define DIG_THERM_DPM(x) ((x) << 14)
#define DIG_THERM_DPM_MASK 0x003FC000
#define DIG_THERM_DPM_SHIFT 14
-
+#define CG_THERMAL_STATUS 0xC0300008
+#define FDO_PWM_DUTY(x) ((x) << 9)
+#define FDO_PWM_DUTY_MASK (0xff << 9)
+#define FDO_PWM_DUTY_SHIFT 9
#define CG_THERMAL_INT 0xC030000C
#define CI_DIG_THERM_INTH(x) ((x) << 8)
#define CI_DIG_THERM_INTH_MASK 0x0000FF00
@@ -194,7 +199,10 @@
#define CI_DIG_THERM_INTL_SHIFT 16
#define THERM_INT_MASK_HIGH (1 << 24)
#define THERM_INT_MASK_LOW (1 << 25)
-
+#define CG_MULT_THERMAL_CTRL 0xC0300010
+#define TEMP_SEL(x) ((x) << 20)
+#define TEMP_SEL_MASK (0xff << 20)
+#define TEMP_SEL_SHIFT 20
#define CG_MULT_THERMAL_STATUS 0xC0300014
#define ASIC_MAX_TEMP(x) ((x) << 0)
#define ASIC_MAX_TEMP_MASK 0x000001ff
@@ -203,6 +211,36 @@
#define CTF_TEMP_MASK 0x0003fe00
#define CTF_TEMP_SHIFT 9
+#define CG_FDO_CTRL0 0xC0300064
+#define FDO_STATIC_DUTY(x) ((x) << 0)
+#define FDO_STATIC_DUTY_MASK 0x000000FF
+#define FDO_STATIC_DUTY_SHIFT 0
+#define CG_FDO_CTRL1 0xC0300068
+#define FMAX_DUTY100(x) ((x) << 0)
+#define FMAX_DUTY100_MASK 0x000000FF
+#define FMAX_DUTY100_SHIFT 0
+#define CG_FDO_CTRL2 0xC030006C
+#define TMIN(x) ((x) << 0)
+#define TMIN_MASK 0x000000FF
+#define TMIN_SHIFT 0
+#define FDO_PWM_MODE(x) ((x) << 11)
+#define FDO_PWM_MODE_MASK (7 << 11)
+#define FDO_PWM_MODE_SHIFT 11
+#define TACH_PWM_RESP_RATE(x) ((x) << 25)
+#define TACH_PWM_RESP_RATE_MASK (0x7f << 25)
+#define TACH_PWM_RESP_RATE_SHIFT 25
+#define CG_TACH_CTRL 0xC0300070
+# define EDGE_PER_REV(x) ((x) << 0)
+# define EDGE_PER_REV_MASK (0x7 << 0)
+# define EDGE_PER_REV_SHIFT 0
+# define TARGET_PERIOD(x) ((x) << 3)
+# define TARGET_PERIOD_MASK 0xfffffff8
+# define TARGET_PERIOD_SHIFT 3
+#define CG_TACH_STATUS 0xC0300074
+# define TACH_PERIOD(x) ((x) << 0)
+# define TACH_PERIOD_MASK 0xffffffff
+# define TACH_PERIOD_SHIFT 0
+
#define CG_ECLK_CNTL 0xC05000AC
# define ECLK_DIVIDER_MASK 0x7f
# define ECLK_DIR_CNTL_EN (1 << 8)
@@ -1137,6 +1175,9 @@
#define SH_MEM_ALIGNMENT_MODE_UNALIGNED 3
#define DEFAULT_MTYPE(x) ((x) << 4)
#define APE1_MTYPE(x) ((x) << 7)
+/* valid for both DEFAULT_MTYPE and APE1_MTYPE */
+#define MTYPE_CACHED 0
+#define MTYPE_NONCACHED 3
#define SX_DEBUG_1 0x9060
@@ -1447,6 +1488,16 @@
#define CP_HQD_ACTIVE 0xC91C
#define CP_HQD_VMID 0xC920
+#define CP_HQD_PERSISTENT_STATE 0xC924u
+#define DEFAULT_CP_HQD_PERSISTENT_STATE (0x33U << 8)
+
+#define CP_HQD_PIPE_PRIORITY 0xC928u
+#define CP_HQD_QUEUE_PRIORITY 0xC92Cu
+#define CP_HQD_QUANTUM 0xC930u
+#define QUANTUM_EN 1U
+#define QUANTUM_SCALE_1MS (1U << 4)
+#define QUANTUM_DURATION(x) ((x) << 8)
+
#define CP_HQD_PQ_BASE 0xC934
#define CP_HQD_PQ_BASE_HI 0xC938
#define CP_HQD_PQ_RPTR 0xC93C
@@ -1474,12 +1525,32 @@
#define PRIV_STATE (1 << 30)
#define KMD_QUEUE (1 << 31)
-#define CP_HQD_DEQUEUE_REQUEST 0xC974
+#define CP_HQD_IB_BASE_ADDR 0xC95Cu
+#define CP_HQD_IB_BASE_ADDR_HI 0xC960u
+#define CP_HQD_IB_RPTR 0xC964u
+#define CP_HQD_IB_CONTROL 0xC968u
+#define IB_ATC_EN (1U << 23)
+#define DEFAULT_MIN_IB_AVAIL_SIZE (3U << 20)
+
+#define CP_HQD_DEQUEUE_REQUEST 0xC974
+#define DEQUEUE_REQUEST_DRAIN 1
+#define DEQUEUE_REQUEST_RESET 2
#define CP_MQD_CONTROL 0xC99C
#define MQD_VMID(x) ((x) << 0)
#define MQD_VMID_MASK (0xf << 0)
+#define CP_HQD_SEMA_CMD 0xC97Cu
+#define CP_HQD_MSG_TYPE 0xC980u
+#define CP_HQD_ATOMIC0_PREOP_LO 0xC984u
+#define CP_HQD_ATOMIC0_PREOP_HI 0xC988u
+#define CP_HQD_ATOMIC1_PREOP_LO 0xC98Cu
+#define CP_HQD_ATOMIC1_PREOP_HI 0xC990u
+#define CP_HQD_HQ_SCHEDULER0 0xC994u
+#define CP_HQD_HQ_SCHEDULER1 0xC998u
+
+#define SH_STATIC_MEM_CONFIG 0x9604u
+
#define DB_RENDER_CONTROL 0x28000
#define PA_SC_RASTER_CONFIG 0x28350
@@ -2069,4 +2140,20 @@
#define VCE_CMD_IB_AUTO 0x00000005
#define VCE_CMD_SEMAPHORE 0x00000006
+#define ATC_VMID0_PASID_MAPPING 0x339Cu
+#define ATC_VMID_PASID_MAPPING_UPDATE_STATUS 0x3398u
+#define ATC_VMID_PASID_MAPPING_VALID (1U << 31)
+
+#define ATC_VM_APERTURE0_CNTL 0x3310u
+#define ATS_ACCESS_MODE_NEVER 0
+#define ATS_ACCESS_MODE_ALWAYS 1
+
+#define ATC_VM_APERTURE0_CNTL2 0x3318u
+#define ATC_VM_APERTURE0_HIGH_ADDR 0x3308u
+#define ATC_VM_APERTURE0_LOW_ADDR 0x3300u
+#define ATC_VM_APERTURE1_CNTL 0x3314u
+#define ATC_VM_APERTURE1_CNTL2 0x331Cu
+#define ATC_VM_APERTURE1_HIGH_ADDR 0x330Cu
+#define ATC_VM_APERTURE1_LOW_ADDR 0x3304u
+
#endif
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index 5c8b358f9fba..924b1b7ab455 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -35,7 +35,7 @@
#define MIN(a,b) (((a)<(b))?(a):(b))
int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
- struct radeon_cs_reloc **cs_reloc);
+ struct radeon_bo_list **cs_reloc);
struct evergreen_cs_track {
u32 group_size;
u32 nbanks;
@@ -1094,7 +1094,7 @@ static int evergreen_cs_parse_packet0(struct radeon_cs_parser *p,
static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
{
struct evergreen_cs_track *track = (struct evergreen_cs_track *)p->track;
- struct radeon_cs_reloc *reloc;
+ struct radeon_bo_list *reloc;
u32 last_reg;
u32 m, i, tmp, *ib;
int r;
@@ -1792,7 +1792,7 @@ static bool evergreen_is_safe_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
static int evergreen_packet3_check(struct radeon_cs_parser *p,
struct radeon_cs_packet *pkt)
{
- struct radeon_cs_reloc *reloc;
+ struct radeon_bo_list *reloc;
struct evergreen_cs_track *track;
volatile u32 *ib;
unsigned idx;
@@ -2661,7 +2661,7 @@ int evergreen_cs_parse(struct radeon_cs_parser *p)
p->track = NULL;
return r;
}
- } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
+ } while (p->idx < p->chunk_ib->length_dw);
#if 0
for (r = 0; r < p->ib.length_dw; r++) {
printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]);
@@ -2684,8 +2684,8 @@ int evergreen_cs_parse(struct radeon_cs_parser *p)
**/
int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
{
- struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
- struct radeon_cs_reloc *src_reloc, *dst_reloc, *dst2_reloc;
+ struct radeon_cs_chunk *ib_chunk = p->chunk_ib;
+ struct radeon_bo_list *src_reloc, *dst_reloc, *dst2_reloc;
u32 header, cmd, count, sub_cmd;
volatile u32 *ib = p->ib.ptr;
u32 idx;
@@ -3100,7 +3100,7 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
return -EINVAL;
}
- } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
+ } while (p->idx < p->chunk_ib->length_dw);
#if 0
for (r = 0; r < p->ib->length_dw; r++) {
printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]);
diff --git a/drivers/gpu/drm/radeon/evergreen_dma.c b/drivers/gpu/drm/radeon/evergreen_dma.c
index 66bcfadeedd1..96535aa8659c 100644
--- a/drivers/gpu/drm/radeon/evergreen_dma.c
+++ b/drivers/gpu/drm/radeon/evergreen_dma.c
@@ -110,31 +110,27 @@ struct radeon_fence *evergreen_copy_dma(struct radeon_device *rdev,
unsigned num_gpu_pages,
struct reservation_object *resv)
{
- struct radeon_semaphore *sem = NULL;
struct radeon_fence *fence;
+ struct radeon_sync sync;
int ring_index = rdev->asic->copy.dma_ring_index;
struct radeon_ring *ring = &rdev->ring[ring_index];
u32 size_in_dw, cur_size_in_dw;
int i, num_loops;
int r = 0;
- r = radeon_semaphore_create(rdev, &sem);
- if (r) {
- DRM_ERROR("radeon: moving bo (%d).\n", r);
- return ERR_PTR(r);
- }
+ radeon_sync_create(&sync);
size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
num_loops = DIV_ROUND_UP(size_in_dw, 0xfffff);
r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
if (r) {
DRM_ERROR("radeon: moving bo (%d).\n", r);
- radeon_semaphore_free(rdev, &sem, NULL);
+ radeon_sync_free(rdev, &sync, NULL);
return ERR_PTR(r);
}
- radeon_semaphore_sync_resv(rdev, sem, resv, false);
- radeon_semaphore_sync_rings(rdev, sem, ring->idx);
+ radeon_sync_resv(rdev, &sync, resv, false);
+ radeon_sync_rings(rdev, &sync, ring->idx);
for (i = 0; i < num_loops; i++) {
cur_size_in_dw = size_in_dw;
@@ -153,12 +149,12 @@ struct radeon_fence *evergreen_copy_dma(struct radeon_device *rdev,
r = radeon_fence_emit(rdev, &fence, ring->idx);
if (r) {
radeon_ring_unlock_undo(rdev, ring);
- radeon_semaphore_free(rdev, &sem, NULL);
+ radeon_sync_free(rdev, &sync, NULL);
return ERR_PTR(r);
}
radeon_ring_unlock_commit(rdev, ring, false);
- radeon_semaphore_free(rdev, &sem, fence);
+ radeon_sync_free(rdev, &sync, fence);
return fence;
}
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 3faee58946dd..360de9f1f491 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1373,6 +1373,7 @@ void cayman_fence_ring_emit(struct radeon_device *rdev,
void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
{
struct radeon_ring *ring = &rdev->ring[ib->ring];
+ unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
u32 cp_coher_cntl = PACKET3_FULL_CACHE_ENA | PACKET3_TC_ACTION_ENA |
PACKET3_SH_ACTION_ENA;
@@ -1395,15 +1396,14 @@ void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
#endif
(ib->gpu_addr & 0xFFFFFFFC));
radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF);
- radeon_ring_write(ring, ib->length_dw |
- (ib->vm ? (ib->vm->id << 24) : 0));
+ radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
/* flush read cache over gart for this vmid */
radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
radeon_ring_write(ring, PACKET3_ENGINE_ME | cp_coher_cntl);
radeon_ring_write(ring, 0xFFFFFFFF);
radeon_ring_write(ring, 0);
- radeon_ring_write(ring, ((ib->vm ? ib->vm->id : 0) << 24) | 10); /* poll interval */
+ radeon_ring_write(ring, (vm_id << 24) | 10); /* poll interval */
}
static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
@@ -2502,15 +2502,11 @@ void cayman_vm_decode_fault(struct radeon_device *rdev,
* Update the page table base and flush the VM TLB
* using the CP (cayman-si).
*/
-void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
+void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
+ unsigned vm_id, uint64_t pd_addr)
{
- struct radeon_ring *ring = &rdev->ring[ridx];
-
- if (vm == NULL)
- return;
-
- radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0));
- radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
+ radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2), 0));
+ radeon_ring_write(ring, pd_addr >> 12);
/* flush hdp cache */
radeon_ring_write(ring, PACKET0(HDP_MEM_COHERENCY_FLUSH_CNTL, 0));
@@ -2518,7 +2514,7 @@ void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
/* bits 0-7 are the VM contexts0-7 */
radeon_ring_write(ring, PACKET0(VM_INVALIDATE_REQUEST, 0));
- radeon_ring_write(ring, 1 << vm->id);
+ radeon_ring_write(ring, 1 << vm_id);
/* sync PFP to ME, otherwise we might get invalid PFP reads */
radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c
index f26f0a9fb522..50f88611ff60 100644
--- a/drivers/gpu/drm/radeon/ni_dma.c
+++ b/drivers/gpu/drm/radeon/ni_dma.c
@@ -123,6 +123,7 @@ void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
struct radeon_ib *ib)
{
struct radeon_ring *ring = &rdev->ring[ib->ring];
+ unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
if (rdev->wb.enabled) {
u32 next_rptr = ring->wptr + 4;
@@ -140,7 +141,7 @@ void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
*/
while ((ring->wptr & 7) != 5)
radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
- radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0));
+ radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0));
radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
@@ -446,16 +447,12 @@ void cayman_dma_vm_pad_ib(struct radeon_ib *ib)
ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
}
-void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
+void cayman_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
+ unsigned vm_id, uint64_t pd_addr)
{
- struct radeon_ring *ring = &rdev->ring[ridx];
-
- if (vm == NULL)
- return;
-
radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
- radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
- radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
+ radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2));
+ radeon_ring_write(ring, pd_addr >> 12);
/* flush hdp cache */
radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
@@ -465,6 +462,6 @@ void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm
/* bits 0-7 are the VM contexts0-7 */
radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
- radeon_ring_write(ring, 1 << vm->id);
+ radeon_ring_write(ring, 1 << vm_id);
}
diff --git a/drivers/gpu/drm/radeon/ppsmc.h b/drivers/gpu/drm/radeon/ppsmc.h
index 5670b8291285..7e5724a12f8b 100644
--- a/drivers/gpu/drm/radeon/ppsmc.h
+++ b/drivers/gpu/drm/radeon/ppsmc.h
@@ -56,6 +56,14 @@
#define PPSMC_STATEFLAG_DEEPSLEEP_THROTTLE 0x20
#define PPSMC_STATEFLAG_DEEPSLEEP_BYPASS 0x40
+#define FDO_MODE_HARDWARE 0
+#define FDO_MODE_PIECE_WISE_LINEAR 1
+
+enum FAN_CONTROL {
+ FAN_CONTROL_FUZZY,
+ FAN_CONTROL_TABLE
+};
+
#define PPSMC_Result_OK ((uint8_t)0x01)
#define PPSMC_Result_Failed ((uint8_t)0xFF)
@@ -79,6 +87,8 @@ typedef uint8_t PPSMC_Result;
#define PPSMC_MSG_DisableCac ((uint8_t)0x54)
#define PPSMC_TDPClampingActive ((uint8_t)0x59)
#define PPSMC_TDPClampingInactive ((uint8_t)0x5A)
+#define PPSMC_StartFanControl ((uint8_t)0x5B)
+#define PPSMC_StopFanControl ((uint8_t)0x5C)
#define PPSMC_MSG_NoDisplay ((uint8_t)0x5D)
#define PPSMC_MSG_HasDisplay ((uint8_t)0x5E)
#define PPSMC_MSG_UVDPowerOFF ((uint8_t)0x60)
@@ -106,6 +116,7 @@ typedef uint8_t PPSMC_Result;
#define PPSMC_MSG_SAMUDPM_SetEnabledMask ((uint16_t) 0x130)
#define PPSMC_MSG_MCLKDPM_ForceState ((uint16_t) 0x131)
#define PPSMC_MSG_MCLKDPM_NoForcedLevel ((uint16_t) 0x132)
+#define PPSMC_MSG_Thermal_Cntl_Disable ((uint16_t) 0x133)
#define PPSMC_MSG_Voltage_Cntl_Disable ((uint16_t) 0x135)
#define PPSMC_MSG_PCIeDPM_Enable ((uint16_t) 0x136)
#define PPSMC_MSG_PCIeDPM_Disable ((uint16_t) 0x13d)
@@ -149,6 +160,10 @@ typedef uint8_t PPSMC_Result;
#define PPSMC_MSG_MASTER_DeepSleep_ON ((uint16_t) 0x18F)
#define PPSMC_MSG_MASTER_DeepSleep_OFF ((uint16_t) 0x190)
#define PPSMC_MSG_Remove_DC_Clamp ((uint16_t) 0x191)
+#define PPSMC_MSG_SetFanPwmMax ((uint16_t) 0x19A)
+
+#define PPSMC_MSG_ENABLE_THERMAL_DPM ((uint16_t) 0x19C)
+#define PPSMC_MSG_DISABLE_THERMAL_DPM ((uint16_t) 0x19D)
#define PPSMC_MSG_API_GetSclkFrequency ((uint16_t) 0x200)
#define PPSMC_MSG_API_GetMclkFrequency ((uint16_t) 0x201)
@@ -157,10 +172,11 @@ typedef uint8_t PPSMC_Result;
#define PPSMC_MSG_DPM_Config ((uint32_t) 0x102)
#define PPSMC_MSG_DPM_ForceState ((uint32_t) 0x104)
#define PPSMC_MSG_PG_SIMD_Config ((uint32_t) 0x108)
-#define PPSMC_MSG_DPM_N_LevelsDisabled ((uint32_t) 0x112)
+#define PPSMC_MSG_Thermal_Cntl_Enable ((uint32_t) 0x10a)
#define PPSMC_MSG_Voltage_Cntl_Enable ((uint32_t) 0x109)
#define PPSMC_MSG_VCEPowerOFF ((uint32_t) 0x10e)
#define PPSMC_MSG_VCEPowerON ((uint32_t) 0x10f)
+#define PPSMC_MSG_DPM_N_LevelsDisabled ((uint32_t) 0x112)
#define PPSMC_MSG_DCE_RemoveVoltageAdjustment ((uint32_t) 0x11d)
#define PPSMC_MSG_DCE_AllowVoltageAdjustment ((uint32_t) 0x11e)
#define PPSMC_MSG_EnableBAPM ((uint32_t) 0x120)
diff --git a/drivers/gpu/drm/radeon/pptable.h b/drivers/gpu/drm/radeon/pptable.h
index 2d532996c697..4c2eec49dadc 100644
--- a/drivers/gpu/drm/radeon/pptable.h
+++ b/drivers/gpu/drm/radeon/pptable.h
@@ -96,6 +96,14 @@ typedef struct _ATOM_PPLIB_FANTABLE2
USHORT usTMax; // The max temperature
} ATOM_PPLIB_FANTABLE2;
+typedef struct _ATOM_PPLIB_FANTABLE3
+{
+ ATOM_PPLIB_FANTABLE2 basicTable2;
+ UCHAR ucFanControlMode;
+ USHORT usFanPWMMax;
+ USHORT usFanOutputSensitivity;
+} ATOM_PPLIB_FANTABLE3;
+
typedef struct _ATOM_PPLIB_EXTENDEDHEADER
{
USHORT usSize;
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index b53b31a7b76f..74f06d540591 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -1254,7 +1254,7 @@ int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
int r;
u32 tile_flags = 0;
u32 tmp;
- struct radeon_cs_reloc *reloc;
+ struct radeon_bo_list *reloc;
u32 value;
r = radeon_cs_packet_next_reloc(p, &reloc, 0);
@@ -1293,7 +1293,7 @@ int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
int idx)
{
unsigned c, i;
- struct radeon_cs_reloc *reloc;
+ struct radeon_bo_list *reloc;
struct r100_cs_track *track;
int r = 0;
volatile uint32_t *ib;
@@ -1542,7 +1542,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
struct radeon_cs_packet *pkt,
unsigned idx, unsigned reg)
{
- struct radeon_cs_reloc *reloc;
+ struct radeon_bo_list *reloc;
struct r100_cs_track *track;
volatile uint32_t *ib;
uint32_t tmp;
@@ -1901,7 +1901,7 @@ int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p,
static int r100_packet3_check(struct radeon_cs_parser *p,
struct radeon_cs_packet *pkt)
{
- struct radeon_cs_reloc *reloc;
+ struct radeon_bo_list *reloc;
struct r100_cs_track *track;
unsigned idx;
volatile uint32_t *ib;
@@ -2061,7 +2061,7 @@ int r100_cs_parse(struct radeon_cs_parser *p)
}
if (r)
return r;
- } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
+ } while (p->idx < p->chunk_ib->length_dw);
return 0;
}
diff --git a/drivers/gpu/drm/radeon/r200.c b/drivers/gpu/drm/radeon/r200.c
index 732d4938aab7..c70e6d5bcd19 100644
--- a/drivers/gpu/drm/radeon/r200.c
+++ b/drivers/gpu/drm/radeon/r200.c
@@ -146,7 +146,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
struct radeon_cs_packet *pkt,
unsigned idx, unsigned reg)
{
- struct radeon_cs_reloc *reloc;
+ struct radeon_bo_list *reloc;
struct r100_cs_track *track;
volatile uint32_t *ib;
uint32_t tmp;
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 1bc4704034ce..064ad5569cca 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -598,7 +598,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
struct radeon_cs_packet *pkt,
unsigned idx, unsigned reg)
{
- struct radeon_cs_reloc *reloc;
+ struct radeon_bo_list *reloc;
struct r100_cs_track *track;
volatile uint32_t *ib;
uint32_t tmp, tile_flags = 0;
@@ -1142,7 +1142,7 @@ fail:
static int r300_packet3_check(struct radeon_cs_parser *p,
struct radeon_cs_packet *pkt)
{
- struct radeon_cs_reloc *reloc;
+ struct radeon_bo_list *reloc;
struct r100_cs_track *track;
volatile uint32_t *ib;
unsigned idx;
@@ -1283,7 +1283,7 @@ int r300_cs_parse(struct radeon_cs_parser *p)
if (r) {
return r;
}
- } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
+ } while (p->idx < p->chunk_ib->length_dw);
return 0;
}
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 56b02927cd3d..ef5d6066fa5b 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -2889,31 +2889,27 @@ struct radeon_fence *r600_copy_cpdma(struct radeon_device *rdev,
unsigned num_gpu_pages,
struct reservation_object *resv)
{
- struct radeon_semaphore *sem = NULL;
struct radeon_fence *fence;
+ struct radeon_sync sync;
int ring_index = rdev->asic->copy.blit_ring_index;
struct radeon_ring *ring = &rdev->ring[ring_index];
u32 size_in_bytes, cur_size_in_bytes, tmp;
int i, num_loops;
int r = 0;
- r = radeon_semaphore_create(rdev, &sem);
- if (r) {
- DRM_ERROR("radeon: moving bo (%d).\n", r);
- return ERR_PTR(r);
- }
+ radeon_sync_create(&sync);
size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
r = radeon_ring_lock(rdev, ring, num_loops * 6 + 24);
if (r) {
DRM_ERROR("radeon: moving bo (%d).\n", r);
- radeon_semaphore_free(rdev, &sem, NULL);
+ radeon_sync_free(rdev, &sync, NULL);
return ERR_PTR(r);
}
- radeon_semaphore_sync_resv(rdev, sem, resv, false);
- radeon_semaphore_sync_rings(rdev, sem, ring->idx);
+ radeon_sync_resv(rdev, &sync, resv, false);
+ radeon_sync_rings(rdev, &sync, ring->idx);
radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
radeon_ring_write(ring, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
@@ -2942,12 +2938,12 @@ struct radeon_fence *r600_copy_cpdma(struct radeon_device *rdev,
r = radeon_fence_emit(rdev, &fence, ring->idx);
if (r) {
radeon_ring_unlock_undo(rdev, ring);
- radeon_semaphore_free(rdev, &sem, NULL);
+ radeon_sync_free(rdev, &sync, NULL);
return ERR_PTR(r);
}
radeon_ring_unlock_commit(rdev, ring, false);
- radeon_semaphore_free(rdev, &sem, fence);
+ radeon_sync_free(rdev, &sync, fence);
return fence;
}
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index c47537a1ddba..acc1f99c84d9 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -969,7 +969,7 @@ static int r600_cs_parse_packet0(struct radeon_cs_parser *p,
static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
{
struct r600_cs_track *track = (struct r600_cs_track *)p->track;
- struct radeon_cs_reloc *reloc;
+ struct radeon_bo_list *reloc;
u32 m, i, tmp, *ib;
int r;
@@ -1626,7 +1626,7 @@ static bool r600_is_safe_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
static int r600_packet3_check(struct radeon_cs_parser *p,
struct radeon_cs_packet *pkt)
{
- struct radeon_cs_reloc *reloc;
+ struct radeon_bo_list *reloc;
struct r600_cs_track *track;
volatile u32 *ib;
unsigned idx;
@@ -2316,7 +2316,7 @@ int r600_cs_parse(struct radeon_cs_parser *p)
p->track = NULL;
return r;
}
- } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
+ } while (p->idx < p->chunk_ib->length_dw);
#if 0
for (r = 0; r < p->ib.length_dw; r++) {
printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]);
@@ -2351,10 +2351,10 @@ static void r600_cs_parser_fini(struct radeon_cs_parser *parser, int error)
static int r600_cs_parser_relocs_legacy(struct radeon_cs_parser *p)
{
- if (p->chunk_relocs_idx == -1) {
+ if (p->chunk_relocs == NULL) {
return 0;
}
- p->relocs = kzalloc(sizeof(struct radeon_cs_reloc), GFP_KERNEL);
+ p->relocs = kzalloc(sizeof(struct radeon_bo_list), GFP_KERNEL);
if (p->relocs == NULL) {
return -ENOMEM;
}
@@ -2398,7 +2398,7 @@ int r600_cs_legacy(struct drm_device *dev, void *data, struct drm_file *filp,
/* Copy the packet into the IB, the parser will read from the
* input memory (cached) and write to the IB (which can be
* uncached). */
- ib_chunk = &parser.chunks[parser.chunk_ib_idx];
+ ib_chunk = parser.chunk_ib;
parser.ib.length_dw = ib_chunk->length_dw;
*l = parser.ib.length_dw;
if (copy_from_user(ib, ib_chunk->user_ptr, ib_chunk->length_dw * 4)) {
@@ -2435,24 +2435,24 @@ void r600_cs_legacy_init(void)
* GPU offset using the provided start.
**/
int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
- struct radeon_cs_reloc **cs_reloc)
+ struct radeon_bo_list **cs_reloc)
{
struct radeon_cs_chunk *relocs_chunk;
unsigned idx;
*cs_reloc = NULL;
- if (p->chunk_relocs_idx == -1) {
+ if (p->chunk_relocs == NULL) {
DRM_ERROR("No relocation chunk !\n");
return -EINVAL;
}
- relocs_chunk = &p->chunks[p->chunk_relocs_idx];
+ relocs_chunk = p->chunk_relocs;
idx = p->dma_reloc_idx;
if (idx >= p->nrelocs) {
DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
idx, p->nrelocs);
return -EINVAL;
}
- *cs_reloc = p->relocs_ptr[idx];
+ *cs_reloc = &p->relocs[idx];
p->dma_reloc_idx++;
return 0;
}
@@ -2472,8 +2472,8 @@ int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
**/
int r600_dma_cs_parse(struct radeon_cs_parser *p)
{
- struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
- struct radeon_cs_reloc *src_reloc, *dst_reloc;
+ struct radeon_cs_chunk *ib_chunk = p->chunk_ib;
+ struct radeon_bo_list *src_reloc, *dst_reloc;
u32 header, cmd, count, tiled;
volatile u32 *ib = p->ib.ptr;
u32 idx, idx_value;
@@ -2619,7 +2619,7 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p)
DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
return -EINVAL;
}
- } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
+ } while (p->idx < p->chunk_ib->length_dw);
#if 0
for (r = 0; r < p->ib->length_dw; r++) {
printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]);
diff --git a/drivers/gpu/drm/radeon/r600_dma.c b/drivers/gpu/drm/radeon/r600_dma.c
index cf0df45d455e..d2dd29ab24fa 100644
--- a/drivers/gpu/drm/radeon/r600_dma.c
+++ b/drivers/gpu/drm/radeon/r600_dma.c
@@ -441,31 +441,27 @@ struct radeon_fence *r600_copy_dma(struct radeon_device *rdev,
unsigned num_gpu_pages,
struct reservation_object *resv)
{
- struct radeon_semaphore *sem = NULL;
struct radeon_fence *fence;
+ struct radeon_sync sync;
int ring_index = rdev->asic->copy.dma_ring_index;
struct radeon_ring *ring = &rdev->ring[ring_index];
u32 size_in_dw, cur_size_in_dw;
int i, num_loops;
int r = 0;
- r = radeon_semaphore_create(rdev, &sem);
- if (r) {
- DRM_ERROR("radeon: moving bo (%d).\n", r);
- return ERR_PTR(r);
- }
+ radeon_sync_create(&sync);
size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFE);
r = radeon_ring_lock(rdev, ring, num_loops * 4 + 8);
if (r) {
DRM_ERROR("radeon: moving bo (%d).\n", r);
- radeon_semaphore_free(rdev, &sem, NULL);
+ radeon_sync_free(rdev, &sync, NULL);
return ERR_PTR(r);
}
- radeon_semaphore_sync_resv(rdev, sem, resv, false);
- radeon_semaphore_sync_rings(rdev, sem, ring->idx);
+ radeon_sync_resv(rdev, &sync, resv, false);
+ radeon_sync_rings(rdev, &sync, ring->idx);
for (i = 0; i < num_loops; i++) {
cur_size_in_dw = size_in_dw;
@@ -484,12 +480,12 @@ struct radeon_fence *r600_copy_dma(struct radeon_device *rdev,
r = radeon_fence_emit(rdev, &fence, ring->idx);
if (r) {
radeon_ring_unlock_undo(rdev, ring);
- radeon_semaphore_free(rdev, &sem, NULL);
+ radeon_sync_free(rdev, &sync, NULL);
return ERR_PTR(r);
}
radeon_ring_unlock_commit(rdev, ring, false);
- radeon_semaphore_free(rdev, &sem, fence);
+ radeon_sync_free(rdev, &sync, fence);
return fence;
}
diff --git a/drivers/gpu/drm/radeon/r600_dpm.c b/drivers/gpu/drm/radeon/r600_dpm.c
index b5c73df8e202..843b65f46ece 100644
--- a/drivers/gpu/drm/radeon/r600_dpm.c
+++ b/drivers/gpu/drm/radeon/r600_dpm.c
@@ -811,6 +811,7 @@ union power_info {
union fan_info {
struct _ATOM_PPLIB_FANTABLE fan;
struct _ATOM_PPLIB_FANTABLE2 fan2;
+ struct _ATOM_PPLIB_FANTABLE3 fan3;
};
static int r600_parse_clk_voltage_dep_table(struct radeon_clock_voltage_dependency_table *radeon_table,
@@ -900,6 +901,14 @@ int r600_parse_extended_power_table(struct radeon_device *rdev)
else
rdev->pm.dpm.fan.t_max = 10900;
rdev->pm.dpm.fan.cycle_delay = 100000;
+ if (fan_info->fan.ucFanTableFormat >= 3) {
+ rdev->pm.dpm.fan.control_mode = fan_info->fan3.ucFanControlMode;
+ rdev->pm.dpm.fan.default_max_fan_pwm =
+ le16_to_cpu(fan_info->fan3.usFanPWMMax);
+ rdev->pm.dpm.fan.default_fan_output_sensitivity = 4836;
+ rdev->pm.dpm.fan.fan_output_sensitivity =
+ le16_to_cpu(fan_info->fan3.usFanOutputSensitivity);
+ }
rdev->pm.dpm.fan.ucode_fan_control = true;
}
}
diff --git a/drivers/gpu/drm/radeon/r600_dpm.h b/drivers/gpu/drm/radeon/r600_dpm.h
index 46b9d2a03018..bd499d749bc9 100644
--- a/drivers/gpu/drm/radeon/r600_dpm.h
+++ b/drivers/gpu/drm/radeon/r600_dpm.h
@@ -96,6 +96,9 @@
#define R600_TEMP_RANGE_MIN (90 * 1000)
#define R600_TEMP_RANGE_MAX (120 * 1000)
+#define FDO_PWM_MODE_STATIC 1
+#define FDO_PWM_MODE_STATIC_RPM 5
+
enum r600_power_level {
R600_POWER_LEVEL_LOW = 0,
R600_POWER_LEVEL_MEDIUM = 1,
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index a9717b3fbf1b..54529b837afa 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -150,9 +150,6 @@ extern int radeon_backlight;
/* number of hw syncs before falling back on blocking */
#define RADEON_NUM_SYNCS 4
-/* number of hw syncs before falling back on blocking */
-#define RADEON_NUM_SYNCS 4
-
/* hardcode those limit for now */
#define RADEON_VA_IB_OFFSET (1 << 20)
#define RADEON_VA_RESERVED_SIZE (8 << 20)
@@ -363,14 +360,15 @@ struct radeon_fence_driver {
};
struct radeon_fence {
- struct fence base;
+ struct fence base;
- struct radeon_device *rdev;
- uint64_t seq;
+ struct radeon_device *rdev;
+ uint64_t seq;
/* RB, DMA, etc. */
- unsigned ring;
+ unsigned ring;
+ bool is_vm_update;
- wait_queue_t fence_wake;
+ wait_queue_t fence_wake;
};
int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring);
@@ -452,12 +450,22 @@ struct radeon_mman {
#endif
};
+struct radeon_bo_list {
+ struct radeon_bo *robj;
+ struct ttm_validate_buffer tv;
+ uint64_t gpu_offset;
+ unsigned prefered_domains;
+ unsigned allowed_domains;
+ uint32_t tiling_flags;
+};
+
/* bo virtual address in a specific vm */
struct radeon_bo_va {
/* protected by bo being reserved */
struct list_head bo_list;
uint32_t flags;
uint64_t addr;
+ struct radeon_fence *last_pt_update;
unsigned ref_count;
/* protected by vm mutex */
@@ -474,7 +482,7 @@ struct radeon_bo {
struct list_head list;
/* Protected by tbo.reserved */
u32 initial_domain;
- struct ttm_place placements[3];
+ struct ttm_place placements[4];
struct ttm_placement placement;
struct ttm_buffer_object tbo;
struct ttm_bo_kmap_obj kmap;
@@ -576,10 +584,9 @@ int radeon_mode_dumb_mmap(struct drm_file *filp,
* Semaphores.
*/
struct radeon_semaphore {
- struct radeon_sa_bo *sa_bo;
- signed waiters;
- uint64_t gpu_addr;
- struct radeon_fence *sync_to[RADEON_NUM_RINGS];
+ struct radeon_sa_bo *sa_bo;
+ signed waiters;
+ uint64_t gpu_addr;
};
int radeon_semaphore_create(struct radeon_device *rdev,
@@ -588,20 +595,33 @@ bool radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring,
struct radeon_semaphore *semaphore);
bool radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring,
struct radeon_semaphore *semaphore);
-void radeon_semaphore_sync_fence(struct radeon_semaphore *semaphore,
- struct radeon_fence *fence);
-int radeon_semaphore_sync_resv(struct radeon_device *rdev,
- struct radeon_semaphore *semaphore,
- struct reservation_object *resv,
- bool shared);
-int radeon_semaphore_sync_rings(struct radeon_device *rdev,
- struct radeon_semaphore *semaphore,
- int waiting_ring);
void radeon_semaphore_free(struct radeon_device *rdev,
struct radeon_semaphore **semaphore,
struct radeon_fence *fence);
/*
+ * Synchronization
+ */
+struct radeon_sync {
+ struct radeon_semaphore *semaphores[RADEON_NUM_SYNCS];
+ struct radeon_fence *sync_to[RADEON_NUM_RINGS];
+ struct radeon_fence *last_vm_update;
+};
+
+void radeon_sync_create(struct radeon_sync *sync);
+void radeon_sync_fence(struct radeon_sync *sync,
+ struct radeon_fence *fence);
+int radeon_sync_resv(struct radeon_device *rdev,
+ struct radeon_sync *sync,
+ struct reservation_object *resv,
+ bool shared);
+int radeon_sync_rings(struct radeon_device *rdev,
+ struct radeon_sync *sync,
+ int waiting_ring);
+void radeon_sync_free(struct radeon_device *rdev, struct radeon_sync *sync,
+ struct radeon_fence *fence);
+
+/*
* GART structures, functions & helpers
*/
struct radeon_mc;
@@ -701,6 +721,10 @@ struct radeon_doorbell {
int radeon_doorbell_get(struct radeon_device *rdev, u32 *page);
void radeon_doorbell_free(struct radeon_device *rdev, u32 doorbell);
+void radeon_doorbell_get_kfd_info(struct radeon_device *rdev,
+ phys_addr_t *aperture_base,
+ size_t *aperture_size,
+ size_t *start_offset);
/*
* IRQS.
@@ -814,7 +838,7 @@ struct radeon_ib {
struct radeon_fence *fence;
struct radeon_vm *vm;
bool is_const_ib;
- struct radeon_semaphore *semaphore;
+ struct radeon_sync sync;
};
struct radeon_ring {
@@ -891,33 +915,40 @@ struct radeon_vm_pt {
uint64_t addr;
};
+struct radeon_vm_id {
+ unsigned id;
+ uint64_t pd_gpu_addr;
+ /* last flushed PD/PT update */
+ struct radeon_fence *flushed_updates;
+ /* last use of vmid */
+ struct radeon_fence *last_id_use;
+};
+
struct radeon_vm {
- struct rb_root va;
- unsigned id;
+ struct mutex mutex;
+
+ struct rb_root va;
+
+ /* protecting invalidated and freed */
+ spinlock_t status_lock;
/* BOs moved, but not yet updated in the PT */
- struct list_head invalidated;
+ struct list_head invalidated;
/* BOs freed, but not yet updated in the PT */
- struct list_head freed;
+ struct list_head freed;
/* contains the page directory */
- struct radeon_bo *page_directory;
- uint64_t pd_gpu_addr;
- unsigned max_pde_used;
+ struct radeon_bo *page_directory;
+ unsigned max_pde_used;
/* array of page tables, one for each page directory entry */
- struct radeon_vm_pt *page_tables;
+ struct radeon_vm_pt *page_tables;
- struct radeon_bo_va *ib_bo_va;
+ struct radeon_bo_va *ib_bo_va;
- struct mutex mutex;
- /* last fence for cs using this vm */
- struct radeon_fence *fence;
- /* last flush or NULL if we still need to flush */
- struct radeon_fence *last_flush;
- /* last use of vmid */
- struct radeon_fence *last_id_use;
+ /* for id and flush management per ring */
+ struct radeon_vm_id ids[RADEON_NUM_RINGS];
};
struct radeon_vm_manager {
@@ -1025,19 +1056,7 @@ void cayman_dma_fini(struct radeon_device *rdev);
/*
* CS.
*/
-struct radeon_cs_reloc {
- struct drm_gem_object *gobj;
- struct radeon_bo *robj;
- struct ttm_validate_buffer tv;
- uint64_t gpu_offset;
- unsigned prefered_domains;
- unsigned allowed_domains;
- uint32_t tiling_flags;
- uint32_t handle;
-};
-
struct radeon_cs_chunk {
- uint32_t chunk_id;
uint32_t length_dw;
uint32_t *kdata;
void __user *user_ptr;
@@ -1055,16 +1074,15 @@ struct radeon_cs_parser {
unsigned idx;
/* relocations */
unsigned nrelocs;
- struct radeon_cs_reloc *relocs;
- struct radeon_cs_reloc **relocs_ptr;
- struct radeon_cs_reloc *vm_bos;
+ struct radeon_bo_list *relocs;
+ struct radeon_bo_list *vm_bos;
struct list_head validated;
unsigned dma_reloc_idx;
/* indices of various chunks */
- int chunk_ib_idx;
- int chunk_relocs_idx;
- int chunk_flags_idx;
- int chunk_const_ib_idx;
+ struct radeon_cs_chunk *chunk_ib;
+ struct radeon_cs_chunk *chunk_relocs;
+ struct radeon_cs_chunk *chunk_flags;
+ struct radeon_cs_chunk *chunk_const_ib;
struct radeon_ib ib;
struct radeon_ib const_ib;
void *track;
@@ -1078,7 +1096,7 @@ struct radeon_cs_parser {
static inline u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx)
{
- struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx];
+ struct radeon_cs_chunk *ibc = p->chunk_ib;
if (ibc->kdata)
return ibc->kdata[idx];
@@ -1490,6 +1508,10 @@ struct radeon_dpm_fan {
u8 t_hyst;
u32 cycle_delay;
u16 t_max;
+ u8 control_mode;
+ u16 default_max_fan_pwm;
+ u16 default_fan_output_sensitivity;
+ u16 fan_output_sensitivity;
bool ucode_fan_control;
};
@@ -1623,6 +1645,11 @@ struct radeon_pm {
/* internal thermal controller on rv6xx+ */
enum radeon_int_thermal_type int_thermal_type;
struct device *int_hwmon_dev;
+ /* fan control parameters */
+ bool no_fan;
+ u8 fan_pulses_per_revolution;
+ u8 fan_min_rpm;
+ u8 fan_max_rpm;
/* dpm */
bool dpm_enabled;
struct radeon_dpm dpm;
@@ -1785,7 +1812,8 @@ struct radeon_asic_ring {
void (*hdp_flush)(struct radeon_device *rdev, struct radeon_ring *ring);
bool (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp,
struct radeon_semaphore *semaphore, bool emit_wait);
- void (*vm_flush)(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
+ void (*vm_flush)(struct radeon_device *rdev, struct radeon_ring *ring,
+ unsigned vm_id, uint64_t pd_addr);
/* testing functions */
int (*ring_test)(struct radeon_device *rdev, struct radeon_ring *cp);
@@ -2388,6 +2416,8 @@ struct radeon_device {
struct radeon_atcs atcs;
/* srbm instance registers */
struct mutex srbm_mutex;
+ /* GRBM index mutex. Protects concurrents access to GRBM index */
+ struct mutex grbm_idx_mutex;
/* clock, powergating flags */
u32 cg_flags;
u32 pg_flags;
@@ -2400,6 +2430,10 @@ struct radeon_device {
u64 vram_pin_size;
u64 gart_pin_size;
+ /* amdkfd interface */
+ struct kfd_dev *kfd;
+ struct radeon_sa_manager kfd_bo;
+
struct mutex mn_lock;
DECLARE_HASHTABLE(mn_hash, 7);
};
@@ -2831,7 +2865,7 @@ static inline void radeon_ring_write(struct radeon_ring *ring, uint32_t v)
#define radeon_ring_ib_execute(rdev, r, ib) (rdev)->asic->ring[(r)]->ib_execute((rdev), (ib))
#define radeon_ring_ib_parse(rdev, r, ib) (rdev)->asic->ring[(r)]->ib_parse((rdev), (ib))
#define radeon_ring_is_lockup(rdev, r, cp) (rdev)->asic->ring[(r)]->is_lockup((rdev), (cp))
-#define radeon_ring_vm_flush(rdev, r, vm) (rdev)->asic->ring[(r)]->vm_flush((rdev), (r), (vm))
+#define radeon_ring_vm_flush(rdev, r, vm_id, pd_addr) (rdev)->asic->ring[(r)->idx]->vm_flush((rdev), (r), (vm_id), (pd_addr))
#define radeon_ring_get_rptr(rdev, r) (rdev)->asic->ring[(r)->idx]->get_rptr((rdev), (r))
#define radeon_ring_get_wptr(rdev, r) (rdev)->asic->ring[(r)->idx]->get_wptr((rdev), (r))
#define radeon_ring_set_wptr(rdev, r) (rdev)->asic->ring[(r)->idx]->set_wptr((rdev), (r))
@@ -2940,14 +2974,14 @@ int radeon_vm_manager_init(struct radeon_device *rdev);
void radeon_vm_manager_fini(struct radeon_device *rdev);
int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm);
void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm);
-struct radeon_cs_reloc *radeon_vm_get_bos(struct radeon_device *rdev,
+struct radeon_bo_list *radeon_vm_get_bos(struct radeon_device *rdev,
struct radeon_vm *vm,
struct list_head *head);
struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
struct radeon_vm *vm, int ring);
void radeon_vm_flush(struct radeon_device *rdev,
struct radeon_vm *vm,
- int ring);
+ int ring, struct radeon_fence *fence);
void radeon_vm_fence(struct radeon_device *rdev,
struct radeon_vm *vm,
struct radeon_fence *fence);
@@ -3054,7 +3088,7 @@ bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p);
void radeon_cs_dump_packet(struct radeon_cs_parser *p,
struct radeon_cs_packet *pkt);
int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,
- struct radeon_cs_reloc **cs_reloc,
+ struct radeon_bo_list **cs_reloc,
int nomm);
int r600_cs_common_vline_parse(struct radeon_cs_parser *p,
uint32_t *vline_start_end,
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index d8ace5b28a5b..2a45d548d5ec 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -599,7 +599,8 @@ int cayman_asic_reset(struct radeon_device *rdev);
void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
int cayman_vm_init(struct radeon_device *rdev);
void cayman_vm_fini(struct radeon_device *rdev);
-void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
+void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
+ unsigned vm_id, uint64_t pd_addr);
uint32_t cayman_vm_page_flags(struct radeon_device *rdev, uint32_t flags);
int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
@@ -624,7 +625,8 @@ void cayman_dma_vm_set_pages(struct radeon_device *rdev,
uint32_t incr, uint32_t flags);
void cayman_dma_vm_pad_ib(struct radeon_ib *ib);
-void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
+void cayman_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
+ unsigned vm_id, uint64_t pd_addr);
u32 cayman_gfx_get_rptr(struct radeon_device *rdev,
struct radeon_ring *ring);
@@ -699,7 +701,8 @@ int si_irq_set(struct radeon_device *rdev);
int si_irq_process(struct radeon_device *rdev);
int si_vm_init(struct radeon_device *rdev);
void si_vm_fini(struct radeon_device *rdev);
-void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
+void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
+ unsigned vm_id, uint64_t pd_addr);
int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
struct radeon_fence *si_copy_dma(struct radeon_device *rdev,
uint64_t src_offset, uint64_t dst_offset,
@@ -721,7 +724,8 @@ void si_dma_vm_set_pages(struct radeon_device *rdev,
uint64_t addr, unsigned count,
uint32_t incr, uint32_t flags);
-void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
+void si_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
+ unsigned vm_id, uint64_t pd_addr);
u32 si_get_xclk(struct radeon_device *rdev);
uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev);
int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk);
@@ -793,7 +797,8 @@ int cik_irq_set(struct radeon_device *rdev);
int cik_irq_process(struct radeon_device *rdev);
int cik_vm_init(struct radeon_device *rdev);
void cik_vm_fini(struct radeon_device *rdev);
-void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
+void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
+ unsigned vm_id, uint64_t pd_addr);
void cik_sdma_vm_copy_pages(struct radeon_device *rdev,
struct radeon_ib *ib,
@@ -811,7 +816,8 @@ void cik_sdma_vm_set_pages(struct radeon_device *rdev,
uint32_t incr, uint32_t flags);
void cik_sdma_vm_pad_ib(struct radeon_ib *ib);
-void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
+void cik_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
+ unsigned vm_id, uint64_t pd_addr);
int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
u32 cik_gfx_get_rptr(struct radeon_device *rdev,
struct radeon_ring *ring);
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index df69b92ba164..dbc94f300297 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -196,8 +196,8 @@ void radeon_atombios_i2c_init(struct radeon_device *rdev)
}
}
-static struct radeon_gpio_rec radeon_lookup_gpio(struct radeon_device *rdev,
- u8 id)
+struct radeon_gpio_rec radeon_atombios_lookup_gpio(struct radeon_device *rdev,
+ u8 id)
{
struct atom_context *ctx = rdev->mode_info.atom_context;
struct radeon_gpio_rec gpio;
@@ -221,6 +221,7 @@ static struct radeon_gpio_rec radeon_lookup_gpio(struct radeon_device *rdev,
if (id == pin->ucGPIO_ID) {
gpio.id = pin->ucGPIO_ID;
gpio.reg = le16_to_cpu(pin->usGpioPin_AIndex) * 4;
+ gpio.shift = pin->ucGpioPinBitShift;
gpio.mask = (1 << pin->ucGpioPinBitShift);
gpio.valid = true;
break;
@@ -801,7 +802,7 @@ bool radeon_get_atom_connector_info_from_object_table(struct drm_device *dev)
hpd_record =
(ATOM_HPD_INT_RECORD *)
record;
- gpio = radeon_lookup_gpio(rdev,
+ gpio = radeon_atombios_lookup_gpio(rdev,
hpd_record->ucHPDIntGPIOID);
hpd = radeon_atom_get_hpd_info_from_gpio(rdev, &gpio);
hpd.plugged_state = hpd_record->ucPlugged_PinState;
@@ -2128,7 +2129,7 @@ static int radeon_atombios_parse_power_table_1_3(struct radeon_device *rdev)
rdev->pm.power_state[state_index].clock_info[0].voltage.type =
VOLTAGE_GPIO;
rdev->pm.power_state[state_index].clock_info[0].voltage.gpio =
- radeon_lookup_gpio(rdev,
+ radeon_atombios_lookup_gpio(rdev,
power_info->info.asPowerPlayInfo[i].ucVoltageDropIndex);
if (misc & ATOM_PM_MISCINFO_VOLTAGE_DROP_ACTIVE_HIGH)
rdev->pm.power_state[state_index].clock_info[0].voltage.active_high =
@@ -2164,7 +2165,7 @@ static int radeon_atombios_parse_power_table_1_3(struct radeon_device *rdev)
rdev->pm.power_state[state_index].clock_info[0].voltage.type =
VOLTAGE_GPIO;
rdev->pm.power_state[state_index].clock_info[0].voltage.gpio =
- radeon_lookup_gpio(rdev,
+ radeon_atombios_lookup_gpio(rdev,
power_info->info_2.asPowerPlayInfo[i].ucVoltageDropIndex);
if (misc & ATOM_PM_MISCINFO_VOLTAGE_DROP_ACTIVE_HIGH)
rdev->pm.power_state[state_index].clock_info[0].voltage.active_high =
@@ -2200,7 +2201,7 @@ static int radeon_atombios_parse_power_table_1_3(struct radeon_device *rdev)
rdev->pm.power_state[state_index].clock_info[0].voltage.type =
VOLTAGE_GPIO;
rdev->pm.power_state[state_index].clock_info[0].voltage.gpio =
- radeon_lookup_gpio(rdev,
+ radeon_atombios_lookup_gpio(rdev,
power_info->info_3.asPowerPlayInfo[i].ucVoltageDropIndex);
if (misc & ATOM_PM_MISCINFO_VOLTAGE_DROP_ACTIVE_HIGH)
rdev->pm.power_state[state_index].clock_info[0].voltage.active_high =
@@ -2248,6 +2249,14 @@ static void radeon_atombios_add_pplib_thermal_controller(struct radeon_device *r
/* add the i2c bus for thermal/fan chip */
if (controller->ucType > 0) {
+ if (controller->ucFanParameters & ATOM_PP_FANPARAMETERS_NOFAN)
+ rdev->pm.no_fan = true;
+ rdev->pm.fan_pulses_per_revolution =
+ controller->ucFanParameters & ATOM_PP_FANPARAMETERS_TACHOMETER_PULSES_PER_REVOLUTION_MASK;
+ if (rdev->pm.fan_pulses_per_revolution) {
+ rdev->pm.fan_min_rpm = controller->ucFanMinRPM;
+ rdev->pm.fan_max_rpm = controller->ucFanMaxRPM;
+ }
if (controller->ucType == ATOM_PP_THERMALCONTROLLER_RV6xx) {
DRM_INFO("Internal thermal controller %s fan control\n",
(controller->ucFanParameters &
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 6f377de099f9..c830863bc98a 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -77,22 +77,18 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
struct drm_device *ddev = p->rdev->ddev;
struct radeon_cs_chunk *chunk;
struct radeon_cs_buckets buckets;
- unsigned i, j;
- bool duplicate, need_mmap_lock = false;
+ unsigned i;
+ bool need_mmap_lock = false;
int r;
- if (p->chunk_relocs_idx == -1) {
+ if (p->chunk_relocs == NULL) {
return 0;
}
- chunk = &p->chunks[p->chunk_relocs_idx];
+ chunk = p->chunk_relocs;
p->dma_reloc_idx = 0;
/* FIXME: we assume that each relocs use 4 dwords */
p->nrelocs = chunk->length_dw / 4;
- p->relocs_ptr = kcalloc(p->nrelocs, sizeof(void *), GFP_KERNEL);
- if (p->relocs_ptr == NULL) {
- return -ENOMEM;
- }
- p->relocs = kcalloc(p->nrelocs, sizeof(struct radeon_cs_reloc), GFP_KERNEL);
+ p->relocs = kcalloc(p->nrelocs, sizeof(struct radeon_bo_list), GFP_KERNEL);
if (p->relocs == NULL) {
return -ENOMEM;
}
@@ -101,31 +97,17 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
for (i = 0; i < p->nrelocs; i++) {
struct drm_radeon_cs_reloc *r;
+ struct drm_gem_object *gobj;
unsigned priority;
- duplicate = false;
r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
- for (j = 0; j < i; j++) {
- if (r->handle == p->relocs[j].handle) {
- p->relocs_ptr[i] = &p->relocs[j];
- duplicate = true;
- break;
- }
- }
- if (duplicate) {
- p->relocs[i].handle = 0;
- continue;
- }
-
- p->relocs[i].gobj = drm_gem_object_lookup(ddev, p->filp,
- r->handle);
- if (p->relocs[i].gobj == NULL) {
+ gobj = drm_gem_object_lookup(ddev, p->filp, r->handle);
+ if (gobj == NULL) {
DRM_ERROR("gem object lookup failed 0x%x\n",
r->handle);
return -ENOENT;
}
- p->relocs_ptr[i] = &p->relocs[i];
- p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj);
+ p->relocs[i].robj = gem_to_radeon_bo(gobj);
/* The userspace buffer priorities are from 0 to 15. A higher
* number means the buffer is more important.
@@ -184,7 +166,6 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
p->relocs[i].tv.shared = !r->write_domain;
- p->relocs[i].handle = r->handle;
radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head,
priority);
@@ -251,15 +232,15 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority
static int radeon_cs_sync_rings(struct radeon_cs_parser *p)
{
- struct radeon_cs_reloc *reloc;
+ struct radeon_bo_list *reloc;
int r;
list_for_each_entry(reloc, &p->validated, tv.head) {
struct reservation_object *resv;
resv = reloc->robj->tbo.resv;
- r = radeon_semaphore_sync_resv(p->rdev, p->ib.semaphore, resv,
- reloc->tv.shared);
+ r = radeon_sync_resv(p->rdev, &p->ib.sync, resv,
+ reloc->tv.shared);
if (r)
return r;
}
@@ -282,13 +263,11 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
INIT_LIST_HEAD(&p->validated);
p->idx = 0;
p->ib.sa_bo = NULL;
- p->ib.semaphore = NULL;
p->const_ib.sa_bo = NULL;
- p->const_ib.semaphore = NULL;
- p->chunk_ib_idx = -1;
- p->chunk_relocs_idx = -1;
- p->chunk_flags_idx = -1;
- p->chunk_const_ib_idx = -1;
+ p->chunk_ib = NULL;
+ p->chunk_relocs = NULL;
+ p->chunk_flags = NULL;
+ p->chunk_const_ib = NULL;
p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
if (p->chunks_array == NULL) {
return -ENOMEM;
@@ -315,24 +294,23 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
return -EFAULT;
}
p->chunks[i].length_dw = user_chunk.length_dw;
- p->chunks[i].chunk_id = user_chunk.chunk_id;
- if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_RELOCS) {
- p->chunk_relocs_idx = i;
+ if (user_chunk.chunk_id == RADEON_CHUNK_ID_RELOCS) {
+ p->chunk_relocs = &p->chunks[i];
}
- if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_IB) {
- p->chunk_ib_idx = i;
+ if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
+ p->chunk_ib = &p->chunks[i];
/* zero length IB isn't useful */
if (p->chunks[i].length_dw == 0)
return -EINVAL;
}
- if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_CONST_IB) {
- p->chunk_const_ib_idx = i;
+ if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB) {
+ p->chunk_const_ib = &p->chunks[i];
/* zero length CONST IB isn't useful */
if (p->chunks[i].length_dw == 0)
return -EINVAL;
}
- if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) {
- p->chunk_flags_idx = i;
+ if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) {
+ p->chunk_flags = &p->chunks[i];
/* zero length flags aren't useful */
if (p->chunks[i].length_dw == 0)
return -EINVAL;
@@ -341,10 +319,10 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
size = p->chunks[i].length_dw;
cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
p->chunks[i].user_ptr = cdata;
- if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_CONST_IB)
+ if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB)
continue;
- if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_IB) {
+ if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
if (!p->rdev || !(p->rdev->flags & RADEON_IS_AGP))
continue;
}
@@ -357,7 +335,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
return -EFAULT;
}
- if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) {
+ if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) {
p->cs_flags = p->chunks[i].kdata[0];
if (p->chunks[i].length_dw > 1)
ring = p->chunks[i].kdata[1];
@@ -398,8 +376,8 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
static int cmp_size_smaller_first(void *priv, struct list_head *a,
struct list_head *b)
{
- struct radeon_cs_reloc *la = list_entry(a, struct radeon_cs_reloc, tv.head);
- struct radeon_cs_reloc *lb = list_entry(b, struct radeon_cs_reloc, tv.head);
+ struct radeon_bo_list *la = list_entry(a, struct radeon_bo_list, tv.head);
+ struct radeon_bo_list *lb = list_entry(b, struct radeon_bo_list, tv.head);
/* Sort A before B if A is smaller. */
return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
@@ -440,13 +418,15 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo
if (parser->relocs != NULL) {
for (i = 0; i < parser->nrelocs; i++) {
- if (parser->relocs[i].gobj)
- drm_gem_object_unreference_unlocked(parser->relocs[i].gobj);
+ struct radeon_bo *bo = parser->relocs[i].robj;
+ if (bo == NULL)
+ continue;
+
+ drm_gem_object_unreference_unlocked(&bo->gem_base);
}
}
kfree(parser->track);
kfree(parser->relocs);
- kfree(parser->relocs_ptr);
drm_free_large(parser->vm_bos);
for (i = 0; i < parser->nchunks; i++)
drm_free_large(parser->chunks[i].kdata);
@@ -461,7 +441,7 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev,
{
int r;
- if (parser->chunk_ib_idx == -1)
+ if (parser->chunk_ib == NULL)
return 0;
if (parser->cs_flags & RADEON_CS_USE_VM)
@@ -521,10 +501,6 @@ static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p,
for (i = 0; i < p->nrelocs; i++) {
struct radeon_bo *bo;
- /* ignore duplicates */
- if (p->relocs_ptr[i] != &p->relocs[i])
- continue;
-
bo = p->relocs[i].robj;
bo_va = radeon_vm_bo_find(vm, bo);
if (bo_va == NULL) {
@@ -535,6 +511,8 @@ static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p,
r = radeon_vm_bo_update(rdev, bo_va, &bo->tbo.mem);
if (r)
return r;
+
+ radeon_sync_fence(&p->ib.sync, bo_va->last_pt_update);
}
return radeon_vm_clear_invalids(rdev, vm);
@@ -547,7 +525,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
struct radeon_vm *vm = &fpriv->vm;
int r;
- if (parser->chunk_ib_idx == -1)
+ if (parser->chunk_ib == NULL)
return 0;
if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
return 0;
@@ -579,10 +557,9 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
DRM_ERROR("Failed to sync rings: %i\n", r);
goto out;
}
- radeon_semaphore_sync_fence(parser->ib.semaphore, vm->fence);
if ((rdev->family >= CHIP_TAHITI) &&
- (parser->chunk_const_ib_idx != -1)) {
+ (parser->chunk_const_ib != NULL)) {
r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib, true);
} else {
r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
@@ -609,7 +586,7 @@ static int radeon_cs_ib_fill(struct radeon_device *rdev, struct radeon_cs_parser
struct radeon_vm *vm = NULL;
int r;
- if (parser->chunk_ib_idx == -1)
+ if (parser->chunk_ib == NULL)
return 0;
if (parser->cs_flags & RADEON_CS_USE_VM) {
@@ -617,8 +594,8 @@ static int radeon_cs_ib_fill(struct radeon_device *rdev, struct radeon_cs_parser
vm = &fpriv->vm;
if ((rdev->family >= CHIP_TAHITI) &&
- (parser->chunk_const_ib_idx != -1)) {
- ib_chunk = &parser->chunks[parser->chunk_const_ib_idx];
+ (parser->chunk_const_ib != NULL)) {
+ ib_chunk = parser->chunk_const_ib;
if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw);
return -EINVAL;
@@ -637,13 +614,13 @@ static int radeon_cs_ib_fill(struct radeon_device *rdev, struct radeon_cs_parser
return -EFAULT;
}
- ib_chunk = &parser->chunks[parser->chunk_ib_idx];
+ ib_chunk = parser->chunk_ib;
if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw);
return -EINVAL;
}
}
- ib_chunk = &parser->chunks[parser->chunk_ib_idx];
+ ib_chunk = parser->chunk_ib;
r = radeon_ib_get(rdev, parser->ring, &parser->ib,
vm, ib_chunk->length_dw * 4);
@@ -735,7 +712,7 @@ int radeon_cs_packet_parse(struct radeon_cs_parser *p,
struct radeon_cs_packet *pkt,
unsigned idx)
{
- struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
+ struct radeon_cs_chunk *ib_chunk = p->chunk_ib;
struct radeon_device *rdev = p->rdev;
uint32_t header;
@@ -829,7 +806,7 @@ void radeon_cs_dump_packet(struct radeon_cs_parser *p,
* GPU offset using the provided start.
**/
int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,
- struct radeon_cs_reloc **cs_reloc,
+ struct radeon_bo_list **cs_reloc,
int nomm)
{
struct radeon_cs_chunk *relocs_chunk;
@@ -837,12 +814,12 @@ int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,
unsigned idx;
int r;
- if (p->chunk_relocs_idx == -1) {
+ if (p->chunk_relocs == NULL) {
DRM_ERROR("No relocation chunk !\n");
return -EINVAL;
}
*cs_reloc = NULL;
- relocs_chunk = &p->chunks[p->chunk_relocs_idx];
+ relocs_chunk = p->chunk_relocs;
r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
if (r)
return r;
@@ -868,6 +845,6 @@ int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,
(u64)relocs_chunk->kdata[idx + 3] << 32;
(*cs_reloc)->gpu_offset |= relocs_chunk->kdata[idx + 0];
} else
- *cs_reloc = p->relocs_ptr[(idx / 4)];
+ *cs_reloc = &p->relocs[(idx / 4)];
return 0;
}
diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c
index 9630e8d95fb4..45e54060ee97 100644
--- a/drivers/gpu/drm/radeon/radeon_cursor.c
+++ b/drivers/gpu/drm/radeon/radeon_cursor.c
@@ -117,106 +117,7 @@ static void radeon_show_cursor(struct drm_crtc *crtc)
}
}
-static void radeon_set_cursor(struct drm_crtc *crtc, struct drm_gem_object *obj,
- uint64_t gpu_addr)
-{
- struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
- struct radeon_device *rdev = crtc->dev->dev_private;
-
- if (ASIC_IS_DCE4(rdev)) {
- WREG32(EVERGREEN_CUR_SURFACE_ADDRESS_HIGH + radeon_crtc->crtc_offset,
- upper_32_bits(gpu_addr));
- WREG32(EVERGREEN_CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset,
- gpu_addr & 0xffffffff);
- } else if (ASIC_IS_AVIVO(rdev)) {
- if (rdev->family >= CHIP_RV770) {
- if (radeon_crtc->crtc_id)
- WREG32(R700_D2CUR_SURFACE_ADDRESS_HIGH, upper_32_bits(gpu_addr));
- else
- WREG32(R700_D1CUR_SURFACE_ADDRESS_HIGH, upper_32_bits(gpu_addr));
- }
- WREG32(AVIVO_D1CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset,
- gpu_addr & 0xffffffff);
- } else {
- radeon_crtc->legacy_cursor_offset = gpu_addr - radeon_crtc->legacy_display_base_addr;
- /* offset is from DISP(2)_BASE_ADDRESS */
- WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, radeon_crtc->legacy_cursor_offset);
- }
-}
-
-int radeon_crtc_cursor_set(struct drm_crtc *crtc,
- struct drm_file *file_priv,
- uint32_t handle,
- uint32_t width,
- uint32_t height)
-{
- struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
- struct radeon_device *rdev = crtc->dev->dev_private;
- struct drm_gem_object *obj;
- struct radeon_bo *robj;
- uint64_t gpu_addr;
- int ret;
-
- if (!handle) {
- /* turn off cursor */
- radeon_hide_cursor(crtc);
- obj = NULL;
- goto unpin;
- }
-
- if ((width > radeon_crtc->max_cursor_width) ||
- (height > radeon_crtc->max_cursor_height)) {
- DRM_ERROR("bad cursor width or height %d x %d\n", width, height);
- return -EINVAL;
- }
-
- obj = drm_gem_object_lookup(crtc->dev, file_priv, handle);
- if (!obj) {
- DRM_ERROR("Cannot find cursor object %x for crtc %d\n", handle, radeon_crtc->crtc_id);
- return -ENOENT;
- }
-
- robj = gem_to_radeon_bo(obj);
- ret = radeon_bo_reserve(robj, false);
- if (unlikely(ret != 0))
- goto fail;
- /* Only 27 bit offset for legacy cursor */
- ret = radeon_bo_pin_restricted(robj, RADEON_GEM_DOMAIN_VRAM,
- ASIC_IS_AVIVO(rdev) ? 0 : 1 << 27,
- &gpu_addr);
- radeon_bo_unreserve(robj);
- if (ret)
- goto fail;
-
- radeon_crtc->cursor_width = width;
- radeon_crtc->cursor_height = height;
-
- radeon_lock_cursor(crtc, true);
- radeon_set_cursor(crtc, obj, gpu_addr);
- radeon_show_cursor(crtc);
- radeon_lock_cursor(crtc, false);
-
-unpin:
- if (radeon_crtc->cursor_bo) {
- robj = gem_to_radeon_bo(radeon_crtc->cursor_bo);
- ret = radeon_bo_reserve(robj, false);
- if (likely(ret == 0)) {
- radeon_bo_unpin(robj);
- radeon_bo_unreserve(robj);
- }
- drm_gem_object_unreference_unlocked(radeon_crtc->cursor_bo);
- }
-
- radeon_crtc->cursor_bo = obj;
- return 0;
-fail:
- drm_gem_object_unreference_unlocked(obj);
-
- return ret;
-}
-
-int radeon_crtc_cursor_move(struct drm_crtc *crtc,
- int x, int y)
+static int radeon_cursor_move_locked(struct drm_crtc *crtc, int x, int y)
{
struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
struct radeon_device *rdev = crtc->dev->dev_private;
@@ -281,7 +182,6 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc,
}
}
- radeon_lock_cursor(crtc, true);
if (ASIC_IS_DCE4(rdev)) {
WREG32(EVERGREEN_CUR_POSITION + radeon_crtc->crtc_offset, (x << 16) | y);
WREG32(EVERGREEN_CUR_HOT_SPOT + radeon_crtc->crtc_offset, (xorigin << 16) | yorigin);
@@ -308,7 +208,173 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc,
WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, (radeon_crtc->legacy_cursor_offset +
(yorigin * 256)));
}
+
+ radeon_crtc->cursor_x = x;
+ radeon_crtc->cursor_y = y;
+
+ return 0;
+}
+
+int radeon_crtc_cursor_move(struct drm_crtc *crtc,
+ int x, int y)
+{
+ int ret;
+
+ radeon_lock_cursor(crtc, true);
+ ret = radeon_cursor_move_locked(crtc, x, y);
radeon_lock_cursor(crtc, false);
+ return ret;
+}
+
+static int radeon_set_cursor(struct drm_crtc *crtc, struct drm_gem_object *obj)
+{
+ struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+ struct radeon_device *rdev = crtc->dev->dev_private;
+ struct radeon_bo *robj = gem_to_radeon_bo(obj);
+ uint64_t gpu_addr;
+ int ret;
+
+ ret = radeon_bo_reserve(robj, false);
+ if (unlikely(ret != 0))
+ goto fail;
+ /* Only 27 bit offset for legacy cursor */
+ ret = radeon_bo_pin_restricted(robj, RADEON_GEM_DOMAIN_VRAM,
+ ASIC_IS_AVIVO(rdev) ? 0 : 1 << 27,
+ &gpu_addr);
+ radeon_bo_unreserve(robj);
+ if (ret)
+ goto fail;
+
+ if (ASIC_IS_DCE4(rdev)) {
+ WREG32(EVERGREEN_CUR_SURFACE_ADDRESS_HIGH + radeon_crtc->crtc_offset,
+ upper_32_bits(gpu_addr));
+ WREG32(EVERGREEN_CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset,
+ gpu_addr & 0xffffffff);
+ } else if (ASIC_IS_AVIVO(rdev)) {
+ if (rdev->family >= CHIP_RV770) {
+ if (radeon_crtc->crtc_id)
+ WREG32(R700_D2CUR_SURFACE_ADDRESS_HIGH, upper_32_bits(gpu_addr));
+ else
+ WREG32(R700_D1CUR_SURFACE_ADDRESS_HIGH, upper_32_bits(gpu_addr));
+ }
+ WREG32(AVIVO_D1CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset,
+ gpu_addr & 0xffffffff);
+ } else {
+ radeon_crtc->legacy_cursor_offset = gpu_addr - radeon_crtc->legacy_display_base_addr;
+ /* offset is from DISP(2)_BASE_ADDRESS */
+ WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, radeon_crtc->legacy_cursor_offset);
+ }
+
return 0;
+
+fail:
+ drm_gem_object_unreference_unlocked(obj);
+
+ return ret;
+}
+
+int radeon_crtc_cursor_set2(struct drm_crtc *crtc,
+ struct drm_file *file_priv,
+ uint32_t handle,
+ uint32_t width,
+ uint32_t height,
+ int32_t hot_x,
+ int32_t hot_y)
+{
+ struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+ struct drm_gem_object *obj;
+ int ret;
+
+ if (!handle) {
+ /* turn off cursor */
+ radeon_hide_cursor(crtc);
+ obj = NULL;
+ goto unpin;
+ }
+
+ if ((width > radeon_crtc->max_cursor_width) ||
+ (height > radeon_crtc->max_cursor_height)) {
+ DRM_ERROR("bad cursor width or height %d x %d\n", width, height);
+ return -EINVAL;
+ }
+
+ obj = drm_gem_object_lookup(crtc->dev, file_priv, handle);
+ if (!obj) {
+ DRM_ERROR("Cannot find cursor object %x for crtc %d\n", handle, radeon_crtc->crtc_id);
+ return -ENOENT;
+ }
+
+ radeon_crtc->cursor_width = width;
+ radeon_crtc->cursor_height = height;
+
+ radeon_lock_cursor(crtc, true);
+
+ if (hot_x != radeon_crtc->cursor_hot_x ||
+ hot_y != radeon_crtc->cursor_hot_y) {
+ int x, y;
+
+ x = radeon_crtc->cursor_x + radeon_crtc->cursor_hot_x - hot_x;
+ y = radeon_crtc->cursor_y + radeon_crtc->cursor_hot_y - hot_y;
+
+ radeon_cursor_move_locked(crtc, x, y);
+
+ radeon_crtc->cursor_hot_x = hot_x;
+ radeon_crtc->cursor_hot_y = hot_y;
+ }
+
+ ret = radeon_set_cursor(crtc, obj);
+
+ if (ret)
+ DRM_ERROR("radeon_set_cursor returned %d, not changing cursor\n",
+ ret);
+ else
+ radeon_show_cursor(crtc);
+
+ radeon_lock_cursor(crtc, false);
+
+unpin:
+ if (radeon_crtc->cursor_bo) {
+ struct radeon_bo *robj = gem_to_radeon_bo(radeon_crtc->cursor_bo);
+ ret = radeon_bo_reserve(robj, false);
+ if (likely(ret == 0)) {
+ radeon_bo_unpin(robj);
+ radeon_bo_unreserve(robj);
+ }
+ if (radeon_crtc->cursor_bo != obj)
+ drm_gem_object_unreference_unlocked(radeon_crtc->cursor_bo);
+ }
+
+ radeon_crtc->cursor_bo = obj;
+ return 0;
+}
+
+/**
+ * radeon_cursor_reset - Re-set the current cursor, if any.
+ *
+ * @crtc: drm crtc
+ *
+ * If the CRTC passed in currently has a cursor assigned, this function
+ * makes sure it's visible.
+ */
+void radeon_cursor_reset(struct drm_crtc *crtc)
+{
+ struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+ int ret;
+
+ if (radeon_crtc->cursor_bo) {
+ radeon_lock_cursor(crtc, true);
+
+ radeon_cursor_move_locked(crtc, radeon_crtc->cursor_x,
+ radeon_crtc->cursor_y);
+
+ ret = radeon_set_cursor(crtc, radeon_crtc->cursor_bo);
+ if (ret)
+ DRM_ERROR("radeon_set_cursor returned %d, not showing "
+ "cursor\n", ret);
+ else
+ radeon_show_cursor(crtc);
+
+ radeon_lock_cursor(crtc, false);
+ }
}
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 995a8b1770dd..0ec65168f331 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -377,6 +377,37 @@ void radeon_doorbell_free(struct radeon_device *rdev, u32 doorbell)
__clear_bit(doorbell, rdev->doorbell.used);
}
+/**
+ * radeon_doorbell_get_kfd_info - Report doorbell configuration required to
+ * setup KFD
+ *
+ * @rdev: radeon_device pointer
+ * @aperture_base: output returning doorbell aperture base physical address
+ * @aperture_size: output returning doorbell aperture size in bytes
+ * @start_offset: output returning # of doorbell bytes reserved for radeon.
+ *
+ * Radeon and the KFD share the doorbell aperture. Radeon sets it up,
+ * takes doorbells required for its own rings and reports the setup to KFD.
+ * Radeon reserved doorbells are at the start of the doorbell aperture.
+ */
+void radeon_doorbell_get_kfd_info(struct radeon_device *rdev,
+ phys_addr_t *aperture_base,
+ size_t *aperture_size,
+ size_t *start_offset)
+{
+ /* The first num_doorbells are used by radeon.
+ * KFD takes whatever's left in the aperture. */
+ if (rdev->doorbell.size > rdev->doorbell.num_doorbells * sizeof(u32)) {
+ *aperture_base = rdev->doorbell.base;
+ *aperture_size = rdev->doorbell.size;
+ *start_offset = rdev->doorbell.num_doorbells * sizeof(u32);
+ } else {
+ *aperture_base = 0;
+ *aperture_size = 0;
+ *start_offset = 0;
+ }
+}
+
/*
* radeon_wb_*()
* Writeback is the the method by which the the GPU updates special pages
@@ -1273,6 +1304,7 @@ int radeon_device_init(struct radeon_device *rdev,
mutex_init(&rdev->pm.mutex);
mutex_init(&rdev->gpu_clock_mutex);
mutex_init(&rdev->srbm_mutex);
+ mutex_init(&rdev->grbm_idx_mutex);
init_rwsem(&rdev->pm.mclk_lock);
init_rwsem(&rdev->exclusive_lock);
init_waitqueue_head(&rdev->irq.vblank_queue);
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 00ead8c2758a..102116902a07 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -32,6 +32,7 @@
#include <linux/pm_runtime.h>
#include <drm/drm_crtc_helper.h>
+#include <drm/drm_plane_helper.h>
#include <drm/drm_edid.h>
#include <linux/gcd.h>
@@ -634,7 +635,7 @@ radeon_crtc_set_config(struct drm_mode_set *set)
return ret;
}
static const struct drm_crtc_funcs radeon_crtc_funcs = {
- .cursor_set = radeon_crtc_cursor_set,
+ .cursor_set2 = radeon_crtc_cursor_set2,
.cursor_move = radeon_crtc_cursor_move,
.gamma_set = radeon_crtc_gamma_set,
.set_config = radeon_crtc_set_config,
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index dcffa30ee2db..4f50fb0e3d93 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -41,6 +41,8 @@
#include <drm/drm_gem.h>
#include "drm_crtc_helper.h"
+#include "radeon_kfd.h"
+
/*
* KMS wrapper.
* - 2.0.0 - initial interface
@@ -654,12 +656,15 @@ static int __init radeon_init(void)
#endif
}
+ radeon_kfd_init();
+
/* let modprobe override vga console setting */
return drm_pci_init(driver, pdriver);
}
static void __exit radeon_exit(void)
{
+ radeon_kfd_fini();
drm_pci_exit(driver, pdriver);
radeon_unregister_atpx_handler();
}
diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c
index 0ea1db83d573..29b9220ec399 100644
--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -48,10 +48,40 @@ struct radeon_fbdev {
struct radeon_device *rdev;
};
+/**
+ * radeon_fb_helper_set_par - Hide cursor on CRTCs used by fbdev.
+ *
+ * @info: fbdev info
+ *
+ * This function hides the cursor on all CRTCs used by fbdev.
+ */
+static int radeon_fb_helper_set_par(struct fb_info *info)
+{
+ int ret;
+
+ ret = drm_fb_helper_set_par(info);
+
+ /* XXX: with universal plane support fbdev will automatically disable
+ * all non-primary planes (including the cursor)
+ */
+ if (ret == 0) {
+ struct drm_fb_helper *fb_helper = info->par;
+ int i;
+
+ for (i = 0; i < fb_helper->crtc_count; i++) {
+ struct drm_crtc *crtc = fb_helper->crtc_info[i].mode_set.crtc;
+
+ radeon_crtc_cursor_set2(crtc, NULL, 0, 0, 0, 0, 0);
+ }
+ }
+
+ return ret;
+}
+
static struct fb_ops radeonfb_ops = {
.owner = THIS_MODULE,
.fb_check_var = drm_fb_helper_check_var,
- .fb_set_par = drm_fb_helper_set_par,
+ .fb_set_par = radeon_fb_helper_set_par,
.fb_fillrect = cfb_fillrect,
.fb_copyarea = cfb_copyarea,
.fb_imageblit = cfb_imageblit,
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 995167025282..d13d1b5a859f 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -140,6 +140,7 @@ int radeon_fence_emit(struct radeon_device *rdev,
(*fence)->rdev = rdev;
(*fence)->seq = seq;
(*fence)->ring = ring;
+ (*fence)->is_vm_update = false;
fence_init(&(*fence)->base, &radeon_fence_ops,
&rdev->fence_queue.lock, rdev->fence_context + ring, seq);
radeon_fence_ring_emit(rdev, ring, *fence);
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index c194497aa586..fe48f229043e 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -394,9 +394,10 @@ int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data,
return r;
}
-int radeon_mode_dumb_mmap(struct drm_file *filp,
- struct drm_device *dev,
- uint32_t handle, uint64_t *offset_p)
+static int radeon_mode_mmap(struct drm_file *filp,
+ struct drm_device *dev,
+ uint32_t handle, bool dumb,
+ uint64_t *offset_p)
{
struct drm_gem_object *gobj;
struct radeon_bo *robj;
@@ -405,6 +406,14 @@ int radeon_mode_dumb_mmap(struct drm_file *filp,
if (gobj == NULL) {
return -ENOENT;
}
+
+ /*
+ * We don't allow dumb mmaps on objects created using another
+ * interface.
+ */
+ WARN_ONCE(dumb && !(gobj->dumb || gobj->import_attach),
+ "Illegal dumb map of GPU buffer.\n");
+
robj = gem_to_radeon_bo(gobj);
if (radeon_ttm_tt_has_userptr(robj->tbo.ttm)) {
drm_gem_object_unreference_unlocked(gobj);
@@ -415,12 +424,20 @@ int radeon_mode_dumb_mmap(struct drm_file *filp,
return 0;
}
+int radeon_mode_dumb_mmap(struct drm_file *filp,
+ struct drm_device *dev,
+ uint32_t handle, uint64_t *offset_p)
+{
+ return radeon_mode_mmap(filp, dev, handle, true, offset_p);
+}
+
int radeon_gem_mmap_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
{
struct drm_radeon_gem_mmap *args = data;
- return radeon_mode_dumb_mmap(filp, dev, args->handle, &args->addr_ptr);
+ return radeon_mode_mmap(filp, dev, args->handle, false,
+ &args->addr_ptr);
}
int radeon_gem_busy_ioctl(struct drm_device *dev, void *data,
@@ -518,6 +535,68 @@ out:
return r;
}
+/**
+ * radeon_gem_va_update_vm -update the bo_va in its VM
+ *
+ * @rdev: radeon_device pointer
+ * @bo_va: bo_va to update
+ *
+ * Update the bo_va directly after setting it's address. Errors are not
+ * vital here, so they are not reported back to userspace.
+ */
+static void radeon_gem_va_update_vm(struct radeon_device *rdev,
+ struct radeon_bo_va *bo_va)
+{
+ struct ttm_validate_buffer tv, *entry;
+ struct radeon_bo_list *vm_bos;
+ struct ww_acquire_ctx ticket;
+ struct list_head list;
+ unsigned domain;
+ int r;
+
+ INIT_LIST_HEAD(&list);
+
+ tv.bo = &bo_va->bo->tbo;
+ tv.shared = true;
+ list_add(&tv.head, &list);
+
+ vm_bos = radeon_vm_get_bos(rdev, bo_va->vm, &list);
+ if (!vm_bos)
+ return;
+
+ r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
+ if (r)
+ goto error_free;
+
+ list_for_each_entry(entry, &list, head) {
+ domain = radeon_mem_type_to_domain(entry->bo->mem.mem_type);
+ /* if anything is swapped out don't swap it in here,
+ just abort and wait for the next CS */
+ if (domain == RADEON_GEM_DOMAIN_CPU)
+ goto error_unreserve;
+ }
+
+ mutex_lock(&bo_va->vm->mutex);
+ r = radeon_vm_clear_freed(rdev, bo_va->vm);
+ if (r)
+ goto error_unlock;
+
+ if (bo_va->it.start)
+ r = radeon_vm_bo_update(rdev, bo_va, &bo_va->bo->tbo.mem);
+
+error_unlock:
+ mutex_unlock(&bo_va->vm->mutex);
+
+error_unreserve:
+ ttm_eu_backoff_reservation(&ticket, &list);
+
+error_free:
+ drm_free_large(vm_bos);
+
+ if (r)
+ DRM_ERROR("Couldn't update BO_VA (%d)\n", r);
+}
+
int radeon_gem_va_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
{
@@ -601,6 +680,7 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data,
if (bo_va->it.start) {
args->operation = RADEON_VA_RESULT_VA_EXIST;
args->offset = bo_va->it.start * RADEON_GPU_PAGE_SIZE;
+ radeon_bo_unreserve(rbo);
goto out;
}
r = radeon_vm_bo_set_addr(rdev, bo_va, args->offset, args->flags);
@@ -611,12 +691,13 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data,
default:
break;
}
+ if (!r)
+ radeon_gem_va_update_vm(rdev, bo_va);
args->operation = RADEON_VA_RESULT_OK;
if (r) {
args->operation = RADEON_VA_RESULT_ERROR;
}
out:
- radeon_bo_unreserve(rbo);
drm_gem_object_unreference_unlocked(gobj);
return r;
}
@@ -682,6 +763,7 @@ int radeon_mode_dumb_create(struct drm_file *file_priv,
return -ENOMEM;
r = drm_gem_handle_create(file_priv, gobj, &handle);
+ gobj->dumb = true;
/* drop reference from allocate - handle holds it now */
drm_gem_object_unreference_unlocked(gobj);
if (r) {
diff --git a/drivers/gpu/drm/radeon/radeon_ib.c b/drivers/gpu/drm/radeon/radeon_ib.c
index 3f39fcca4d07..c39ce1f05703 100644
--- a/drivers/gpu/drm/radeon/radeon_ib.c
+++ b/drivers/gpu/drm/radeon/radeon_ib.c
@@ -64,10 +64,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring,
return r;
}
- r = radeon_semaphore_create(rdev, &ib->semaphore);
- if (r) {
- return r;
- }
+ radeon_sync_create(&ib->sync);
ib->ring = ring;
ib->fence = NULL;
@@ -96,7 +93,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring,
*/
void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib)
{
- radeon_semaphore_free(rdev, &ib->semaphore, ib->fence);
+ radeon_sync_free(rdev, &ib->sync, ib->fence);
radeon_sa_bo_free(rdev, &ib->sa_bo, ib->fence);
radeon_fence_unref(&ib->fence);
}
@@ -145,11 +142,11 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
if (ib->vm) {
struct radeon_fence *vm_id_fence;
vm_id_fence = radeon_vm_grab_id(rdev, ib->vm, ib->ring);
- radeon_semaphore_sync_fence(ib->semaphore, vm_id_fence);
+ radeon_sync_fence(&ib->sync, vm_id_fence);
}
/* sync with other rings */
- r = radeon_semaphore_sync_rings(rdev, ib->semaphore, ib->ring);
+ r = radeon_sync_rings(rdev, &ib->sync, ib->ring);
if (r) {
dev_err(rdev->dev, "failed to sync rings (%d)\n", r);
radeon_ring_unlock_undo(rdev, ring);
@@ -157,11 +154,12 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
}
if (ib->vm)
- radeon_vm_flush(rdev, ib->vm, ib->ring);
+ radeon_vm_flush(rdev, ib->vm, ib->ring,
+ ib->sync.last_vm_update);
if (const_ib) {
radeon_ring_ib_execute(rdev, const_ib->ring, const_ib);
- radeon_semaphore_free(rdev, &const_ib->semaphore, NULL);
+ radeon_sync_free(rdev, &const_ib->sync, NULL);
}
radeon_ring_ib_execute(rdev, ib->ring, ib);
r = radeon_fence_emit(rdev, &ib->fence, ib->ring);
diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c
new file mode 100644
index 000000000000..065d02068ec3
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_kfd.c
@@ -0,0 +1,563 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/fdtable.h>
+#include <linux/uaccess.h>
+#include <drm/drmP.h>
+#include "radeon.h"
+#include "cikd.h"
+#include "cik_reg.h"
+#include "radeon_kfd.h"
+
+#define CIK_PIPE_PER_MEC (4)
+
+struct kgd_mem {
+ struct radeon_sa_bo *sa_bo;
+ uint64_t gpu_addr;
+ void *ptr;
+};
+
+static int init_sa_manager(struct kgd_dev *kgd, unsigned int size);
+static void fini_sa_manager(struct kgd_dev *kgd);
+
+static int allocate_mem(struct kgd_dev *kgd, size_t size, size_t alignment,
+ enum kgd_memory_pool pool, struct kgd_mem **mem);
+
+static void free_mem(struct kgd_dev *kgd, struct kgd_mem *mem);
+
+static uint64_t get_vmem_size(struct kgd_dev *kgd);
+static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
+
+static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
+
+/*
+ * Register access functions
+ */
+
+static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
+ uint32_t sh_mem_config, uint32_t sh_mem_ape1_base,
+ uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases);
+
+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
+ unsigned int vmid);
+
+static int kgd_init_memory(struct kgd_dev *kgd);
+
+static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
+ uint32_t hpd_size, uint64_t hpd_gpu_addr);
+
+static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t __user *wptr);
+
+static bool kgd_hqd_is_occupies(struct kgd_dev *kgd, uint64_t queue_address,
+ uint32_t pipe_id, uint32_t queue_id);
+
+static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
+ unsigned int timeout, uint32_t pipe_id,
+ uint32_t queue_id);
+
+static const struct kfd2kgd_calls kfd2kgd = {
+ .init_sa_manager = init_sa_manager,
+ .fini_sa_manager = fini_sa_manager,
+ .allocate_mem = allocate_mem,
+ .free_mem = free_mem,
+ .get_vmem_size = get_vmem_size,
+ .get_gpu_clock_counter = get_gpu_clock_counter,
+ .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
+ .program_sh_mem_settings = kgd_program_sh_mem_settings,
+ .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
+ .init_memory = kgd_init_memory,
+ .init_pipeline = kgd_init_pipeline,
+ .hqd_load = kgd_hqd_load,
+ .hqd_is_occupies = kgd_hqd_is_occupies,
+ .hqd_destroy = kgd_hqd_destroy,
+};
+
+static const struct kgd2kfd_calls *kgd2kfd;
+
+bool radeon_kfd_init(void)
+{
+ bool (*kgd2kfd_init_p)(unsigned, const struct kfd2kgd_calls*,
+ const struct kgd2kfd_calls**);
+
+ kgd2kfd_init_p = symbol_request(kgd2kfd_init);
+
+ if (kgd2kfd_init_p == NULL)
+ return false;
+
+ if (!kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kfd2kgd, &kgd2kfd)) {
+ symbol_put(kgd2kfd_init);
+ kgd2kfd = NULL;
+
+ return false;
+ }
+
+ return true;
+}
+
+void radeon_kfd_fini(void)
+{
+ if (kgd2kfd) {
+ kgd2kfd->exit();
+ symbol_put(kgd2kfd_init);
+ }
+}
+
+void radeon_kfd_device_probe(struct radeon_device *rdev)
+{
+ if (kgd2kfd)
+ rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev, rdev->pdev);
+}
+
+void radeon_kfd_device_init(struct radeon_device *rdev)
+{
+ if (rdev->kfd) {
+ struct kgd2kfd_shared_resources gpu_resources = {
+ .compute_vmid_bitmap = 0xFF00,
+
+ .first_compute_pipe = 1,
+ .compute_pipe_count = 8 - 1,
+ };
+
+ radeon_doorbell_get_kfd_info(rdev,
+ &gpu_resources.doorbell_physical_address,
+ &gpu_resources.doorbell_aperture_size,
+ &gpu_resources.doorbell_start_offset);
+
+ kgd2kfd->device_init(rdev->kfd, &gpu_resources);
+ }
+}
+
+void radeon_kfd_device_fini(struct radeon_device *rdev)
+{
+ if (rdev->kfd) {
+ kgd2kfd->device_exit(rdev->kfd);
+ rdev->kfd = NULL;
+ }
+}
+
+void radeon_kfd_interrupt(struct radeon_device *rdev, const void *ih_ring_entry)
+{
+ if (rdev->kfd)
+ kgd2kfd->interrupt(rdev->kfd, ih_ring_entry);
+}
+
+void radeon_kfd_suspend(struct radeon_device *rdev)
+{
+ if (rdev->kfd)
+ kgd2kfd->suspend(rdev->kfd);
+}
+
+int radeon_kfd_resume(struct radeon_device *rdev)
+{
+ int r = 0;
+
+ if (rdev->kfd)
+ r = kgd2kfd->resume(rdev->kfd);
+
+ return r;
+}
+
+static u32 pool_to_domain(enum kgd_memory_pool p)
+{
+ switch (p) {
+ case KGD_POOL_FRAMEBUFFER: return RADEON_GEM_DOMAIN_VRAM;
+ default: return RADEON_GEM_DOMAIN_GTT;
+ }
+}
+
+static int init_sa_manager(struct kgd_dev *kgd, unsigned int size)
+{
+ struct radeon_device *rdev = (struct radeon_device *)kgd;
+ int r;
+
+ BUG_ON(kgd == NULL);
+
+ r = radeon_sa_bo_manager_init(rdev, &rdev->kfd_bo,
+ size,
+ RADEON_GPU_PAGE_SIZE,
+ RADEON_GEM_DOMAIN_GTT,
+ RADEON_GEM_GTT_WC);
+
+ if (r)
+ return r;
+
+ r = radeon_sa_bo_manager_start(rdev, &rdev->kfd_bo);
+ if (r)
+ radeon_sa_bo_manager_fini(rdev, &rdev->kfd_bo);
+
+ return r;
+}
+
+static void fini_sa_manager(struct kgd_dev *kgd)
+{
+ struct radeon_device *rdev = (struct radeon_device *)kgd;
+
+ BUG_ON(kgd == NULL);
+
+ radeon_sa_bo_manager_suspend(rdev, &rdev->kfd_bo);
+ radeon_sa_bo_manager_fini(rdev, &rdev->kfd_bo);
+}
+
+static int allocate_mem(struct kgd_dev *kgd, size_t size, size_t alignment,
+ enum kgd_memory_pool pool, struct kgd_mem **mem)
+{
+ struct radeon_device *rdev = (struct radeon_device *)kgd;
+ u32 domain;
+ int r;
+
+ BUG_ON(kgd == NULL);
+
+ domain = pool_to_domain(pool);
+ if (domain != RADEON_GEM_DOMAIN_GTT) {
+ dev_err(rdev->dev,
+ "Only allowed to allocate gart memory for kfd\n");
+ return -EINVAL;
+ }
+
+ *mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL);
+ if ((*mem) == NULL)
+ return -ENOMEM;
+
+ r = radeon_sa_bo_new(rdev, &rdev->kfd_bo, &(*mem)->sa_bo, size,
+ alignment);
+ if (r) {
+ dev_err(rdev->dev, "failed to get memory for kfd (%d)\n", r);
+ return r;
+ }
+
+ (*mem)->ptr = radeon_sa_bo_cpu_addr((*mem)->sa_bo);
+ (*mem)->gpu_addr = radeon_sa_bo_gpu_addr((*mem)->sa_bo);
+
+ return 0;
+}
+
+static void free_mem(struct kgd_dev *kgd, struct kgd_mem *mem)
+{
+ struct radeon_device *rdev = (struct radeon_device *)kgd;
+
+ BUG_ON(kgd == NULL);
+
+ radeon_sa_bo_free(rdev, &mem->sa_bo, NULL);
+ kfree(mem);
+}
+
+static uint64_t get_vmem_size(struct kgd_dev *kgd)
+{
+ struct radeon_device *rdev = (struct radeon_device *)kgd;
+
+ BUG_ON(kgd == NULL);
+
+ return rdev->mc.real_vram_size;
+}
+
+static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
+{
+ struct radeon_device *rdev = (struct radeon_device *)kgd;
+
+ return rdev->asic->get_gpu_clock_counter(rdev);
+}
+
+static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
+{
+ struct radeon_device *rdev = (struct radeon_device *)kgd;
+
+ /* The sclk is in quantas of 10kHz */
+ return rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100;
+}
+
+static inline struct radeon_device *get_radeon_device(struct kgd_dev *kgd)
+{
+ return (struct radeon_device *)kgd;
+}
+
+static void write_register(struct kgd_dev *kgd, uint32_t offset, uint32_t value)
+{
+ struct radeon_device *rdev = get_radeon_device(kgd);
+
+ writel(value, (void __iomem *)(rdev->rmmio + offset));
+}
+
+static uint32_t read_register(struct kgd_dev *kgd, uint32_t offset)
+{
+ struct radeon_device *rdev = get_radeon_device(kgd);
+
+ return readl((void __iomem *)(rdev->rmmio + offset));
+}
+
+static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
+ uint32_t queue, uint32_t vmid)
+{
+ struct radeon_device *rdev = get_radeon_device(kgd);
+ uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue);
+
+ mutex_lock(&rdev->srbm_mutex);
+ write_register(kgd, SRBM_GFX_CNTL, value);
+}
+
+static void unlock_srbm(struct kgd_dev *kgd)
+{
+ struct radeon_device *rdev = get_radeon_device(kgd);
+
+ write_register(kgd, SRBM_GFX_CNTL, 0);
+ mutex_unlock(&rdev->srbm_mutex);
+}
+
+static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1;
+ uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
+
+ lock_srbm(kgd, mec, pipe, queue_id, 0);
+}
+
+static void release_queue(struct kgd_dev *kgd)
+{
+ unlock_srbm(kgd);
+}
+
+static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
+ uint32_t sh_mem_config,
+ uint32_t sh_mem_ape1_base,
+ uint32_t sh_mem_ape1_limit,
+ uint32_t sh_mem_bases)
+{
+ lock_srbm(kgd, 0, 0, 0, vmid);
+
+ write_register(kgd, SH_MEM_CONFIG, sh_mem_config);
+ write_register(kgd, SH_MEM_APE1_BASE, sh_mem_ape1_base);
+ write_register(kgd, SH_MEM_APE1_LIMIT, sh_mem_ape1_limit);
+ write_register(kgd, SH_MEM_BASES, sh_mem_bases);
+
+ unlock_srbm(kgd);
+}
+
+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
+ unsigned int vmid)
+{
+ /*
+ * We have to assume that there is no outstanding mapping.
+ * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0
+ * because a mapping is in progress or because a mapping finished and
+ * the SW cleared it.
+ * So the protocol is to always wait & clear.
+ */
+ uint32_t pasid_mapping = (pasid == 0) ? 0 :
+ (uint32_t)pasid | ATC_VMID_PASID_MAPPING_VALID;
+
+ write_register(kgd, ATC_VMID0_PASID_MAPPING + vmid*sizeof(uint32_t),
+ pasid_mapping);
+
+ while (!(read_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS) &
+ (1U << vmid)))
+ cpu_relax();
+ write_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid);
+
+ return 0;
+}
+
+static int kgd_init_memory(struct kgd_dev *kgd)
+{
+ /*
+ * Configure apertures:
+ * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
+ * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
+ * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
+ */
+ int i;
+ uint32_t sh_mem_bases = PRIVATE_BASE(0x6000) | SHARED_BASE(0x6000);
+
+ for (i = 8; i < 16; i++) {
+ uint32_t sh_mem_config;
+
+ lock_srbm(kgd, 0, 0, 0, i);
+
+ sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
+ sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
+
+ write_register(kgd, SH_MEM_CONFIG, sh_mem_config);
+
+ write_register(kgd, SH_MEM_BASES, sh_mem_bases);
+
+ /* Scratch aperture is not supported for now. */
+ write_register(kgd, SH_STATIC_MEM_CONFIG, 0);
+
+ /* APE1 disabled for now. */
+ write_register(kgd, SH_MEM_APE1_BASE, 1);
+ write_register(kgd, SH_MEM_APE1_LIMIT, 0);
+
+ unlock_srbm(kgd);
+ }
+
+ return 0;
+}
+
+static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
+ uint32_t hpd_size, uint64_t hpd_gpu_addr)
+{
+ uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1;
+ uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
+
+ lock_srbm(kgd, mec, pipe, 0, 0);
+ write_register(kgd, CP_HPD_EOP_BASE_ADDR,
+ lower_32_bits(hpd_gpu_addr >> 8));
+ write_register(kgd, CP_HPD_EOP_BASE_ADDR_HI,
+ upper_32_bits(hpd_gpu_addr >> 8));
+ write_register(kgd, CP_HPD_EOP_VMID, 0);
+ write_register(kgd, CP_HPD_EOP_CONTROL, hpd_size);
+ unlock_srbm(kgd);
+
+ return 0;
+}
+
+static inline struct cik_mqd *get_mqd(void *mqd)
+{
+ return (struct cik_mqd *)mqd;
+}
+
+static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t __user *wptr)
+{
+ uint32_t wptr_shadow, is_wptr_shadow_valid;
+ struct cik_mqd *m;
+
+ m = get_mqd(mqd);
+
+ is_wptr_shadow_valid = !get_user(wptr_shadow, wptr);
+
+ acquire_queue(kgd, pipe_id, queue_id);
+ write_register(kgd, CP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
+ write_register(kgd, CP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
+ write_register(kgd, CP_MQD_CONTROL, m->cp_mqd_control);
+
+ write_register(kgd, CP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
+ write_register(kgd, CP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
+ write_register(kgd, CP_HQD_PQ_CONTROL, m->cp_hqd_pq_control);
+
+ write_register(kgd, CP_HQD_IB_CONTROL, m->cp_hqd_ib_control);
+ write_register(kgd, CP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo);
+ write_register(kgd, CP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi);
+
+ write_register(kgd, CP_HQD_IB_RPTR, m->cp_hqd_ib_rptr);
+
+ write_register(kgd, CP_HQD_PERSISTENT_STATE,
+ m->cp_hqd_persistent_state);
+ write_register(kgd, CP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd);
+ write_register(kgd, CP_HQD_MSG_TYPE, m->cp_hqd_msg_type);
+
+ write_register(kgd, CP_HQD_ATOMIC0_PREOP_LO,
+ m->cp_hqd_atomic0_preop_lo);
+
+ write_register(kgd, CP_HQD_ATOMIC0_PREOP_HI,
+ m->cp_hqd_atomic0_preop_hi);
+
+ write_register(kgd, CP_HQD_ATOMIC1_PREOP_LO,
+ m->cp_hqd_atomic1_preop_lo);
+
+ write_register(kgd, CP_HQD_ATOMIC1_PREOP_HI,
+ m->cp_hqd_atomic1_preop_hi);
+
+ write_register(kgd, CP_HQD_PQ_RPTR_REPORT_ADDR,
+ m->cp_hqd_pq_rptr_report_addr_lo);
+
+ write_register(kgd, CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
+ m->cp_hqd_pq_rptr_report_addr_hi);
+
+ write_register(kgd, CP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr);
+
+ write_register(kgd, CP_HQD_PQ_WPTR_POLL_ADDR,
+ m->cp_hqd_pq_wptr_poll_addr_lo);
+
+ write_register(kgd, CP_HQD_PQ_WPTR_POLL_ADDR_HI,
+ m->cp_hqd_pq_wptr_poll_addr_hi);
+
+ write_register(kgd, CP_HQD_PQ_DOORBELL_CONTROL,
+ m->cp_hqd_pq_doorbell_control);
+
+ write_register(kgd, CP_HQD_VMID, m->cp_hqd_vmid);
+
+ write_register(kgd, CP_HQD_QUANTUM, m->cp_hqd_quantum);
+
+ write_register(kgd, CP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
+ write_register(kgd, CP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
+
+ write_register(kgd, CP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr);
+
+ if (is_wptr_shadow_valid)
+ write_register(kgd, CP_HQD_PQ_WPTR, wptr_shadow);
+
+ write_register(kgd, CP_HQD_ACTIVE, m->cp_hqd_active);
+ release_queue(kgd);
+
+ return 0;
+}
+
+static bool kgd_hqd_is_occupies(struct kgd_dev *kgd, uint64_t queue_address,
+ uint32_t pipe_id, uint32_t queue_id)
+{
+ uint32_t act;
+ bool retval = false;
+ uint32_t low, high;
+
+ acquire_queue(kgd, pipe_id, queue_id);
+ act = read_register(kgd, CP_HQD_ACTIVE);
+ if (act) {
+ low = lower_32_bits(queue_address >> 8);
+ high = upper_32_bits(queue_address >> 8);
+
+ if (low == read_register(kgd, CP_HQD_PQ_BASE) &&
+ high == read_register(kgd, CP_HQD_PQ_BASE_HI))
+ retval = true;
+ }
+ release_queue(kgd);
+ return retval;
+}
+
+static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
+ unsigned int timeout, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ uint32_t temp;
+
+ acquire_queue(kgd, pipe_id, queue_id);
+ write_register(kgd, CP_HQD_PQ_DOORBELL_CONTROL, 0);
+
+ write_register(kgd, CP_HQD_DEQUEUE_REQUEST, reset_type);
+
+ while (true) {
+ temp = read_register(kgd, CP_HQD_ACTIVE);
+ if (temp & 0x1)
+ break;
+ if (timeout == 0) {
+ pr_err("kfd: cp queue preemption time out (%dms)\n",
+ temp);
+ return -ETIME;
+ }
+ msleep(20);
+ timeout -= 20;
+ }
+
+ release_queue(kgd);
+ return 0;
+}
diff --git a/drivers/gpu/drm/radeon/radeon_kfd.h b/drivers/gpu/drm/radeon/radeon_kfd.h
new file mode 100644
index 000000000000..f90e161ca507
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_kfd.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * radeon_kfd.h defines the private interface between the
+ * AMD kernel graphics drivers and the AMD KFD.
+ */
+
+#ifndef RADEON_KFD_H_INCLUDED
+#define RADEON_KFD_H_INCLUDED
+
+#include <linux/types.h>
+#include "../amd/include/kgd_kfd_interface.h"
+
+struct radeon_device;
+
+bool radeon_kfd_init(void);
+void radeon_kfd_fini(void);
+
+void radeon_kfd_suspend(struct radeon_device *rdev);
+int radeon_kfd_resume(struct radeon_device *rdev);
+void radeon_kfd_interrupt(struct radeon_device *rdev,
+ const void *ih_ring_entry);
+void radeon_kfd_device_probe(struct radeon_device *rdev);
+void radeon_kfd_device_init(struct radeon_device *rdev);
+void radeon_kfd_device_fini(struct radeon_device *rdev);
+
+#endif /* RADEON_KFD_H_INCLUDED */
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index 03586763ee86..3cf9c1fa6475 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -34,6 +34,8 @@
#include <linux/slab.h>
#include <linux/pm_runtime.h>
+#include "radeon_kfd.h"
+
#if defined(CONFIG_VGA_SWITCHEROO)
bool radeon_has_atpx(void);
#else
@@ -63,6 +65,8 @@ int radeon_driver_unload_kms(struct drm_device *dev)
pm_runtime_get_sync(dev->dev);
+ radeon_kfd_device_fini(rdev);
+
radeon_acpi_fini(rdev);
radeon_modeset_fini(rdev);
@@ -142,6 +146,9 @@ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags)
"Error during ACPI methods call\n");
}
+ radeon_kfd_device_probe(rdev);
+ radeon_kfd_device_init(rdev);
+
if (radeon_is_px(dev)) {
pm_runtime_use_autosuspend(dev->dev);
pm_runtime_set_autosuspend_delay(dev->dev, 5000);
@@ -621,8 +628,6 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
RADEON_VA_IB_OFFSET,
RADEON_VM_PAGE_READABLE |
RADEON_VM_PAGE_SNOOPED);
-
- radeon_bo_unreserve(rdev->ring_tmp_bo.bo);
if (r) {
radeon_vm_fini(rdev, vm);
kfree(fpriv);
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
index cafb1ccf2ec3..678b4386540d 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
@@ -1054,6 +1054,7 @@ static int radeon_crtc_mode_set(struct drm_crtc *crtc,
DRM_ERROR("Mode need scaling but only first crtc can do that.\n");
}
}
+ radeon_cursor_reset(crtc);
return 0;
}
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index 04db2fdd8692..390db897f322 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -321,6 +321,10 @@ struct radeon_crtc {
uint32_t crtc_offset;
struct drm_gem_object *cursor_bo;
uint64_t cursor_addr;
+ int cursor_x;
+ int cursor_y;
+ int cursor_hot_x;
+ int cursor_hot_y;
int cursor_width;
int cursor_height;
int max_cursor_width;
@@ -462,6 +466,7 @@ struct radeon_gpio_rec {
u8 id;
u32 reg;
u32 mask;
+ u32 shift;
};
struct radeon_hpd {
@@ -748,6 +753,8 @@ extern bool radeon_atombios_get_ppll_ss_info(struct radeon_device *rdev,
extern bool radeon_atombios_get_asic_ss_info(struct radeon_device *rdev,
struct radeon_atom_ss *ss,
int id, u32 clock);
+extern struct radeon_gpio_rec radeon_atombios_lookup_gpio(struct radeon_device *rdev,
+ u8 id);
extern void radeon_compute_pll_legacy(struct radeon_pll *pll,
uint64_t freq,
@@ -802,13 +809,16 @@ extern int radeon_crtc_set_base_atomic(struct drm_crtc *crtc,
extern int radeon_crtc_do_set_base(struct drm_crtc *crtc,
struct drm_framebuffer *fb,
int x, int y, int atomic);
-extern int radeon_crtc_cursor_set(struct drm_crtc *crtc,
- struct drm_file *file_priv,
- uint32_t handle,
- uint32_t width,
- uint32_t height);
+extern int radeon_crtc_cursor_set2(struct drm_crtc *crtc,
+ struct drm_file *file_priv,
+ uint32_t handle,
+ uint32_t width,
+ uint32_t height,
+ int32_t hot_x,
+ int32_t hot_y);
extern int radeon_crtc_cursor_move(struct drm_crtc *crtc,
int x, int y);
+extern void radeon_cursor_reset(struct drm_crtc *crtc);
extern int radeon_get_crtc_scanoutpos(struct drm_device *dev, int crtc,
unsigned int flags,
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 4c0d786d5c7a..7d68223eb469 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -99,22 +99,39 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
rbo->placement.placement = rbo->placements;
rbo->placement.busy_placement = rbo->placements;
- if (domain & RADEON_GEM_DOMAIN_VRAM)
+ if (domain & RADEON_GEM_DOMAIN_VRAM) {
+ /* Try placing BOs which don't need CPU access outside of the
+ * CPU accessible part of VRAM
+ */
+ if ((rbo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
+ rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size) {
+ rbo->placements[c].fpfn =
+ rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
+ rbo->placements[c++].flags = TTM_PL_FLAG_WC |
+ TTM_PL_FLAG_UNCACHED |
+ TTM_PL_FLAG_VRAM;
+ }
+
+ rbo->placements[c].fpfn = 0;
rbo->placements[c++].flags = TTM_PL_FLAG_WC |
TTM_PL_FLAG_UNCACHED |
TTM_PL_FLAG_VRAM;
+ }
if (domain & RADEON_GEM_DOMAIN_GTT) {
if (rbo->flags & RADEON_GEM_GTT_UC) {
+ rbo->placements[c].fpfn = 0;
rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED |
TTM_PL_FLAG_TT;
} else if ((rbo->flags & RADEON_GEM_GTT_WC) ||
(rbo->rdev->flags & RADEON_IS_AGP)) {
+ rbo->placements[c].fpfn = 0;
rbo->placements[c++].flags = TTM_PL_FLAG_WC |
TTM_PL_FLAG_UNCACHED |
TTM_PL_FLAG_TT;
} else {
+ rbo->placements[c].fpfn = 0;
rbo->placements[c++].flags = TTM_PL_FLAG_CACHED |
TTM_PL_FLAG_TT;
}
@@ -122,30 +139,35 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
if (domain & RADEON_GEM_DOMAIN_CPU) {
if (rbo->flags & RADEON_GEM_GTT_UC) {
+ rbo->placements[c].fpfn = 0;
rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED |
TTM_PL_FLAG_SYSTEM;
} else if ((rbo->flags & RADEON_GEM_GTT_WC) ||
rbo->rdev->flags & RADEON_IS_AGP) {
+ rbo->placements[c].fpfn = 0;
rbo->placements[c++].flags = TTM_PL_FLAG_WC |
TTM_PL_FLAG_UNCACHED |
TTM_PL_FLAG_SYSTEM;
} else {
+ rbo->placements[c].fpfn = 0;
rbo->placements[c++].flags = TTM_PL_FLAG_CACHED |
TTM_PL_FLAG_SYSTEM;
}
}
- if (!c)
+ if (!c) {
+ rbo->placements[c].fpfn = 0;
rbo->placements[c++].flags = TTM_PL_MASK_CACHING |
TTM_PL_FLAG_SYSTEM;
+ }
rbo->placement.num_placement = c;
rbo->placement.num_busy_placement = c;
for (i = 0; i < c; ++i) {
- rbo->placements[i].fpfn = 0;
if ((rbo->flags & RADEON_GEM_CPU_ACCESS) &&
- (rbo->placements[i].flags & TTM_PL_FLAG_VRAM))
+ (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
+ !rbo->placements[i].fpfn)
rbo->placements[i].lpfn =
rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
else
@@ -157,9 +179,7 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
* improve fragmentation quality.
* 512kb was measured as the most optimal number.
*/
- if (!((rbo->flags & RADEON_GEM_CPU_ACCESS) &&
- (rbo->placements[i].flags & TTM_PL_FLAG_VRAM)) &&
- rbo->tbo.mem.size > 512 * 1024) {
+ if (rbo->tbo.mem.size > 512 * 1024) {
for (i = 0; i < c; i++) {
rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN;
}
@@ -489,25 +509,29 @@ int radeon_bo_list_validate(struct radeon_device *rdev,
struct ww_acquire_ctx *ticket,
struct list_head *head, int ring)
{
- struct radeon_cs_reloc *lobj;
- struct radeon_bo *bo;
+ struct radeon_bo_list *lobj;
+ struct list_head duplicates;
int r;
u64 bytes_moved = 0, initial_bytes_moved;
u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev);
- r = ttm_eu_reserve_buffers(ticket, head, true);
+ INIT_LIST_HEAD(&duplicates);
+ r = ttm_eu_reserve_buffers(ticket, head, true, &duplicates);
if (unlikely(r != 0)) {
return r;
}
list_for_each_entry(lobj, head, tv.head) {
- bo = lobj->robj;
+ struct radeon_bo *bo = lobj->robj;
if (!bo->pin_count) {
u32 domain = lobj->prefered_domains;
u32 allowed = lobj->allowed_domains;
u32 current_domain =
radeon_mem_type_to_domain(bo->tbo.mem.mem_type);
+ WARN_ONCE(bo->gem_base.dumb,
+ "GPU use of dumb buffer is illegal.\n");
+
/* Check if this buffer will be moved and don't move it
* if we have moved too many buffers for this IB already.
*
@@ -546,6 +570,12 @@ int radeon_bo_list_validate(struct radeon_device *rdev,
lobj->gpu_offset = radeon_bo_gpu_offset(bo);
lobj->tiling_flags = bo->tiling_flags;
}
+
+ list_for_each_entry(lobj, &duplicates, tv.head) {
+ lobj->gpu_offset = radeon_bo_gpu_offset(lobj->robj);
+ lobj->tiling_flags = lobj->robj->tiling_flags;
+ }
+
return 0;
}
@@ -750,8 +780,8 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
{
struct radeon_device *rdev;
struct radeon_bo *rbo;
- unsigned long offset, size;
- int r;
+ unsigned long offset, size, lpfn;
+ int i, r;
if (!radeon_ttm_bo_is_radeon_bo(bo))
return 0;
@@ -768,7 +798,13 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
/* hurrah the memory is not visible ! */
radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM);
- rbo->placements[0].lpfn = rdev->mc.visible_vram_size >> PAGE_SHIFT;
+ lpfn = rdev->mc.visible_vram_size >> PAGE_SHIFT;
+ for (i = 0; i < rbo->placement.num_placement; i++) {
+ /* Force into visible VRAM */
+ if ((rbo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
+ (!rbo->placements[i].lpfn || rbo->placements[i].lpfn > lpfn))
+ rbo->placements[i].lpfn = lpfn;
+ }
r = ttm_bo_validate(bo, &rbo->placement, false, false);
if (unlikely(r == -ENOMEM)) {
radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT);
@@ -799,3 +835,22 @@ int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, bool no_wait)
ttm_bo_unreserve(&bo->tbo);
return r;
}
+
+/**
+ * radeon_bo_fence - add fence to buffer object
+ *
+ * @bo: buffer object in question
+ * @fence: fence to add
+ * @shared: true if fence should be added shared
+ *
+ */
+void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence,
+ bool shared)
+{
+ struct reservation_object *resv = bo->tbo.resv;
+
+ if (shared)
+ reservation_object_add_shared_fence(resv, &fence->base);
+ else
+ reservation_object_add_excl_fence(resv, &fence->base);
+}
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index 1b8ec7917154..3b0b377f76cb 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -155,6 +155,8 @@ extern void radeon_bo_move_notify(struct ttm_buffer_object *bo,
struct ttm_mem_reg *new_mem);
extern int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
extern int radeon_bo_get_surface_reg(struct radeon_bo *bo);
+extern void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence,
+ bool shared);
/*
* sub allocation
diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c
index 6deb08f045b7..e6ad54cdfa62 100644
--- a/drivers/gpu/drm/radeon/radeon_semaphore.c
+++ b/drivers/gpu/drm/radeon/radeon_semaphore.c
@@ -34,15 +34,14 @@
int radeon_semaphore_create(struct radeon_device *rdev,
struct radeon_semaphore **semaphore)
{
- uint64_t *cpu_addr;
- int i, r;
+ int r;
*semaphore = kmalloc(sizeof(struct radeon_semaphore), GFP_KERNEL);
if (*semaphore == NULL) {
return -ENOMEM;
}
- r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &(*semaphore)->sa_bo,
- 8 * RADEON_NUM_SYNCS, 8);
+ r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo,
+ &(*semaphore)->sa_bo, 8, 8);
if (r) {
kfree(*semaphore);
*semaphore = NULL;
@@ -51,12 +50,7 @@ int radeon_semaphore_create(struct radeon_device *rdev,
(*semaphore)->waiters = 0;
(*semaphore)->gpu_addr = radeon_sa_bo_gpu_addr((*semaphore)->sa_bo);
- cpu_addr = radeon_sa_bo_cpu_addr((*semaphore)->sa_bo);
- for (i = 0; i < RADEON_NUM_SYNCS; ++i)
- cpu_addr[i] = 0;
-
- for (i = 0; i < RADEON_NUM_RINGS; ++i)
- (*semaphore)->sync_to[i] = NULL;
+ *((uint64_t *)radeon_sa_bo_cpu_addr((*semaphore)->sa_bo)) = 0;
return 0;
}
@@ -95,146 +89,6 @@ bool radeon_semaphore_emit_wait(struct radeon_device *rdev, int ridx,
return false;
}
-/**
- * radeon_semaphore_sync_fence - use the semaphore to sync to a fence
- *
- * @semaphore: semaphore object to add fence to
- * @fence: fence to sync to
- *
- * Sync to the fence using this semaphore object
- */
-void radeon_semaphore_sync_fence(struct radeon_semaphore *semaphore,
- struct radeon_fence *fence)
-{
- struct radeon_fence *other;
-
- if (!fence)
- return;
-
- other = semaphore->sync_to[fence->ring];
- semaphore->sync_to[fence->ring] = radeon_fence_later(fence, other);
-}
-
-/**
- * radeon_semaphore_sync_to - use the semaphore to sync to a reservation object
- *
- * @sema: semaphore object to add fence from reservation object to
- * @resv: reservation object with embedded fence
- * @shared: true if we should onyl sync to the exclusive fence
- *
- * Sync to the fence using this semaphore object
- */
-int radeon_semaphore_sync_resv(struct radeon_device *rdev,
- struct radeon_semaphore *sema,
- struct reservation_object *resv,
- bool shared)
-{
- struct reservation_object_list *flist;
- struct fence *f;
- struct radeon_fence *fence;
- unsigned i;
- int r = 0;
-
- /* always sync to the exclusive fence */
- f = reservation_object_get_excl(resv);
- fence = f ? to_radeon_fence(f) : NULL;
- if (fence && fence->rdev == rdev)
- radeon_semaphore_sync_fence(sema, fence);
- else if (f)
- r = fence_wait(f, true);
-
- flist = reservation_object_get_list(resv);
- if (shared || !flist || r)
- return r;
-
- for (i = 0; i < flist->shared_count; ++i) {
- f = rcu_dereference_protected(flist->shared[i],
- reservation_object_held(resv));
- fence = to_radeon_fence(f);
- if (fence && fence->rdev == rdev)
- radeon_semaphore_sync_fence(sema, fence);
- else
- r = fence_wait(f, true);
-
- if (r)
- break;
- }
- return r;
-}
-
-/**
- * radeon_semaphore_sync_rings - sync ring to all registered fences
- *
- * @rdev: radeon_device pointer
- * @semaphore: semaphore object to use for sync
- * @ring: ring that needs sync
- *
- * Ensure that all registered fences are signaled before letting
- * the ring continue. The caller must hold the ring lock.
- */
-int radeon_semaphore_sync_rings(struct radeon_device *rdev,
- struct radeon_semaphore *semaphore,
- int ring)
-{
- unsigned count = 0;
- int i, r;
-
- for (i = 0; i < RADEON_NUM_RINGS; ++i) {
- struct radeon_fence *fence = semaphore->sync_to[i];
-
- /* check if we really need to sync */
- if (!radeon_fence_need_sync(fence, ring))
- continue;
-
- /* prevent GPU deadlocks */
- if (!rdev->ring[i].ready) {
- dev_err(rdev->dev, "Syncing to a disabled ring!");
- return -EINVAL;
- }
-
- if (++count > RADEON_NUM_SYNCS) {
- /* not enough room, wait manually */
- r = radeon_fence_wait(fence, false);
- if (r)
- return r;
- continue;
- }
-
- /* allocate enough space for sync command */
- r = radeon_ring_alloc(rdev, &rdev->ring[i], 16);
- if (r) {
- return r;
- }
-
- /* emit the signal semaphore */
- if (!radeon_semaphore_emit_signal(rdev, i, semaphore)) {
- /* signaling wasn't successful wait manually */
- radeon_ring_undo(&rdev->ring[i]);
- r = radeon_fence_wait(fence, false);
- if (r)
- return r;
- continue;
- }
-
- /* we assume caller has already allocated space on waiters ring */
- if (!radeon_semaphore_emit_wait(rdev, ring, semaphore)) {
- /* waiting wasn't successful wait manually */
- radeon_ring_undo(&rdev->ring[i]);
- r = radeon_fence_wait(fence, false);
- if (r)
- return r;
- continue;
- }
-
- radeon_ring_commit(rdev, &rdev->ring[i], false);
- radeon_fence_note_sync(fence, ring);
-
- semaphore->gpu_addr += 8;
- }
-
- return 0;
-}
-
void radeon_semaphore_free(struct radeon_device *rdev,
struct radeon_semaphore **semaphore,
struct radeon_fence *fence)
diff --git a/drivers/gpu/drm/radeon/radeon_sync.c b/drivers/gpu/drm/radeon/radeon_sync.c
new file mode 100644
index 000000000000..02ac8a1de4ff
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_sync.c
@@ -0,0 +1,220 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ * Christian König <christian.koenig@amd.com>
+ */
+
+#include <drm/drmP.h>
+#include "radeon.h"
+#include "radeon_trace.h"
+
+/**
+ * radeon_sync_create - zero init sync object
+ *
+ * @sync: sync object to initialize
+ *
+ * Just clear the sync object for now.
+ */
+void radeon_sync_create(struct radeon_sync *sync)
+{
+ unsigned i;
+
+ for (i = 0; i < RADEON_NUM_SYNCS; ++i)
+ sync->semaphores[i] = NULL;
+
+ for (i = 0; i < RADEON_NUM_RINGS; ++i)
+ sync->sync_to[i] = NULL;
+
+ sync->last_vm_update = NULL;
+}
+
+/**
+ * radeon_sync_fence - use the semaphore to sync to a fence
+ *
+ * @sync: sync object to add fence to
+ * @fence: fence to sync to
+ *
+ * Sync to the fence using the semaphore objects
+ */
+void radeon_sync_fence(struct radeon_sync *sync,
+ struct radeon_fence *fence)
+{
+ struct radeon_fence *other;
+
+ if (!fence)
+ return;
+
+ other = sync->sync_to[fence->ring];
+ sync->sync_to[fence->ring] = radeon_fence_later(fence, other);
+
+ if (fence->is_vm_update) {
+ other = sync->last_vm_update;
+ sync->last_vm_update = radeon_fence_later(fence, other);
+ }
+}
+
+/**
+ * radeon_sync_resv - use the semaphores to sync to a reservation object
+ *
+ * @sync: sync object to add fences from reservation object to
+ * @resv: reservation object with embedded fence
+ * @shared: true if we should only sync to the exclusive fence
+ *
+ * Sync to the fence using the semaphore objects
+ */
+int radeon_sync_resv(struct radeon_device *rdev,
+ struct radeon_sync *sync,
+ struct reservation_object *resv,
+ bool shared)
+{
+ struct reservation_object_list *flist;
+ struct fence *f;
+ struct radeon_fence *fence;
+ unsigned i;
+ int r = 0;
+
+ /* always sync to the exclusive fence */
+ f = reservation_object_get_excl(resv);
+ fence = f ? to_radeon_fence(f) : NULL;
+ if (fence && fence->rdev == rdev)
+ radeon_sync_fence(sync, fence);
+ else if (f)
+ r = fence_wait(f, true);
+
+ flist = reservation_object_get_list(resv);
+ if (shared || !flist || r)
+ return r;
+
+ for (i = 0; i < flist->shared_count; ++i) {
+ f = rcu_dereference_protected(flist->shared[i],
+ reservation_object_held(resv));
+ fence = to_radeon_fence(f);
+ if (fence && fence->rdev == rdev)
+ radeon_sync_fence(sync, fence);
+ else
+ r = fence_wait(f, true);
+
+ if (r)
+ break;
+ }
+ return r;
+}
+
+/**
+ * radeon_sync_rings - sync ring to all registered fences
+ *
+ * @rdev: radeon_device pointer
+ * @sync: sync object to use
+ * @ring: ring that needs sync
+ *
+ * Ensure that all registered fences are signaled before letting
+ * the ring continue. The caller must hold the ring lock.
+ */
+int radeon_sync_rings(struct radeon_device *rdev,
+ struct radeon_sync *sync,
+ int ring)
+{
+ unsigned count = 0;
+ int i, r;
+
+ for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+ struct radeon_fence *fence = sync->sync_to[i];
+ struct radeon_semaphore *semaphore;
+
+ /* check if we really need to sync */
+ if (!radeon_fence_need_sync(fence, ring))
+ continue;
+
+ /* prevent GPU deadlocks */
+ if (!rdev->ring[i].ready) {
+ dev_err(rdev->dev, "Syncing to a disabled ring!");
+ return -EINVAL;
+ }
+
+ if (count >= RADEON_NUM_SYNCS) {
+ /* not enough room, wait manually */
+ r = radeon_fence_wait(fence, false);
+ if (r)
+ return r;
+ continue;
+ }
+ r = radeon_semaphore_create(rdev, &semaphore);
+ if (r)
+ return r;
+
+ sync->semaphores[count++] = semaphore;
+
+ /* allocate enough space for sync command */
+ r = radeon_ring_alloc(rdev, &rdev->ring[i], 16);
+ if (r)
+ return r;
+
+ /* emit the signal semaphore */
+ if (!radeon_semaphore_emit_signal(rdev, i, semaphore)) {
+ /* signaling wasn't successful wait manually */
+ radeon_ring_undo(&rdev->ring[i]);
+ r = radeon_fence_wait(fence, false);
+ if (r)
+ return r;
+ continue;
+ }
+
+ /* we assume caller has already allocated space on waiters ring */
+ if (!radeon_semaphore_emit_wait(rdev, ring, semaphore)) {
+ /* waiting wasn't successful wait manually */
+ radeon_ring_undo(&rdev->ring[i]);
+ r = radeon_fence_wait(fence, false);
+ if (r)
+ return r;
+ continue;
+ }
+
+ radeon_ring_commit(rdev, &rdev->ring[i], false);
+ radeon_fence_note_sync(fence, ring);
+ }
+
+ return 0;
+}
+
+/**
+ * radeon_sync_free - free the sync object
+ *
+ * @rdev: radeon_device pointer
+ * @sync: sync object to use
+ * @fence: fence to use for the free
+ *
+ * Free the sync object by freeing all semaphores in it.
+ */
+void radeon_sync_free(struct radeon_device *rdev,
+ struct radeon_sync *sync,
+ struct radeon_fence *fence)
+{
+ unsigned i;
+
+ for (i = 0; i < RADEON_NUM_SYNCS; ++i)
+ radeon_semaphore_free(rdev, &sync->semaphores[i], fence);
+}
diff --git a/drivers/gpu/drm/radeon/radeon_trace.h b/drivers/gpu/drm/radeon/radeon_trace.h
index 9db74a96ef61..ce075cb08cb2 100644
--- a/drivers/gpu/drm/radeon/radeon_trace.h
+++ b/drivers/gpu/drm/radeon/radeon_trace.h
@@ -38,7 +38,7 @@ TRACE_EVENT(radeon_cs,
TP_fast_assign(
__entry->ring = p->ring;
- __entry->dw = p->chunks[p->chunk_ib_idx].length_dw;
+ __entry->dw = p->chunk_ib->length_dw;
__entry->fences = radeon_fence_count_emitted(
p->rdev, p->ring);
),
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 8624979afb65..d02aa1d0f588 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -196,9 +196,32 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
rbo = container_of(bo, struct radeon_bo, tbo);
switch (bo->mem.mem_type) {
case TTM_PL_VRAM:
- if (rbo->rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready == false)
+ if (rbo->rdev->ring[radeon_copy_ring_index(rbo->rdev)].ready == false)
radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU);
- else
+ else if (rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size &&
+ bo->mem.start < (rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT)) {
+ unsigned fpfn = rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
+ int i;
+
+ /* Try evicting to the CPU inaccessible part of VRAM
+ * first, but only set GTT as busy placement, so this
+ * BO will be evicted to GTT rather than causing other
+ * BOs to be evicted from VRAM
+ */
+ radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM |
+ RADEON_GEM_DOMAIN_GTT);
+ rbo->placement.num_busy_placement = 0;
+ for (i = 0; i < rbo->placement.num_placement; i++) {
+ if (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) {
+ if (rbo->placements[0].fpfn < fpfn)
+ rbo->placements[0].fpfn = fpfn;
+ } else {
+ rbo->placement.busy_placement =
+ &rbo->placements[i];
+ rbo->placement.num_busy_placement = 1;
+ }
+ }
+ } else
radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT);
break;
case TTM_PL_TT:
diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c
index 11b662469253..c10b2aec6450 100644
--- a/drivers/gpu/drm/radeon/radeon_uvd.c
+++ b/drivers/gpu/drm/radeon/radeon_uvd.c
@@ -488,12 +488,12 @@ static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p,
unsigned buf_sizes[], bool *has_msg_cmd)
{
struct radeon_cs_chunk *relocs_chunk;
- struct radeon_cs_reloc *reloc;
+ struct radeon_bo_list *reloc;
unsigned idx, cmd, offset;
uint64_t start, end;
int r;
- relocs_chunk = &p->chunks[p->chunk_relocs_idx];
+ relocs_chunk = p->chunk_relocs;
offset = radeon_get_ib_value(p, data0);
idx = radeon_get_ib_value(p, data1);
if (idx >= relocs_chunk->length_dw) {
@@ -502,7 +502,7 @@ static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p,
return -EINVAL;
}
- reloc = p->relocs_ptr[(idx / 4)];
+ reloc = &p->relocs[(idx / 4)];
start = reloc->gpu_offset;
end = start + radeon_bo_size(reloc->robj);
start += offset;
@@ -610,13 +610,13 @@ int radeon_uvd_cs_parse(struct radeon_cs_parser *p)
[0x00000003] = 2048,
};
- if (p->chunks[p->chunk_ib_idx].length_dw % 16) {
+ if (p->chunk_ib->length_dw % 16) {
DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n",
- p->chunks[p->chunk_ib_idx].length_dw);
+ p->chunk_ib->length_dw);
return -EINVAL;
}
- if (p->chunk_relocs_idx == -1) {
+ if (p->chunk_relocs == NULL) {
DRM_ERROR("No relocation chunk !\n");
return -EINVAL;
}
@@ -640,7 +640,7 @@ int radeon_uvd_cs_parse(struct radeon_cs_parser *p)
DRM_ERROR("Unknown packet type %d !\n", pkt.type);
return -EINVAL;
}
- } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
+ } while (p->idx < p->chunk_ib->length_dw);
if (!has_msg_cmd) {
DRM_ERROR("UVD-IBs need a msg command!\n");
diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c
index 9e85757d5599..976fe432f4e2 100644
--- a/drivers/gpu/drm/radeon/radeon_vce.c
+++ b/drivers/gpu/drm/radeon/radeon_vce.c
@@ -453,11 +453,11 @@ int radeon_vce_cs_reloc(struct radeon_cs_parser *p, int lo, int hi,
unsigned size)
{
struct radeon_cs_chunk *relocs_chunk;
- struct radeon_cs_reloc *reloc;
+ struct radeon_bo_list *reloc;
uint64_t start, end, offset;
unsigned idx;
- relocs_chunk = &p->chunks[p->chunk_relocs_idx];
+ relocs_chunk = p->chunk_relocs;
offset = radeon_get_ib_value(p, lo);
idx = radeon_get_ib_value(p, hi);
@@ -467,7 +467,7 @@ int radeon_vce_cs_reloc(struct radeon_cs_parser *p, int lo, int hi,
return -EINVAL;
}
- reloc = p->relocs_ptr[(idx / 4)];
+ reloc = &p->relocs[(idx / 4)];
start = reloc->gpu_offset;
end = start + radeon_bo_size(reloc->robj);
start += offset;
@@ -534,7 +534,7 @@ int radeon_vce_cs_parse(struct radeon_cs_parser *p)
uint32_t *size = &tmp;
int i, r;
- while (p->idx < p->chunks[p->chunk_ib_idx].length_dw) {
+ while (p->idx < p->chunk_ib->length_dw) {
uint32_t len = radeon_get_ib_value(p, p->idx);
uint32_t cmd = radeon_get_ib_value(p, p->idx + 1);
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
index dfde266529e2..cde48c42b30a 100644
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -125,41 +125,37 @@ void radeon_vm_manager_fini(struct radeon_device *rdev)
* Add the page directory to the list of BOs to
* validate for command submission (cayman+).
*/
-struct radeon_cs_reloc *radeon_vm_get_bos(struct radeon_device *rdev,
+struct radeon_bo_list *radeon_vm_get_bos(struct radeon_device *rdev,
struct radeon_vm *vm,
struct list_head *head)
{
- struct radeon_cs_reloc *list;
+ struct radeon_bo_list *list;
unsigned i, idx;
list = drm_malloc_ab(vm->max_pde_used + 2,
- sizeof(struct radeon_cs_reloc));
+ sizeof(struct radeon_bo_list));
if (!list)
return NULL;
/* add the vm page table to the list */
- list[0].gobj = NULL;
list[0].robj = vm->page_directory;
list[0].prefered_domains = RADEON_GEM_DOMAIN_VRAM;
list[0].allowed_domains = RADEON_GEM_DOMAIN_VRAM;
list[0].tv.bo = &vm->page_directory->tbo;
- list[0].tv.shared = false;
+ list[0].tv.shared = true;
list[0].tiling_flags = 0;
- list[0].handle = 0;
list_add(&list[0].tv.head, head);
for (i = 0, idx = 1; i <= vm->max_pde_used; i++) {
if (!vm->page_tables[i].bo)
continue;
- list[idx].gobj = NULL;
list[idx].robj = vm->page_tables[i].bo;
list[idx].prefered_domains = RADEON_GEM_DOMAIN_VRAM;
list[idx].allowed_domains = RADEON_GEM_DOMAIN_VRAM;
list[idx].tv.bo = &list[idx].robj->tbo;
- list[idx].tv.shared = false;
+ list[idx].tv.shared = true;
list[idx].tiling_flags = 0;
- list[idx].handle = 0;
list_add(&list[idx++].tv.head, head);
}
@@ -182,15 +178,18 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
struct radeon_vm *vm, int ring)
{
struct radeon_fence *best[RADEON_NUM_RINGS] = {};
+ struct radeon_vm_id *vm_id = &vm->ids[ring];
+
unsigned choices[2] = {};
unsigned i;
/* check if the id is still valid */
- if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id])
+ if (vm_id->id && vm_id->last_id_use &&
+ vm_id->last_id_use == rdev->vm_manager.active[vm_id->id])
return NULL;
/* we definately need to flush */
- radeon_fence_unref(&vm->last_flush);
+ vm_id->pd_gpu_addr = ~0ll;
/* skip over VMID 0, since it is the system VM */
for (i = 1; i < rdev->vm_manager.nvm; ++i) {
@@ -198,8 +197,8 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
if (fence == NULL) {
/* found a free one */
- vm->id = i;
- trace_radeon_vm_grab_id(vm->id, ring);
+ vm_id->id = i;
+ trace_radeon_vm_grab_id(i, ring);
return NULL;
}
@@ -211,8 +210,8 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
for (i = 0; i < 2; ++i) {
if (choices[i]) {
- vm->id = choices[i];
- trace_radeon_vm_grab_id(vm->id, ring);
+ vm_id->id = choices[i];
+ trace_radeon_vm_grab_id(choices[i], ring);
return rdev->vm_manager.active[choices[i]];
}
}
@@ -228,6 +227,7 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
* @rdev: radeon_device pointer
* @vm: vm we want to flush
* @ring: ring to use for flush
+ * @updates: last vm update that is waited for
*
* Flush the vm (cayman+).
*
@@ -235,15 +235,21 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
*/
void radeon_vm_flush(struct radeon_device *rdev,
struct radeon_vm *vm,
- int ring)
+ int ring, struct radeon_fence *updates)
{
uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory);
+ struct radeon_vm_id *vm_id = &vm->ids[ring];
+
+ if (pd_addr != vm_id->pd_gpu_addr || !vm_id->flushed_updates ||
+ radeon_fence_is_earlier(vm_id->flushed_updates, updates)) {
+
+ trace_radeon_vm_flush(pd_addr, ring, vm->ids[ring].id);
+ radeon_fence_unref(&vm_id->flushed_updates);
+ vm_id->flushed_updates = radeon_fence_ref(updates);
+ vm_id->pd_gpu_addr = pd_addr;
+ radeon_ring_vm_flush(rdev, &rdev->ring[ring],
+ vm_id->id, vm_id->pd_gpu_addr);
- /* if we can't remember our last VM flush then flush now! */
- if (!vm->last_flush || pd_addr != vm->pd_gpu_addr) {
- trace_radeon_vm_flush(pd_addr, ring, vm->id);
- vm->pd_gpu_addr = pd_addr;
- radeon_ring_vm_flush(rdev, ring, vm);
}
}
@@ -263,18 +269,13 @@ void radeon_vm_fence(struct radeon_device *rdev,
struct radeon_vm *vm,
struct radeon_fence *fence)
{
- radeon_fence_unref(&vm->fence);
- vm->fence = radeon_fence_ref(fence);
-
- radeon_fence_unref(&rdev->vm_manager.active[vm->id]);
- rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence);
+ unsigned vm_id = vm->ids[fence->ring].id;
- radeon_fence_unref(&vm->last_id_use);
- vm->last_id_use = radeon_fence_ref(fence);
+ radeon_fence_unref(&rdev->vm_manager.active[vm_id]);
+ rdev->vm_manager.active[vm_id] = radeon_fence_ref(fence);
- /* we just flushed the VM, remember that */
- if (!vm->last_flush)
- vm->last_flush = radeon_fence_ref(fence);
+ radeon_fence_unref(&vm->ids[fence->ring].last_id_use);
+ vm->ids[fence->ring].last_id_use = radeon_fence_ref(fence);
}
/**
@@ -387,35 +388,25 @@ static void radeon_vm_set_pages(struct radeon_device *rdev,
static int radeon_vm_clear_bo(struct radeon_device *rdev,
struct radeon_bo *bo)
{
- struct ttm_validate_buffer tv;
- struct ww_acquire_ctx ticket;
- struct list_head head;
struct radeon_ib ib;
unsigned entries;
uint64_t addr;
int r;
- memset(&tv, 0, sizeof(tv));
- tv.bo = &bo->tbo;
- tv.shared = false;
-
- INIT_LIST_HEAD(&head);
- list_add(&tv.head, &head);
-
- r = ttm_eu_reserve_buffers(&ticket, &head, true);
- if (r)
+ r = radeon_bo_reserve(bo, false);
+ if (r)
return r;
- r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
- if (r)
- goto error;
+ r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
+ if (r)
+ goto error_unreserve;
addr = radeon_bo_gpu_offset(bo);
entries = radeon_bo_size(bo) / 8;
r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, 256);
if (r)
- goto error;
+ goto error_unreserve;
ib.length_dw = 0;
@@ -425,15 +416,16 @@ static int radeon_vm_clear_bo(struct radeon_device *rdev,
r = radeon_ib_schedule(rdev, &ib, NULL, false);
if (r)
- goto error;
+ goto error_free;
- ttm_eu_fence_buffer_objects(&ticket, &head, &ib.fence->base);
- radeon_ib_free(rdev, &ib);
+ ib.fence->is_vm_update = true;
+ radeon_bo_fence(bo, ib.fence, false);
- return 0;
+error_free:
+ radeon_ib_free(rdev, &ib);
-error:
- ttm_eu_backoff_reservation(&ticket, &head);
+error_unreserve:
+ radeon_bo_unreserve(bo);
return r;
}
@@ -449,7 +441,7 @@ error:
* Validate and set the offset requested within the vm address space.
* Returns 0 for success, error for failure.
*
- * Object has to be reserved!
+ * Object has to be reserved and gets unreserved by this function!
*/
int radeon_vm_bo_set_addr(struct radeon_device *rdev,
struct radeon_bo_va *bo_va,
@@ -495,7 +487,9 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
tmp->vm = vm;
tmp->addr = bo_va->addr;
tmp->bo = radeon_bo_ref(bo_va->bo);
+ spin_lock(&vm->status_lock);
list_add(&tmp->vm_status, &vm->freed);
+ spin_unlock(&vm->status_lock);
}
interval_tree_remove(&bo_va->it, &vm->va);
@@ -575,7 +569,7 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
}
mutex_unlock(&vm->mutex);
- return radeon_bo_reserve(bo_va->bo, false);
+ return 0;
}
/**
@@ -699,17 +693,15 @@ int radeon_vm_update_page_directory(struct radeon_device *rdev,
if (ib.length_dw != 0) {
radeon_asic_vm_pad_ib(rdev, &ib);
- radeon_semaphore_sync_resv(rdev, ib.semaphore, pd->tbo.resv, false);
- radeon_semaphore_sync_fence(ib.semaphore, vm->last_id_use);
+ radeon_sync_resv(rdev, &ib.sync, pd->tbo.resv, true);
WARN_ON(ib.length_dw > ndw);
r = radeon_ib_schedule(rdev, &ib, NULL, false);
if (r) {
radeon_ib_free(rdev, &ib);
return r;
}
- radeon_fence_unref(&vm->fence);
- vm->fence = radeon_fence_ref(ib.fence);
- radeon_fence_unref(&vm->last_flush);
+ ib.fence->is_vm_update = true;
+ radeon_bo_fence(pd, ib.fence, false);
}
radeon_ib_free(rdev, &ib);
@@ -808,11 +800,11 @@ static void radeon_vm_frag_ptes(struct radeon_device *rdev,
*
* Global and local mutex must be locked!
*/
-static void radeon_vm_update_ptes(struct radeon_device *rdev,
- struct radeon_vm *vm,
- struct radeon_ib *ib,
- uint64_t start, uint64_t end,
- uint64_t dst, uint32_t flags)
+static int radeon_vm_update_ptes(struct radeon_device *rdev,
+ struct radeon_vm *vm,
+ struct radeon_ib *ib,
+ uint64_t start, uint64_t end,
+ uint64_t dst, uint32_t flags)
{
uint64_t mask = RADEON_VM_PTE_COUNT - 1;
uint64_t last_pte = ~0, last_dst = ~0;
@@ -825,8 +817,12 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
struct radeon_bo *pt = vm->page_tables[pt_idx].bo;
unsigned nptes;
uint64_t pte;
+ int r;
- radeon_semaphore_sync_resv(rdev, ib->semaphore, pt->tbo.resv, false);
+ radeon_sync_resv(rdev, &ib->sync, pt->tbo.resv, true);
+ r = reservation_object_reserve_shared(pt->tbo.resv);
+ if (r)
+ return r;
if ((addr & ~mask) == (end & ~mask))
nptes = end - addr;
@@ -860,6 +856,33 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
last_pte + 8 * count,
last_dst, flags);
}
+
+ return 0;
+}
+
+/**
+ * radeon_vm_fence_pts - fence page tables after an update
+ *
+ * @vm: requested vm
+ * @start: start of GPU address range
+ * @end: end of GPU address range
+ * @fence: fence to use
+ *
+ * Fence the page tables in the range @start - @end (cayman+).
+ *
+ * Global and local mutex must be locked!
+ */
+static void radeon_vm_fence_pts(struct radeon_vm *vm,
+ uint64_t start, uint64_t end,
+ struct radeon_fence *fence)
+{
+ unsigned i;
+
+ start >>= radeon_vm_block_size;
+ end >>= radeon_vm_block_size;
+
+ for (i = start; i <= end; ++i)
+ radeon_bo_fence(vm->page_tables[i].bo, fence, true);
}
/**
@@ -892,7 +915,9 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
return -EINVAL;
}
+ spin_lock(&vm->status_lock);
list_del_init(&bo_va->vm_status);
+ spin_unlock(&vm->status_lock);
bo_va->flags &= ~RADEON_VM_PAGE_VALID;
bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM;
@@ -961,23 +986,34 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
return r;
ib.length_dw = 0;
- radeon_vm_update_ptes(rdev, vm, &ib, bo_va->it.start,
- bo_va->it.last + 1, addr,
- radeon_vm_page_flags(bo_va->flags));
+ if (!(bo_va->flags & RADEON_VM_PAGE_VALID)) {
+ unsigned i;
+
+ for (i = 0; i < RADEON_NUM_RINGS; ++i)
+ radeon_sync_fence(&ib.sync, vm->ids[i].last_id_use);
+ }
+
+ r = radeon_vm_update_ptes(rdev, vm, &ib, bo_va->it.start,
+ bo_va->it.last + 1, addr,
+ radeon_vm_page_flags(bo_va->flags));
+ if (r) {
+ radeon_ib_free(rdev, &ib);
+ return r;
+ }
radeon_asic_vm_pad_ib(rdev, &ib);
WARN_ON(ib.length_dw > ndw);
- radeon_semaphore_sync_fence(ib.semaphore, vm->fence);
r = radeon_ib_schedule(rdev, &ib, NULL, false);
if (r) {
radeon_ib_free(rdev, &ib);
return r;
}
- radeon_fence_unref(&vm->fence);
- vm->fence = radeon_fence_ref(ib.fence);
+ ib.fence->is_vm_update = true;
+ radeon_vm_fence_pts(vm, bo_va->it.start, bo_va->it.last + 1, ib.fence);
+ radeon_fence_unref(&bo_va->last_pt_update);
+ bo_va->last_pt_update = radeon_fence_ref(ib.fence);
radeon_ib_free(rdev, &ib);
- radeon_fence_unref(&vm->last_flush);
return 0;
}
@@ -996,16 +1032,25 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
int radeon_vm_clear_freed(struct radeon_device *rdev,
struct radeon_vm *vm)
{
- struct radeon_bo_va *bo_va, *tmp;
+ struct radeon_bo_va *bo_va;
int r;
- list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status) {
+ spin_lock(&vm->status_lock);
+ while (!list_empty(&vm->freed)) {
+ bo_va = list_first_entry(&vm->freed,
+ struct radeon_bo_va, vm_status);
+ spin_unlock(&vm->status_lock);
+
r = radeon_vm_bo_update(rdev, bo_va, NULL);
radeon_bo_unref(&bo_va->bo);
+ radeon_fence_unref(&bo_va->last_pt_update);
kfree(bo_va);
if (r)
return r;
+
+ spin_lock(&vm->status_lock);
}
+ spin_unlock(&vm->status_lock);
return 0;
}
@@ -1024,14 +1069,23 @@ int radeon_vm_clear_freed(struct radeon_device *rdev,
int radeon_vm_clear_invalids(struct radeon_device *rdev,
struct radeon_vm *vm)
{
- struct radeon_bo_va *bo_va, *tmp;
+ struct radeon_bo_va *bo_va;
int r;
- list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, vm_status) {
+ spin_lock(&vm->status_lock);
+ while (!list_empty(&vm->invalidated)) {
+ bo_va = list_first_entry(&vm->invalidated,
+ struct radeon_bo_va, vm_status);
+ spin_unlock(&vm->status_lock);
+
r = radeon_vm_bo_update(rdev, bo_va, NULL);
if (r)
return r;
+
+ spin_lock(&vm->status_lock);
}
+ spin_unlock(&vm->status_lock);
+
return 0;
}
@@ -1054,14 +1108,17 @@ void radeon_vm_bo_rmv(struct radeon_device *rdev,
mutex_lock(&vm->mutex);
interval_tree_remove(&bo_va->it, &vm->va);
+ spin_lock(&vm->status_lock);
list_del(&bo_va->vm_status);
if (bo_va->addr) {
bo_va->bo = radeon_bo_ref(bo_va->bo);
list_add(&bo_va->vm_status, &vm->freed);
} else {
+ radeon_fence_unref(&bo_va->last_pt_update);
kfree(bo_va);
}
+ spin_unlock(&vm->status_lock);
mutex_unlock(&vm->mutex);
}
@@ -1082,10 +1139,10 @@ void radeon_vm_bo_invalidate(struct radeon_device *rdev,
list_for_each_entry(bo_va, &bo->va, bo_list) {
if (bo_va->addr) {
- mutex_lock(&bo_va->vm->mutex);
+ spin_lock(&bo_va->vm->status_lock);
list_del(&bo_va->vm_status);
list_add(&bo_va->vm_status, &bo_va->vm->invalidated);
- mutex_unlock(&bo_va->vm->mutex);
+ spin_unlock(&bo_va->vm->status_lock);
}
}
}
@@ -1103,15 +1160,17 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
const unsigned align = min(RADEON_VM_PTB_ALIGN_SIZE,
RADEON_VM_PTE_COUNT * 8);
unsigned pd_size, pd_entries, pts_size;
- int r;
+ int i, r;
- vm->id = 0;
vm->ib_bo_va = NULL;
- vm->fence = NULL;
- vm->last_flush = NULL;
- vm->last_id_use = NULL;
+ for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+ vm->ids[i].id = 0;
+ vm->ids[i].flushed_updates = NULL;
+ vm->ids[i].last_id_use = NULL;
+ }
mutex_init(&vm->mutex);
vm->va = RB_ROOT;
+ spin_lock_init(&vm->status_lock);
INIT_LIST_HEAD(&vm->invalidated);
INIT_LIST_HEAD(&vm->freed);
@@ -1165,11 +1224,13 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
if (!r) {
list_del_init(&bo_va->bo_list);
radeon_bo_unreserve(bo_va->bo);
+ radeon_fence_unref(&bo_va->last_pt_update);
kfree(bo_va);
}
}
list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status) {
radeon_bo_unref(&bo_va->bo);
+ radeon_fence_unref(&bo_va->last_pt_update);
kfree(bo_va);
}
@@ -1179,9 +1240,10 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
radeon_bo_unref(&vm->page_directory);
- radeon_fence_unref(&vm->fence);
- radeon_fence_unref(&vm->last_flush);
- radeon_fence_unref(&vm->last_id_use);
+ for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+ radeon_fence_unref(&vm->ids[i].flushed_updates);
+ radeon_fence_unref(&vm->ids[i].last_id_use);
+ }
mutex_destroy(&vm->mutex);
}
diff --git a/drivers/gpu/drm/radeon/rv770_dma.c b/drivers/gpu/drm/radeon/rv770_dma.c
index 7f34bad2e724..acff6e09cc40 100644
--- a/drivers/gpu/drm/radeon/rv770_dma.c
+++ b/drivers/gpu/drm/radeon/rv770_dma.c
@@ -44,31 +44,27 @@ struct radeon_fence *rv770_copy_dma(struct radeon_device *rdev,
unsigned num_gpu_pages,
struct reservation_object *resv)
{
- struct radeon_semaphore *sem = NULL;
struct radeon_fence *fence;
+ struct radeon_sync sync;
int ring_index = rdev->asic->copy.dma_ring_index;
struct radeon_ring *ring = &rdev->ring[ring_index];
u32 size_in_dw, cur_size_in_dw;
int i, num_loops;
int r = 0;
- r = radeon_semaphore_create(rdev, &sem);
- if (r) {
- DRM_ERROR("radeon: moving bo (%d).\n", r);
- return ERR_PTR(r);
- }
+ radeon_sync_create(&sync);
size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFF);
r = radeon_ring_lock(rdev, ring, num_loops * 5 + 8);
if (r) {
DRM_ERROR("radeon: moving bo (%d).\n", r);
- radeon_semaphore_free(rdev, &sem, NULL);
+ radeon_sync_free(rdev, &sync, NULL);
return ERR_PTR(r);
}
- radeon_semaphore_sync_resv(rdev, sem, resv, false);
- radeon_semaphore_sync_rings(rdev, sem, ring->idx);
+ radeon_sync_resv(rdev, &sync, resv, false);
+ radeon_sync_rings(rdev, &sync, ring->idx);
for (i = 0; i < num_loops; i++) {
cur_size_in_dw = size_in_dw;
@@ -87,12 +83,12 @@ struct radeon_fence *rv770_copy_dma(struct radeon_device *rdev,
r = radeon_fence_emit(rdev, &fence, ring->idx);
if (r) {
radeon_ring_unlock_undo(rdev, ring);
- radeon_semaphore_free(rdev, &sem, NULL);
+ radeon_sync_free(rdev, &sync, NULL);
return ERR_PTR(r);
}
radeon_ring_unlock_commit(rdev, ring, false);
- radeon_semaphore_free(rdev, &sem, fence);
+ radeon_sync_free(rdev, &sync, fence);
return fence;
}
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index 7d5083dc4acb..60df444bd075 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -3365,6 +3365,7 @@ void si_fence_ring_emit(struct radeon_device *rdev,
void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
{
struct radeon_ring *ring = &rdev->ring[ib->ring];
+ unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
u32 header;
if (ib->is_const_ib) {
@@ -3400,14 +3401,13 @@ void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
#endif
(ib->gpu_addr & 0xFFFFFFFC));
radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
- radeon_ring_write(ring, ib->length_dw |
- (ib->vm ? (ib->vm->id << 24) : 0));
+ radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
if (!ib->is_const_ib) {
/* flush read cache over gart for this vmid */
radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
- radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
+ radeon_ring_write(ring, vm_id);
radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
PACKET3_TC_ACTION_ENA |
@@ -5023,27 +5023,23 @@ static void si_vm_decode_fault(struct radeon_device *rdev,
block, mc_id);
}
-void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
+void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
+ unsigned vm_id, uint64_t pd_addr)
{
- struct radeon_ring *ring = &rdev->ring[ridx];
-
- if (vm == NULL)
- return;
-
/* write new base address */
radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
WRITE_DATA_DST_SEL(0)));
- if (vm->id < 8) {
+ if (vm_id < 8) {
radeon_ring_write(ring,
- (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
+ (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
} else {
radeon_ring_write(ring,
- (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
+ (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
}
radeon_ring_write(ring, 0);
- radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
+ radeon_ring_write(ring, pd_addr >> 12);
/* flush hdp cache */
radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
@@ -5059,7 +5055,7 @@ void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
WRITE_DATA_DST_SEL(0)));
radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
radeon_ring_write(ring, 0);
- radeon_ring_write(ring, 1 << vm->id);
+ radeon_ring_write(ring, 1 << vm_id);
/* sync PFP to ME, otherwise we might get invalid PFP reads */
radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c
index b58f12b762d7..f5cc777e1c5f 100644
--- a/drivers/gpu/drm/radeon/si_dma.c
+++ b/drivers/gpu/drm/radeon/si_dma.c
@@ -185,20 +185,17 @@ void si_dma_vm_set_pages(struct radeon_device *rdev,
}
}
-void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
-{
- struct radeon_ring *ring = &rdev->ring[ridx];
-
- if (vm == NULL)
- return;
+void si_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
+ unsigned vm_id, uint64_t pd_addr)
+{
radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
- if (vm->id < 8) {
- radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
+ if (vm_id < 8) {
+ radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2));
} else {
- radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
+ radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2));
}
- radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
+ radeon_ring_write(ring, pd_addr >> 12);
/* flush hdp cache */
radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
@@ -208,7 +205,7 @@ void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
/* bits 0-7 are the VM contexts0-7 */
radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
- radeon_ring_write(ring, 1 << vm->id);
+ radeon_ring_write(ring, 1 << vm_id);
}
/**
@@ -229,31 +226,27 @@ struct radeon_fence *si_copy_dma(struct radeon_device *rdev,
unsigned num_gpu_pages,
struct reservation_object *resv)
{
- struct radeon_semaphore *sem = NULL;
struct radeon_fence *fence;
+ struct radeon_sync sync;
int ring_index = rdev->asic->copy.dma_ring_index;
struct radeon_ring *ring = &rdev->ring[ring_index];
u32 size_in_bytes, cur_size_in_bytes;
int i, num_loops;
int r = 0;
- r = radeon_semaphore_create(rdev, &sem);
- if (r) {
- DRM_ERROR("radeon: moving bo (%d).\n", r);
- return ERR_PTR(r);
- }
+ radeon_sync_create(&sync);
size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
if (r) {
DRM_ERROR("radeon: moving bo (%d).\n", r);
- radeon_semaphore_free(rdev, &sem, NULL);
+ radeon_sync_free(rdev, &sync, NULL);
return ERR_PTR(r);
}
- radeon_semaphore_sync_resv(rdev, sem, resv, false);
- radeon_semaphore_sync_rings(rdev, sem, ring->idx);
+ radeon_sync_resv(rdev, &sync, resv, false);
+ radeon_sync_rings(rdev, &sync, ring->idx);
for (i = 0; i < num_loops; i++) {
cur_size_in_bytes = size_in_bytes;
@@ -272,12 +265,12 @@ struct radeon_fence *si_copy_dma(struct radeon_device *rdev,
r = radeon_fence_emit(rdev, &fence, ring->idx);
if (r) {
radeon_ring_unlock_undo(rdev, ring);
- radeon_semaphore_free(rdev, &sem, NULL);
+ radeon_sync_free(rdev, &sync, NULL);
return ERR_PTR(r);
}
radeon_ring_unlock_commit(rdev, ring, false);
- radeon_semaphore_free(rdev, &sem, fence);
+ radeon_sync_free(rdev, &sync, fence);
return fence;
}
diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index 676e6c2ba90a..32e354b8b0ab 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -3398,6 +3398,15 @@ static int si_process_firmware_header(struct radeon_device *rdev)
ret = si_read_smc_sram_dword(rdev,
SISLANDS_SMC_FIRMWARE_HEADER_LOCATION +
+ SISLANDS_SMC_FIRMWARE_HEADER_fanTable,
+ &tmp, si_pi->sram_end);
+ if (ret)
+ return ret;
+
+ si_pi->fan_table_start = tmp;
+
+ ret = si_read_smc_sram_dword(rdev,
+ SISLANDS_SMC_FIRMWARE_HEADER_LOCATION +
SISLANDS_SMC_FIRMWARE_HEADER_mcArbDramAutoRefreshTable,
&tmp, si_pi->sram_end);
if (ret)
@@ -5817,8 +5826,33 @@ void si_dpm_setup_asic(struct radeon_device *rdev)
si_enable_acpi_power_management(rdev);
}
-static int si_set_thermal_temperature_range(struct radeon_device *rdev,
- int min_temp, int max_temp)
+static int si_thermal_enable_alert(struct radeon_device *rdev,
+ bool enable)
+{
+ u32 thermal_int = RREG32(CG_THERMAL_INT);
+
+ if (enable) {
+ PPSMC_Result result;
+
+ thermal_int &= ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
+ WREG32(CG_THERMAL_INT, thermal_int);
+ rdev->irq.dpm_thermal = false;
+ result = si_send_msg_to_smc(rdev, PPSMC_MSG_EnableThermalInterrupt);
+ if (result != PPSMC_Result_OK) {
+ DRM_DEBUG_KMS("Could not enable thermal interrupts.\n");
+ return -EINVAL;
+ }
+ } else {
+ thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
+ WREG32(CG_THERMAL_INT, thermal_int);
+ rdev->irq.dpm_thermal = true;
+ }
+
+ return 0;
+}
+
+static int si_thermal_set_temperature_range(struct radeon_device *rdev,
+ int min_temp, int max_temp)
{
int low_temp = 0 * 1000;
int high_temp = 255 * 1000;
@@ -5842,6 +5876,309 @@ static int si_set_thermal_temperature_range(struct radeon_device *rdev,
return 0;
}
+static void si_fan_ctrl_set_static_mode(struct radeon_device *rdev, u32 mode)
+{
+ struct si_power_info *si_pi = si_get_pi(rdev);
+ u32 tmp;
+
+ if (si_pi->fan_ctrl_is_in_default_mode) {
+ tmp = (RREG32(CG_FDO_CTRL2) & FDO_PWM_MODE_MASK) >> FDO_PWM_MODE_SHIFT;
+ si_pi->fan_ctrl_default_mode = tmp;
+ tmp = (RREG32(CG_FDO_CTRL2) & TMIN_MASK) >> TMIN_SHIFT;
+ si_pi->t_min = tmp;
+ si_pi->fan_ctrl_is_in_default_mode = false;
+ }
+
+ tmp = RREG32(CG_FDO_CTRL2) & ~TMIN_MASK;
+ tmp |= TMIN(0);
+ WREG32(CG_FDO_CTRL2, tmp);
+
+ tmp = RREG32(CG_FDO_CTRL2) & ~FDO_PWM_MODE_MASK;
+ tmp |= FDO_PWM_MODE(mode);
+ WREG32(CG_FDO_CTRL2, tmp);
+}
+
+static int si_thermal_setup_fan_table(struct radeon_device *rdev)
+{
+ struct si_power_info *si_pi = si_get_pi(rdev);
+ PP_SIslands_FanTable fan_table = { FDO_MODE_HARDWARE };
+ u32 duty100;
+ u32 t_diff1, t_diff2, pwm_diff1, pwm_diff2;
+ u16 fdo_min, slope1, slope2;
+ u32 reference_clock, tmp;
+ int ret;
+ u64 tmp64;
+
+ if (!si_pi->fan_table_start) {
+ rdev->pm.dpm.fan.ucode_fan_control = false;
+ return 0;
+ }
+
+ duty100 = (RREG32(CG_FDO_CTRL1) & FMAX_DUTY100_MASK) >> FMAX_DUTY100_SHIFT;
+
+ if (duty100 == 0) {
+ rdev->pm.dpm.fan.ucode_fan_control = false;
+ return 0;
+ }
+
+ tmp64 = (u64)rdev->pm.dpm.fan.pwm_min * duty100;
+ do_div(tmp64, 10000);
+ fdo_min = (u16)tmp64;
+
+ t_diff1 = rdev->pm.dpm.fan.t_med - rdev->pm.dpm.fan.t_min;
+ t_diff2 = rdev->pm.dpm.fan.t_high - rdev->pm.dpm.fan.t_med;
+
+ pwm_diff1 = rdev->pm.dpm.fan.pwm_med - rdev->pm.dpm.fan.pwm_min;
+ pwm_diff2 = rdev->pm.dpm.fan.pwm_high - rdev->pm.dpm.fan.pwm_med;
+
+ slope1 = (u16)((50 + ((16 * duty100 * pwm_diff1) / t_diff1)) / 100);
+ slope2 = (u16)((50 + ((16 * duty100 * pwm_diff2) / t_diff2)) / 100);
+
+ fan_table.slope1 = cpu_to_be16(slope1);
+ fan_table.slope2 = cpu_to_be16(slope2);
+
+ fan_table.fdo_min = cpu_to_be16(fdo_min);
+
+ fan_table.hys_down = cpu_to_be16(rdev->pm.dpm.fan.t_hyst);
+
+ fan_table.hys_up = cpu_to_be16(1);
+
+ fan_table.hys_slope = cpu_to_be16(1);
+
+ fan_table.temp_resp_lim = cpu_to_be16(5);
+
+ reference_clock = radeon_get_xclk(rdev);
+
+ fan_table.refresh_period = cpu_to_be32((rdev->pm.dpm.fan.cycle_delay *
+ reference_clock) / 1600);
+
+ fan_table.fdo_max = cpu_to_be16((u16)duty100);
+
+ tmp = (RREG32(CG_MULT_THERMAL_CTRL) & TEMP_SEL_MASK) >> TEMP_SEL_SHIFT;
+ fan_table.temp_src = (uint8_t)tmp;
+
+ ret = si_copy_bytes_to_smc(rdev,
+ si_pi->fan_table_start,
+ (u8 *)(&fan_table),
+ sizeof(fan_table),
+ si_pi->sram_end);
+
+ if (ret) {
+ DRM_ERROR("Failed to load fan table to the SMC.");
+ rdev->pm.dpm.fan.ucode_fan_control = false;
+ }
+
+ return 0;
+}
+
+static int si_fan_ctrl_start_smc_fan_control(struct radeon_device *rdev)
+{
+ PPSMC_Result ret;
+
+ ret = si_send_msg_to_smc(rdev, PPSMC_StartFanControl);
+ if (ret == PPSMC_Result_OK)
+ return 0;
+ else
+ return -EINVAL;
+}
+
+static int si_fan_ctrl_stop_smc_fan_control(struct radeon_device *rdev)
+{
+ PPSMC_Result ret;
+
+ ret = si_send_msg_to_smc(rdev, PPSMC_StopFanControl);
+ if (ret == PPSMC_Result_OK)
+ return 0;
+ else
+ return -EINVAL;
+}
+
+#if 0
+static int si_fan_ctrl_get_fan_speed_percent(struct radeon_device *rdev,
+ u32 *speed)
+{
+ u32 duty, duty100;
+ u64 tmp64;
+
+ if (rdev->pm.no_fan)
+ return -ENOENT;
+
+ duty100 = (RREG32(CG_FDO_CTRL1) & FMAX_DUTY100_MASK) >> FMAX_DUTY100_SHIFT;
+ duty = (RREG32(CG_THERMAL_STATUS) & FDO_PWM_DUTY_MASK) >> FDO_PWM_DUTY_SHIFT;
+
+ if (duty100 == 0)
+ return -EINVAL;
+
+ tmp64 = (u64)duty * 100;
+ do_div(tmp64, duty100);
+ *speed = (u32)tmp64;
+
+ if (*speed > 100)
+ *speed = 100;
+
+ return 0;
+}
+
+static int si_fan_ctrl_set_fan_speed_percent(struct radeon_device *rdev,
+ u32 speed)
+{
+ u32 tmp;
+ u32 duty, duty100;
+ u64 tmp64;
+
+ if (rdev->pm.no_fan)
+ return -ENOENT;
+
+ if (speed > 100)
+ return -EINVAL;
+
+ if (rdev->pm.dpm.fan.ucode_fan_control)
+ si_fan_ctrl_stop_smc_fan_control(rdev);
+
+ duty100 = (RREG32(CG_FDO_CTRL1) & FMAX_DUTY100_MASK) >> FMAX_DUTY100_SHIFT;
+
+ if (duty100 == 0)
+ return -EINVAL;
+
+ tmp64 = (u64)speed * duty100;
+ do_div(tmp64, 100);
+ duty = (u32)tmp64;
+
+ tmp = RREG32(CG_FDO_CTRL0) & ~FDO_STATIC_DUTY_MASK;
+ tmp |= FDO_STATIC_DUTY(duty);
+ WREG32(CG_FDO_CTRL0, tmp);
+
+ si_fan_ctrl_set_static_mode(rdev, FDO_PWM_MODE_STATIC);
+
+ return 0;
+}
+
+static int si_fan_ctrl_get_fan_speed_rpm(struct radeon_device *rdev,
+ u32 *speed)
+{
+ u32 tach_period;
+ u32 xclk = radeon_get_xclk(rdev);
+
+ if (rdev->pm.no_fan)
+ return -ENOENT;
+
+ if (rdev->pm.fan_pulses_per_revolution == 0)
+ return -ENOENT;
+
+ tach_period = (RREG32(CG_TACH_STATUS) & TACH_PERIOD_MASK) >> TACH_PERIOD_SHIFT;
+ if (tach_period == 0)
+ return -ENOENT;
+
+ *speed = 60 * xclk * 10000 / tach_period;
+
+ return 0;
+}
+
+static int si_fan_ctrl_set_fan_speed_rpm(struct radeon_device *rdev,
+ u32 speed)
+{
+ u32 tach_period, tmp;
+ u32 xclk = radeon_get_xclk(rdev);
+
+ if (rdev->pm.no_fan)
+ return -ENOENT;
+
+ if (rdev->pm.fan_pulses_per_revolution == 0)
+ return -ENOENT;
+
+ if ((speed < rdev->pm.fan_min_rpm) ||
+ (speed > rdev->pm.fan_max_rpm))
+ return -EINVAL;
+
+ if (rdev->pm.dpm.fan.ucode_fan_control)
+ si_fan_ctrl_stop_smc_fan_control(rdev);
+
+ tach_period = 60 * xclk * 10000 / (8 * speed);
+ tmp = RREG32(CG_TACH_CTRL) & ~TARGET_PERIOD_MASK;
+ tmp |= TARGET_PERIOD(tach_period);
+ WREG32(CG_TACH_CTRL, tmp);
+
+ si_fan_ctrl_set_static_mode(rdev, FDO_PWM_MODE_STATIC_RPM);
+
+ return 0;
+}
+#endif
+
+static void si_fan_ctrl_set_default_mode(struct radeon_device *rdev)
+{
+ struct si_power_info *si_pi = si_get_pi(rdev);
+ u32 tmp;
+
+ if (!si_pi->fan_ctrl_is_in_default_mode) {
+ tmp = RREG32(CG_FDO_CTRL2) & ~FDO_PWM_MODE_MASK;
+ tmp |= FDO_PWM_MODE(si_pi->fan_ctrl_default_mode);
+ WREG32(CG_FDO_CTRL2, tmp);
+
+ tmp = RREG32(CG_FDO_CTRL2) & ~TMIN_MASK;
+ tmp |= TMIN(si_pi->t_min);
+ WREG32(CG_FDO_CTRL2, tmp);
+ si_pi->fan_ctrl_is_in_default_mode = true;
+ }
+}
+
+static void si_thermal_start_smc_fan_control(struct radeon_device *rdev)
+{
+ if (rdev->pm.dpm.fan.ucode_fan_control) {
+ si_fan_ctrl_start_smc_fan_control(rdev);
+ si_fan_ctrl_set_static_mode(rdev, FDO_PWM_MODE_STATIC);
+ }
+}
+
+static void si_thermal_initialize(struct radeon_device *rdev)
+{
+ u32 tmp;
+
+ if (rdev->pm.fan_pulses_per_revolution) {
+ tmp = RREG32(CG_TACH_CTRL) & ~EDGE_PER_REV_MASK;
+ tmp |= EDGE_PER_REV(rdev->pm.fan_pulses_per_revolution -1);
+ WREG32(CG_TACH_CTRL, tmp);
+ }
+
+ tmp = RREG32(CG_FDO_CTRL2) & ~TACH_PWM_RESP_RATE_MASK;
+ tmp |= TACH_PWM_RESP_RATE(0x28);
+ WREG32(CG_FDO_CTRL2, tmp);
+}
+
+static int si_thermal_start_thermal_controller(struct radeon_device *rdev)
+{
+ int ret;
+
+ si_thermal_initialize(rdev);
+ ret = si_thermal_set_temperature_range(rdev, R600_TEMP_RANGE_MIN, R600_TEMP_RANGE_MAX);
+ if (ret)
+ return ret;
+ ret = si_thermal_enable_alert(rdev, true);
+ if (ret)
+ return ret;
+ if (rdev->pm.dpm.fan.ucode_fan_control) {
+ ret = si_halt_smc(rdev);
+ if (ret)
+ return ret;
+ ret = si_thermal_setup_fan_table(rdev);
+ if (ret)
+ return ret;
+ ret = si_resume_smc(rdev);
+ if (ret)
+ return ret;
+ si_thermal_start_smc_fan_control(rdev);
+ }
+
+ return 0;
+}
+
+static void si_thermal_stop_thermal_controller(struct radeon_device *rdev)
+{
+ if (!rdev->pm.no_fan) {
+ si_fan_ctrl_set_default_mode(rdev);
+ si_fan_ctrl_stop_smc_fan_control(rdev);
+ }
+}
+
int si_dpm_enable(struct radeon_device *rdev)
{
struct rv7xx_power_info *pi = rv770_get_pi(rdev);
@@ -5954,31 +6291,39 @@ int si_dpm_enable(struct radeon_device *rdev)
si_enable_auto_throttle_source(rdev, RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL, true);
+ si_thermal_start_thermal_controller(rdev);
+
ni_update_current_ps(rdev, boot_ps);
return 0;
}
-int si_dpm_late_enable(struct radeon_device *rdev)
+static int si_set_temperature_range(struct radeon_device *rdev)
{
int ret;
- if (rdev->irq.installed &&
- r600_is_internal_thermal_sensor(rdev->pm.int_thermal_type)) {
- PPSMC_Result result;
+ ret = si_thermal_enable_alert(rdev, false);
+ if (ret)
+ return ret;
+ ret = si_thermal_set_temperature_range(rdev, R600_TEMP_RANGE_MIN, R600_TEMP_RANGE_MAX);
+ if (ret)
+ return ret;
+ ret = si_thermal_enable_alert(rdev, true);
+ if (ret)
+ return ret;
- ret = si_set_thermal_temperature_range(rdev, R600_TEMP_RANGE_MIN, R600_TEMP_RANGE_MAX);
- if (ret)
- return ret;
- rdev->irq.dpm_thermal = true;
- radeon_irq_set(rdev);
- result = si_send_msg_to_smc(rdev, PPSMC_MSG_EnableThermalInterrupt);
+ return ret;
+}
- if (result != PPSMC_Result_OK)
- DRM_DEBUG_KMS("Could not enable thermal interrupts.\n");
- }
+int si_dpm_late_enable(struct radeon_device *rdev)
+{
+ int ret;
- return 0;
+ ret = si_set_temperature_range(rdev);
+ if (ret)
+ return ret;
+
+ return ret;
}
void si_dpm_disable(struct radeon_device *rdev)
@@ -5988,6 +6333,7 @@ void si_dpm_disable(struct radeon_device *rdev)
if (!si_is_smc_running(rdev))
return;
+ si_thermal_stop_thermal_controller(rdev);
si_disable_ulv(rdev);
si_clear_vc(rdev);
if (pi->thermal_protection)
@@ -6526,6 +6872,9 @@ int si_dpm_init(struct radeon_device *rdev)
rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc =
rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac;
+ si_pi->fan_ctrl_is_in_default_mode = true;
+ rdev->pm.dpm.fan.ucode_fan_control = false;
+
return 0;
}
diff --git a/drivers/gpu/drm/radeon/si_dpm.h b/drivers/gpu/drm/radeon/si_dpm.h
index 8b5c06a0832d..d16bb1b5f10f 100644
--- a/drivers/gpu/drm/radeon/si_dpm.h
+++ b/drivers/gpu/drm/radeon/si_dpm.h
@@ -182,6 +182,7 @@ struct si_power_info {
u32 dte_table_start;
u32 spll_table_start;
u32 papm_cfg_table_start;
+ u32 fan_table_start;
/* CAC stuff */
const struct si_cac_config_reg *cac_weights;
const struct si_cac_config_reg *lcac_config;
@@ -197,6 +198,10 @@ struct si_power_info {
/* SVI2 */
u8 svd_gpio_id;
u8 svc_gpio_id;
+ /* fan control */
+ bool fan_ctrl_is_in_default_mode;
+ u32 t_min;
+ u32 fan_ctrl_default_mode;
};
#define SISLANDS_INITIAL_STATE_ARB_INDEX 0
diff --git a/drivers/gpu/drm/radeon/si_smc.c b/drivers/gpu/drm/radeon/si_smc.c
index 73dbc79c959d..e5bb92f16775 100644
--- a/drivers/gpu/drm/radeon/si_smc.c
+++ b/drivers/gpu/drm/radeon/si_smc.c
@@ -135,7 +135,7 @@ void si_reset_smc(struct radeon_device *rdev)
int si_program_jump_on_start(struct radeon_device *rdev)
{
- static u8 data[] = { 0x0E, 0x00, 0x40, 0x40 };
+ static const u8 data[] = { 0x0E, 0x00, 0x40, 0x40 };
return si_copy_bytes_to_smc(rdev, 0x0, data, 4, sizeof(data)+1);
}
diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h
index 6635da9ec986..4069be89e585 100644
--- a/drivers/gpu/drm/radeon/sid.h
+++ b/drivers/gpu/drm/radeon/sid.h
@@ -180,7 +180,10 @@
#define DIG_THERM_DPM(x) ((x) << 14)
#define DIG_THERM_DPM_MASK 0x003FC000
#define DIG_THERM_DPM_SHIFT 14
-
+#define CG_THERMAL_STATUS 0x704
+#define FDO_PWM_DUTY(x) ((x) << 9)
+#define FDO_PWM_DUTY_MASK (0xff << 9)
+#define FDO_PWM_DUTY_SHIFT 9
#define CG_THERMAL_INT 0x708
#define DIG_THERM_INTH(x) ((x) << 8)
#define DIG_THERM_INTH_MASK 0x0000FF00
@@ -191,6 +194,10 @@
#define THERM_INT_MASK_HIGH (1 << 24)
#define THERM_INT_MASK_LOW (1 << 25)
+#define CG_MULT_THERMAL_CTRL 0x710
+#define TEMP_SEL(x) ((x) << 20)
+#define TEMP_SEL_MASK (0xff << 20)
+#define TEMP_SEL_SHIFT 20
#define CG_MULT_THERMAL_STATUS 0x714
#define ASIC_MAX_TEMP(x) ((x) << 0)
#define ASIC_MAX_TEMP_MASK 0x000001ff
@@ -199,6 +206,37 @@
#define CTF_TEMP_MASK 0x0003fe00
#define CTF_TEMP_SHIFT 9
+#define CG_FDO_CTRL0 0x754
+#define FDO_STATIC_DUTY(x) ((x) << 0)
+#define FDO_STATIC_DUTY_MASK 0x000000FF
+#define FDO_STATIC_DUTY_SHIFT 0
+#define CG_FDO_CTRL1 0x758
+#define FMAX_DUTY100(x) ((x) << 0)
+#define FMAX_DUTY100_MASK 0x000000FF
+#define FMAX_DUTY100_SHIFT 0
+#define CG_FDO_CTRL2 0x75C
+#define TMIN(x) ((x) << 0)
+#define TMIN_MASK 0x000000FF
+#define TMIN_SHIFT 0
+#define FDO_PWM_MODE(x) ((x) << 11)
+#define FDO_PWM_MODE_MASK (7 << 11)
+#define FDO_PWM_MODE_SHIFT 11
+#define TACH_PWM_RESP_RATE(x) ((x) << 25)
+#define TACH_PWM_RESP_RATE_MASK (0x7f << 25)
+#define TACH_PWM_RESP_RATE_SHIFT 25
+
+#define CG_TACH_CTRL 0x770
+# define EDGE_PER_REV(x) ((x) << 0)
+# define EDGE_PER_REV_MASK (0x7 << 0)
+# define EDGE_PER_REV_SHIFT 0
+# define TARGET_PERIOD(x) ((x) << 3)
+# define TARGET_PERIOD_MASK 0xfffffff8
+# define TARGET_PERIOD_SHIFT 3
+#define CG_TACH_STATUS 0x774
+# define TACH_PERIOD(x) ((x) << 0)
+# define TACH_PERIOD_MASK 0xffffffff
+# define TACH_PERIOD_SHIFT 0
+
#define GENERAL_PWRMGT 0x780
# define GLOBAL_PWRMGT_EN (1 << 0)
# define STATIC_PM_EN (1 << 1)
diff --git a/drivers/gpu/drm/radeon/sislands_smc.h b/drivers/gpu/drm/radeon/sislands_smc.h
index 623a0b1e2d9d..3c779838d9ab 100644
--- a/drivers/gpu/drm/radeon/sislands_smc.h
+++ b/drivers/gpu/drm/radeon/sislands_smc.h
@@ -245,6 +245,31 @@ typedef struct SISLANDS_SMC_STATETABLE SISLANDS_SMC_STATETABLE;
#define SI_SMC_SOFT_REGISTER_svi_rework_gpio_id_svd 0x11c
#define SI_SMC_SOFT_REGISTER_svi_rework_gpio_id_svc 0x120
+struct PP_SIslands_FanTable
+{
+ uint8_t fdo_mode;
+ uint8_t padding;
+ int16_t temp_min;
+ int16_t temp_med;
+ int16_t temp_max;
+ int16_t slope1;
+ int16_t slope2;
+ int16_t fdo_min;
+ int16_t hys_up;
+ int16_t hys_down;
+ int16_t hys_slope;
+ int16_t temp_resp_lim;
+ int16_t temp_curr;
+ int16_t slope_curr;
+ int16_t pwm_curr;
+ uint32_t refresh_period;
+ int16_t fdo_max;
+ uint8_t temp_src;
+ int8_t padding2;
+};
+
+typedef struct PP_SIslands_FanTable PP_SIslands_FanTable;
+
#define SMC_SISLANDS_LKGE_LUT_NUM_OF_TEMP_ENTRIES 16
#define SMC_SISLANDS_LKGE_LUT_NUM_OF_VOLT_ENTRIES 32
diff --git a/drivers/gpu/drm/radeon/smu7_discrete.h b/drivers/gpu/drm/radeon/smu7_discrete.h
index 82f70c90a9ee..0b0b404ff091 100644
--- a/drivers/gpu/drm/radeon/smu7_discrete.h
+++ b/drivers/gpu/drm/radeon/smu7_discrete.h
@@ -431,6 +431,31 @@ struct SMU7_Discrete_MCRegisters
typedef struct SMU7_Discrete_MCRegisters SMU7_Discrete_MCRegisters;
+struct SMU7_Discrete_FanTable
+{
+ uint16_t FdoMode;
+ int16_t TempMin;
+ int16_t TempMed;
+ int16_t TempMax;
+ int16_t Slope1;
+ int16_t Slope2;
+ int16_t FdoMin;
+ int16_t HystUp;
+ int16_t HystDown;
+ int16_t HystSlope;
+ int16_t TempRespLim;
+ int16_t TempCurr;
+ int16_t SlopeCurr;
+ int16_t PwmCurr;
+ uint32_t RefreshPeriod;
+ int16_t FdoMax;
+ uint8_t TempSrc;
+ int8_t Padding;
+};
+
+typedef struct SMU7_Discrete_FanTable SMU7_Discrete_FanTable;
+
+
struct SMU7_Discrete_PmFuses {
// dw0-dw1
uint8_t BapmVddCVidHiSidd[8];
@@ -462,7 +487,10 @@ struct SMU7_Discrete_PmFuses {
uint8_t BapmVddCVidHiSidd2[8];
// dw11-dw12
- uint32_t Reserved6[2];
+ int16_t FuzzyFan_ErrorSetDelta;
+ int16_t FuzzyFan_ErrorRateSetDelta;
+ int16_t FuzzyFan_PwmSetDelta;
+ uint16_t CalcMeasPowerBlend;
// dw13-dw16
uint8_t GnbLPML[16];