aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/radeon/si.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/radeon/si.c')
-rw-r--r--drivers/gpu/drm/radeon/si.c1337
1 files changed, 1296 insertions, 41 deletions
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index a1b0da6b5808..234906709067 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -32,40 +32,43 @@
#include "sid.h"
#include "atom.h"
#include "si_blit_shaders.h"
+#include "clearstate_si.h"
+#include "radeon_ucode.h"
-#define SI_PFP_UCODE_SIZE 2144
-#define SI_PM4_UCODE_SIZE 2144
-#define SI_CE_UCODE_SIZE 2144
-#define SI_RLC_UCODE_SIZE 2048
-#define SI_MC_UCODE_SIZE 7769
-#define OLAND_MC_UCODE_SIZE 7863
MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
MODULE_FIRMWARE("radeon/TAHITI_me.bin");
MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
+MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
+MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
MODULE_FIRMWARE("radeon/VERDE_me.bin");
MODULE_FIRMWARE("radeon/VERDE_ce.bin");
MODULE_FIRMWARE("radeon/VERDE_mc.bin");
MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
+MODULE_FIRMWARE("radeon/VERDE_smc.bin");
MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
MODULE_FIRMWARE("radeon/OLAND_me.bin");
MODULE_FIRMWARE("radeon/OLAND_ce.bin");
MODULE_FIRMWARE("radeon/OLAND_mc.bin");
MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
+MODULE_FIRMWARE("radeon/OLAND_smc.bin");
MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
MODULE_FIRMWARE("radeon/HAINAN_me.bin");
MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
+MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
+static void si_pcie_gen3_enable(struct radeon_device *rdev);
+static void si_program_aspm(struct radeon_device *rdev);
extern int r600_ih_ring_alloc(struct radeon_device *rdev);
extern void r600_ih_ring_fini(struct radeon_device *rdev);
extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
@@ -75,6 +78,228 @@ extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
extern bool evergreen_is_display_hung(struct radeon_device *rdev);
+static const u32 verde_rlc_save_restore_register_list[] =
+{
+ (0x8000 << 16) | (0x98f4 >> 2),
+ 0x00000000,
+ (0x8040 << 16) | (0x98f4 >> 2),
+ 0x00000000,
+ (0x8000 << 16) | (0xe80 >> 2),
+ 0x00000000,
+ (0x8040 << 16) | (0xe80 >> 2),
+ 0x00000000,
+ (0x8000 << 16) | (0x89bc >> 2),
+ 0x00000000,
+ (0x8040 << 16) | (0x89bc >> 2),
+ 0x00000000,
+ (0x8000 << 16) | (0x8c1c >> 2),
+ 0x00000000,
+ (0x8040 << 16) | (0x8c1c >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x98f0 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0xe7c >> 2),
+ 0x00000000,
+ (0x8000 << 16) | (0x9148 >> 2),
+ 0x00000000,
+ (0x8040 << 16) | (0x9148 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9150 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x897c >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x8d8c >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0xac54 >> 2),
+ 0X00000000,
+ 0x3,
+ (0x9c00 << 16) | (0x98f8 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9910 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9914 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9918 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x991c >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9920 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9924 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9928 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x992c >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9930 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9934 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9938 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x993c >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9940 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9944 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9948 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x994c >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9950 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9954 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9958 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x995c >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9960 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9964 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9968 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x996c >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9970 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9974 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9978 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x997c >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9980 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9984 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9988 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x998c >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x8c00 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x8c14 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x8c04 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x8c08 >> 2),
+ 0x00000000,
+ (0x8000 << 16) | (0x9b7c >> 2),
+ 0x00000000,
+ (0x8040 << 16) | (0x9b7c >> 2),
+ 0x00000000,
+ (0x8000 << 16) | (0xe84 >> 2),
+ 0x00000000,
+ (0x8040 << 16) | (0xe84 >> 2),
+ 0x00000000,
+ (0x8000 << 16) | (0x89c0 >> 2),
+ 0x00000000,
+ (0x8040 << 16) | (0x89c0 >> 2),
+ 0x00000000,
+ (0x8000 << 16) | (0x914c >> 2),
+ 0x00000000,
+ (0x8040 << 16) | (0x914c >> 2),
+ 0x00000000,
+ (0x8000 << 16) | (0x8c20 >> 2),
+ 0x00000000,
+ (0x8040 << 16) | (0x8c20 >> 2),
+ 0x00000000,
+ (0x8000 << 16) | (0x9354 >> 2),
+ 0x00000000,
+ (0x8040 << 16) | (0x9354 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9060 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9364 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9100 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x913c >> 2),
+ 0x00000000,
+ (0x8000 << 16) | (0x90e0 >> 2),
+ 0x00000000,
+ (0x8000 << 16) | (0x90e4 >> 2),
+ 0x00000000,
+ (0x8000 << 16) | (0x90e8 >> 2),
+ 0x00000000,
+ (0x8040 << 16) | (0x90e0 >> 2),
+ 0x00000000,
+ (0x8040 << 16) | (0x90e4 >> 2),
+ 0x00000000,
+ (0x8040 << 16) | (0x90e8 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x8bcc >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x8b24 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x88c4 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x8e50 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x8c0c >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x8e58 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x8e5c >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9508 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x950c >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9494 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0xac0c >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0xac10 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0xac14 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0xae00 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0xac08 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x88d4 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x88c8 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x88cc >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x89b0 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x8b10 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x8a14 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9830 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9834 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9838 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9a10 >> 2),
+ 0x00000000,
+ (0x8000 << 16) | (0x9870 >> 2),
+ 0x00000000,
+ (0x8000 << 16) | (0x9874 >> 2),
+ 0x00000000,
+ (0x8001 << 16) | (0x9870 >> 2),
+ 0x00000000,
+ (0x8001 << 16) | (0x9874 >> 2),
+ 0x00000000,
+ (0x8040 << 16) | (0x9870 >> 2),
+ 0x00000000,
+ (0x8040 << 16) | (0x9874 >> 2),
+ 0x00000000,
+ (0x8041 << 16) | (0x9870 >> 2),
+ 0x00000000,
+ (0x8041 << 16) | (0x9874 >> 2),
+ 0x00000000,
+ 0x00000000
+};
+
static const u32 tahiti_golden_rlc_registers[] =
{
0xc424, 0xffffffff, 0x00601005,
@@ -1320,6 +1545,7 @@ static int si_init_microcode(struct radeon_device *rdev)
const char *chip_name;
const char *rlc_chip_name;
size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
+ size_t smc_req_size;
char fw_name[30];
int err;
@@ -1341,6 +1567,7 @@ static int si_init_microcode(struct radeon_device *rdev)
ce_req_size = SI_CE_UCODE_SIZE * 4;
rlc_req_size = SI_RLC_UCODE_SIZE * 4;
mc_req_size = SI_MC_UCODE_SIZE * 4;
+ smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
break;
case CHIP_PITCAIRN:
chip_name = "PITCAIRN";
@@ -1350,6 +1577,7 @@ static int si_init_microcode(struct radeon_device *rdev)
ce_req_size = SI_CE_UCODE_SIZE * 4;
rlc_req_size = SI_RLC_UCODE_SIZE * 4;
mc_req_size = SI_MC_UCODE_SIZE * 4;
+ smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
break;
case CHIP_VERDE:
chip_name = "VERDE";
@@ -1359,6 +1587,7 @@ static int si_init_microcode(struct radeon_device *rdev)
ce_req_size = SI_CE_UCODE_SIZE * 4;
rlc_req_size = SI_RLC_UCODE_SIZE * 4;
mc_req_size = SI_MC_UCODE_SIZE * 4;
+ smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
break;
case CHIP_OLAND:
chip_name = "OLAND";
@@ -1368,6 +1597,7 @@ static int si_init_microcode(struct radeon_device *rdev)
ce_req_size = SI_CE_UCODE_SIZE * 4;
rlc_req_size = SI_RLC_UCODE_SIZE * 4;
mc_req_size = OLAND_MC_UCODE_SIZE * 4;
+ smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
break;
case CHIP_HAINAN:
chip_name = "HAINAN";
@@ -1377,6 +1607,7 @@ static int si_init_microcode(struct radeon_device *rdev)
ce_req_size = SI_CE_UCODE_SIZE * 4;
rlc_req_size = SI_RLC_UCODE_SIZE * 4;
mc_req_size = OLAND_MC_UCODE_SIZE * 4;
+ smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
break;
default: BUG();
}
@@ -1439,6 +1670,17 @@ static int si_init_microcode(struct radeon_device *rdev)
err = -EINVAL;
}
+ snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
+ err = request_firmware(&rdev->smc_fw, fw_name, &pdev->dev);
+ if (err)
+ goto out;
+ if (rdev->smc_fw->size != smc_req_size) {
+ printk(KERN_ERR
+ "si_smc: Bogus length %zu in firmware \"%s\"\n",
+ rdev->smc_fw->size, fw_name);
+ err = -EINVAL;
+ }
+
out:
platform_device_unregister(pdev);
@@ -1457,6 +1699,8 @@ out:
rdev->rlc_fw = NULL;
release_firmware(rdev->mc_fw);
rdev->mc_fw = NULL;
+ release_firmware(rdev->smc_fw);
+ rdev->smc_fw = NULL;
}
return err;
}
@@ -1792,7 +2036,8 @@ static void dce6_program_watermarks(struct radeon_device *rdev,
u32 lb_size, u32 num_heads)
{
struct drm_display_mode *mode = &radeon_crtc->base.mode;
- struct dce6_wm_params wm;
+ struct dce6_wm_params wm_low, wm_high;
+ u32 dram_channels;
u32 pixel_period;
u32 line_time = 0;
u32 latency_watermark_a = 0, latency_watermark_b = 0;
@@ -1808,38 +2053,83 @@ static void dce6_program_watermarks(struct radeon_device *rdev,
priority_a_cnt = 0;
priority_b_cnt = 0;
- wm.yclk = rdev->pm.current_mclk * 10;
- wm.sclk = rdev->pm.current_sclk * 10;
- wm.disp_clk = mode->clock;
- wm.src_width = mode->crtc_hdisplay;
- wm.active_time = mode->crtc_hdisplay * pixel_period;
- wm.blank_time = line_time - wm.active_time;
- wm.interlaced = false;
- if (mode->flags & DRM_MODE_FLAG_INTERLACE)
- wm.interlaced = true;
- wm.vsc = radeon_crtc->vsc;
- wm.vtaps = 1;
- if (radeon_crtc->rmx_type != RMX_OFF)
- wm.vtaps = 2;
- wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
- wm.lb_size = lb_size;
if (rdev->family == CHIP_ARUBA)
- wm.dram_channels = evergreen_get_number_of_dram_channels(rdev);
+ dram_channels = evergreen_get_number_of_dram_channels(rdev);
else
- wm.dram_channels = si_get_number_of_dram_channels(rdev);
- wm.num_heads = num_heads;
+ dram_channels = si_get_number_of_dram_channels(rdev);
+
+ /* watermark for high clocks */
+ if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
+ wm_high.yclk =
+ radeon_dpm_get_mclk(rdev, false) * 10;
+ wm_high.sclk =
+ radeon_dpm_get_sclk(rdev, false) * 10;
+ } else {
+ wm_high.yclk = rdev->pm.current_mclk * 10;
+ wm_high.sclk = rdev->pm.current_sclk * 10;
+ }
+
+ wm_high.disp_clk = mode->clock;
+ wm_high.src_width = mode->crtc_hdisplay;
+ wm_high.active_time = mode->crtc_hdisplay * pixel_period;
+ wm_high.blank_time = line_time - wm_high.active_time;
+ wm_high.interlaced = false;
+ if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+ wm_high.interlaced = true;
+ wm_high.vsc = radeon_crtc->vsc;
+ wm_high.vtaps = 1;
+ if (radeon_crtc->rmx_type != RMX_OFF)
+ wm_high.vtaps = 2;
+ wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
+ wm_high.lb_size = lb_size;
+ wm_high.dram_channels = dram_channels;
+ wm_high.num_heads = num_heads;
+
+ /* watermark for low clocks */
+ if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
+ wm_low.yclk =
+ radeon_dpm_get_mclk(rdev, true) * 10;
+ wm_low.sclk =
+ radeon_dpm_get_sclk(rdev, true) * 10;
+ } else {
+ wm_low.yclk = rdev->pm.current_mclk * 10;
+ wm_low.sclk = rdev->pm.current_sclk * 10;
+ }
+
+ wm_low.disp_clk = mode->clock;
+ wm_low.src_width = mode->crtc_hdisplay;
+ wm_low.active_time = mode->crtc_hdisplay * pixel_period;
+ wm_low.blank_time = line_time - wm_low.active_time;
+ wm_low.interlaced = false;
+ if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+ wm_low.interlaced = true;
+ wm_low.vsc = radeon_crtc->vsc;
+ wm_low.vtaps = 1;
+ if (radeon_crtc->rmx_type != RMX_OFF)
+ wm_low.vtaps = 2;
+ wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
+ wm_low.lb_size = lb_size;
+ wm_low.dram_channels = dram_channels;
+ wm_low.num_heads = num_heads;
/* set for high clocks */
- latency_watermark_a = min(dce6_latency_watermark(&wm), (u32)65535);
+ latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
/* set for low clocks */
- /* wm.yclk = low clk; wm.sclk = low clk */
- latency_watermark_b = min(dce6_latency_watermark(&wm), (u32)65535);
+ latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
/* possibly force display priority to high */
/* should really do this at mode validation time... */
- if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
- !dce6_average_bandwidth_vs_available_bandwidth(&wm) ||
- !dce6_check_latency_hiding(&wm) ||
+ if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
+ !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
+ !dce6_check_latency_hiding(&wm_high) ||
+ (rdev->disp_priority == 2)) {
+ DRM_DEBUG_KMS("force priority to high\n");
+ priority_a_cnt |= PRIORITY_ALWAYS_ON;
+ priority_b_cnt |= PRIORITY_ALWAYS_ON;
+ }
+ if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
+ !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
+ !dce6_check_latency_hiding(&wm_low) ||
(rdev->disp_priority == 2)) {
DRM_DEBUG_KMS("force priority to high\n");
priority_a_cnt |= PRIORITY_ALWAYS_ON;
@@ -1895,6 +2185,10 @@ static void dce6_program_watermarks(struct radeon_device *rdev,
WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
+ /* save values for DPM */
+ radeon_crtc->line_time = line_time;
+ radeon_crtc->wm_high = latency_watermark_a;
+ radeon_crtc->wm_low = latency_watermark_b;
}
void dce6_bandwidth_update(struct radeon_device *rdev)
@@ -3535,8 +3829,8 @@ static void si_mc_program(struct radeon_device *rdev)
}
}
-static void si_vram_gtt_location(struct radeon_device *rdev,
- struct radeon_mc *mc)
+void si_vram_gtt_location(struct radeon_device *rdev,
+ struct radeon_mc *mc)
{
if (mc->mc_vram_size > 0xFFC0000000ULL) {
/* leave room for at least 1024M GTT */
@@ -4282,6 +4576,450 @@ void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
}
/*
+ * Power and clock gating
+ */
+static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
+{
+ int i;
+
+ for (i = 0; i < rdev->usec_timeout; i++) {
+ if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
+ break;
+ udelay(1);
+ }
+
+ for (i = 0; i < rdev->usec_timeout; i++) {
+ if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
+ break;
+ udelay(1);
+ }
+}
+
+static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
+ bool enable)
+{
+ u32 tmp = RREG32(CP_INT_CNTL_RING0);
+ u32 mask;
+ int i;
+
+ if (enable)
+ tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
+ else
+ tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
+ WREG32(CP_INT_CNTL_RING0, tmp);
+
+ if (!enable) {
+ /* read a gfx register */
+ tmp = RREG32(DB_DEPTH_INFO);
+
+ mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
+ for (i = 0; i < rdev->usec_timeout; i++) {
+ if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
+ break;
+ udelay(1);
+ }
+ }
+}
+
+static void si_set_uvd_dcm(struct radeon_device *rdev,
+ bool sw_mode)
+{
+ u32 tmp, tmp2;
+
+ tmp = RREG32(UVD_CGC_CTRL);
+ tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
+ tmp |= DCM | CG_DT(1) | CLK_OD(4);
+
+ if (sw_mode) {
+ tmp &= ~0x7ffff800;
+ tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
+ } else {
+ tmp |= 0x7ffff800;
+ tmp2 = 0;
+ }
+
+ WREG32(UVD_CGC_CTRL, tmp);
+ WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
+}
+
+static void si_init_uvd_internal_cg(struct radeon_device *rdev)
+{
+ bool hw_mode = true;
+
+ if (hw_mode) {
+ si_set_uvd_dcm(rdev, false);
+ } else {
+ u32 tmp = RREG32(UVD_CGC_CTRL);
+ tmp &= ~DCM;
+ WREG32(UVD_CGC_CTRL, tmp);
+ }
+}
+
+static u32 si_halt_rlc(struct radeon_device *rdev)
+{
+ u32 data, orig;
+
+ orig = data = RREG32(RLC_CNTL);
+
+ if (data & RLC_ENABLE) {
+ data &= ~RLC_ENABLE;
+ WREG32(RLC_CNTL, data);
+
+ si_wait_for_rlc_serdes(rdev);
+ }
+
+ return orig;
+}
+
+static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
+{
+ u32 tmp;
+
+ tmp = RREG32(RLC_CNTL);
+ if (tmp != rlc)
+ WREG32(RLC_CNTL, rlc);
+}
+
+static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
+{
+ u32 data, orig;
+
+ orig = data = RREG32(DMA_PG);
+ if (enable)
+ data |= PG_CNTL_ENABLE;
+ else
+ data &= ~PG_CNTL_ENABLE;
+ if (orig != data)
+ WREG32(DMA_PG, data);
+}
+
+static void si_init_dma_pg(struct radeon_device *rdev)
+{
+ u32 tmp;
+
+ WREG32(DMA_PGFSM_WRITE, 0x00002000);
+ WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
+
+ for (tmp = 0; tmp < 5; tmp++)
+ WREG32(DMA_PGFSM_WRITE, 0);
+}
+
+static void si_enable_gfx_cgpg(struct radeon_device *rdev,
+ bool enable)
+{
+ u32 tmp;
+
+ if (enable) {
+ tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
+ WREG32(RLC_TTOP_D, tmp);
+
+ tmp = RREG32(RLC_PG_CNTL);
+ tmp |= GFX_PG_ENABLE;
+ WREG32(RLC_PG_CNTL, tmp);
+
+ tmp = RREG32(RLC_AUTO_PG_CTRL);
+ tmp |= AUTO_PG_EN;
+ WREG32(RLC_AUTO_PG_CTRL, tmp);
+ } else {
+ tmp = RREG32(RLC_AUTO_PG_CTRL);
+ tmp &= ~AUTO_PG_EN;
+ WREG32(RLC_AUTO_PG_CTRL, tmp);
+
+ tmp = RREG32(DB_RENDER_CONTROL);
+ }
+}
+
+static void si_init_gfx_cgpg(struct radeon_device *rdev)
+{
+ u32 tmp;
+
+ WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
+
+ tmp = RREG32(RLC_PG_CNTL);
+ tmp |= GFX_PG_SRC;
+ WREG32(RLC_PG_CNTL, tmp);
+
+ WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
+
+ tmp = RREG32(RLC_AUTO_PG_CTRL);
+
+ tmp &= ~GRBM_REG_SGIT_MASK;
+ tmp |= GRBM_REG_SGIT(0x700);
+ tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
+ WREG32(RLC_AUTO_PG_CTRL, tmp);
+}
+
+static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
+{
+ u32 mask = 0, tmp, tmp1;
+ int i;
+
+ si_select_se_sh(rdev, se, sh);
+ tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
+ tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
+ si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
+
+ tmp &= 0xffff0000;
+
+ tmp |= tmp1;
+ tmp >>= 16;
+
+ for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
+ mask <<= 1;
+ mask |= 1;
+ }
+
+ return (~tmp) & mask;
+}
+
+static void si_init_ao_cu_mask(struct radeon_device *rdev)
+{
+ u32 i, j, k, active_cu_number = 0;
+ u32 mask, counter, cu_bitmap;
+ u32 tmp = 0;
+
+ for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
+ for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
+ mask = 1;
+ cu_bitmap = 0;
+ counter = 0;
+ for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
+ if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
+ if (counter < 2)
+ cu_bitmap |= mask;
+ counter++;
+ }
+ mask <<= 1;
+ }
+
+ active_cu_number += counter;
+ tmp |= (cu_bitmap << (i * 16 + j * 8));
+ }
+ }
+
+ WREG32(RLC_PG_AO_CU_MASK, tmp);
+
+ tmp = RREG32(RLC_MAX_PG_CU);
+ tmp &= ~MAX_PU_CU_MASK;
+ tmp |= MAX_PU_CU(active_cu_number);
+ WREG32(RLC_MAX_PG_CU, tmp);
+}
+
+static void si_enable_cgcg(struct radeon_device *rdev,
+ bool enable)
+{
+ u32 data, orig, tmp;
+
+ orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
+
+ si_enable_gui_idle_interrupt(rdev, enable);
+
+ if (enable) {
+ WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
+
+ tmp = si_halt_rlc(rdev);
+
+ WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
+ WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
+ WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
+
+ si_wait_for_rlc_serdes(rdev);
+
+ si_update_rlc(rdev, tmp);
+
+ WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
+
+ data |= CGCG_EN | CGLS_EN;
+ } else {
+ RREG32(CB_CGTT_SCLK_CTRL);
+ RREG32(CB_CGTT_SCLK_CTRL);
+ RREG32(CB_CGTT_SCLK_CTRL);
+ RREG32(CB_CGTT_SCLK_CTRL);
+
+ data &= ~(CGCG_EN | CGLS_EN);
+ }
+
+ if (orig != data)
+ WREG32(RLC_CGCG_CGLS_CTRL, data);
+}
+
+static void si_enable_mgcg(struct radeon_device *rdev,
+ bool enable)
+{
+ u32 data, orig, tmp = 0;
+
+ if (enable) {
+ orig = data = RREG32(CGTS_SM_CTRL_REG);
+ data = 0x96940200;
+ if (orig != data)
+ WREG32(CGTS_SM_CTRL_REG, data);
+
+ orig = data = RREG32(CP_MEM_SLP_CNTL);
+ data |= CP_MEM_LS_EN;
+ if (orig != data)
+ WREG32(CP_MEM_SLP_CNTL, data);
+
+ orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
+ data &= 0xffffffc0;
+ if (orig != data)
+ WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
+
+ tmp = si_halt_rlc(rdev);
+
+ WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
+ WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
+ WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
+
+ si_update_rlc(rdev, tmp);
+ } else {
+ orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
+ data |= 0x00000003;
+ if (orig != data)
+ WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
+
+ data = RREG32(CP_MEM_SLP_CNTL);
+ if (data & CP_MEM_LS_EN) {
+ data &= ~CP_MEM_LS_EN;
+ WREG32(CP_MEM_SLP_CNTL, data);
+ }
+ orig = data = RREG32(CGTS_SM_CTRL_REG);
+ data |= LS_OVERRIDE | OVERRIDE;
+ if (orig != data)
+ WREG32(CGTS_SM_CTRL_REG, data);
+
+ tmp = si_halt_rlc(rdev);
+
+ WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
+ WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
+ WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
+
+ si_update_rlc(rdev, tmp);
+ }
+}
+
+static void si_enable_uvd_mgcg(struct radeon_device *rdev,
+ bool enable)
+{
+ u32 orig, data, tmp;
+
+ if (enable) {
+ tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
+ tmp |= 0x3fff;
+ WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
+
+ orig = data = RREG32(UVD_CGC_CTRL);
+ data |= DCM;
+ if (orig != data)
+ WREG32(UVD_CGC_CTRL, data);
+
+ WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
+ WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
+ } else {
+ tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
+ tmp &= ~0x3fff;
+ WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
+
+ orig = data = RREG32(UVD_CGC_CTRL);
+ data &= ~DCM;
+ if (orig != data)
+ WREG32(UVD_CGC_CTRL, data);
+
+ WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
+ WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
+ }
+}
+
+static const u32 mc_cg_registers[] =
+{
+ MC_HUB_MISC_HUB_CG,
+ MC_HUB_MISC_SIP_CG,
+ MC_HUB_MISC_VM_CG,
+ MC_XPB_CLK_GAT,
+ ATC_MISC_CG,
+ MC_CITF_MISC_WR_CG,
+ MC_CITF_MISC_RD_CG,
+ MC_CITF_MISC_VM_CG,
+ VM_L2_CG,
+};
+
+static void si_enable_mc_ls(struct radeon_device *rdev,
+ bool enable)
+{
+ int i;
+ u32 orig, data;
+
+ for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
+ orig = data = RREG32(mc_cg_registers[i]);
+ if (enable)
+ data |= MC_LS_ENABLE;
+ else
+ data &= ~MC_LS_ENABLE;
+ if (data != orig)
+ WREG32(mc_cg_registers[i], data);
+ }
+}
+
+
+static void si_init_cg(struct radeon_device *rdev)
+{
+ bool has_uvd = true;
+
+ si_enable_mgcg(rdev, true);
+ si_enable_cgcg(rdev, true);
+ /* disable MC LS on Tahiti */
+ if (rdev->family == CHIP_TAHITI)
+ si_enable_mc_ls(rdev, false);
+ if (has_uvd) {
+ si_enable_uvd_mgcg(rdev, true);
+ si_init_uvd_internal_cg(rdev);
+ }
+}
+
+static void si_fini_cg(struct radeon_device *rdev)
+{
+ bool has_uvd = true;
+
+ if (has_uvd)
+ si_enable_uvd_mgcg(rdev, false);
+ si_enable_cgcg(rdev, false);
+ si_enable_mgcg(rdev, false);
+}
+
+static void si_init_pg(struct radeon_device *rdev)
+{
+ bool has_pg = false;
+
+ /* only cape verde supports PG */
+ if (rdev->family == CHIP_VERDE)
+ has_pg = true;
+
+ if (has_pg) {
+ si_init_ao_cu_mask(rdev);
+ si_init_dma_pg(rdev);
+ si_enable_dma_pg(rdev, true);
+ si_init_gfx_cgpg(rdev);
+ si_enable_gfx_cgpg(rdev, true);
+ } else {
+ WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
+ WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
+ }
+}
+
+static void si_fini_pg(struct radeon_device *rdev)
+{
+ bool has_pg = false;
+
+ /* only cape verde supports PG */
+ if (rdev->family == CHIP_VERDE)
+ has_pg = true;
+
+ if (has_pg) {
+ si_enable_dma_pg(rdev, false);
+ si_enable_gfx_cgpg(rdev, false);
+ }
+}
+
+/*
* RLC
*/
void si_rlc_fini(struct radeon_device *rdev)
@@ -4313,8 +5051,15 @@ void si_rlc_fini(struct radeon_device *rdev)
}
}
+#define RLC_CLEAR_STATE_END_MARKER 0x00000001
+
int si_rlc_init(struct radeon_device *rdev)
{
+ volatile u32 *dst_ptr;
+ u32 dws, data, i, j, k, reg_num;
+ u32 reg_list_num, reg_list_hdr_blk_index, reg_list_blk_index;
+ u64 reg_list_mc_addr;
+ const struct cs_section_def *cs_data = si_cs_data;
int r;
/* save restore block */
@@ -4335,18 +5080,44 @@ int si_rlc_init(struct radeon_device *rdev)
}
r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
&rdev->rlc.save_restore_gpu_addr);
- radeon_bo_unreserve(rdev->rlc.save_restore_obj);
if (r) {
+ radeon_bo_unreserve(rdev->rlc.save_restore_obj);
dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
si_rlc_fini(rdev);
return r;
}
+ if (rdev->family == CHIP_VERDE) {
+ r = radeon_bo_kmap(rdev->rlc.save_restore_obj, (void **)&rdev->rlc.sr_ptr);
+ if (r) {
+ dev_warn(rdev->dev, "(%d) map RLC sr bo failed\n", r);
+ si_rlc_fini(rdev);
+ return r;
+ }
+ /* write the sr buffer */
+ dst_ptr = rdev->rlc.sr_ptr;
+ for (i = 0; i < ARRAY_SIZE(verde_rlc_save_restore_register_list); i++) {
+ dst_ptr[i] = verde_rlc_save_restore_register_list[i];
+ }
+ radeon_bo_kunmap(rdev->rlc.save_restore_obj);
+ }
+ radeon_bo_unreserve(rdev->rlc.save_restore_obj);
+
/* clear state block */
+ reg_list_num = 0;
+ dws = 0;
+ for (i = 0; cs_data[i].section != NULL; i++) {
+ for (j = 0; cs_data[i].section[j].extent != NULL; j++) {
+ reg_list_num++;
+ dws += cs_data[i].section[j].reg_count;
+ }
+ }
+ reg_list_blk_index = (3 * reg_list_num + 2);
+ dws += reg_list_blk_index;
+
if (rdev->rlc.clear_state_obj == NULL) {
- r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
- RADEON_GEM_DOMAIN_VRAM, NULL,
- &rdev->rlc.clear_state_obj);
+ r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true,
+ RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.clear_state_obj);
if (r) {
dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
si_rlc_fini(rdev);
@@ -4360,24 +5131,113 @@ int si_rlc_init(struct radeon_device *rdev)
}
r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
&rdev->rlc.clear_state_gpu_addr);
- radeon_bo_unreserve(rdev->rlc.clear_state_obj);
if (r) {
+
+ radeon_bo_unreserve(rdev->rlc.clear_state_obj);
dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
si_rlc_fini(rdev);
return r;
}
+ r = radeon_bo_kmap(rdev->rlc.clear_state_obj, (void **)&rdev->rlc.cs_ptr);
+ if (r) {
+ dev_warn(rdev->dev, "(%d) map RLC c bo failed\n", r);
+ si_rlc_fini(rdev);
+ return r;
+ }
+ /* set up the cs buffer */
+ dst_ptr = rdev->rlc.cs_ptr;
+ reg_list_hdr_blk_index = 0;
+ reg_list_mc_addr = rdev->rlc.clear_state_gpu_addr + (reg_list_blk_index * 4);
+ data = upper_32_bits(reg_list_mc_addr);
+ dst_ptr[reg_list_hdr_blk_index] = data;
+ reg_list_hdr_blk_index++;
+ for (i = 0; cs_data[i].section != NULL; i++) {
+ for (j = 0; cs_data[i].section[j].extent != NULL; j++) {
+ reg_num = cs_data[i].section[j].reg_count;
+ data = reg_list_mc_addr & 0xffffffff;
+ dst_ptr[reg_list_hdr_blk_index] = data;
+ reg_list_hdr_blk_index++;
+
+ data = (cs_data[i].section[j].reg_index * 4) & 0xffffffff;
+ dst_ptr[reg_list_hdr_blk_index] = data;
+ reg_list_hdr_blk_index++;
+
+ data = 0x08000000 | (reg_num * 4);
+ dst_ptr[reg_list_hdr_blk_index] = data;
+ reg_list_hdr_blk_index++;
+
+ for (k = 0; k < reg_num; k++) {
+ data = cs_data[i].section[j].extent[k];
+ dst_ptr[reg_list_blk_index + k] = data;
+ }
+ reg_list_mc_addr += reg_num * 4;
+ reg_list_blk_index += reg_num;
+ }
+ }
+ dst_ptr[reg_list_hdr_blk_index] = RLC_CLEAR_STATE_END_MARKER;
+
+ radeon_bo_kunmap(rdev->rlc.clear_state_obj);
+ radeon_bo_unreserve(rdev->rlc.clear_state_obj);
return 0;
}
+static void si_rlc_reset(struct radeon_device *rdev)
+{
+ u32 tmp = RREG32(GRBM_SOFT_RESET);
+
+ tmp |= SOFT_RESET_RLC;
+ WREG32(GRBM_SOFT_RESET, tmp);
+ udelay(50);
+ tmp &= ~SOFT_RESET_RLC;
+ WREG32(GRBM_SOFT_RESET, tmp);
+ udelay(50);
+}
+
static void si_rlc_stop(struct radeon_device *rdev)
{
WREG32(RLC_CNTL, 0);
+
+ si_enable_gui_idle_interrupt(rdev, false);
+
+ si_wait_for_rlc_serdes(rdev);
}
static void si_rlc_start(struct radeon_device *rdev)
{
WREG32(RLC_CNTL, RLC_ENABLE);
+
+ si_enable_gui_idle_interrupt(rdev, true);
+
+ udelay(50);
+}
+
+static bool si_lbpw_supported(struct radeon_device *rdev)
+{
+ u32 tmp;
+
+ /* Enable LBPW only for DDR3 */
+ tmp = RREG32(MC_SEQ_MISC0);
+ if ((tmp & 0xF0000000) == 0xB0000000)
+ return true;
+ return false;
+}
+
+static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
+{
+ u32 tmp;
+
+ tmp = RREG32(RLC_LB_CNTL);
+ if (enable)
+ tmp |= LOAD_BALANCE_ENABLE;
+ else
+ tmp &= ~LOAD_BALANCE_ENABLE;
+ WREG32(RLC_LB_CNTL, tmp);
+
+ if (!enable) {
+ si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
+ WREG32(SPI_LB_CU_MASK, 0x00ff);
+ }
}
static int si_rlc_resume(struct radeon_device *rdev)
@@ -4390,14 +5250,18 @@ static int si_rlc_resume(struct radeon_device *rdev)
si_rlc_stop(rdev);
+ si_rlc_reset(rdev);
+
+ si_init_pg(rdev);
+
+ si_init_cg(rdev);
+
WREG32(RLC_RL_BASE, 0);
WREG32(RLC_RL_SIZE, 0);
WREG32(RLC_LB_CNTL, 0);
WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
WREG32(RLC_LB_CNTR_INIT, 0);
-
- WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
- WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
+ WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
WREG32(RLC_MC_CNTL, 0);
WREG32(RLC_UCODE_CNTL, 0);
@@ -4409,6 +5273,8 @@ static int si_rlc_resume(struct radeon_device *rdev)
}
WREG32(RLC_UCODE_ADDR, 0);
+ si_enable_lbpw(rdev, si_lbpw_supported(rdev));
+
si_rlc_start(rdev);
return 0;
@@ -4578,6 +5444,7 @@ int si_irq_set(struct radeon_device *rdev)
u32 grbm_int_cntl = 0;
u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
u32 dma_cntl, dma_cntl1;
+ u32 thermal_int = 0;
if (!rdev->irq.installed) {
WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
@@ -4603,6 +5470,9 @@ int si_irq_set(struct radeon_device *rdev)
dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
+ thermal_int = RREG32(CG_THERMAL_INT) &
+ ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
+
/* enable CP interrupts on all rings */
if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
DRM_DEBUG("si_irq_set: sw int gfx\n");
@@ -4689,6 +5559,11 @@ int si_irq_set(struct radeon_device *rdev)
WREG32(GRBM_INT_CNTL, grbm_int_cntl);
+ if (rdev->irq.dpm_thermal) {
+ DRM_DEBUG("dpm thermal\n");
+ thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
+ }
+
if (rdev->num_crtc >= 2) {
WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
@@ -4724,6 +5599,8 @@ int si_irq_set(struct radeon_device *rdev)
WREG32(DC_HPD6_INT_CONTROL, hpd6);
}
+ WREG32(CG_THERMAL_INT, thermal_int);
+
return 0;
}
@@ -4888,6 +5765,7 @@ int si_irq_process(struct radeon_device *rdev)
u32 src_id, src_data, ring_id;
u32 ring_index;
bool queue_hotplug = false;
+ bool queue_thermal = false;
if (!rdev->ih.enabled || rdev->shutdown)
return IRQ_NONE;
@@ -5158,6 +6036,16 @@ restart_ih:
DRM_DEBUG("IH: DMA trap\n");
radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
break;
+ case 230: /* thermal low to high */
+ DRM_DEBUG("IH: thermal low to high\n");
+ rdev->pm.dpm.thermal.high_to_low = false;
+ queue_thermal = true;
+ break;
+ case 231: /* thermal high to low */
+ DRM_DEBUG("IH: thermal high to low\n");
+ rdev->pm.dpm.thermal.high_to_low = true;
+ queue_thermal = true;
+ break;
case 233: /* GUI IDLE */
DRM_DEBUG("IH: GUI idle\n");
break;
@@ -5176,6 +6064,8 @@ restart_ih:
}
if (queue_hotplug)
schedule_work(&rdev->hotplug_work);
+ if (queue_thermal && rdev->pm.dpm_enabled)
+ schedule_work(&rdev->pm.dpm.thermal.work);
rdev->ih.rptr = rptr;
WREG32(IH_RB_RPTR, rdev->ih.rptr);
atomic_set(&rdev->ih.lock, 0);
@@ -5270,6 +6160,11 @@ static int si_startup(struct radeon_device *rdev)
struct radeon_ring *ring;
int r;
+ /* enable pcie gen2/3 link */
+ si_pcie_gen3_enable(rdev);
+ /* enable aspm */
+ si_program_aspm(rdev);
+
if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
!rdev->rlc_fw || !rdev->mc_fw) {
r = si_init_microcode(rdev);
@@ -5609,6 +6504,8 @@ void si_fini(struct radeon_device *rdev)
cayman_dma_fini(rdev);
si_irq_fini(rdev);
si_rlc_fini(rdev);
+ si_fini_cg(rdev);
+ si_fini_pg(rdev);
radeon_wb_fini(rdev);
radeon_vm_manager_fini(rdev);
radeon_ib_pool_fini(rdev);
@@ -5735,3 +6632,361 @@ int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
return 0;
}
+
+static void si_pcie_gen3_enable(struct radeon_device *rdev)
+{
+ struct pci_dev *root = rdev->pdev->bus->self;
+ int bridge_pos, gpu_pos;
+ u32 speed_cntl, mask, current_data_rate;
+ int ret, i;
+ u16 tmp16;
+
+ if (radeon_pcie_gen2 == 0)
+ return;
+
+ if (rdev->flags & RADEON_IS_IGP)
+ return;
+
+ if (!(rdev->flags & RADEON_IS_PCIE))
+ return;
+
+ ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
+ if (ret != 0)
+ return;
+
+ if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
+ return;
+
+ speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
+ current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
+ LC_CURRENT_DATA_RATE_SHIFT;
+ if (mask & DRM_PCIE_SPEED_80) {
+ if (current_data_rate == 2) {
+ DRM_INFO("PCIE gen 3 link speeds already enabled\n");
+ return;
+ }
+ DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
+ } else if (mask & DRM_PCIE_SPEED_50) {
+ if (current_data_rate == 1) {
+ DRM_INFO("PCIE gen 2 link speeds already enabled\n");
+ return;
+ }
+ DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
+ }
+
+ bridge_pos = pci_pcie_cap(root);
+ if (!bridge_pos)
+ return;
+
+ gpu_pos = pci_pcie_cap(rdev->pdev);
+ if (!gpu_pos)
+ return;
+
+ if (mask & DRM_PCIE_SPEED_80) {
+ /* re-try equalization if gen3 is not already enabled */
+ if (current_data_rate != 2) {
+ u16 bridge_cfg, gpu_cfg;
+ u16 bridge_cfg2, gpu_cfg2;
+ u32 max_lw, current_lw, tmp;
+
+ pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
+ pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+
+ tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
+ pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+
+ tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
+ pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
+
+ tmp = RREG32_PCIE(PCIE_LC_STATUS1);
+ max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
+ current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
+
+ if (current_lw < max_lw) {
+ tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
+ if (tmp & LC_RENEGOTIATION_SUPPORT) {
+ tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
+ tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
+ tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
+ WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
+ }
+ }
+
+ for (i = 0; i < 10; i++) {
+ /* check status */
+ pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
+ if (tmp16 & PCI_EXP_DEVSTA_TRPND)
+ break;
+
+ pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
+ pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+
+ pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
+ pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
+
+ tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
+ tmp |= LC_SET_QUIESCE;
+ WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
+
+ tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
+ tmp |= LC_REDO_EQ;
+ WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
+
+ mdelay(100);
+
+ /* linkctl */
+ pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
+ tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
+ tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
+ pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+
+ pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
+ tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
+ tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
+ pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
+
+ /* linkctl2 */
+ pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
+ tmp16 &= ~((1 << 4) | (7 << 9));
+ tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
+ pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
+
+ pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
+ tmp16 &= ~((1 << 4) | (7 << 9));
+ tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
+ pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+
+ tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
+ tmp &= ~LC_SET_QUIESCE;
+ WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
+ }
+ }
+ }
+
+ /* set the link speed */
+ speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
+ speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
+ WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
+
+ pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
+ tmp16 &= ~0xf;
+ if (mask & DRM_PCIE_SPEED_80)
+ tmp16 |= 3; /* gen3 */
+ else if (mask & DRM_PCIE_SPEED_50)
+ tmp16 |= 2; /* gen2 */
+ else
+ tmp16 |= 1; /* gen1 */
+ pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+
+ speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
+ speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
+ WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
+
+ for (i = 0; i < rdev->usec_timeout; i++) {
+ speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
+ if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
+ break;
+ udelay(1);
+ }
+}
+
+static void si_program_aspm(struct radeon_device *rdev)
+{
+ u32 data, orig;
+ bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
+ bool disable_clkreq = false;
+
+ if (!(rdev->flags & RADEON_IS_PCIE))
+ return;
+
+ orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
+ data &= ~LC_XMIT_N_FTS_MASK;
+ data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
+ if (orig != data)
+ WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
+
+ orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
+ data |= LC_GO_TO_RECOVERY;
+ if (orig != data)
+ WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
+
+ orig = data = RREG32_PCIE(PCIE_P_CNTL);
+ data |= P_IGNORE_EDB_ERR;
+ if (orig != data)
+ WREG32_PCIE(PCIE_P_CNTL, data);
+
+ orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
+ data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
+ data |= LC_PMI_TO_L1_DIS;
+ if (!disable_l0s)
+ data |= LC_L0S_INACTIVITY(7);
+
+ if (!disable_l1) {
+ data |= LC_L1_INACTIVITY(7);
+ data &= ~LC_PMI_TO_L1_DIS;
+ if (orig != data)
+ WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
+
+ if (!disable_plloff_in_l1) {
+ bool clk_req_support;
+
+ orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
+ data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
+ data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
+ if (orig != data)
+ WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
+
+ orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
+ data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
+ data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
+ if (orig != data)
+ WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
+
+ orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
+ data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
+ data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
+ if (orig != data)
+ WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
+
+ orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
+ data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
+ data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
+ if (orig != data)
+ WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
+
+ if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
+ orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
+ data &= ~PLL_RAMP_UP_TIME_0_MASK;
+ if (orig != data)
+ WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
+
+ orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
+ data &= ~PLL_RAMP_UP_TIME_1_MASK;
+ if (orig != data)
+ WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
+
+ orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
+ data &= ~PLL_RAMP_UP_TIME_2_MASK;
+ if (orig != data)
+ WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
+
+ orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
+ data &= ~PLL_RAMP_UP_TIME_3_MASK;
+ if (orig != data)
+ WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
+
+ orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
+ data &= ~PLL_RAMP_UP_TIME_0_MASK;
+ if (orig != data)
+ WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
+
+ orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
+ data &= ~PLL_RAMP_UP_TIME_1_MASK;
+ if (orig != data)
+ WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
+
+ orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
+ data &= ~PLL_RAMP_UP_TIME_2_MASK;
+ if (orig != data)
+ WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
+
+ orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
+ data &= ~PLL_RAMP_UP_TIME_3_MASK;
+ if (orig != data)
+ WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
+ }
+ orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
+ data &= ~LC_DYN_LANES_PWR_STATE_MASK;
+ data |= LC_DYN_LANES_PWR_STATE(3);
+ if (orig != data)
+ WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
+
+ orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
+ data &= ~LS2_EXIT_TIME_MASK;
+ if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
+ data |= LS2_EXIT_TIME(5);
+ if (orig != data)
+ WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
+
+ orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
+ data &= ~LS2_EXIT_TIME_MASK;
+ if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
+ data |= LS2_EXIT_TIME(5);
+ if (orig != data)
+ WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
+
+ if (!disable_clkreq) {
+ struct pci_dev *root = rdev->pdev->bus->self;
+ u32 lnkcap;
+
+ clk_req_support = false;
+ pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
+ if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
+ clk_req_support = true;
+ } else {
+ clk_req_support = false;
+ }
+
+ if (clk_req_support) {
+ orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
+ data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
+ if (orig != data)
+ WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
+
+ orig = data = RREG32(THM_CLK_CNTL);
+ data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
+ data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
+ if (orig != data)
+ WREG32(THM_CLK_CNTL, data);
+
+ orig = data = RREG32(MISC_CLK_CNTL);
+ data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
+ data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
+ if (orig != data)
+ WREG32(MISC_CLK_CNTL, data);
+
+ orig = data = RREG32(CG_CLKPIN_CNTL);
+ data &= ~BCLK_AS_XCLK;
+ if (orig != data)
+ WREG32(CG_CLKPIN_CNTL, data);
+
+ orig = data = RREG32(CG_CLKPIN_CNTL_2);
+ data &= ~FORCE_BIF_REFCLK_EN;
+ if (orig != data)
+ WREG32(CG_CLKPIN_CNTL_2, data);
+
+ orig = data = RREG32(MPLL_BYPASSCLK_SEL);
+ data &= ~MPLL_CLKOUT_SEL_MASK;
+ data |= MPLL_CLKOUT_SEL(4);
+ if (orig != data)
+ WREG32(MPLL_BYPASSCLK_SEL, data);
+
+ orig = data = RREG32(SPLL_CNTL_MODE);
+ data &= ~SPLL_REFCLK_SEL_MASK;
+ if (orig != data)
+ WREG32(SPLL_CNTL_MODE, data);
+ }
+ }
+ } else {
+ if (orig != data)
+ WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
+ }
+
+ orig = data = RREG32_PCIE(PCIE_CNTL2);
+ data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
+ if (orig != data)
+ WREG32_PCIE(PCIE_CNTL2, data);
+
+ if (!disable_l0s) {
+ data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
+ if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
+ data = RREG32_PCIE(PCIE_LC_STATUS1);
+ if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
+ orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
+ data &= ~LC_L0S_INACTIVITY_MASK;
+ if (orig != data)
+ WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
+ }
+ }
+ }
+}