diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 70 |
1 files changed, 53 insertions, 17 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 05117eda105b..84c241b9a2a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -31,11 +31,14 @@ #include <linux/debugfs.h> #include <linux/uaccess.h> +#include "amdgpu_reset.h" + #define EEPROM_I2C_MADDR_VEGA20 0x0 #define EEPROM_I2C_MADDR_ARCTURUS 0x40000 #define EEPROM_I2C_MADDR_ARCTURUS_D342 0x0 #define EEPROM_I2C_MADDR_SIENNA_CICHLID 0x0 #define EEPROM_I2C_MADDR_ALDEBARAN 0x0 +#define EEPROM_I2C_MADDR_SMU_13_0_0 (0x54UL << 16) /* * The 2 macros bellow represent the actual size in bytes that @@ -154,6 +157,15 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev, return false; } + switch (adev->ip_versions[MP1_HWIP][0]) { + case IP_VERSION(13, 0, 0): + control->i2c_address = EEPROM_I2C_MADDR_SMU_13_0_0; + break; + + default: + break; + } + return true; } @@ -193,12 +205,12 @@ static int __write_table_header(struct amdgpu_ras_eeprom_control *control) __encode_table_header_to_buf(&control->tbl_hdr, buf); /* i2c may be unstable in gpu reset */ - down_read(&adev->reset_sem); - res = amdgpu_eeprom_write(&adev->pm.smu_i2c, + down_read(&adev->reset_domain->sem); + res = amdgpu_eeprom_write(adev->pm.ras_eeprom_i2c_bus, control->i2c_address + control->ras_header_offset, buf, RAS_TABLE_HEADER_SIZE); - up_read(&adev->reset_sem); + up_read(&adev->reset_domain->sem); if (res < 0) { DRM_ERROR("Failed to write EEPROM table header:%d", res); @@ -263,7 +275,9 @@ static int amdgpu_ras_eeprom_correct_header_tag( */ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control) { + struct amdgpu_device *adev = to_amdgpu_device(control); struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); u8 csum; int res; @@ -282,6 +296,12 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control) control->ras_num_recs = 0; control->ras_fri = 0; + amdgpu_dpm_send_hbm_bad_pages_num(adev, control->ras_num_recs); + + control->bad_channel_bitmap = 0; + amdgpu_dpm_send_hbm_bad_channel_flag(adev, control->bad_channel_bitmap); + con->update_channel_flag = false; + amdgpu_ras_debugfs_set_ret_size(control); mutex_unlock(&control->ras_tbl_mutex); @@ -387,13 +407,13 @@ static int __amdgpu_ras_eeprom_write(struct amdgpu_ras_eeprom_control *control, int res; /* i2c may be unstable in gpu reset */ - down_read(&adev->reset_sem); + down_read(&adev->reset_domain->sem); buf_size = num * RAS_TABLE_RECORD_SIZE; - res = amdgpu_eeprom_write(&adev->pm.smu_i2c, + res = amdgpu_eeprom_write(adev->pm.ras_eeprom_i2c_bus, control->i2c_address + RAS_INDEX_TO_OFFSET(control, fri), buf, buf_size); - up_read(&adev->reset_sem); + up_read(&adev->reset_domain->sem); if (res < 0) { DRM_ERROR("Writing %d EEPROM table records error:%d", num, res); @@ -415,6 +435,7 @@ amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control, struct eeprom_table_record *record, const u32 num) { + struct amdgpu_ras *con = amdgpu_ras_get_context(to_amdgpu_device(control)); u32 a, b, i; u8 *buf, *pp; int res; @@ -426,9 +447,16 @@ amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control, /* Encode all of them in one go. */ pp = buf; - for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE) + for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE) { __encode_table_record_to_buf(control, &record[i], pp); + /* update bad channel bitmap */ + if (!(control->bad_channel_bitmap & (1 << record[i].mem_channel))) { + control->bad_channel_bitmap |= 1 << record[i].mem_channel; + con->update_channel_flag = true; + } + } + /* a, first record index to write into. * b, last record index to write into. * a = first index to read (fri) + number of records in the table, @@ -547,12 +575,12 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) goto Out; } - down_read(&adev->reset_sem); - res = amdgpu_eeprom_read(&adev->pm.smu_i2c, + down_read(&adev->reset_domain->sem); + res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus, control->i2c_address + control->ras_record_offset, buf, buf_size); - up_read(&adev->reset_sem); + up_read(&adev->reset_domain->sem); if (res < 0) { DRM_ERROR("EEPROM failed reading records:%d\n", res); @@ -642,13 +670,13 @@ static int __amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control, int res; /* i2c may be unstable in gpu reset */ - down_read(&adev->reset_sem); + down_read(&adev->reset_domain->sem); buf_size = num * RAS_TABLE_RECORD_SIZE; - res = amdgpu_eeprom_read(&adev->pm.smu_i2c, + res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus, control->i2c_address + RAS_INDEX_TO_OFFSET(control, fri), buf, buf_size); - up_read(&adev->reset_sem); + up_read(&adev->reset_domain->sem); if (res < 0) { DRM_ERROR("Reading %d EEPROM table records error:%d", num, res); @@ -681,6 +709,7 @@ int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control, const u32 num) { struct amdgpu_device *adev = to_amdgpu_device(control); + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); int i, res; u8 *buf, *pp; u32 g0, g1; @@ -748,8 +777,15 @@ int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control, /* Read up everything? Then transform. */ pp = buf; - for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE) + for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE) { __decode_table_record_from_buf(control, &record[i], pp); + + /* update bad channel bitmap */ + if (!(control->bad_channel_bitmap & (1 << record[i].mem_channel))) { + control->bad_channel_bitmap |= 1 << record[i].mem_channel; + con->update_channel_flag = true; + } + } Out: kfree(buf); mutex_unlock(&control->ras_tbl_mutex); @@ -1009,7 +1045,7 @@ static int __verify_ras_table_checksum(struct amdgpu_ras_eeprom_control *control return -ENOMEM; } - res = amdgpu_eeprom_read(&adev->pm.smu_i2c, + res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus, control->i2c_address + control->ras_header_offset, buf, buf_size); @@ -1045,7 +1081,7 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control, return 0; /* Verify i2c adapter is initialized */ - if (!adev->pm.smu_i2c.algo) + if (!adev->pm.ras_eeprom_i2c_bus || !adev->pm.ras_eeprom_i2c_bus->algo) return -ENOENT; if (!__get_eeprom_i2c_addr(adev, control)) @@ -1057,7 +1093,7 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control, mutex_init(&control->ras_tbl_mutex); /* Read the table header from EEPROM address */ - res = amdgpu_eeprom_read(&adev->pm.smu_i2c, + res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus, control->i2c_address + control->ras_header_offset, buf, RAS_TABLE_HEADER_SIZE); if (res < RAS_TABLE_HEADER_SIZE) { |