aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_crat.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c352
1 files changed, 272 insertions, 80 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index cfedfb1e8596..8bfdfd062ff6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2015-2017 Advanced Micro Devices, Inc.
+ * Copyright 2015-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -794,6 +795,102 @@ static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
},
};
+static struct kfd_gpu_cache_info gfx1037_cache_info[] = {
+ {
+ /* TCP L1 Cache per CU */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 1,
+ },
+ {
+ /* Scalar L1 Instruction Cache per SQC */
+ .cache_size = 32,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* Scalar L1 Data Cache per SQC */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* GL1 Data Cache per SA */
+ .cache_size = 128,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ .cache_size = 256,
+ .cache_level = 2,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+};
+
+static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = {
+ {
+ /* TCP L1 Cache per CU */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 1,
+ },
+ {
+ /* Scalar L1 Instruction Cache per SQC */
+ .cache_size = 32,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* Scalar L1 Data Cache per SQC */
+ .cache_size = 16,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* GL1 Data Cache per SA */
+ .cache_size = 128,
+ .cache_level = 1,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+ {
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ .cache_size = 256,
+ .cache_level = 2,
+ .flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE),
+ .num_cu_shared = 2,
+ },
+};
+
static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
struct crat_subtype_computeunit *cu)
{
@@ -1039,7 +1136,6 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
props->rec_transfer_size =
iolink->recommended_transfer_size;
- dev->io_link_count++;
dev->node_props.io_links_count++;
list_add_tail(&props->list, &dev->io_link_props);
break;
@@ -1055,15 +1151,17 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
* table, add corresponded reversed direction link now.
*/
if (props && (iolink->flags & CRAT_IOLINK_FLAGS_BI_DIRECTIONAL)) {
- to_dev = kfd_topology_device_by_proximity_domain(id_to);
+ to_dev = kfd_topology_device_by_proximity_domain_no_lock(id_to);
if (!to_dev)
return -ENODEV;
/* same everything but the other direction */
props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL);
+ if (!props2)
+ return -ENOMEM;
+
props2->node_from = id_to;
props2->node_to = id_from;
props2->kobj = NULL;
- to_dev->io_link_count++;
to_dev->node_props.io_links_count++;
list_add_tail(&props2->list, &to_dev->io_link_props);
}
@@ -1311,6 +1409,80 @@ static int fill_in_l2_l3_pcache(struct crat_subtype_cache *pcache,
return 1;
}
+#define KFD_MAX_CACHE_TYPES 6
+
+static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
+ struct kfd_gpu_cache_info *pcache_info)
+{
+ struct amdgpu_device *adev = kdev->adev;
+ int i = 0;
+
+ /* TCP L1 Cache per CU */
+ if (adev->gfx.config.gc_tcp_l1_size) {
+ pcache_info[i].cache_size = adev->gfx.config.gc_tcp_l1_size;
+ pcache_info[i].cache_level = 1;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+ pcache_info[0].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2;
+ i++;
+ }
+ /* Scalar L1 Instruction Cache per SQC */
+ if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) {
+ pcache_info[i].cache_size =
+ adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;
+ pcache_info[i].cache_level = 1;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+ pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
+ i++;
+ }
+ /* Scalar L1 Data Cache per SQC */
+ if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) {
+ pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc;
+ pcache_info[i].cache_level = 1;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+ pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
+ i++;
+ }
+ /* GL1 Data Cache per SA */
+ if (adev->gfx.config.gc_gl1c_per_sa &&
+ adev->gfx.config.gc_gl1c_size_per_instance) {
+ pcache_info[i].cache_size = adev->gfx.config.gc_gl1c_per_sa *
+ adev->gfx.config.gc_gl1c_size_per_instance;
+ pcache_info[i].cache_level = 1;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+ pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+ i++;
+ }
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ if (adev->gfx.config.gc_gl2c_per_gpu) {
+ pcache_info[i].cache_size = adev->gfx.config.gc_gl2c_per_gpu;
+ pcache_info[i].cache_level = 2;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+ pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+ i++;
+ }
+ /* L3 Data Cache per GPU */
+ if (adev->gmc.mall_size) {
+ pcache_info[i].cache_size = adev->gmc.mall_size / 1024;
+ pcache_info[i].cache_level = 3;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+ pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+ i++;
+ }
+ return i;
+}
+
/* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info
* tables
*
@@ -1332,6 +1504,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
int *num_of_entries)
{
struct kfd_gpu_cache_info *pcache_info;
+ struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES];
int num_of_cache_types = 0;
int i, j, k;
int ct = 0;
@@ -1340,7 +1513,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
int ret;
unsigned int num_cu_shared;
- switch (kdev->device_info->asic_family) {
+ switch (kdev->adev->asic_type) {
case CHIP_KAVERI:
pcache_info = kaveri_cache_info;
num_of_cache_types = ARRAY_SIZE(kaveri_cache_info);
@@ -1377,67 +1550,88 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
pcache_info = vegam_cache_info;
num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
break;
- case CHIP_VEGA10:
- pcache_info = vega10_cache_info;
- num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
- break;
- case CHIP_VEGA12:
- pcache_info = vega12_cache_info;
- num_of_cache_types = ARRAY_SIZE(vega12_cache_info);
- break;
- case CHIP_VEGA20:
- case CHIP_ARCTURUS:
- pcache_info = vega20_cache_info;
- num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
- break;
- case CHIP_ALDEBARAN:
- pcache_info = aldebaran_cache_info;
- num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
- break;
- case CHIP_RAVEN:
- pcache_info = raven_cache_info;
- num_of_cache_types = ARRAY_SIZE(raven_cache_info);
- break;
- case CHIP_RENOIR:
- pcache_info = renoir_cache_info;
- num_of_cache_types = ARRAY_SIZE(renoir_cache_info);
- break;
- case CHIP_NAVI10:
- case CHIP_NAVI12:
- case CHIP_CYAN_SKILLFISH:
- pcache_info = navi10_cache_info;
- num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
- break;
- case CHIP_NAVI14:
- pcache_info = navi14_cache_info;
- num_of_cache_types = ARRAY_SIZE(navi14_cache_info);
- break;
- case CHIP_SIENNA_CICHLID:
- pcache_info = sienna_cichlid_cache_info;
- num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info);
- break;
- case CHIP_NAVY_FLOUNDER:
- pcache_info = navy_flounder_cache_info;
- num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info);
- break;
- case CHIP_DIMGREY_CAVEFISH:
- pcache_info = dimgrey_cavefish_cache_info;
- num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info);
- break;
- case CHIP_VANGOGH:
- pcache_info = vangogh_cache_info;
- num_of_cache_types = ARRAY_SIZE(vangogh_cache_info);
- break;
- case CHIP_BEIGE_GOBY:
- pcache_info = beige_goby_cache_info;
- num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info);
- break;
- case CHIP_YELLOW_CARP:
- pcache_info = yellow_carp_cache_info;
- num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info);
- break;
default:
- return -EINVAL;
+ switch (KFD_GC_VERSION(kdev)) {
+ case IP_VERSION(9, 0, 1):
+ pcache_info = vega10_cache_info;
+ num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
+ break;
+ case IP_VERSION(9, 2, 1):
+ pcache_info = vega12_cache_info;
+ num_of_cache_types = ARRAY_SIZE(vega12_cache_info);
+ break;
+ case IP_VERSION(9, 4, 0):
+ case IP_VERSION(9, 4, 1):
+ pcache_info = vega20_cache_info;
+ num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
+ break;
+ case IP_VERSION(9, 4, 2):
+ pcache_info = aldebaran_cache_info;
+ num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
+ break;
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 2, 2):
+ pcache_info = raven_cache_info;
+ num_of_cache_types = ARRAY_SIZE(raven_cache_info);
+ break;
+ case IP_VERSION(9, 3, 0):
+ pcache_info = renoir_cache_info;
+ num_of_cache_types = ARRAY_SIZE(renoir_cache_info);
+ break;
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 2):
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
+ pcache_info = navi10_cache_info;
+ num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
+ break;
+ case IP_VERSION(10, 1, 1):
+ pcache_info = navi14_cache_info;
+ num_of_cache_types = ARRAY_SIZE(navi14_cache_info);
+ break;
+ case IP_VERSION(10, 3, 0):
+ pcache_info = sienna_cichlid_cache_info;
+ num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info);
+ break;
+ case IP_VERSION(10, 3, 2):
+ pcache_info = navy_flounder_cache_info;
+ num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info);
+ break;
+ case IP_VERSION(10, 3, 4):
+ pcache_info = dimgrey_cavefish_cache_info;
+ num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info);
+ break;
+ case IP_VERSION(10, 3, 1):
+ pcache_info = vangogh_cache_info;
+ num_of_cache_types = ARRAY_SIZE(vangogh_cache_info);
+ break;
+ case IP_VERSION(10, 3, 5):
+ pcache_info = beige_goby_cache_info;
+ num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info);
+ break;
+ case IP_VERSION(10, 3, 3):
+ pcache_info = yellow_carp_cache_info;
+ num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info);
+ break;
+ case IP_VERSION(10, 3, 6):
+ pcache_info = gc_10_3_6_cache_info;
+ num_of_cache_types = ARRAY_SIZE(gc_10_3_6_cache_info);
+ break;
+ case IP_VERSION(10, 3, 7):
+ pcache_info = gfx1037_cache_info;
+ num_of_cache_types = ARRAY_SIZE(gfx1037_cache_info);
+ break;
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ pcache_info = cache_info;
+ num_of_cache_types =
+ kfd_fill_gpu_cache_info_from_gfx_config(kdev, pcache_info);
+ break;
+ default:
+ return -EINVAL;
+ }
}
*size_filled = 0;
@@ -1560,7 +1754,7 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size)
/* Fetch the CRAT table from ACPI */
status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table);
if (status == AE_NOT_FOUND) {
- pr_warn("CRAT table not found\n");
+ pr_info("CRAT table not found\n");
return -ENODATA;
} else if (ACPI_FAILURE(status)) {
const char *err = acpi_format_exception(status);
@@ -1963,8 +2157,6 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
struct crat_subtype_iolink *sub_type_hdr,
uint32_t proximity_domain)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kdev->kgd;
-
*avail_size -= sizeof(struct crat_subtype_iolink);
if (*avail_size < 0)
return -ENOMEM;
@@ -1981,7 +2173,7 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
/* Fill in IOLINK subtype.
* TODO: Fill-in other fields of iolink subtype
*/
- if (adev->gmc.xgmi.connected_to_cpu) {
+ if (kdev->adev->gmc.xgmi.connected_to_cpu) {
/*
* with host gpu xgmi link, host can access gpu memory whether
* or not pcie bar type is large, so always create bidirectional
@@ -1990,19 +2182,19 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
sub_type_hdr->num_hops_xgmi = 1;
- if (adev->asic_type == CHIP_ALDEBARAN) {
+ if (KFD_GC_VERSION(kdev) == IP_VERSION(9, 4, 2)) {
sub_type_hdr->minimum_bandwidth_mbs =
amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(
- kdev->kgd, NULL, true);
+ kdev->adev, NULL, true);
sub_type_hdr->maximum_bandwidth_mbs =
sub_type_hdr->minimum_bandwidth_mbs;
}
} else {
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS;
sub_type_hdr->minimum_bandwidth_mbs =
- amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->kgd, true);
+ amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, true);
sub_type_hdr->maximum_bandwidth_mbs =
- amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->kgd, false);
+ amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, false);
}
sub_type_hdr->proximity_domain_from = proximity_domain;
@@ -2044,11 +2236,11 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
sub_type_hdr->proximity_domain_from = proximity_domain_from;
sub_type_hdr->proximity_domain_to = proximity_domain_to;
sub_type_hdr->num_hops_xgmi =
- amdgpu_amdkfd_get_xgmi_hops_count(kdev->kgd, peer_kdev->kgd);
+ amdgpu_amdkfd_get_xgmi_hops_count(kdev->adev, peer_kdev->adev);
sub_type_hdr->maximum_bandwidth_mbs =
- amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->kgd, peer_kdev->kgd, false);
+ amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, peer_kdev->adev, false);
sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ?
- amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->kgd, NULL, true) : 0;
+ amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, NULL, true) : 0;
return 0;
}
@@ -2114,7 +2306,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT;
cu->proximity_domain = proximity_domain;
- amdgpu_amdkfd_get_cu_info(kdev->kgd, &cu_info);
+ amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info);
cu->num_simd_per_cu = cu_info.simd_per_cu;
cu->num_simd_cores = cu_info.simd_per_cu * cu_info.cu_active_number;
cu->max_waves_simd = cu_info.max_waves_per_simd;
@@ -2145,7 +2337,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
* report the total FB size (public+private) as a single
* private heap.
*/
- amdgpu_amdkfd_get_local_mem_info(kdev->kgd, &local_mem_info);
+ local_mem_info = kdev->local_mem_info;
sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
sub_type_hdr->length);
@@ -2194,7 +2386,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
/* Fill in Subtype: IO_LINKS
* Only direct links are added here which is Link from GPU to
- * to its NUMA node. Indirect links are added by userspace.
+ * its NUMA node. Indirect links are added by userspace.
*/
sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
cache_mem_filled);
@@ -2218,7 +2410,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
*/
if (kdev->hive_id) {
for (nid = 0; nid < proximity_domain; ++nid) {
- peer_dev = kfd_topology_device_by_proximity_domain(nid);
+ peer_dev = kfd_topology_device_by_proximity_domain_no_lock(nid);
if (!peer_dev->gpu)
continue;
if (peer_dev->gpu->hive_id != kdev->hive_id)