diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_topology.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 538 |
1 files changed, 449 insertions, 89 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index dd593ad0614a..3f0a4a415907 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1,5 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT /* - * Copyright 2014 Advanced Micro Devices, Inc. + * Copyright 2014-2022 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -39,28 +40,39 @@ #include "kfd_svm.h" #include "amdgpu_amdkfd.h" #include "amdgpu_ras.h" +#include "amdgpu.h" /* topology_device_list - Master list of all topology devices */ static struct list_head topology_device_list; static struct kfd_system_properties sys_props; static DECLARE_RWSEM(topology_lock); -static atomic_t topology_crat_proximity_domain; +static uint32_t topology_crat_proximity_domain; -struct kfd_topology_device *kfd_topology_device_by_proximity_domain( +struct kfd_topology_device *kfd_topology_device_by_proximity_domain_no_lock( uint32_t proximity_domain) { struct kfd_topology_device *top_dev; struct kfd_topology_device *device = NULL; - down_read(&topology_lock); - list_for_each_entry(top_dev, &topology_device_list, list) if (top_dev->proximity_domain == proximity_domain) { device = top_dev; break; } + return device; +} + +struct kfd_topology_device *kfd_topology_device_by_proximity_domain( + uint32_t proximity_domain) +{ + struct kfd_topology_device *device = NULL; + + down_read(&topology_lock); + + device = kfd_topology_device_by_proximity_domain_no_lock( + proximity_domain); up_read(&topology_lock); return device; @@ -113,7 +125,7 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev) return device; } -struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd) +struct kfd_dev *kfd_device_by_adev(const struct amdgpu_device *adev) { struct kfd_topology_device *top_dev; struct kfd_dev *device = NULL; @@ -121,7 +133,7 @@ struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd) down_read(&topology_lock); list_for_each_entry(top_dev, &topology_device_list, list) - if (top_dev->gpu && top_dev->gpu->kgd == kgd) { + if (top_dev->gpu && top_dev->gpu->adev == adev) { device = top_dev->gpu; break; } @@ -137,6 +149,7 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev) struct kfd_mem_properties *mem; struct kfd_cache_properties *cache; struct kfd_iolink_properties *iolink; + struct kfd_iolink_properties *p2plink; struct kfd_perf_properties *perf; list_del(&dev->list); @@ -162,6 +175,13 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev) kfree(iolink); } + while (dev->p2p_link_props.next != &dev->p2p_link_props) { + p2plink = container_of(dev->p2p_link_props.next, + struct kfd_iolink_properties, list); + list_del(&p2plink->list); + kfree(p2plink); + } + while (dev->perf_props.next != &dev->perf_props) { perf = container_of(dev->perf_props.next, struct kfd_perf_properties, list); @@ -203,6 +223,7 @@ struct kfd_topology_device *kfd_create_topology_device( INIT_LIST_HEAD(&dev->mem_props); INIT_LIST_HEAD(&dev->cache_props); INIT_LIST_HEAD(&dev->io_link_props); + INIT_LIST_HEAD(&dev->p2p_link_props); INIT_LIST_HEAD(&dev->perf_props); list_add_tail(&dev->list, device_list); @@ -454,6 +475,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, dev->node_props.caches_count); sysfs_show_32bit_prop(buffer, offs, "io_links_count", dev->node_props.io_links_count); + sysfs_show_32bit_prop(buffer, offs, "p2p_links_count", + dev->node_props.p2p_links_count); sysfs_show_32bit_prop(buffer, offs, "cpu_core_id_base", dev->node_props.cpu_core_id_base); sysfs_show_32bit_prop(buffer, offs, "simd_id_base", @@ -503,7 +526,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, if (dev->gpu) { log_max_watch_addr = - __ilog2_u32(dev->gpu->device_info->num_of_watch_points); + __ilog2_u32(dev->gpu->device_info.num_of_watch_points); if (log_max_watch_addr) { dev->node_props.capability |= @@ -515,7 +538,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, HSA_CAP_WATCH_POINTS_TOTALBITS_MASK); } - if (dev->gpu->device_info->asic_family == CHIP_TONGA) + if (dev->gpu->adev->asic_type == CHIP_TONGA) dev->node_props.capability |= HSA_CAP_AQL_QUEUE_DOUBLE_MAP; @@ -531,7 +554,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, sysfs_show_32bit_prop(buffer, offs, "sdma_fw_version", dev->gpu->sdma_fw_version); sysfs_show_64bit_prop(buffer, offs, "unique_id", - amdgpu_amdkfd_get_unique_id(dev->gpu->kgd)); + dev->gpu->adev->unique_id); } @@ -557,6 +580,7 @@ static void kfd_remove_sysfs_file(struct kobject *kobj, struct attribute *attr) static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev) { + struct kfd_iolink_properties *p2plink; struct kfd_iolink_properties *iolink; struct kfd_cache_properties *cache; struct kfd_mem_properties *mem; @@ -574,6 +598,18 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev) dev->kobj_iolink = NULL; } + if (dev->kobj_p2plink) { + list_for_each_entry(p2plink, &dev->p2p_link_props, list) + if (p2plink->kobj) { + kfd_remove_sysfs_file(p2plink->kobj, + &p2plink->attr); + p2plink->kobj = NULL; + } + kobject_del(dev->kobj_p2plink); + kobject_put(dev->kobj_p2plink); + dev->kobj_p2plink = NULL; + } + if (dev->kobj_cache) { list_for_each_entry(cache, &dev->cache_props, list) if (cache->kobj) { @@ -620,6 +656,7 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev) static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, uint32_t id) { + struct kfd_iolink_properties *p2plink; struct kfd_iolink_properties *iolink; struct kfd_cache_properties *cache; struct kfd_mem_properties *mem; @@ -657,6 +694,10 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, if (!dev->kobj_iolink) return -ENOMEM; + dev->kobj_p2plink = kobject_create_and_add("p2p_links", dev->kobj_node); + if (!dev->kobj_p2plink) + return -ENOMEM; + dev->kobj_perf = kobject_create_and_add("perf", dev->kobj_node); if (!dev->kobj_perf) return -ENOMEM; @@ -746,6 +787,27 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, i++; } + i = 0; + list_for_each_entry(p2plink, &dev->p2p_link_props, list) { + p2plink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); + if (!p2plink->kobj) + return -ENOMEM; + ret = kobject_init_and_add(p2plink->kobj, &iolink_type, + dev->kobj_p2plink, "%d", i); + if (ret < 0) { + kobject_put(p2plink->kobj); + return ret; + } + + p2plink->attr.name = "properties"; + p2plink->attr.mode = KFD_SYSFS_FILE_MODE; + sysfs_attr_init(&iolink->attr); + ret = sysfs_create_file(p2plink->kobj, &p2plink->attr); + if (ret < 0) + return ret; + i++; + } + /* All hardware blocks have the same number of attributes. */ num_attrs = ARRAY_SIZE(perf_attr_iommu); list_for_each_entry(perf, &dev->perf_props, list) { @@ -1059,7 +1121,7 @@ int kfd_topology_init(void) down_write(&topology_lock); kfd_topology_update_device_list(&temp_topology_device_list, &topology_device_list); - atomic_set(&topology_crat_proximity_domain, sys_props.num_devices-1); + topology_crat_proximity_domain = sys_props.num_devices-1; ret = kfd_topology_update_sysfs(); up_write(&topology_lock); @@ -1101,15 +1163,12 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu) uint32_t buf[7]; uint64_t local_mem_size; int i; - struct kfd_local_mem_info local_mem_info; if (!gpu) return 0; - amdgpu_amdkfd_get_local_mem_info(gpu->kgd, &local_mem_info); - - local_mem_size = local_mem_info.local_mem_size_private + - local_mem_info.local_mem_size_public; + local_mem_size = gpu->local_mem_info.local_mem_size_private + + gpu->local_mem_info.local_mem_size_public; buf[0] = gpu->pdev->devfn; buf[1] = gpu->pdev->subsystem_vendor | @@ -1137,6 +1196,7 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) struct kfd_mem_properties *mem; struct kfd_cache_properties *cache; struct kfd_iolink_properties *iolink; + struct kfd_iolink_properties *p2plink; down_write(&topology_lock); list_for_each_entry(dev, &topology_device_list, list) { @@ -1157,6 +1217,8 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) cache->gpu = dev->gpu; list_for_each_entry(iolink, &dev->io_link_props, list) iolink->gpu = dev->gpu; + list_for_each_entry(p2plink, &dev->p2p_link_props, list) + p2plink->gpu = dev->gpu; break; } } @@ -1189,7 +1251,7 @@ static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev) * for APUs - If CRAT from ACPI reports more than one bank, then * all the banks will report the same mem_clk_max information */ - amdgpu_amdkfd_get_local_mem_info(dev->gpu->kgd, &local_mem_info); + amdgpu_amdkfd_get_local_mem_info(dev->gpu->adev, &local_mem_info); list_for_each_entry(mem, &dev->mem_props, list) mem->mem_clk_max = local_mem_info.mem_clk_max; @@ -1217,8 +1279,7 @@ static void kfd_set_iolink_no_atomics(struct kfd_topology_device *dev, /* set gpu (dev) flags. */ } else { if (!dev->gpu->pci_atomic_requested || - dev->gpu->device_info->asic_family == - CHIP_HAWAII) + dev->gpu->adev->asic_type == CHIP_HAWAII) link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT; } @@ -1239,7 +1300,7 @@ static void kfd_set_iolink_non_coherent(struct kfd_topology_device *to_dev, */ if (inbound_link->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS || (inbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI && - to_dev->gpu->device_info->asic_family == CHIP_VEGA20)) { + KFD_GC_VERSION(to_dev->gpu) == IP_VERSION(9, 4, 0))) { outbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT; inbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT; } @@ -1264,6 +1325,12 @@ static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev) if (!peer_dev) continue; + /* Include the CPU peer in GPU hive if connected over xGMI. */ + if (!peer_dev->gpu && !peer_dev->node_props.hive_id && + dev->node_props.hive_id && + dev->gpu->adev->gmc.xgmi.connected_to_cpu) + peer_dev->node_props.hive_id = dev->node_props.hive_id; + list_for_each_entry(inbound_link, &peer_dev->io_link_props, list) { if (inbound_link->node_to != link->node_from) @@ -1274,6 +1341,256 @@ static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev) kfd_set_iolink_non_coherent(peer_dev, link, inbound_link); } } + + /* Create indirect links so apply flags setting to all */ + list_for_each_entry(link, &dev->p2p_link_props, list) { + link->flags = CRAT_IOLINK_FLAGS_ENABLED; + kfd_set_iolink_no_atomics(dev, NULL, link); + peer_dev = kfd_topology_device_by_proximity_domain( + link->node_to); + + if (!peer_dev) + continue; + + list_for_each_entry(inbound_link, &peer_dev->p2p_link_props, + list) { + if (inbound_link->node_to != link->node_from) + continue; + + inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED; + kfd_set_iolink_no_atomics(peer_dev, dev, inbound_link); + kfd_set_iolink_non_coherent(peer_dev, link, inbound_link); + } + } +} + +static int kfd_build_p2p_node_entry(struct kfd_topology_device *dev, + struct kfd_iolink_properties *p2plink) +{ + int ret; + + p2plink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); + if (!p2plink->kobj) + return -ENOMEM; + + ret = kobject_init_and_add(p2plink->kobj, &iolink_type, + dev->kobj_p2plink, "%d", dev->node_props.p2p_links_count - 1); + if (ret < 0) { + kobject_put(p2plink->kobj); + return ret; + } + + p2plink->attr.name = "properties"; + p2plink->attr.mode = KFD_SYSFS_FILE_MODE; + sysfs_attr_init(&p2plink->attr); + ret = sysfs_create_file(p2plink->kobj, &p2plink->attr); + if (ret < 0) + return ret; + + return 0; +} + +static int kfd_create_indirect_link_prop(struct kfd_topology_device *kdev, int gpu_node) +{ + struct kfd_iolink_properties *gpu_link, *tmp_link, *cpu_link; + struct kfd_iolink_properties *props = NULL, *props2 = NULL; + struct kfd_topology_device *cpu_dev; + int ret = 0; + int i, num_cpu; + + num_cpu = 0; + list_for_each_entry(cpu_dev, &topology_device_list, list) { + if (cpu_dev->gpu) + break; + num_cpu++; + } + + gpu_link = list_first_entry(&kdev->io_link_props, + struct kfd_iolink_properties, list); + if (!gpu_link) + return -ENOMEM; + + for (i = 0; i < num_cpu; i++) { + /* CPU <--> GPU */ + if (gpu_link->node_to == i) + continue; + + /* find CPU <--> CPU links */ + cpu_link = NULL; + cpu_dev = kfd_topology_device_by_proximity_domain(i); + if (cpu_dev) { + list_for_each_entry(tmp_link, + &cpu_dev->io_link_props, list) { + if (tmp_link->node_to == gpu_link->node_to) { + cpu_link = tmp_link; + break; + } + } + } + + if (!cpu_link) + return -ENOMEM; + + /* CPU <--> CPU <--> GPU, GPU node*/ + props = kfd_alloc_struct(props); + if (!props) + return -ENOMEM; + + memcpy(props, gpu_link, sizeof(struct kfd_iolink_properties)); + props->weight = gpu_link->weight + cpu_link->weight; + props->min_latency = gpu_link->min_latency + cpu_link->min_latency; + props->max_latency = gpu_link->max_latency + cpu_link->max_latency; + props->min_bandwidth = min(gpu_link->min_bandwidth, cpu_link->min_bandwidth); + props->max_bandwidth = min(gpu_link->max_bandwidth, cpu_link->max_bandwidth); + + props->node_from = gpu_node; + props->node_to = i; + kdev->node_props.p2p_links_count++; + list_add_tail(&props->list, &kdev->p2p_link_props); + ret = kfd_build_p2p_node_entry(kdev, props); + if (ret < 0) + return ret; + + /* for small Bar, no CPU --> GPU in-direct links */ + if (kfd_dev_is_large_bar(kdev->gpu)) { + /* CPU <--> CPU <--> GPU, CPU node*/ + props2 = kfd_alloc_struct(props2); + if (!props2) + return -ENOMEM; + + memcpy(props2, props, sizeof(struct kfd_iolink_properties)); + props2->node_from = i; + props2->node_to = gpu_node; + props2->kobj = NULL; + cpu_dev->node_props.p2p_links_count++; + list_add_tail(&props2->list, &cpu_dev->p2p_link_props); + ret = kfd_build_p2p_node_entry(cpu_dev, props2); + if (ret < 0) + return ret; + } + } + return ret; +} + +#if defined(CONFIG_HSA_AMD_P2P) +static int kfd_add_peer_prop(struct kfd_topology_device *kdev, + struct kfd_topology_device *peer, int from, int to) +{ + struct kfd_iolink_properties *props = NULL; + struct kfd_iolink_properties *iolink1, *iolink2, *iolink3; + struct kfd_topology_device *cpu_dev; + int ret = 0; + + if (!amdgpu_device_is_peer_accessible( + kdev->gpu->adev, + peer->gpu->adev)) + return ret; + + iolink1 = list_first_entry(&kdev->io_link_props, + struct kfd_iolink_properties, list); + if (!iolink1) + return -ENOMEM; + + iolink2 = list_first_entry(&peer->io_link_props, + struct kfd_iolink_properties, list); + if (!iolink2) + return -ENOMEM; + + props = kfd_alloc_struct(props); + if (!props) + return -ENOMEM; + + memcpy(props, iolink1, sizeof(struct kfd_iolink_properties)); + + props->weight = iolink1->weight + iolink2->weight; + props->min_latency = iolink1->min_latency + iolink2->min_latency; + props->max_latency = iolink1->max_latency + iolink2->max_latency; + props->min_bandwidth = min(iolink1->min_bandwidth, iolink2->min_bandwidth); + props->max_bandwidth = min(iolink2->max_bandwidth, iolink2->max_bandwidth); + + if (iolink1->node_to != iolink2->node_to) { + /* CPU->CPU link*/ + cpu_dev = kfd_topology_device_by_proximity_domain(iolink1->node_to); + if (cpu_dev) { + list_for_each_entry(iolink3, &cpu_dev->io_link_props, list) + if (iolink3->node_to == iolink2->node_to) + break; + + props->weight += iolink3->weight; + props->min_latency += iolink3->min_latency; + props->max_latency += iolink3->max_latency; + props->min_bandwidth = min(props->min_bandwidth, + iolink3->min_bandwidth); + props->max_bandwidth = min(props->max_bandwidth, + iolink3->max_bandwidth); + } else { + WARN(1, "CPU node not found"); + } + } + + props->node_from = from; + props->node_to = to; + peer->node_props.p2p_links_count++; + list_add_tail(&props->list, &peer->p2p_link_props); + ret = kfd_build_p2p_node_entry(peer, props); + + return ret; +} +#endif + +static int kfd_dev_create_p2p_links(void) +{ + struct kfd_topology_device *dev; + struct kfd_topology_device *new_dev; +#if defined(CONFIG_HSA_AMD_P2P) + uint32_t i; +#endif + uint32_t k; + int ret = 0; + + k = 0; + list_for_each_entry(dev, &topology_device_list, list) + k++; + if (k < 2) + return 0; + + new_dev = list_last_entry(&topology_device_list, struct kfd_topology_device, list); + if (WARN_ON(!new_dev->gpu)) + return 0; + + k--; + + /* create in-direct links */ + ret = kfd_create_indirect_link_prop(new_dev, k); + if (ret < 0) + goto out; + + /* create p2p links */ +#if defined(CONFIG_HSA_AMD_P2P) + i = 0; + list_for_each_entry(dev, &topology_device_list, list) { + if (dev == new_dev) + break; + if (!dev->gpu || !dev->gpu->adev || + (dev->gpu->hive_id && + dev->gpu->hive_id == new_dev->gpu->hive_id)) + goto next; + + /* check if node(s) is/are peer accessible in one direction or bi-direction */ + ret = kfd_add_peer_prop(new_dev, dev, i, k); + if (ret < 0) + goto out; + + ret = kfd_add_peer_prop(dev, new_dev, k, i); + if (ret < 0) + goto out; +next: + i++; + } +#endif + +out: + return ret; } int kfd_topology_add_device(struct kfd_dev *gpu) @@ -1286,34 +1603,14 @@ int kfd_topology_add_device(struct kfd_dev *gpu) void *crat_image = NULL; size_t image_size = 0; int proximity_domain; - struct amdgpu_device *adev; + int i; + const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type]; INIT_LIST_HEAD(&temp_topology_device_list); gpu_id = kfd_generate_gpu_id(gpu); - pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id); - proximity_domain = atomic_inc_return(&topology_crat_proximity_domain); - - adev = (struct amdgpu_device *)(gpu->kgd); - - /* Include the CPU in xGMI hive if xGMI connected by assigning it the hive ID. */ - if (gpu->hive_id && adev->gmc.xgmi.connected_to_cpu) { - struct kfd_topology_device *top_dev; - - down_read(&topology_lock); - - list_for_each_entry(top_dev, &topology_device_list, list) { - if (top_dev->gpu) - break; - - top_dev->node_props.hive_id = gpu->hive_id; - } - - up_read(&topology_lock); - } - /* Check to see if this gpu device exists in the topology_device_list. * If so, assign the gpu to that device, * else create a Virtual CRAT for this gpu device and then parse that @@ -1322,12 +1619,16 @@ int kfd_topology_add_device(struct kfd_dev *gpu) */ dev = kfd_assign_gpu(gpu); if (!dev) { + down_write(&topology_lock); + proximity_domain = ++topology_crat_proximity_domain; + res = kfd_create_crat_image_virtual(&crat_image, &image_size, COMPUTE_UNIT_GPU, gpu, proximity_domain); if (res) { pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n", gpu_id); + topology_crat_proximity_domain--; return res; } res = kfd_parse_crat_table(crat_image, @@ -1336,10 +1637,10 @@ int kfd_topology_add_device(struct kfd_dev *gpu) if (res) { pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n", gpu_id); + topology_crat_proximity_domain--; goto err; } - down_write(&topology_lock); kfd_topology_update_device_list(&temp_topology_device_list, &topology_device_list); @@ -1364,6 +1665,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu) dev->gpu_id = gpu_id; gpu->id = gpu_id; + kfd_dev_create_p2p_links(); + /* TODO: Move the following lines to function * kfd_add_non_crat_information */ @@ -1372,45 +1675,49 @@ int kfd_topology_add_device(struct kfd_dev *gpu) * needed for the topology */ - amdgpu_amdkfd_get_cu_info(dev->gpu->kgd, &cu_info); + amdgpu_amdkfd_get_cu_info(dev->gpu->adev, &cu_info); - strncpy(dev->node_props.name, gpu->device_info->asic_name, - KFD_TOPOLOGY_PUBLIC_NAME_SIZE); + for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1; i++) { + dev->node_props.name[i] = __tolower(asic_name[i]); + if (asic_name[i] == '\0') + break; + } + dev->node_props.name[i] = '\0'; dev->node_props.simd_arrays_per_engine = cu_info.num_shader_arrays_per_engine; - dev->node_props.gfx_target_version = gpu->device_info->gfx_target_version; + dev->node_props.gfx_target_version = gpu->device_info.gfx_target_version; dev->node_props.vendor_id = gpu->pdev->vendor; dev->node_props.device_id = gpu->pdev->device; dev->node_props.capability |= - ((amdgpu_amdkfd_get_asic_rev_id(dev->gpu->kgd) << - HSA_CAP_ASIC_REVISION_SHIFT) & + ((dev->gpu->adev->rev_id << HSA_CAP_ASIC_REVISION_SHIFT) & HSA_CAP_ASIC_REVISION_MASK); dev->node_props.location_id = pci_dev_id(gpu->pdev); dev->node_props.domain = pci_domain_nr(gpu->pdev->bus); dev->node_props.max_engine_clk_fcompute = - amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->kgd); + amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->adev); dev->node_props.max_engine_clk_ccompute = cpufreq_quick_get_max(0) / 1000; dev->node_props.drm_render_minor = gpu->shared_resources.drm_render_minor; dev->node_props.hive_id = gpu->hive_id; - dev->node_props.num_sdma_engines = gpu->device_info->num_sdma_engines; + dev->node_props.num_sdma_engines = kfd_get_num_sdma_engines(gpu); dev->node_props.num_sdma_xgmi_engines = - gpu->device_info->num_xgmi_sdma_engines; + kfd_get_num_xgmi_sdma_engines(gpu); dev->node_props.num_sdma_queues_per_engine = - gpu->device_info->num_sdma_queues_per_engine; + gpu->device_info.num_sdma_queues_per_engine - + gpu->device_info.num_reserved_sdma_queues_per_engine; dev->node_props.num_gws = (dev->gpu->gws && dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ? - amdgpu_amdkfd_get_num_gws(dev->gpu->kgd) : 0; + dev->gpu->adev->gds.gws_size : 0; dev->node_props.num_cp_queues = get_cp_queues_num(dev->gpu->dqm); kfd_fill_mem_clk_max_info(dev); kfd_fill_iolink_non_crat_info(dev); - switch (dev->gpu->device_info->asic_family) { + switch (dev->gpu->adev->asic_type) { case CHIP_KAVERI: case CHIP_HAWAII: case CHIP_TONGA: @@ -1429,36 +1736,20 @@ int kfd_topology_add_device(struct kfd_dev *gpu) HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); break; - case CHIP_VEGA10: - case CHIP_VEGA12: - case CHIP_VEGA20: - case CHIP_RAVEN: - case CHIP_RENOIR: - case CHIP_ARCTURUS: - case CHIP_ALDEBARAN: - case CHIP_NAVI10: - case CHIP_NAVI12: - case CHIP_NAVI14: - case CHIP_SIENNA_CICHLID: - case CHIP_NAVY_FLOUNDER: - case CHIP_VANGOGH: - case CHIP_DIMGREY_CAVEFISH: - case CHIP_BEIGE_GOBY: - case CHIP_YELLOW_CARP: - case CHIP_CYAN_SKILLFISH: - dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 << - HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & - HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); - break; default: - WARN(1, "Unexpected ASIC family %u", - dev->gpu->device_info->asic_family); + if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 0, 1)) + dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 << + HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & + HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); + else + WARN(1, "Unexpected ASIC family %u", + dev->gpu->adev->asic_type); } /* - * Overwrite ATS capability according to needs_iommu_device to fix - * potential missing corresponding bit in CRAT of BIOS. - */ + * Overwrite ATS capability according to needs_iommu_device to fix + * potential missing corresponding bit in CRAT of BIOS. + */ if (dev->gpu->use_iommu_v2) dev->node_props.capability |= HSA_CAP_ATS_PRESENT; else @@ -1469,7 +1760,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu) * because it doesn't consider masked out CUs * max_waves_per_simd: Carrizo reports wrong max_waves_per_simd */ - if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) { + if (dev->gpu->adev->asic_type == CHIP_CARRIZO) { dev->node_props.simd_count = cu_info.simd_per_cu * cu_info.cu_active_number; dev->node_props.max_waves_per_simd = 10; @@ -1477,16 +1768,17 @@ int kfd_topology_add_device(struct kfd_dev *gpu) /* kfd only concerns sram ecc on GFX and HBM ecc on UMC */ dev->node_props.capability |= - ((adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ? + ((dev->gpu->adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ? HSA_CAP_SRAM_EDCSUPPORTED : 0; - dev->node_props.capability |= ((adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ? + dev->node_props.capability |= + ((dev->gpu->adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ? HSA_CAP_MEM_EDCSUPPORTED : 0; - if (adev->asic_type != CHIP_VEGA10) - dev->node_props.capability |= (adev->ras_enabled != 0) ? + if (KFD_GC_VERSION(dev->gpu) != IP_VERSION(9, 0, 1)) + dev->node_props.capability |= (dev->gpu->adev->ras_enabled != 0) ? HSA_CAP_RASEVENTNOTIFY : 0; - if (KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) + if (KFD_IS_SVM_API_SUPPORTED(dev->gpu->adev->kfd.dev)) dev->node_props.capability |= HSA_CAP_SVMAPI_SUPPORTED; kfd_debug_print_topology(); @@ -1498,25 +1790,93 @@ err: return res; } +/** + * kfd_topology_update_io_links() - Update IO links after device removal. + * @proximity_domain: Proximity domain value of the dev being removed. + * + * The topology list currently is arranged in increasing order of + * proximity domain. + * + * Two things need to be done when a device is removed: + * 1. All the IO links to this device need to be removed. + * 2. All nodes after the current device node need to move + * up once this device node is removed from the topology + * list. As a result, the proximity domain values for + * all nodes after the node being deleted reduce by 1. + * This would also cause the proximity domain values for + * io links to be updated based on new proximity domain + * values. + * + * Context: The caller must hold write topology_lock. + */ +static void kfd_topology_update_io_links(int proximity_domain) +{ + struct kfd_topology_device *dev; + struct kfd_iolink_properties *iolink, *p2plink, *tmp; + + list_for_each_entry(dev, &topology_device_list, list) { + if (dev->proximity_domain > proximity_domain) + dev->proximity_domain--; + + list_for_each_entry_safe(iolink, tmp, &dev->io_link_props, list) { + /* + * If there is an io link to the dev being deleted + * then remove that IO link also. + */ + if (iolink->node_to == proximity_domain) { + list_del(&iolink->list); + dev->node_props.io_links_count--; + } else { + if (iolink->node_from > proximity_domain) + iolink->node_from--; + if (iolink->node_to > proximity_domain) + iolink->node_to--; + } + } + + list_for_each_entry_safe(p2plink, tmp, &dev->p2p_link_props, list) { + /* + * If there is a p2p link to the dev being deleted + * then remove that p2p link also. + */ + if (p2plink->node_to == proximity_domain) { + list_del(&p2plink->list); + dev->node_props.p2p_links_count--; + } else { + if (p2plink->node_from > proximity_domain) + p2plink->node_from--; + if (p2plink->node_to > proximity_domain) + p2plink->node_to--; + } + } + } +} + int kfd_topology_remove_device(struct kfd_dev *gpu) { struct kfd_topology_device *dev, *tmp; uint32_t gpu_id; int res = -ENODEV; + int i = 0; down_write(&topology_lock); - list_for_each_entry_safe(dev, tmp, &topology_device_list, list) + list_for_each_entry_safe(dev, tmp, &topology_device_list, list) { if (dev->gpu == gpu) { gpu_id = dev->gpu_id; kfd_remove_sysfs_node_entry(dev); kfd_release_topology_device(dev); sys_props.num_devices--; + kfd_topology_update_io_links(i); + topology_crat_proximity_domain = sys_props.num_devices-1; + sys_props.generation_count++; res = 0; if (kfd_topology_update_sysfs() < 0) kfd_topology_release_sysfs(); break; } + i++; + } up_write(&topology_lock); @@ -1592,7 +1952,7 @@ void kfd_double_confirm_iommu_support(struct kfd_dev *gpu) gpu->use_iommu_v2 = false; - if (!gpu->device_info->needs_iommu_device) + if (!gpu->device_info.needs_iommu_device) return; down_read(&topology_lock); |