diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_device.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device.c | 186 |
1 files changed, 119 insertions, 67 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 127d41d0e4f0..65a1d4f9004b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -1,5 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT /* - * Copyright 2014 Advanced Micro Devices, Inc. + * Copyright 2014-2022 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -52,6 +53,7 @@ extern const struct kfd2kgd_calls arcturus_kfd2kgd; extern const struct kfd2kgd_calls aldebaran_kfd2kgd; extern const struct kfd2kgd_calls gfx_v10_kfd2kgd; extern const struct kfd2kgd_calls gfx_v10_3_kfd2kgd; +extern const struct kfd2kgd_calls gfx_v11_kfd2kgd; static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, unsigned int chunk_size); @@ -59,39 +61,63 @@ static void kfd_gtt_sa_fini(struct kfd_dev *kfd); static int kfd_resume(struct kfd_dev *kfd); -static void kfd_device_info_set_sdma_queue_num(struct kfd_dev *kfd) +static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd) { uint32_t sdma_version = kfd->adev->ip_versions[SDMA0_HWIP][0]; switch (sdma_version) { - case IP_VERSION(4, 0, 0):/* VEGA10 */ - case IP_VERSION(4, 0, 1):/* VEGA12 */ - case IP_VERSION(4, 1, 0):/* RAVEN */ - case IP_VERSION(4, 1, 1):/* RAVEN */ - case IP_VERSION(4, 1, 2):/* RENIOR */ - case IP_VERSION(5, 2, 1):/* VANGOGH */ - case IP_VERSION(5, 2, 3):/* YELLOW_CARP */ - kfd->device_info.num_sdma_queues_per_engine = 2; - break; - case IP_VERSION(4, 2, 0):/* VEGA20 */ - case IP_VERSION(4, 2, 2):/* ARCTUTUS */ - case IP_VERSION(4, 4, 0):/* ALDEBARAN */ - case IP_VERSION(5, 0, 0):/* NAVI10 */ - case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */ - case IP_VERSION(5, 0, 2):/* NAVI14 */ - case IP_VERSION(5, 0, 5):/* NAVI12 */ - case IP_VERSION(5, 2, 0):/* SIENNA_CICHLID */ - case IP_VERSION(5, 2, 2):/* NAVY_FLOUDER */ - case IP_VERSION(5, 2, 4):/* DIMGREY_CAVEFISH */ - case IP_VERSION(5, 2, 5):/* BEIGE_GOBY */ - kfd->device_info.num_sdma_queues_per_engine = 8; - break; - default: - dev_warn(kfd_device, - "Default sdma queue per engine(8) is set due to " - "mismatch of sdma ip block(SDMA_HWIP:0x%x).\n", - sdma_version); - kfd->device_info.num_sdma_queues_per_engine = 8; + case IP_VERSION(4, 0, 0):/* VEGA10 */ + case IP_VERSION(4, 0, 1):/* VEGA12 */ + case IP_VERSION(4, 1, 0):/* RAVEN */ + case IP_VERSION(4, 1, 1):/* RAVEN */ + case IP_VERSION(4, 1, 2):/* RENOIR */ + case IP_VERSION(5, 2, 1):/* VANGOGH */ + case IP_VERSION(5, 2, 3):/* YELLOW_CARP */ + case IP_VERSION(5, 2, 6):/* GC 10.3.6 */ + case IP_VERSION(5, 2, 7):/* GC 10.3.7 */ + kfd->device_info.num_sdma_queues_per_engine = 2; + break; + case IP_VERSION(4, 2, 0):/* VEGA20 */ + case IP_VERSION(4, 2, 2):/* ARCTURUS */ + case IP_VERSION(4, 4, 0):/* ALDEBARAN */ + case IP_VERSION(5, 0, 0):/* NAVI10 */ + case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */ + case IP_VERSION(5, 0, 2):/* NAVI14 */ + case IP_VERSION(5, 0, 5):/* NAVI12 */ + case IP_VERSION(5, 2, 0):/* SIENNA_CICHLID */ + case IP_VERSION(5, 2, 2):/* NAVY_FLOUNDER */ + case IP_VERSION(5, 2, 4):/* DIMGREY_CAVEFISH */ + case IP_VERSION(5, 2, 5):/* BEIGE_GOBY */ + case IP_VERSION(6, 0, 0): + case IP_VERSION(6, 0, 1): + case IP_VERSION(6, 0, 2): + case IP_VERSION(6, 0, 3): + kfd->device_info.num_sdma_queues_per_engine = 8; + break; + default: + dev_warn(kfd_device, + "Default sdma queue per engine(8) is set due to mismatch of sdma ip block(SDMA_HWIP:0x%x).\n", + sdma_version); + kfd->device_info.num_sdma_queues_per_engine = 8; + } + + switch (sdma_version) { + case IP_VERSION(6, 0, 0): + case IP_VERSION(6, 0, 2): + case IP_VERSION(6, 0, 3): + /* Reserve 1 for paging and 1 for gfx */ + kfd->device_info.num_reserved_sdma_queues_per_engine = 2; + /* BIT(0)=engine-0 queue-0; BIT(1)=engine-1 queue-0; BIT(2)=engine-0 queue-1; ... */ + kfd->device_info.reserved_sdma_queues_bitmap = 0xFULL; + break; + case IP_VERSION(6, 0, 1): + /* Reserve 1 for paging and 1 for gfx */ + kfd->device_info.num_reserved_sdma_queues_per_engine = 2; + /* BIT(0)=engine-0 queue-0; BIT(1)=engine-0 queue-1; ... */ + kfd->device_info.reserved_sdma_queues_bitmap = 0x3ULL; + break; + default: + break; } } @@ -110,7 +136,10 @@ static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd) case IP_VERSION(9, 4, 2): /* ALDEBARAN */ case IP_VERSION(10, 3, 1): /* VANGOGH */ case IP_VERSION(10, 3, 3): /* YELLOW_CARP */ + case IP_VERSION(10, 3, 6): /* GC 10.3.6 */ + case IP_VERSION(10, 3, 7): /* GC 10.3.7 */ case IP_VERSION(10, 1, 3): /* CYAN_SKILLFISH */ + case IP_VERSION(10, 1, 4): case IP_VERSION(10, 1, 10): /* NAVI10 */ case IP_VERSION(10, 1, 2): /* NAVI12 */ case IP_VERSION(10, 1, 1): /* NAVI14 */ @@ -120,6 +149,12 @@ static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd) case IP_VERSION(10, 3, 5): /* BEIGE_GOBY */ kfd->device_info.event_interrupt_class = &event_interrupt_class_v9; break; + case IP_VERSION(11, 0, 0): + case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 2): + case IP_VERSION(11, 0, 3): + kfd->device_info.event_interrupt_class = &event_interrupt_class_v11; + break; default: dev_warn(kfd_device, "v9 event interrupt handler is set due to " "mismatch of gc ip block(GC_HWIP:0x%x).\n", gc_version); @@ -144,7 +179,7 @@ static void kfd_device_info_init(struct kfd_dev *kfd, kfd->device_info.ih_ring_entry_size = 8 * sizeof(uint32_t); kfd->device_info.supports_cwsr = true; - kfd_device_info_set_sdma_queue_num(kfd); + kfd_device_info_set_sdma_info(kfd); kfd_device_info_set_event_interrupt_class(kfd); @@ -155,7 +190,11 @@ static void kfd_device_info_init(struct kfd_dev *kfd, if (gc_version < IP_VERSION(11, 0, 0)) { /* Navi2x+, Navi1x+ */ - if (gc_version >= IP_VERSION(10, 3, 0)) + if (gc_version == IP_VERSION(10, 3, 6)) + kfd->device_info.no_atomic_fw_version = 14; + else if (gc_version == IP_VERSION(10, 3, 7)) + kfd->device_info.no_atomic_fw_version = 3; + else if (gc_version >= IP_VERSION(10, 3, 0)) kfd->device_info.no_atomic_fw_version = 92; else if (gc_version >= IP_VERSION(10, 1, 1)) kfd->device_info.no_atomic_fw_version = 145; @@ -308,6 +347,7 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) break; /* Cyan Skillfish */ case IP_VERSION(10, 1, 3): + case IP_VERSION(10, 1, 4): gfx_target_version = 100103; if (!vf) f2g = &gfx_v10_kfd2kgd; @@ -344,6 +384,29 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) if (!vf) f2g = &gfx_v10_3_kfd2kgd; break; + case IP_VERSION(10, 3, 6): + case IP_VERSION(10, 3, 7): + gfx_target_version = 100306; + if (!vf) + f2g = &gfx_v10_3_kfd2kgd; + break; + case IP_VERSION(11, 0, 0): + gfx_target_version = 110000; + f2g = &gfx_v11_kfd2kgd; + break; + case IP_VERSION(11, 0, 1): + gfx_target_version = 110003; + f2g = &gfx_v11_kfd2kgd; + break; + case IP_VERSION(11, 0, 2): + gfx_target_version = 110002; + f2g = &gfx_v11_kfd2kgd; + break; + case IP_VERSION(11, 0, 3): + /* Note: Compiler version is 11.0.1 while HW version is 11.0.3 */ + gfx_target_version = 110001; + f2g = &gfx_v11_kfd2kgd; + break; default: break; } @@ -405,10 +468,14 @@ static void kfd_cwsr_init(struct kfd_dev *kfd) BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) > PAGE_SIZE); kfd->cwsr_isa = cwsr_trap_nv1x_hex; kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex); - } else { + } else if (KFD_GC_VERSION(kfd) < IP_VERSION(11, 0, 0)) { BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) > PAGE_SIZE); kfd->cwsr_isa = cwsr_trap_gfx10_hex; kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex); + } else { + BUILD_BUG_ON(sizeof(cwsr_trap_gfx11_hex) > PAGE_SIZE); + kfd->cwsr_isa = cwsr_trap_gfx11_hex; + kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx11_hex); } kfd->cwsr_enabled = true; @@ -437,7 +504,8 @@ static int kfd_gws_init(struct kfd_dev *kfd) return ret; } -static void kfd_smi_init(struct kfd_dev *dev) { +static void kfd_smi_init(struct kfd_dev *dev) +{ INIT_LIST_HEAD(&dev->smi_clients); spin_lock_init(&dev->smi_lock); } @@ -480,15 +548,10 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, } /* Verify module parameters regarding mapped process number*/ - if ((hws_max_conc_proc < 0) - || (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) { - dev_err(kfd_device, - "hws_max_conc_proc %d must be between 0 and %d, use %d instead\n", - hws_max_conc_proc, kfd->vm_info.vmid_num_kfd, - kfd->vm_info.vmid_num_kfd); + if (hws_max_conc_proc >= 0) + kfd->max_proc_per_quantum = min((u32)hws_max_conc_proc, kfd->vm_info.vmid_num_kfd); + else kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd; - } else - kfd->max_proc_per_quantum = hws_max_conc_proc; /* calculate max size of mqds needed for queues */ size = max_num_of_queues_per_device * @@ -533,7 +596,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, goto kfd_doorbell_error; } - kfd->hive_id = kfd->adev->gmc.xgmi.hive_id; + if (amdgpu_use_xgmi_p2p) + kfd->hive_id = kfd->adev->gmc.xgmi.hive_id; kfd->noretry = kfd->adev->gmc.noretry; @@ -570,13 +634,13 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, svm_migrate_init(kfd->adev); - if(kgd2kfd_resume_iommu(kfd)) + if (kgd2kfd_resume_iommu(kfd)) goto device_iommu_error; if (kfd_resume(kfd)) goto kfd_resume_error; - kfd->dbgmgr = NULL; + amdgpu_amdkfd_get_local_mem_info(kfd->adev, &kfd->local_mem_info); if (kfd_topology_add_device(kfd)) { dev_err(kfd_device, "Error adding device to topology\n"); @@ -784,7 +848,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) spin_unlock_irqrestore(&kfd->interrupt_lock, flags); } -int kgd2kfd_quiesce_mm(struct mm_struct *mm) +int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger) { struct kfd_process *p; int r; @@ -798,7 +862,7 @@ int kgd2kfd_quiesce_mm(struct mm_struct *mm) return -ESRCH; WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); - r = kfd_process_evict_queues(p); + r = kfd_process_evict_queues(p, trigger); kfd_unref_process(p); return r; @@ -876,8 +940,6 @@ out: static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, unsigned int chunk_size) { - unsigned int num_of_longs; - if (WARN_ON(buf_size < chunk_size)) return -EINVAL; if (WARN_ON(buf_size == 0)) @@ -888,11 +950,8 @@ static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, kfd->gtt_sa_chunk_size = chunk_size; kfd->gtt_sa_num_of_chunks = buf_size / chunk_size; - num_of_longs = (kfd->gtt_sa_num_of_chunks + BITS_PER_LONG - 1) / - BITS_PER_LONG; - - kfd->gtt_sa_bitmap = kcalloc(num_of_longs, sizeof(long), GFP_KERNEL); - + kfd->gtt_sa_bitmap = bitmap_zalloc(kfd->gtt_sa_num_of_chunks, + GFP_KERNEL); if (!kfd->gtt_sa_bitmap) return -ENOMEM; @@ -902,13 +961,12 @@ static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, mutex_init(&kfd->gtt_sa_lock); return 0; - } static void kfd_gtt_sa_fini(struct kfd_dev *kfd) { mutex_destroy(&kfd->gtt_sa_lock); - kfree(kfd->gtt_sa_bitmap); + bitmap_free(kfd->gtt_sa_bitmap); } static inline uint64_t kfd_gtt_sa_calc_gpu_addr(uint64_t start_addr, @@ -976,7 +1034,7 @@ kfd_gtt_restart_search: /* If we need only one chunk, mark it as allocated and get out */ if (size <= kfd->gtt_sa_chunk_size) { pr_debug("Single bit\n"); - set_bit(found, kfd->gtt_sa_bitmap); + __set_bit(found, kfd->gtt_sa_bitmap); goto kfd_gtt_out; } @@ -1014,10 +1072,8 @@ kfd_gtt_restart_search: (*mem_obj)->range_start, (*mem_obj)->range_end); /* Mark the chunks as allocated */ - for (found = (*mem_obj)->range_start; - found <= (*mem_obj)->range_end; - found++) - set_bit(found, kfd->gtt_sa_bitmap); + bitmap_set(kfd->gtt_sa_bitmap, (*mem_obj)->range_start, + (*mem_obj)->range_end - (*mem_obj)->range_start + 1); kfd_gtt_out: mutex_unlock(&kfd->gtt_sa_lock); @@ -1032,8 +1088,6 @@ kfd_gtt_no_free_chunk: int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj) { - unsigned int bit; - /* Act like kfree when trying to free a NULL object */ if (!mem_obj) return 0; @@ -1044,10 +1098,8 @@ int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj) mutex_lock(&kfd->gtt_sa_lock); /* Mark the chunks as free */ - for (bit = mem_obj->range_start; - bit <= mem_obj->range_end; - bit++) - clear_bit(bit, kfd->gtt_sa_bitmap); + bitmap_clear(kfd->gtt_sa_bitmap, mem_obj->range_start, + mem_obj->range_end - mem_obj->range_start + 1); mutex_unlock(&kfd->gtt_sa_lock); |