diff options
author | 2020-06-08 04:47:57 +0000 | |
---|---|---|
committer | 2020-06-08 04:47:57 +0000 | |
commit | c349dbc7938c71a30e13c1be4acc1976165f4630 (patch) | |
tree | 8798187dfd7a927a15123e8dad31b782b074baa8 /sys/dev/pci/drm/amd/amdgpu/amdgpu_gfx.c | |
parent | The errcheck() function treats an errno of ERANGE or EDOM as something (diff) | |
download | wireguard-openbsd-c349dbc7938c71a30e13c1be4acc1976165f4630.tar.xz wireguard-openbsd-c349dbc7938c71a30e13c1be4acc1976165f4630.zip |
update drm to linux 5.7
adds kernel support for
amdgpu: vega20, raven2, renoir, navi10, navi14
inteldrm: icelake, tigerlake
Thanks to the OpenBSD Foundation for sponsoring this work, kettenis@ for
helping, patrick@ for helping adapt rockchip drm and many developers for
testing.
Diffstat (limited to 'sys/dev/pci/drm/amd/amdgpu/amdgpu_gfx.c')
-rw-r--r-- | sys/dev/pci/drm/amd/amdgpu/amdgpu_gfx.c | 443 |
1 files changed, 430 insertions, 13 deletions
diff --git a/sys/dev/pci/drm/amd/amdgpu/amdgpu_gfx.c b/sys/dev/pci/drm/amd/amdgpu/amdgpu_gfx.c index 35e7da3a443..44ad023fab7 100644 --- a/sys/dev/pci/drm/amd/amdgpu/amdgpu_gfx.c +++ b/sys/dev/pci/drm/amd/amdgpu/amdgpu_gfx.c @@ -22,13 +22,80 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#include <drm/drmP.h> + #include "amdgpu.h" #include "amdgpu_gfx.h" +#include "amdgpu_rlc.h" +#include "amdgpu_ras.h" + +/* delay 0.1 second to enable gfx off feature */ +#define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100) /* - * GPU scratch registers helpers function. + * GPU GFX IP block helpers function. */ + +int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec, + int pipe, int queue) +{ + int bit = 0; + + bit += mec * adev->gfx.mec.num_pipe_per_mec + * adev->gfx.mec.num_queue_per_pipe; + bit += pipe * adev->gfx.mec.num_queue_per_pipe; + bit += queue; + + return bit; +} + +void amdgpu_gfx_bit_to_mec_queue(struct amdgpu_device *adev, int bit, + int *mec, int *pipe, int *queue) +{ + *queue = bit % adev->gfx.mec.num_queue_per_pipe; + *pipe = (bit / adev->gfx.mec.num_queue_per_pipe) + % adev->gfx.mec.num_pipe_per_mec; + *mec = (bit / adev->gfx.mec.num_queue_per_pipe) + / adev->gfx.mec.num_pipe_per_mec; + +} + +bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, + int mec, int pipe, int queue) +{ + return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue), + adev->gfx.mec.queue_bitmap); +} + +int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, + int me, int pipe, int queue) +{ + int bit = 0; + + bit += me * adev->gfx.me.num_pipe_per_me + * adev->gfx.me.num_queue_per_pipe; + bit += pipe * adev->gfx.me.num_queue_per_pipe; + bit += queue; + + return bit; +} + +void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit, + int *me, int *pipe, int *queue) +{ + *queue = bit % adev->gfx.me.num_queue_per_pipe; + *pipe = (bit / adev->gfx.me.num_queue_per_pipe) + % adev->gfx.me.num_pipe_per_me; + *me = (bit / adev->gfx.me.num_queue_per_pipe) + / adev->gfx.me.num_pipe_per_me; +} + +bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, + int me, int pipe, int queue) +{ + return test_bit(amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue), + adev->gfx.me.queue_bitmap); +} + /** * amdgpu_gfx_scratch_get - Allocate a scratch register * @@ -127,6 +194,14 @@ static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev) return adev->gfx.mec.num_mec > 1; } +bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev, + int queue) +{ + /* Policy: make queue 0 of each pipe as high priority compute queue */ + return (queue == 0); + +} + void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) { int i, queue, pipe, mec; @@ -165,6 +240,28 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; } +void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev) +{ + int i, queue, me; + + for (i = 0; i < AMDGPU_MAX_GFX_QUEUES; ++i) { + queue = i % adev->gfx.me.num_queue_per_pipe; + me = (i / adev->gfx.me.num_queue_per_pipe) + / adev->gfx.me.num_pipe_per_me; + + if (me >= adev->gfx.me.num_me) + break; + /* policy: amdgpu owns the first queue per pipe at this stage + * will extend to mulitple queues per pipe later */ + if (me == 0 && queue < 1) + set_bit(i, adev->gfx.me.queue_bitmap); + } + + /* update the number of active graphics rings */ + adev->gfx.num_gfx_rings = + bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES); +} + static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, struct amdgpu_ring *ring) { @@ -179,7 +276,7 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap)) continue; - amdgpu_gfx_bit_to_queue(adev, queue_bit, &mec, &pipe, &queue); + amdgpu_gfx_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue); /* * 1. Using pipes 2/3 from MEC 2 seems cause problems. @@ -209,14 +306,14 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, mtx_init(&kiq->ring_lock, IPL_TTY); - r = amdgpu_device_wb_get(adev, &adev->virt.reg_val_offs); + r = amdgpu_device_wb_get(adev, &kiq->reg_val_offs); if (r) return r; ring->adev = NULL; ring->ring_obj = NULL; ring->use_doorbell = true; - ring->doorbell_index = AMDGPU_DOORBELL_KIQ; + ring->doorbell_index = adev->doorbell_index.kiq; r = amdgpu_gfx_kiq_acquire(adev, ring); if (r) @@ -232,10 +329,9 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, return r; } -void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring, - struct amdgpu_irq_src *irq) +void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring) { - amdgpu_device_wb_free(ring->adev, ring->adev->virt.reg_val_offs); + amdgpu_device_wb_free(ring->adev, ring->adev->gfx.kiq.reg_val_offs); amdgpu_ring_fini(ring); } @@ -272,9 +368,9 @@ int amdgpu_gfx_kiq_init(struct amdgpu_device *adev, return 0; } -/* create MQD for each compute queue */ -int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev, - unsigned mqd_size) +/* create MQD for each compute/gfx queue */ +int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, + unsigned mqd_size) { struct amdgpu_ring *ring = NULL; int r, i; @@ -301,6 +397,27 @@ int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev, dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); } + if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) { + /* create MQD for each KGQ */ + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + if (!ring->mqd_obj) { + r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, + &ring->mqd_gpu_addr, &ring->mqd_ptr); + if (r) { + dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r); + return r; + } + + /* prepare MQD backup */ + adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL); + if (!adev->gfx.me.mqd_backup[i]) + dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); + } + } + } + /* create MQD for each KCQ */ for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; @@ -309,7 +426,7 @@ int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev, AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, &ring->mqd_gpu_addr, &ring->mqd_ptr); if (r) { - dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); + dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r); return r; } @@ -323,11 +440,21 @@ int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev, return 0; } -void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev) +void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev) { struct amdgpu_ring *ring = NULL; int i; + if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) { + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + kfree(adev->gfx.me.mqd_backup[i]); + amdgpu_bo_free_kernel(&ring->mqd_obj, + &ring->mqd_gpu_addr, + &ring->mqd_ptr); + } + } + for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; kfree(adev->gfx.mec.mqd_backup[i]); @@ -342,3 +469,293 @@ void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev) &ring->mqd_gpu_addr, &ring->mqd_ptr); } + +int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev) +{ + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_ring *kiq_ring = &kiq->ring; + int i; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * + adev->gfx.num_compute_rings)) + return -ENOMEM; + + for (i = 0; i < adev->gfx.num_compute_rings; i++) + kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i], + RESET_QUEUES, 0, 0); + + return amdgpu_ring_test_helper(kiq_ring); +} + +int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev) +{ + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; + uint64_t queue_mask = 0; + int r, i; + + if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources) + return -EINVAL; + + for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { + if (!test_bit(i, adev->gfx.mec.queue_bitmap)) + continue; + + /* This situation may be hit in the future if a new HW + * generation exposes more than 64 queues. If so, the + * definition of queue_mask needs updating */ + if (WARN_ON(i > (sizeof(queue_mask)*8))) { + DRM_ERROR("Invalid KCQ enabled: %d\n", i); + break; + } + + queue_mask |= (1ull << i); + } + + DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe, + kiq_ring->queue); + + r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * + adev->gfx.num_compute_rings + + kiq->pmf->set_resources_size); + if (r) { + DRM_ERROR("Failed to lock KIQ (%d).\n", r); + return r; + } + + kiq->pmf->kiq_set_resources(kiq_ring, queue_mask); + for (i = 0; i < adev->gfx.num_compute_rings; i++) + kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.compute_ring[i]); + + r = amdgpu_ring_test_helper(kiq_ring); + if (r) + DRM_ERROR("KCQ enable failed\n"); + + return r; +} + +/* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable + * + * @adev: amdgpu_device pointer + * @bool enable true: enable gfx off feature, false: disable gfx off feature + * + * 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled. + * 2. other client can send request to disable gfx off feature, the request should be honored. + * 3. other client can cancel their request of disable gfx off feature + * 4. other client should not send request to enable gfx off feature before disable gfx off feature. + */ + +void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) +{ + if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) + return; + + mutex_lock(&adev->gfx.gfx_off_mutex); + + if (!enable) + adev->gfx.gfx_off_req_count++; + else if (adev->gfx.gfx_off_req_count > 0) + adev->gfx.gfx_off_req_count--; + + if (enable && !adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) { + schedule_delayed_work(&adev->gfx.gfx_off_delay_work, GFX_OFF_DELAY_ENABLE); + } else if (!enable && adev->gfx.gfx_off_state) { + if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) + adev->gfx.gfx_off_state = false; + } + + mutex_unlock(&adev->gfx.gfx_off_mutex); +} + +int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev) +{ + int r; + struct ras_fs_if fs_info = { + .sysfs_name = "gfx_err_count", + }; + struct ras_ih_if ih_info = { + .cb = amdgpu_gfx_process_ras_data_cb, + }; + + if (!adev->gfx.ras_if) { + adev->gfx.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); + if (!adev->gfx.ras_if) + return -ENOMEM; + adev->gfx.ras_if->block = AMDGPU_RAS_BLOCK__GFX; + adev->gfx.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->gfx.ras_if->sub_block_index = 0; + strlcpy(adev->gfx.ras_if->name, "gfx", sizeof(adev->gfx.ras_if->name)); + } + fs_info.head = ih_info.head = *adev->gfx.ras_if; + + r = amdgpu_ras_late_init(adev, adev->gfx.ras_if, + &fs_info, &ih_info); + if (r) + goto free; + + if (amdgpu_ras_is_supported(adev, adev->gfx.ras_if->block)) { + r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); + if (r) + goto late_fini; + } else { + /* free gfx ras_if if ras is not supported */ + r = 0; + goto free; + } + + return 0; +late_fini: + amdgpu_ras_late_fini(adev, adev->gfx.ras_if, &ih_info); +free: + kfree(adev->gfx.ras_if); + adev->gfx.ras_if = NULL; + return r; +} + +void amdgpu_gfx_ras_fini(struct amdgpu_device *adev) +{ + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) && + adev->gfx.ras_if) { + struct ras_common_if *ras_if = adev->gfx.ras_if; + struct ras_ih_if ih_info = { + .head = *ras_if, + .cb = amdgpu_gfx_process_ras_data_cb, + }; + + amdgpu_ras_late_fini(adev, ras_if, &ih_info); + kfree(ras_if); + } +} + +int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, + void *err_data, + struct amdgpu_iv_entry *entry) +{ + /* TODO ue will trigger an interrupt. + * + * When “Full RAS” is enabled, the per-IP interrupt sources should + * be disabled and the driver should only look for the aggregated + * interrupt via sync flood + */ + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { + kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + if (adev->gfx.funcs->query_ras_error_count) + adev->gfx.funcs->query_ras_error_count(adev, err_data); + amdgpu_ras_reset_gpu(adev); + } + return AMDGPU_RAS_SUCCESS; +} + +int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + struct ras_common_if *ras_if = adev->gfx.ras_if; + struct ras_dispatch_if ih_data = { + .entry = entry, + }; + + if (!ras_if) + return 0; + + ih_data.head = *ras_if; + + DRM_ERROR("CP ECC ERROR IRQ\n"); + amdgpu_ras_interrupt_dispatch(adev, &ih_data); + return 0; +} + +uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) +{ + signed long r, cnt = 0; + unsigned long flags; + uint32_t seq; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_ring *ring = &kiq->ring; + + BUG_ON(!ring->funcs->emit_rreg); + + spin_lock_irqsave(&kiq->ring_lock, flags); + amdgpu_ring_alloc(ring, 32); + amdgpu_ring_emit_rreg(ring, reg); + amdgpu_fence_emit_polling(ring, &seq); + amdgpu_ring_commit(ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + + /* don't wait anymore for gpu reset case because this way may + * block gpu_recover() routine forever, e.g. this virt_kiq_rreg + * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will + * never return if we keep waiting in virt_kiq_rreg, which cause + * gpu_recover() hang there. + * + * also don't wait anymore for IRQ context + * */ + if (r < 1 && (adev->in_gpu_reset || in_interrupt())) + goto failed_kiq_read; + + might_sleep(); + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + drm_msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + } + + if (cnt > MAX_KIQ_REG_TRY) + goto failed_kiq_read; + + return adev->wb.wb[kiq->reg_val_offs]; + +failed_kiq_read: + pr_err("failed to read reg:%x\n", reg); + return ~0; +} + +void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) +{ + signed long r, cnt = 0; + unsigned long flags; + uint32_t seq; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_ring *ring = &kiq->ring; + + BUG_ON(!ring->funcs->emit_wreg); + + spin_lock_irqsave(&kiq->ring_lock, flags); + amdgpu_ring_alloc(ring, 32); + amdgpu_ring_emit_wreg(ring, reg, v); + amdgpu_fence_emit_polling(ring, &seq); + amdgpu_ring_commit(ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + + /* don't wait anymore for gpu reset case because this way may + * block gpu_recover() routine forever, e.g. this virt_kiq_rreg + * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will + * never return if we keep waiting in virt_kiq_rreg, which cause + * gpu_recover() hang there. + * + * also don't wait anymore for IRQ context + * */ + if (r < 1 && (adev->in_gpu_reset || in_interrupt())) + goto failed_kiq_write; + + might_sleep(); + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + + drm_msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + } + + if (cnt > MAX_KIQ_REG_TRY) + goto failed_kiq_write; + + return; + +failed_kiq_write: + pr_err("failed to write reg:%x\n", reg); +} |