diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/Makefile | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 267 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 65 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 670 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 543 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 19 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 9 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 5 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c | 7 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 7 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/cik.c | 11 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/cikd.h | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/cz_dpm.c | 16 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 5 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/vi.c | 35 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/vid.h | 5 |
18 files changed, 1667 insertions, 16 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 616dfd4a1398..908360584e4d 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -71,6 +71,12 @@ amdgpu-y += \ amdgpu_vce.o \ vce_v3_0.o +# add amdkfd interfaces +amdgpu-y += \ + amdgpu_amdkfd.o \ + amdgpu_amdkfd_gfx_v7.o \ + amdgpu_amdkfd_gfx_v8.o + amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 01657830b470..f3791e0d27d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -2011,6 +2011,9 @@ struct amdgpu_device { /* tracking pinned memory */ u64 vram_pin_size; u64 gart_pin_size; + + /* amdkfd interface */ + struct kfd_dev *kfd; }; bool amdgpu_device_is_px(struct drm_device *dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c new file mode 100644 index 000000000000..bc763e0c8f4c --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -0,0 +1,267 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "amdgpu_amdkfd.h" +#include "amdgpu_family.h" +#include <drm/drmP.h> +#include "amdgpu.h" +#include <linux/module.h> + +const struct kfd2kgd_calls *kfd2kgd; +const struct kgd2kfd_calls *kgd2kfd; +bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**); + +bool amdgpu_amdkfd_init(void) +{ +#if defined(CONFIG_HSA_AMD_MODULE) + bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**); + + kgd2kfd_init_p = symbol_request(kgd2kfd_init); + + if (kgd2kfd_init_p == NULL) + return false; +#endif + return true; +} + +bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev) +{ +#if defined(CONFIG_HSA_AMD_MODULE) + bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**); +#endif + + switch (rdev->asic_type) { + case CHIP_KAVERI: + kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions(); + break; + case CHIP_CARRIZO: + kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions(); + break; + default: + return false; + } + +#if defined(CONFIG_HSA_AMD_MODULE) + kgd2kfd_init_p = symbol_request(kgd2kfd_init); + + if (kgd2kfd_init_p == NULL) { + kfd2kgd = NULL; + return false; + } + + if (!kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd)) { + symbol_put(kgd2kfd_init); + kfd2kgd = NULL; + kgd2kfd = NULL; + + return false; + } + + return true; +#elif defined(CONFIG_HSA_AMD) + if (!kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd)) { + kfd2kgd = NULL; + kgd2kfd = NULL; + return false; + } + + return true; +#else + kfd2kgd = NULL; + return false; +#endif +} + +void amdgpu_amdkfd_fini(void) +{ + if (kgd2kfd) { + kgd2kfd->exit(); + symbol_put(kgd2kfd_init); + } +} + +void amdgpu_amdkfd_device_probe(struct amdgpu_device *rdev) +{ + if (kgd2kfd) + rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev, + rdev->pdev, kfd2kgd); +} + +void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev) +{ + if (rdev->kfd) { + struct kgd2kfd_shared_resources gpu_resources = { + .compute_vmid_bitmap = 0xFF00, + + .first_compute_pipe = 1, + .compute_pipe_count = 4 - 1, + }; + + amdgpu_doorbell_get_kfd_info(rdev, + &gpu_resources.doorbell_physical_address, + &gpu_resources.doorbell_aperture_size, + &gpu_resources.doorbell_start_offset); + + kgd2kfd->device_init(rdev->kfd, &gpu_resources); + } +} + +void amdgpu_amdkfd_device_fini(struct amdgpu_device *rdev) +{ + if (rdev->kfd) { + kgd2kfd->device_exit(rdev->kfd); + rdev->kfd = NULL; + } +} + +void amdgpu_amdkfd_interrupt(struct amdgpu_device *rdev, + const void *ih_ring_entry) +{ + if (rdev->kfd) + kgd2kfd->interrupt(rdev->kfd, ih_ring_entry); +} + +void amdgpu_amdkfd_suspend(struct amdgpu_device *rdev) +{ + if (rdev->kfd) + kgd2kfd->suspend(rdev->kfd); +} + +int amdgpu_amdkfd_resume(struct amdgpu_device *rdev) +{ + int r = 0; + + if (rdev->kfd) + r = kgd2kfd->resume(rdev->kfd); + + return r; +} + +u32 pool_to_domain(enum kgd_memory_pool p) +{ + switch (p) { + case KGD_POOL_FRAMEBUFFER: return AMDGPU_GEM_DOMAIN_VRAM; + default: return AMDGPU_GEM_DOMAIN_GTT; + } +} + +int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, + void **mem_obj, uint64_t *gpu_addr, + void **cpu_ptr) +{ + struct amdgpu_device *rdev = (struct amdgpu_device *)kgd; + struct kgd_mem **mem = (struct kgd_mem **) mem_obj; + int r; + + BUG_ON(kgd == NULL); + BUG_ON(gpu_addr == NULL); + BUG_ON(cpu_ptr == NULL); + + *mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL); + if ((*mem) == NULL) + return -ENOMEM; + + r = amdgpu_bo_create(rdev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, + AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, &(*mem)->bo); + if (r) { + dev_err(rdev->dev, + "failed to allocate BO for amdkfd (%d)\n", r); + return r; + } + + /* map the buffer */ + r = amdgpu_bo_reserve((*mem)->bo, true); + if (r) { + dev_err(rdev->dev, "(%d) failed to reserve bo for amdkfd\n", r); + goto allocate_mem_reserve_bo_failed; + } + + r = amdgpu_bo_pin((*mem)->bo, AMDGPU_GEM_DOMAIN_GTT, + &(*mem)->gpu_addr); + if (r) { + dev_err(rdev->dev, "(%d) failed to pin bo for amdkfd\n", r); + goto allocate_mem_pin_bo_failed; + } + *gpu_addr = (*mem)->gpu_addr; + + r = amdgpu_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr); + if (r) { + dev_err(rdev->dev, + "(%d) failed to map bo to kernel for amdkfd\n", r); + goto allocate_mem_kmap_bo_failed; + } + *cpu_ptr = (*mem)->cpu_ptr; + + amdgpu_bo_unreserve((*mem)->bo); + + return 0; + +allocate_mem_kmap_bo_failed: + amdgpu_bo_unpin((*mem)->bo); +allocate_mem_pin_bo_failed: + amdgpu_bo_unreserve((*mem)->bo); +allocate_mem_reserve_bo_failed: + amdgpu_bo_unref(&(*mem)->bo); + + return r; +} + +void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) +{ + struct kgd_mem *mem = (struct kgd_mem *) mem_obj; + + BUG_ON(mem == NULL); + + amdgpu_bo_reserve(mem->bo, true); + amdgpu_bo_kunmap(mem->bo); + amdgpu_bo_unpin(mem->bo); + amdgpu_bo_unreserve(mem->bo); + amdgpu_bo_unref(&(mem->bo)); + kfree(mem); +} + +uint64_t get_vmem_size(struct kgd_dev *kgd) +{ + struct amdgpu_device *rdev = + (struct amdgpu_device *)kgd; + + BUG_ON(kgd == NULL); + + return rdev->mc.real_vram_size; +} + +uint64_t get_gpu_clock_counter(struct kgd_dev *kgd) +{ + struct amdgpu_device *rdev = (struct amdgpu_device *)kgd; + + if (rdev->asic_funcs->get_gpu_clock_counter) + return rdev->asic_funcs->get_gpu_clock_counter(rdev); + return 0; +} + +uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd) +{ + struct amdgpu_device *rdev = (struct amdgpu_device *)kgd; + + /* The sclk is in quantas of 10kHz */ + return rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h new file mode 100644 index 000000000000..a8be765542e6 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -0,0 +1,65 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* amdgpu_amdkfd.h defines the private interface between amdgpu and amdkfd. */ + +#ifndef AMDGPU_AMDKFD_H_INCLUDED +#define AMDGPU_AMDKFD_H_INCLUDED + +#include <linux/types.h> +#include <kgd_kfd_interface.h> + +struct amdgpu_device; + +struct kgd_mem { + struct amdgpu_bo *bo; + uint64_t gpu_addr; + void *cpu_ptr; +}; + +bool amdgpu_amdkfd_init(void); +void amdgpu_amdkfd_fini(void); + +bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev); + +void amdgpu_amdkfd_suspend(struct amdgpu_device *rdev); +int amdgpu_amdkfd_resume(struct amdgpu_device *rdev); +void amdgpu_amdkfd_interrupt(struct amdgpu_device *rdev, + const void *ih_ring_entry); +void amdgpu_amdkfd_device_probe(struct amdgpu_device *rdev); +void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev); +void amdgpu_amdkfd_device_fini(struct amdgpu_device *rdev); + +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); + +/* Shared API */ +int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, + void **mem_obj, uint64_t *gpu_addr, + void **cpu_ptr); +void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj); +uint64_t get_vmem_size(struct kgd_dev *kgd); +uint64_t get_gpu_clock_counter(struct kgd_dev *kgd); + +uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd); + +#endif /* AMDGPU_AMDKFD_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c new file mode 100644 index 000000000000..2daad335b809 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -0,0 +1,670 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <linux/fdtable.h> +#include <linux/uaccess.h> +#include <linux/firmware.h> +#include <drm/drmP.h> +#include "amdgpu.h" +#include "amdgpu_amdkfd.h" +#include "cikd.h" +#include "cik_sdma.h" +#include "amdgpu_ucode.h" +#include "gca/gfx_7_2_d.h" +#include "gca/gfx_7_2_enum.h" +#include "gca/gfx_7_2_sh_mask.h" +#include "oss/oss_2_0_d.h" +#include "oss/oss_2_0_sh_mask.h" +#include "gmc/gmc_7_1_d.h" +#include "gmc/gmc_7_1_sh_mask.h" +#include "cik_structs.h" + +#define CIK_PIPE_PER_MEC (4) + +enum { + MAX_TRAPID = 8, /* 3 bits in the bitfield. */ + MAX_WATCH_ADDRESSES = 4 +}; + +enum { + ADDRESS_WATCH_REG_ADDR_HI = 0, + ADDRESS_WATCH_REG_ADDR_LO, + ADDRESS_WATCH_REG_CNTL, + ADDRESS_WATCH_REG_MAX +}; + +/* not defined in the CI/KV reg file */ +enum { + ADDRESS_WATCH_REG_CNTL_ATC_BIT = 0x10000000UL, + ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK = 0x00FFFFFF, + ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENSION = 0x03000000, + /* extend the mask to 26 bits to match the low address field */ + ADDRESS_WATCH_REG_ADDLOW_SHIFT = 6, + ADDRESS_WATCH_REG_ADDHIGH_MASK = 0xFFFF +}; + +static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = { + mmTCP_WATCH0_ADDR_H, mmTCP_WATCH0_ADDR_L, mmTCP_WATCH0_CNTL, + mmTCP_WATCH1_ADDR_H, mmTCP_WATCH1_ADDR_L, mmTCP_WATCH1_CNTL, + mmTCP_WATCH2_ADDR_H, mmTCP_WATCH2_ADDR_L, mmTCP_WATCH2_CNTL, + mmTCP_WATCH3_ADDR_H, mmTCP_WATCH3_ADDR_L, mmTCP_WATCH3_CNTL +}; + +union TCP_WATCH_CNTL_BITS { + struct { + uint32_t mask:24; + uint32_t vmid:4; + uint32_t atc:1; + uint32_t mode:2; + uint32_t valid:1; + } bitfields, bits; + uint32_t u32All; + signed int i32All; + float f32All; +}; + +/* + * Register access functions + */ + +static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, + uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases); + +static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid); + +static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, + uint32_t hpd_size, uint64_t hpd_gpu_addr); +static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); +static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr); +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); +static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id); + +static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, + unsigned int timeout, uint32_t pipe_id, + uint32_t queue_id); +static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); +static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, + unsigned int timeout); +static int kgd_address_watch_disable(struct kgd_dev *kgd); +static int kgd_address_watch_execute(struct kgd_dev *kgd, + unsigned int watch_point_id, + uint32_t cntl_val, + uint32_t addr_hi, + uint32_t addr_lo); +static int kgd_wave_control_execute(struct kgd_dev *kgd, + uint32_t gfx_index_val, + uint32_t sq_cmd); +static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset); + +static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid); +static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid); +static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); + +static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); + +static const struct kfd2kgd_calls kfd2kgd = { + .init_gtt_mem_allocation = alloc_gtt_mem, + .free_gtt_mem = free_gtt_mem, + .get_vmem_size = get_vmem_size, + .get_gpu_clock_counter = get_gpu_clock_counter, + .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, + .program_sh_mem_settings = kgd_program_sh_mem_settings, + .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, + .init_pipeline = kgd_init_pipeline, + .init_interrupts = kgd_init_interrupts, + .hqd_load = kgd_hqd_load, + .hqd_sdma_load = kgd_hqd_sdma_load, + .hqd_is_occupied = kgd_hqd_is_occupied, + .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, + .hqd_destroy = kgd_hqd_destroy, + .hqd_sdma_destroy = kgd_hqd_sdma_destroy, + .address_watch_disable = kgd_address_watch_disable, + .address_watch_execute = kgd_address_watch_execute, + .wave_control_execute = kgd_wave_control_execute, + .address_watch_get_offset = kgd_address_watch_get_offset, + .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid, + .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, + .write_vmid_invalidate_request = write_vmid_invalidate_request, + .get_fw_version = get_fw_version +}; + +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions() +{ + return (struct kfd2kgd_calls *)&kfd2kgd; +} + +static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) +{ + return (struct amdgpu_device *)kgd; +} + +static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, + uint32_t queue, uint32_t vmid) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue); + + mutex_lock(&adev->srbm_mutex); + WREG32(mmSRBM_GFX_CNTL, value); +} + +static void unlock_srbm(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + WREG32(mmSRBM_GFX_CNTL, 0); + mutex_unlock(&adev->srbm_mutex); +} + +static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, + uint32_t queue_id) +{ + uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1; + uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC); + + lock_srbm(kgd, mec, pipe, queue_id, 0); +} + +static void release_queue(struct kgd_dev *kgd) +{ + unlock_srbm(kgd); +} + +static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_config, + uint32_t sh_mem_ape1_base, + uint32_t sh_mem_ape1_limit, + uint32_t sh_mem_bases) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + lock_srbm(kgd, 0, 0, 0, vmid); + + WREG32(mmSH_MEM_CONFIG, sh_mem_config); + WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base); + WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit); + WREG32(mmSH_MEM_BASES, sh_mem_bases); + + unlock_srbm(kgd); +} + +static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + /* + * We have to assume that there is no outstanding mapping. + * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because + * a mapping is in progress or because a mapping finished and the + * SW cleared it. So the protocol is to always wait & clear. + */ + uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | + ATC_VMID0_PASID_MAPPING__VALID_MASK; + + WREG32(mmATC_VMID0_PASID_MAPPING + vmid, pasid_mapping); + + while (!(RREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS) & (1U << vmid))) + cpu_relax(); + WREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid); + + /* Mapping vmid to pasid also for IH block */ + WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping); + + return 0; +} + +static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, + uint32_t hpd_size, uint64_t hpd_gpu_addr) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1; + uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC); + + lock_srbm(kgd, mec, pipe, 0, 0); + WREG32(mmCP_HPD_EOP_BASE_ADDR, lower_32_bits(hpd_gpu_addr >> 8)); + WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(hpd_gpu_addr >> 8)); + WREG32(mmCP_HPD_EOP_VMID, 0); + WREG32(mmCP_HPD_EOP_CONTROL, hpd_size); + unlock_srbm(kgd); + + return 0; +} + +static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t mec; + uint32_t pipe; + + mec = (pipe_id / CIK_PIPE_PER_MEC) + 1; + pipe = (pipe_id % CIK_PIPE_PER_MEC); + + lock_srbm(kgd, mec, pipe, 0, 0); + + WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | + CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); + + unlock_srbm(kgd); + + return 0; +} + +static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m) +{ + uint32_t retval; + + retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET + + m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET; + + pr_debug("kfd: sdma base address: 0x%x\n", retval); + + return retval; +} + +static inline struct cik_mqd *get_mqd(void *mqd) +{ + return (struct cik_mqd *)mqd; +} + +static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) +{ + return (struct cik_sdma_rlc_registers *)mqd; +} + +static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t wptr_shadow, is_wptr_shadow_valid; + struct cik_mqd *m; + + m = get_mqd(mqd); + + is_wptr_shadow_valid = !get_user(wptr_shadow, wptr); + + acquire_queue(kgd, pipe_id, queue_id); + WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo); + WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi); + WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control); + + WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo); + WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi); + WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control); + + WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control); + WREG32(mmCP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo); + WREG32(mmCP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi); + + WREG32(mmCP_HQD_IB_RPTR, m->cp_hqd_ib_rptr); + + WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state); + WREG32(mmCP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd); + WREG32(mmCP_HQD_MSG_TYPE, m->cp_hqd_msg_type); + + WREG32(mmCP_HQD_ATOMIC0_PREOP_LO, m->cp_hqd_atomic0_preop_lo); + WREG32(mmCP_HQD_ATOMIC0_PREOP_HI, m->cp_hqd_atomic0_preop_hi); + WREG32(mmCP_HQD_ATOMIC1_PREOP_LO, m->cp_hqd_atomic1_preop_lo); + WREG32(mmCP_HQD_ATOMIC1_PREOP_HI, m->cp_hqd_atomic1_preop_hi); + + WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo); + WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, + m->cp_hqd_pq_rptr_report_addr_hi); + + WREG32(mmCP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr); + + WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, m->cp_hqd_pq_wptr_poll_addr_lo); + WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, m->cp_hqd_pq_wptr_poll_addr_hi); + + WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control); + + WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid); + + WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum); + + WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority); + WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority); + + WREG32(mmCP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr); + + if (is_wptr_shadow_valid) + WREG32(mmCP_HQD_PQ_WPTR, wptr_shadow); + + WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active); + release_queue(kgd); + + return 0; +} + +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct cik_sdma_rlc_registers *m; + uint32_t sdma_base_addr; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(m); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, + m->sdma_rlc_virtual_addr); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, + m->sdma_rlc_rb_base); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, + m->sdma_rlc_rb_base_hi); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + m->sdma_rlc_rb_rptr_addr_lo); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + m->sdma_rlc_rb_rptr_addr_hi); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, + m->sdma_rlc_doorbell); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + m->sdma_rlc_rb_cntl); + + return 0; +} + +static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t act; + bool retval = false; + uint32_t low, high; + + acquire_queue(kgd, pipe_id, queue_id); + act = RREG32(mmCP_HQD_ACTIVE); + if (act) { + low = lower_32_bits(queue_address >> 8); + high = upper_32_bits(queue_address >> 8); + + if (low == RREG32(mmCP_HQD_PQ_BASE) && + high == RREG32(mmCP_HQD_PQ_BASE_HI)) + retval = true; + } + release_queue(kgd); + return retval; +} + +static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct cik_sdma_rlc_registers *m; + uint32_t sdma_base_addr; + uint32_t sdma_rlc_rb_cntl; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(m); + + sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + + if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) + return true; + + return false; +} + +static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, + unsigned int timeout, uint32_t pipe_id, + uint32_t queue_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t temp; + + acquire_queue(kgd, pipe_id, queue_id); + WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0); + + WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type); + + while (true) { + temp = RREG32(mmCP_HQD_ACTIVE); + if (temp & CP_HQD_ACTIVE__ACTIVE__SHIFT) + break; + if (timeout == 0) { + pr_err("kfd: cp queue preemption time out (%dms)\n", + temp); + release_queue(kgd); + return -ETIME; + } + msleep(20); + timeout -= 20; + } + + release_queue(kgd); + return 0; +} + +static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, + unsigned int timeout) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct cik_sdma_rlc_registers *m; + uint32_t sdma_base_addr; + uint32_t temp; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(m); + + temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); + + while (true) { + temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT) + break; + if (timeout == 0) + return -ETIME; + msleep(20); + timeout -= 20; + } + + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, 0); + + return 0; +} + +static int kgd_address_watch_disable(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + union TCP_WATCH_CNTL_BITS cntl; + unsigned int i; + + cntl.u32All = 0; + + cntl.bitfields.valid = 0; + cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK; + cntl.bitfields.atc = 1; + + /* Turning off this address until we set all the registers */ + for (i = 0; i < MAX_WATCH_ADDRESSES; i++) + WREG32(watchRegs[i * ADDRESS_WATCH_REG_MAX + + ADDRESS_WATCH_REG_CNTL], cntl.u32All); + + return 0; +} + +static int kgd_address_watch_execute(struct kgd_dev *kgd, + unsigned int watch_point_id, + uint32_t cntl_val, + uint32_t addr_hi, + uint32_t addr_lo) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + union TCP_WATCH_CNTL_BITS cntl; + + cntl.u32All = cntl_val; + + /* Turning off this watch point until we set all the registers */ + cntl.bitfields.valid = 0; + WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + + ADDRESS_WATCH_REG_CNTL], cntl.u32All); + + WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + + ADDRESS_WATCH_REG_ADDR_HI], addr_hi); + + WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + + ADDRESS_WATCH_REG_ADDR_LO], addr_lo); + + /* Enable the watch point */ + cntl.bitfields.valid = 1; + + WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + + ADDRESS_WATCH_REG_CNTL], cntl.u32All); + + return 0; +} + +static int kgd_wave_control_execute(struct kgd_dev *kgd, + uint32_t gfx_index_val, + uint32_t sq_cmd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t data; + + mutex_lock(&adev->grbm_idx_mutex); + + WREG32(mmGRBM_GFX_INDEX, gfx_index_val); + WREG32(mmSQ_CMD, sq_cmd); + + /* Restore the GRBM_GFX_INDEX register */ + + data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK | + GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK | + GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK; + + WREG32(mmGRBM_GFX_INDEX, data); + + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; +} + +static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset) +{ + return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset]; +} + +static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, + uint8_t vmid) +{ + uint32_t reg; + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; +} + +static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid) +{ + uint32_t reg; + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; +} + +static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); +} + +static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + const union amdgpu_firmware_header *hdr; + + BUG_ON(kgd == NULL); + + switch (type) { + case KGD_ENGINE_PFP: + hdr = (const union amdgpu_firmware_header *) + adev->gfx.pfp_fw->data; + break; + + case KGD_ENGINE_ME: + hdr = (const union amdgpu_firmware_header *) + adev->gfx.me_fw->data; + break; + + case KGD_ENGINE_CE: + hdr = (const union amdgpu_firmware_header *) + adev->gfx.ce_fw->data; + break; + + case KGD_ENGINE_MEC1: + hdr = (const union amdgpu_firmware_header *) + adev->gfx.mec_fw->data; + break; + + case KGD_ENGINE_MEC2: + hdr = (const union amdgpu_firmware_header *) + adev->gfx.mec2_fw->data; + break; + + case KGD_ENGINE_RLC: + hdr = (const union amdgpu_firmware_header *) + adev->gfx.rlc_fw->data; + break; + + case KGD_ENGINE_SDMA1: + hdr = (const union amdgpu_firmware_header *) + adev->sdma[0].fw->data; + break; + + case KGD_ENGINE_SDMA2: + hdr = (const union amdgpu_firmware_header *) + adev->sdma[1].fw->data; + break; + + default: + return 0; + } + + if (hdr == NULL) + return 0; + + /* Only 12 bit in use*/ + return hdr->common.ucode_version; +} + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c new file mode 100644 index 000000000000..dfd1d503bccf --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -0,0 +1,543 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <linux/module.h> +#include <linux/fdtable.h> +#include <linux/uaccess.h> +#include <linux/firmware.h> +#include <drm/drmP.h> +#include "amdgpu.h" +#include "amdgpu_amdkfd.h" +#include "amdgpu_ucode.h" +#include "gca/gfx_8_0_sh_mask.h" +#include "gca/gfx_8_0_d.h" +#include "gca/gfx_8_0_enum.h" +#include "oss/oss_3_0_sh_mask.h" +#include "oss/oss_3_0_d.h" +#include "gmc/gmc_8_1_sh_mask.h" +#include "gmc/gmc_8_1_d.h" +#include "vi_structs.h" +#include "vid.h" + +#define VI_PIPE_PER_MEC (4) + +struct cik_sdma_rlc_registers; + +/* + * Register access functions + */ + +static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_config, + uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, + uint32_t sh_mem_bases); +static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid); +static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, + uint32_t hpd_size, uint64_t hpd_gpu_addr); +static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); +static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr); +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); +static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id); +static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); +static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, + unsigned int timeout, uint32_t pipe_id, + uint32_t queue_id); +static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, + unsigned int timeout); +static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); +static int kgd_address_watch_disable(struct kgd_dev *kgd); +static int kgd_address_watch_execute(struct kgd_dev *kgd, + unsigned int watch_point_id, + uint32_t cntl_val, + uint32_t addr_hi, + uint32_t addr_lo); +static int kgd_wave_control_execute(struct kgd_dev *kgd, + uint32_t gfx_index_val, + uint32_t sq_cmd); +static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset); + +static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, + uint8_t vmid); +static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid); +static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); +static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); + +static const struct kfd2kgd_calls kfd2kgd = { + .init_gtt_mem_allocation = alloc_gtt_mem, + .free_gtt_mem = free_gtt_mem, + .get_vmem_size = get_vmem_size, + .get_gpu_clock_counter = get_gpu_clock_counter, + .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, + .program_sh_mem_settings = kgd_program_sh_mem_settings, + .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, + .init_pipeline = kgd_init_pipeline, + .init_interrupts = kgd_init_interrupts, + .hqd_load = kgd_hqd_load, + .hqd_sdma_load = kgd_hqd_sdma_load, + .hqd_is_occupied = kgd_hqd_is_occupied, + .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, + .hqd_destroy = kgd_hqd_destroy, + .hqd_sdma_destroy = kgd_hqd_sdma_destroy, + .address_watch_disable = kgd_address_watch_disable, + .address_watch_execute = kgd_address_watch_execute, + .wave_control_execute = kgd_wave_control_execute, + .address_watch_get_offset = kgd_address_watch_get_offset, + .get_atc_vmid_pasid_mapping_pasid = + get_atc_vmid_pasid_mapping_pasid, + .get_atc_vmid_pasid_mapping_valid = + get_atc_vmid_pasid_mapping_valid, + .write_vmid_invalidate_request = write_vmid_invalidate_request, + .get_fw_version = get_fw_version +}; + +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions() +{ + return (struct kfd2kgd_calls *)&kfd2kgd; +} + +static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) +{ + return (struct amdgpu_device *)kgd; +} + +static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, + uint32_t queue, uint32_t vmid) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue); + + mutex_lock(&adev->srbm_mutex); + WREG32(mmSRBM_GFX_CNTL, value); +} + +static void unlock_srbm(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + WREG32(mmSRBM_GFX_CNTL, 0); + mutex_unlock(&adev->srbm_mutex); +} + +static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, + uint32_t queue_id) +{ + uint32_t mec = (++pipe_id / VI_PIPE_PER_MEC) + 1; + uint32_t pipe = (pipe_id % VI_PIPE_PER_MEC); + + lock_srbm(kgd, mec, pipe, queue_id, 0); +} + +static void release_queue(struct kgd_dev *kgd) +{ + unlock_srbm(kgd); +} + +static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_config, + uint32_t sh_mem_ape1_base, + uint32_t sh_mem_ape1_limit, + uint32_t sh_mem_bases) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + lock_srbm(kgd, 0, 0, 0, vmid); + + WREG32(mmSH_MEM_CONFIG, sh_mem_config); + WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base); + WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit); + WREG32(mmSH_MEM_BASES, sh_mem_bases); + + unlock_srbm(kgd); +} + +static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + /* + * We have to assume that there is no outstanding mapping. + * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because + * a mapping is in progress or because a mapping finished + * and the SW cleared it. + * So the protocol is to always wait & clear. + */ + uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | + ATC_VMID0_PASID_MAPPING__VALID_MASK; + + WREG32(mmATC_VMID0_PASID_MAPPING + vmid, pasid_mapping); + + while (!(RREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS) & (1U << vmid))) + cpu_relax(); + WREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid); + + /* Mapping vmid to pasid also for IH block */ + WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping); + + return 0; +} + +static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, + uint32_t hpd_size, uint64_t hpd_gpu_addr) +{ + return 0; +} + +static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t mec; + uint32_t pipe; + + mec = (++pipe_id / VI_PIPE_PER_MEC) + 1; + pipe = (pipe_id % VI_PIPE_PER_MEC); + + lock_srbm(kgd, mec, pipe, 0, 0); + + WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK); + + unlock_srbm(kgd); + + return 0; +} + +static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m) +{ + return 0; +} + +static inline struct vi_mqd *get_mqd(void *mqd) +{ + return (struct vi_mqd *)mqd; +} + +static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) +{ + return (struct cik_sdma_rlc_registers *)mqd; +} + +static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr) +{ + struct vi_mqd *m; + uint32_t shadow_wptr, valid_wptr; + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + m = get_mqd(mqd); + + valid_wptr = copy_from_user(&shadow_wptr, wptr, sizeof(shadow_wptr)); + acquire_queue(kgd, pipe_id, queue_id); + + WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control); + WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo); + WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi); + + WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid); + WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state); + WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority); + WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority); + WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum); + WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo); + WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi); + WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo); + WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, + m->cp_hqd_pq_rptr_report_addr_hi); + + if (valid_wptr > 0) + WREG32(mmCP_HQD_PQ_WPTR, shadow_wptr); + + WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control); + WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control); + + WREG32(mmCP_HQD_EOP_BASE_ADDR, m->cp_hqd_eop_base_addr_lo); + WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, m->cp_hqd_eop_base_addr_hi); + WREG32(mmCP_HQD_EOP_CONTROL, m->cp_hqd_eop_control); + WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr); + WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr); + WREG32(mmCP_HQD_EOP_EVENTS, m->cp_hqd_eop_done_events); + + WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO, m->cp_hqd_ctx_save_base_addr_lo); + WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI, m->cp_hqd_ctx_save_base_addr_hi); + WREG32(mmCP_HQD_CTX_SAVE_CONTROL, m->cp_hqd_ctx_save_control); + WREG32(mmCP_HQD_CNTL_STACK_OFFSET, m->cp_hqd_cntl_stack_offset); + WREG32(mmCP_HQD_CNTL_STACK_SIZE, m->cp_hqd_cntl_stack_size); + WREG32(mmCP_HQD_WG_STATE_OFFSET, m->cp_hqd_wg_state_offset); + WREG32(mmCP_HQD_CTX_SAVE_SIZE, m->cp_hqd_ctx_save_size); + + WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control); + + WREG32(mmCP_HQD_DEQUEUE_REQUEST, m->cp_hqd_dequeue_request); + WREG32(mmCP_HQD_ERROR, m->cp_hqd_error); + WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem); + WREG32(mmCP_HQD_EOP_DONES, m->cp_hqd_eop_dones); + + WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active); + + release_queue(kgd); + + return 0; +} + +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd) +{ + return 0; +} + +static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t act; + bool retval = false; + uint32_t low, high; + + acquire_queue(kgd, pipe_id, queue_id); + act = RREG32(mmCP_HQD_ACTIVE); + if (act) { + low = lower_32_bits(queue_address >> 8); + high = upper_32_bits(queue_address >> 8); + + if (low == RREG32(mmCP_HQD_PQ_BASE) && + high == RREG32(mmCP_HQD_PQ_BASE_HI)) + retval = true; + } + release_queue(kgd); + return retval; +} + +static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct cik_sdma_rlc_registers *m; + uint32_t sdma_base_addr; + uint32_t sdma_rlc_rb_cntl; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(m); + + sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + + if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) + return true; + + return false; +} + +static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, + unsigned int timeout, uint32_t pipe_id, + uint32_t queue_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t temp; + + acquire_queue(kgd, pipe_id, queue_id); + + WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type); + + while (true) { + temp = RREG32(mmCP_HQD_ACTIVE); + if (temp & CP_HQD_ACTIVE__ACTIVE_MASK) + break; + if (timeout == 0) { + pr_err("kfd: cp queue preemption time out (%dms)\n", + temp); + release_queue(kgd); + return -ETIME; + } + msleep(20); + timeout -= 20; + } + + release_queue(kgd); + return 0; +} + +static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, + unsigned int timeout) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct cik_sdma_rlc_registers *m; + uint32_t sdma_base_addr; + uint32_t temp; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(m); + + temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); + + while (true) { + temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT) + break; + if (timeout == 0) + return -ETIME; + msleep(20); + timeout -= 20; + } + + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, 0); + + return 0; +} + +static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, + uint8_t vmid) +{ + uint32_t reg; + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; +} + +static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid) +{ + uint32_t reg; + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; +} + +static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); +} + +static int kgd_address_watch_disable(struct kgd_dev *kgd) +{ + return 0; +} + +static int kgd_address_watch_execute(struct kgd_dev *kgd, + unsigned int watch_point_id, + uint32_t cntl_val, + uint32_t addr_hi, + uint32_t addr_lo) +{ + return 0; +} + +static int kgd_wave_control_execute(struct kgd_dev *kgd, + uint32_t gfx_index_val, + uint32_t sq_cmd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t data = 0; + + mutex_lock(&adev->grbm_idx_mutex); + + WREG32(mmGRBM_GFX_INDEX, gfx_index_val); + WREG32(mmSQ_CMD, sq_cmd); + + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + INSTANCE_BROADCAST_WRITES, 1); + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + SH_BROADCAST_WRITES, 1); + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + SE_BROADCAST_WRITES, 1); + + WREG32(mmGRBM_GFX_INDEX, data); + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; +} + +static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset) +{ + return 0; +} + +static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + const union amdgpu_firmware_header *hdr; + + BUG_ON(kgd == NULL); + + switch (type) { + case KGD_ENGINE_PFP: + hdr = (const union amdgpu_firmware_header *) + adev->gfx.pfp_fw->data; + break; + + case KGD_ENGINE_ME: + hdr = (const union amdgpu_firmware_header *) + adev->gfx.me_fw->data; + break; + + case KGD_ENGINE_CE: + hdr = (const union amdgpu_firmware_header *) + adev->gfx.ce_fw->data; + break; + + case KGD_ENGINE_MEC1: + hdr = (const union amdgpu_firmware_header *) + adev->gfx.mec_fw->data; + break; + + case KGD_ENGINE_MEC2: + hdr = (const union amdgpu_firmware_header *) + adev->gfx.mec2_fw->data; + break; + + case KGD_ENGINE_RLC: + hdr = (const union amdgpu_firmware_header *) + adev->gfx.rlc_fw->data; + break; + + case KGD_ENGINE_SDMA1: + hdr = (const union amdgpu_firmware_header *) + adev->sdma[0].fw->data; + break; + + case KGD_ENGINE_SDMA2: + hdr = (const union amdgpu_firmware_header *) + adev->sdma[1].fw->data; + break; + + default: + return 0; + } + + if (hdr == NULL) + return 0; + + /* Only 12 bit in use*/ + return hdr->common.ucode_version; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index d63135bf29c0..1f040d85ac47 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -669,6 +669,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, static int amdgpu_cs_dependencies(struct amdgpu_device *adev, struct amdgpu_cs_parser *p) { + struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct amdgpu_ib *ib; int i, j, r; @@ -694,6 +695,7 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, for (j = 0; j < num_deps; ++j) { struct amdgpu_fence *fence; struct amdgpu_ring *ring; + struct amdgpu_ctx *ctx; r = amdgpu_cs_get_ring(adev, deps[j].ip_type, deps[j].ip_instance, @@ -701,14 +703,21 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, if (r) return r; + ctx = amdgpu_ctx_get(fpriv, deps[j].ctx_id); + if (ctx == NULL) + return -EINVAL; + r = amdgpu_fence_recreate(ring, p->filp, deps[j].handle, &fence); - if (r) + if (r) { + amdgpu_ctx_put(ctx); return r; + } amdgpu_sync_fence(&ib->sync, fence); amdgpu_fence_unref(&fence); + amdgpu_ctx_put(ctx); } } @@ -808,12 +817,16 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance, wait->in.ring, &ring); - if (r) + if (r) { + amdgpu_ctx_put(ctx); return r; + } r = amdgpu_fence_recreate(ring, filp, wait->in.handle, &fence); - if (r) + if (r) { + amdgpu_ctx_put(ctx); return r; + } r = fence_wait_timeout(&fence->base, true, timeout); amdgpu_fence_unref(&fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index ba46be361c9b..d79009b65867 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1207,10 +1207,15 @@ static int amdgpu_early_init(struct amdgpu_device *adev) } else { if (adev->ip_blocks[i].funcs->early_init) { r = adev->ip_blocks[i].funcs->early_init((void *)adev); - if (r) + if (r == -ENOENT) + adev->ip_block_enabled[i] = false; + else if (r) return r; + else + adev->ip_block_enabled[i] = true; + } else { + adev->ip_block_enabled[i] = true; } - adev->ip_block_enabled[i] = true; } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 56da962231fc..115906f5fda0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -44,6 +44,8 @@ #include "amdgpu.h" #include "amdgpu_irq.h" +#include "amdgpu_amdkfd.h" + /* * KMS wrapper. * - 3.0.0 - initial driver @@ -527,12 +529,15 @@ static int __init amdgpu_init(void) driver->num_ioctls = amdgpu_max_kms_ioctl; amdgpu_register_atpx_handler(); + amdgpu_amdkfd_init(); + /* let modprobe override vga console setting */ return drm_pci_init(driver, pdriver); } static void __exit amdgpu_exit(void) { + amdgpu_amdkfd_fini(); drm_pci_exit(driver, pdriver); amdgpu_unregister_atpx_handler(); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index db5422e65ec5..fb44dd2231b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -24,6 +24,7 @@ #include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_ih.h" +#include "amdgpu_amdkfd.h" /** * amdgpu_ih_ring_alloc - allocate memory for the IH ring @@ -199,6 +200,12 @@ restart_ih: rmb(); while (adev->irq.ih.rptr != wptr) { + u32 ring_index = adev->irq.ih.rptr >> 2; + + /* Before dispatching irq to IP blocks, send it to amdkfd */ + amdgpu_amdkfd_interrupt(adev, + (const void *) &adev->irq.ih.ring[ring_index]); + amdgpu_ih_decode_iv(adev, &entry); adev->irq.ih.rptr &= adev->irq.ih.ptr_mask; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 5533434c7a8f..8c40a9671b9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -34,6 +34,7 @@ #include <linux/vga_switcheroo.h> #include <linux/slab.h> #include <linux/pm_runtime.h> +#include "amdgpu_amdkfd.h" #if defined(CONFIG_VGA_SWITCHEROO) bool amdgpu_has_atpx(void); @@ -61,6 +62,8 @@ int amdgpu_driver_unload_kms(struct drm_device *dev) pm_runtime_get_sync(dev->dev); + amdgpu_amdkfd_device_fini(adev); + amdgpu_acpi_fini(adev); amdgpu_device_fini(adev); @@ -118,6 +121,10 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) "Error during ACPI methods call\n"); } + amdgpu_amdkfd_load_interface(adev); + amdgpu_amdkfd_device_probe(adev); + amdgpu_amdkfd_device_init(adev); + if (amdgpu_device_is_px(dev)) { pm_runtime_use_autosuspend(dev->dev); pm_runtime_set_autosuspend_delay(dev->dev, 5000); diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 341c56681841..b3b66a0d5ff7 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -64,6 +64,8 @@ #include "oss/oss_2_0_d.h" #include "oss/oss_2_0_sh_mask.h" +#include "amdgpu_amdkfd.h" + /* * Indirect registers accessor */ @@ -2448,14 +2450,21 @@ static int cik_common_suspend(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + amdgpu_amdkfd_suspend(adev); + return cik_common_hw_fini(adev); } static int cik_common_resume(void *handle) { + int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - return cik_common_hw_init(adev); + r = cik_common_hw_init(adev); + if (r) + return r; + + return amdgpu_amdkfd_resume(adev); } static bool cik_common_is_idle(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h index d19085a97064..a3e3dfaa01a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/cikd.h +++ b/drivers/gpu/drm/amd/amdgpu/cikd.h @@ -552,6 +552,12 @@ #define VCE_CMD_IB_AUTO 0x00000005 #define VCE_CMD_SEMAPHORE 0x00000006 +/* if PTR32, these are the bases for scratch and lds */ +#define PRIVATE_BASE(x) ((x) << 0) /* scratch */ +#define SHARED_BASE(x) ((x) << 16) /* LDS */ + +#define KFD_CIK_SDMA_QUEUE_OFFSET 0x200 + /* valid for both DEFAULT_MTYPE and APE1_MTYPE */ enum { MTYPE_CACHED = 0, diff --git a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c index f75a31df30bd..1a2d419cbf16 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c @@ -1679,25 +1679,31 @@ static int cz_dpm_unforce_dpm_levels(struct amdgpu_device *adev) if (ret) return ret; - DRM_INFO("DPM unforce state min=%d, max=%d.\n", - pi->sclk_dpm.soft_min_clk, - pi->sclk_dpm.soft_max_clk); + DRM_DEBUG("DPM unforce state min=%d, max=%d.\n", + pi->sclk_dpm.soft_min_clk, + pi->sclk_dpm.soft_max_clk); return 0; } static int cz_dpm_force_dpm_level(struct amdgpu_device *adev, - enum amdgpu_dpm_forced_level level) + enum amdgpu_dpm_forced_level level) { int ret = 0; switch (level) { case AMDGPU_DPM_FORCED_LEVEL_HIGH: + ret = cz_dpm_unforce_dpm_levels(adev); + if (ret) + return ret; ret = cz_dpm_force_highest(adev); if (ret) return ret; break; case AMDGPU_DPM_FORCED_LEVEL_LOW: + ret = cz_dpm_unforce_dpm_levels(adev); + if (ret) + return ret; ret = cz_dpm_force_lowest(adev); if (ret) return ret; @@ -1711,6 +1717,8 @@ static int cz_dpm_force_dpm_level(struct amdgpu_device *adev, break; } + adev->pm.dpm.forced_level = level; + return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 08387dfd98a7..cc050a329c49 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -2566,6 +2566,7 @@ static void dce_v8_0_crtc_dpms(struct drm_crtc *crtc, int mode) struct drm_device *dev = crtc->dev; struct amdgpu_device *adev = dev->dev_private; struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); + unsigned type; switch (mode) { case DRM_MODE_DPMS_ON: @@ -2574,6 +2575,9 @@ static void dce_v8_0_crtc_dpms(struct drm_crtc *crtc, int mode) dce_v8_0_vga_enable(crtc, true); amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE); dce_v8_0_vga_enable(crtc, false); + /* Make sure VBLANK interrupt is still enabled */ + type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id); + amdgpu_irq_update(adev, &adev->crtc_irq, type); drm_vblank_post_modeset(dev, amdgpu_crtc->crtc_id); dce_v8_0_crtc_load_lut(crtc); break; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 7b683fb2173c..1c7c992dea37 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1813,10 +1813,7 @@ static u32 gfx_v8_0_get_rb_disabled(struct amdgpu_device *adev, u32 data, mask; data = RREG32(mmCC_RB_BACKEND_DISABLE); - if (data & 1) - data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; - else - data = 0; + data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE); diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index fa5a4448531d..68552da40287 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -122,6 +122,32 @@ static void vi_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v) spin_unlock_irqrestore(&adev->smc_idx_lock, flags); } +/* smu_8_0_d.h */ +#define mmMP0PUB_IND_INDEX 0x180 +#define mmMP0PUB_IND_DATA 0x181 + +static u32 cz_smc_rreg(struct amdgpu_device *adev, u32 reg) +{ + unsigned long flags; + u32 r; + + spin_lock_irqsave(&adev->smc_idx_lock, flags); + WREG32(mmMP0PUB_IND_INDEX, (reg)); + r = RREG32(mmMP0PUB_IND_DATA); + spin_unlock_irqrestore(&adev->smc_idx_lock, flags); + return r; +} + +static void cz_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v) +{ + unsigned long flags; + + spin_lock_irqsave(&adev->smc_idx_lock, flags); + WREG32(mmMP0PUB_IND_INDEX, (reg)); + WREG32(mmMP0PUB_IND_DATA, (v)); + spin_unlock_irqrestore(&adev->smc_idx_lock, flags); +} + static u32 vi_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg) { unsigned long flags; @@ -1222,8 +1248,13 @@ static int vi_common_early_init(void *handle) bool smc_enabled = false; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - adev->smc_rreg = &vi_smc_rreg; - adev->smc_wreg = &vi_smc_wreg; + if (adev->flags & AMDGPU_IS_APU) { + adev->smc_rreg = &cz_smc_rreg; + adev->smc_wreg = &cz_smc_wreg; + } else { + adev->smc_rreg = &vi_smc_rreg; + adev->smc_wreg = &vi_smc_wreg; + } adev->pcie_rreg = &vi_pcie_rreg; adev->pcie_wreg = &vi_pcie_wreg; adev->uvd_ctx_rreg = &vi_uvd_ctx_rreg; diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h index 31bb89452e12..d98aa9d82fa1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vid.h +++ b/drivers/gpu/drm/amd/amdgpu/vid.h @@ -66,6 +66,11 @@ #define AMDGPU_NUM_OF_VMIDS 8 +#define PIPEID(x) ((x) << 0) +#define MEID(x) ((x) << 2) +#define VMID(x) ((x) << 4) +#define QUEUEID(x) ((x) << 8) + #define RB_BITMAP_WIDTH_PER_SH 2 #define MC_SEQ_MISC0__MT__MASK 0xf0000000 |