aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c267
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h65
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c670
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c543
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cikd.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cz_dpm.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v8_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vid.h5
18 files changed, 1667 insertions, 16 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 616dfd4a1398..908360584e4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -71,6 +71,12 @@ amdgpu-y += \
amdgpu_vce.o \
vce_v3_0.o
+# add amdkfd interfaces
+amdgpu-y += \
+ amdgpu_amdkfd.o \
+ amdgpu_amdkfd_gfx_v7.o \
+ amdgpu_amdkfd_gfx_v8.o
+
amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 01657830b470..f3791e0d27d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -2011,6 +2011,9 @@ struct amdgpu_device {
/* tracking pinned memory */
u64 vram_pin_size;
u64 gart_pin_size;
+
+ /* amdkfd interface */
+ struct kfd_dev *kfd;
};
bool amdgpu_device_is_px(struct drm_device *dev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
new file mode 100644
index 000000000000..bc763e0c8f4c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -0,0 +1,267 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_family.h"
+#include <drm/drmP.h>
+#include "amdgpu.h"
+#include <linux/module.h>
+
+const struct kfd2kgd_calls *kfd2kgd;
+const struct kgd2kfd_calls *kgd2kfd;
+bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
+
+bool amdgpu_amdkfd_init(void)
+{
+#if defined(CONFIG_HSA_AMD_MODULE)
+ bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
+
+ kgd2kfd_init_p = symbol_request(kgd2kfd_init);
+
+ if (kgd2kfd_init_p == NULL)
+ return false;
+#endif
+ return true;
+}
+
+bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev)
+{
+#if defined(CONFIG_HSA_AMD_MODULE)
+ bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
+#endif
+
+ switch (rdev->asic_type) {
+ case CHIP_KAVERI:
+ kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions();
+ break;
+ case CHIP_CARRIZO:
+ kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
+ break;
+ default:
+ return false;
+ }
+
+#if defined(CONFIG_HSA_AMD_MODULE)
+ kgd2kfd_init_p = symbol_request(kgd2kfd_init);
+
+ if (kgd2kfd_init_p == NULL) {
+ kfd2kgd = NULL;
+ return false;
+ }
+
+ if (!kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd)) {
+ symbol_put(kgd2kfd_init);
+ kfd2kgd = NULL;
+ kgd2kfd = NULL;
+
+ return false;
+ }
+
+ return true;
+#elif defined(CONFIG_HSA_AMD)
+ if (!kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd)) {
+ kfd2kgd = NULL;
+ kgd2kfd = NULL;
+ return false;
+ }
+
+ return true;
+#else
+ kfd2kgd = NULL;
+ return false;
+#endif
+}
+
+void amdgpu_amdkfd_fini(void)
+{
+ if (kgd2kfd) {
+ kgd2kfd->exit();
+ symbol_put(kgd2kfd_init);
+ }
+}
+
+void amdgpu_amdkfd_device_probe(struct amdgpu_device *rdev)
+{
+ if (kgd2kfd)
+ rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev,
+ rdev->pdev, kfd2kgd);
+}
+
+void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev)
+{
+ if (rdev->kfd) {
+ struct kgd2kfd_shared_resources gpu_resources = {
+ .compute_vmid_bitmap = 0xFF00,
+
+ .first_compute_pipe = 1,
+ .compute_pipe_count = 4 - 1,
+ };
+
+ amdgpu_doorbell_get_kfd_info(rdev,
+ &gpu_resources.doorbell_physical_address,
+ &gpu_resources.doorbell_aperture_size,
+ &gpu_resources.doorbell_start_offset);
+
+ kgd2kfd->device_init(rdev->kfd, &gpu_resources);
+ }
+}
+
+void amdgpu_amdkfd_device_fini(struct amdgpu_device *rdev)
+{
+ if (rdev->kfd) {
+ kgd2kfd->device_exit(rdev->kfd);
+ rdev->kfd = NULL;
+ }
+}
+
+void amdgpu_amdkfd_interrupt(struct amdgpu_device *rdev,
+ const void *ih_ring_entry)
+{
+ if (rdev->kfd)
+ kgd2kfd->interrupt(rdev->kfd, ih_ring_entry);
+}
+
+void amdgpu_amdkfd_suspend(struct amdgpu_device *rdev)
+{
+ if (rdev->kfd)
+ kgd2kfd->suspend(rdev->kfd);
+}
+
+int amdgpu_amdkfd_resume(struct amdgpu_device *rdev)
+{
+ int r = 0;
+
+ if (rdev->kfd)
+ r = kgd2kfd->resume(rdev->kfd);
+
+ return r;
+}
+
+u32 pool_to_domain(enum kgd_memory_pool p)
+{
+ switch (p) {
+ case KGD_POOL_FRAMEBUFFER: return AMDGPU_GEM_DOMAIN_VRAM;
+ default: return AMDGPU_GEM_DOMAIN_GTT;
+ }
+}
+
+int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
+ void **mem_obj, uint64_t *gpu_addr,
+ void **cpu_ptr)
+{
+ struct amdgpu_device *rdev = (struct amdgpu_device *)kgd;
+ struct kgd_mem **mem = (struct kgd_mem **) mem_obj;
+ int r;
+
+ BUG_ON(kgd == NULL);
+ BUG_ON(gpu_addr == NULL);
+ BUG_ON(cpu_ptr == NULL);
+
+ *mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL);
+ if ((*mem) == NULL)
+ return -ENOMEM;
+
+ r = amdgpu_bo_create(rdev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT,
+ AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, &(*mem)->bo);
+ if (r) {
+ dev_err(rdev->dev,
+ "failed to allocate BO for amdkfd (%d)\n", r);
+ return r;
+ }
+
+ /* map the buffer */
+ r = amdgpu_bo_reserve((*mem)->bo, true);
+ if (r) {
+ dev_err(rdev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
+ goto allocate_mem_reserve_bo_failed;
+ }
+
+ r = amdgpu_bo_pin((*mem)->bo, AMDGPU_GEM_DOMAIN_GTT,
+ &(*mem)->gpu_addr);
+ if (r) {
+ dev_err(rdev->dev, "(%d) failed to pin bo for amdkfd\n", r);
+ goto allocate_mem_pin_bo_failed;
+ }
+ *gpu_addr = (*mem)->gpu_addr;
+
+ r = amdgpu_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr);
+ if (r) {
+ dev_err(rdev->dev,
+ "(%d) failed to map bo to kernel for amdkfd\n", r);
+ goto allocate_mem_kmap_bo_failed;
+ }
+ *cpu_ptr = (*mem)->cpu_ptr;
+
+ amdgpu_bo_unreserve((*mem)->bo);
+
+ return 0;
+
+allocate_mem_kmap_bo_failed:
+ amdgpu_bo_unpin((*mem)->bo);
+allocate_mem_pin_bo_failed:
+ amdgpu_bo_unreserve((*mem)->bo);
+allocate_mem_reserve_bo_failed:
+ amdgpu_bo_unref(&(*mem)->bo);
+
+ return r;
+}
+
+void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
+{
+ struct kgd_mem *mem = (struct kgd_mem *) mem_obj;
+
+ BUG_ON(mem == NULL);
+
+ amdgpu_bo_reserve(mem->bo, true);
+ amdgpu_bo_kunmap(mem->bo);
+ amdgpu_bo_unpin(mem->bo);
+ amdgpu_bo_unreserve(mem->bo);
+ amdgpu_bo_unref(&(mem->bo));
+ kfree(mem);
+}
+
+uint64_t get_vmem_size(struct kgd_dev *kgd)
+{
+ struct amdgpu_device *rdev =
+ (struct amdgpu_device *)kgd;
+
+ BUG_ON(kgd == NULL);
+
+ return rdev->mc.real_vram_size;
+}
+
+uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
+{
+ struct amdgpu_device *rdev = (struct amdgpu_device *)kgd;
+
+ if (rdev->asic_funcs->get_gpu_clock_counter)
+ return rdev->asic_funcs->get_gpu_clock_counter(rdev);
+ return 0;
+}
+
+uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
+{
+ struct amdgpu_device *rdev = (struct amdgpu_device *)kgd;
+
+ /* The sclk is in quantas of 10kHz */
+ return rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
new file mode 100644
index 000000000000..a8be765542e6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* amdgpu_amdkfd.h defines the private interface between amdgpu and amdkfd. */
+
+#ifndef AMDGPU_AMDKFD_H_INCLUDED
+#define AMDGPU_AMDKFD_H_INCLUDED
+
+#include <linux/types.h>
+#include <kgd_kfd_interface.h>
+
+struct amdgpu_device;
+
+struct kgd_mem {
+ struct amdgpu_bo *bo;
+ uint64_t gpu_addr;
+ void *cpu_ptr;
+};
+
+bool amdgpu_amdkfd_init(void);
+void amdgpu_amdkfd_fini(void);
+
+bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev);
+
+void amdgpu_amdkfd_suspend(struct amdgpu_device *rdev);
+int amdgpu_amdkfd_resume(struct amdgpu_device *rdev);
+void amdgpu_amdkfd_interrupt(struct amdgpu_device *rdev,
+ const void *ih_ring_entry);
+void amdgpu_amdkfd_device_probe(struct amdgpu_device *rdev);
+void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev);
+void amdgpu_amdkfd_device_fini(struct amdgpu_device *rdev);
+
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
+
+/* Shared API */
+int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
+ void **mem_obj, uint64_t *gpu_addr,
+ void **cpu_ptr);
+void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
+uint64_t get_vmem_size(struct kgd_dev *kgd);
+uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
+
+uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
+
+#endif /* AMDGPU_AMDKFD_H_INCLUDED */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
new file mode 100644
index 000000000000..2daad335b809
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -0,0 +1,670 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/fdtable.h>
+#include <linux/uaccess.h>
+#include <linux/firmware.h>
+#include <drm/drmP.h>
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "cikd.h"
+#include "cik_sdma.h"
+#include "amdgpu_ucode.h"
+#include "gca/gfx_7_2_d.h"
+#include "gca/gfx_7_2_enum.h"
+#include "gca/gfx_7_2_sh_mask.h"
+#include "oss/oss_2_0_d.h"
+#include "oss/oss_2_0_sh_mask.h"
+#include "gmc/gmc_7_1_d.h"
+#include "gmc/gmc_7_1_sh_mask.h"
+#include "cik_structs.h"
+
+#define CIK_PIPE_PER_MEC (4)
+
+enum {
+ MAX_TRAPID = 8, /* 3 bits in the bitfield. */
+ MAX_WATCH_ADDRESSES = 4
+};
+
+enum {
+ ADDRESS_WATCH_REG_ADDR_HI = 0,
+ ADDRESS_WATCH_REG_ADDR_LO,
+ ADDRESS_WATCH_REG_CNTL,
+ ADDRESS_WATCH_REG_MAX
+};
+
+/* not defined in the CI/KV reg file */
+enum {
+ ADDRESS_WATCH_REG_CNTL_ATC_BIT = 0x10000000UL,
+ ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK = 0x00FFFFFF,
+ ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENSION = 0x03000000,
+ /* extend the mask to 26 bits to match the low address field */
+ ADDRESS_WATCH_REG_ADDLOW_SHIFT = 6,
+ ADDRESS_WATCH_REG_ADDHIGH_MASK = 0xFFFF
+};
+
+static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = {
+ mmTCP_WATCH0_ADDR_H, mmTCP_WATCH0_ADDR_L, mmTCP_WATCH0_CNTL,
+ mmTCP_WATCH1_ADDR_H, mmTCP_WATCH1_ADDR_L, mmTCP_WATCH1_CNTL,
+ mmTCP_WATCH2_ADDR_H, mmTCP_WATCH2_ADDR_L, mmTCP_WATCH2_CNTL,
+ mmTCP_WATCH3_ADDR_H, mmTCP_WATCH3_ADDR_L, mmTCP_WATCH3_CNTL
+};
+
+union TCP_WATCH_CNTL_BITS {
+ struct {
+ uint32_t mask:24;
+ uint32_t vmid:4;
+ uint32_t atc:1;
+ uint32_t mode:2;
+ uint32_t valid:1;
+ } bitfields, bits;
+ uint32_t u32All;
+ signed int i32All;
+ float f32All;
+};
+
+/*
+ * Register access functions
+ */
+
+static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
+ uint32_t sh_mem_config, uint32_t sh_mem_ape1_base,
+ uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases);
+
+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
+ unsigned int vmid);
+
+static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
+ uint32_t hpd_size, uint64_t hpd_gpu_addr);
+static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
+static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t __user *wptr);
+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
+static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
+ uint32_t pipe_id, uint32_t queue_id);
+
+static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
+ unsigned int timeout, uint32_t pipe_id,
+ uint32_t queue_id);
+static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
+static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
+ unsigned int timeout);
+static int kgd_address_watch_disable(struct kgd_dev *kgd);
+static int kgd_address_watch_execute(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ uint32_t cntl_val,
+ uint32_t addr_hi,
+ uint32_t addr_lo);
+static int kgd_wave_control_execute(struct kgd_dev *kgd,
+ uint32_t gfx_index_val,
+ uint32_t sq_cmd);
+static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ unsigned int reg_offset);
+
+static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid);
+static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
+ uint8_t vmid);
+static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
+
+static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
+
+static const struct kfd2kgd_calls kfd2kgd = {
+ .init_gtt_mem_allocation = alloc_gtt_mem,
+ .free_gtt_mem = free_gtt_mem,
+ .get_vmem_size = get_vmem_size,
+ .get_gpu_clock_counter = get_gpu_clock_counter,
+ .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
+ .program_sh_mem_settings = kgd_program_sh_mem_settings,
+ .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
+ .init_pipeline = kgd_init_pipeline,
+ .init_interrupts = kgd_init_interrupts,
+ .hqd_load = kgd_hqd_load,
+ .hqd_sdma_load = kgd_hqd_sdma_load,
+ .hqd_is_occupied = kgd_hqd_is_occupied,
+ .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
+ .hqd_destroy = kgd_hqd_destroy,
+ .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
+ .address_watch_disable = kgd_address_watch_disable,
+ .address_watch_execute = kgd_address_watch_execute,
+ .wave_control_execute = kgd_wave_control_execute,
+ .address_watch_get_offset = kgd_address_watch_get_offset,
+ .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
+ .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
+ .write_vmid_invalidate_request = write_vmid_invalidate_request,
+ .get_fw_version = get_fw_version
+};
+
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions()
+{
+ return (struct kfd2kgd_calls *)&kfd2kgd;
+}
+
+static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
+{
+ return (struct amdgpu_device *)kgd;
+}
+
+static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
+ uint32_t queue, uint32_t vmid)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue);
+
+ mutex_lock(&adev->srbm_mutex);
+ WREG32(mmSRBM_GFX_CNTL, value);
+}
+
+static void unlock_srbm(struct kgd_dev *kgd)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ WREG32(mmSRBM_GFX_CNTL, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1;
+ uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
+
+ lock_srbm(kgd, mec, pipe, queue_id, 0);
+}
+
+static void release_queue(struct kgd_dev *kgd)
+{
+ unlock_srbm(kgd);
+}
+
+static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
+ uint32_t sh_mem_config,
+ uint32_t sh_mem_ape1_base,
+ uint32_t sh_mem_ape1_limit,
+ uint32_t sh_mem_bases)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ lock_srbm(kgd, 0, 0, 0, vmid);
+
+ WREG32(mmSH_MEM_CONFIG, sh_mem_config);
+ WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base);
+ WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit);
+ WREG32(mmSH_MEM_BASES, sh_mem_bases);
+
+ unlock_srbm(kgd);
+}
+
+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
+ unsigned int vmid)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ /*
+ * We have to assume that there is no outstanding mapping.
+ * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
+ * a mapping is in progress or because a mapping finished and the
+ * SW cleared it. So the protocol is to always wait & clear.
+ */
+ uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
+ ATC_VMID0_PASID_MAPPING__VALID_MASK;
+
+ WREG32(mmATC_VMID0_PASID_MAPPING + vmid, pasid_mapping);
+
+ while (!(RREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS) & (1U << vmid)))
+ cpu_relax();
+ WREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid);
+
+ /* Mapping vmid to pasid also for IH block */
+ WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping);
+
+ return 0;
+}
+
+static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
+ uint32_t hpd_size, uint64_t hpd_gpu_addr)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1;
+ uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
+
+ lock_srbm(kgd, mec, pipe, 0, 0);
+ WREG32(mmCP_HPD_EOP_BASE_ADDR, lower_32_bits(hpd_gpu_addr >> 8));
+ WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(hpd_gpu_addr >> 8));
+ WREG32(mmCP_HPD_EOP_VMID, 0);
+ WREG32(mmCP_HPD_EOP_CONTROL, hpd_size);
+ unlock_srbm(kgd);
+
+ return 0;
+}
+
+static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t mec;
+ uint32_t pipe;
+
+ mec = (pipe_id / CIK_PIPE_PER_MEC) + 1;
+ pipe = (pipe_id % CIK_PIPE_PER_MEC);
+
+ lock_srbm(kgd, mec, pipe, 0, 0);
+
+ WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
+ CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
+
+ unlock_srbm(kgd);
+
+ return 0;
+}
+
+static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
+{
+ uint32_t retval;
+
+ retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
+ m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
+
+ pr_debug("kfd: sdma base address: 0x%x\n", retval);
+
+ return retval;
+}
+
+static inline struct cik_mqd *get_mqd(void *mqd)
+{
+ return (struct cik_mqd *)mqd;
+}
+
+static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
+{
+ return (struct cik_sdma_rlc_registers *)mqd;
+}
+
+static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t __user *wptr)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t wptr_shadow, is_wptr_shadow_valid;
+ struct cik_mqd *m;
+
+ m = get_mqd(mqd);
+
+ is_wptr_shadow_valid = !get_user(wptr_shadow, wptr);
+
+ acquire_queue(kgd, pipe_id, queue_id);
+ WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
+ WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
+ WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control);
+
+ WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
+ WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
+ WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control);
+
+ WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control);
+ WREG32(mmCP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo);
+ WREG32(mmCP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi);
+
+ WREG32(mmCP_HQD_IB_RPTR, m->cp_hqd_ib_rptr);
+
+ WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state);
+ WREG32(mmCP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd);
+ WREG32(mmCP_HQD_MSG_TYPE, m->cp_hqd_msg_type);
+
+ WREG32(mmCP_HQD_ATOMIC0_PREOP_LO, m->cp_hqd_atomic0_preop_lo);
+ WREG32(mmCP_HQD_ATOMIC0_PREOP_HI, m->cp_hqd_atomic0_preop_hi);
+ WREG32(mmCP_HQD_ATOMIC1_PREOP_LO, m->cp_hqd_atomic1_preop_lo);
+ WREG32(mmCP_HQD_ATOMIC1_PREOP_HI, m->cp_hqd_atomic1_preop_hi);
+
+ WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo);
+ WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
+ m->cp_hqd_pq_rptr_report_addr_hi);
+
+ WREG32(mmCP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr);
+
+ WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, m->cp_hqd_pq_wptr_poll_addr_lo);
+ WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, m->cp_hqd_pq_wptr_poll_addr_hi);
+
+ WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control);
+
+ WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid);
+
+ WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum);
+
+ WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
+ WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
+
+ WREG32(mmCP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr);
+
+ if (is_wptr_shadow_valid)
+ WREG32(mmCP_HQD_PQ_WPTR, wptr_shadow);
+
+ WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active);
+ release_queue(kgd);
+
+ return 0;
+}
+
+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct cik_sdma_rlc_registers *m;
+ uint32_t sdma_base_addr;
+
+ m = get_sdma_mqd(mqd);
+ sdma_base_addr = get_sdma_base_addr(m);
+
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR,
+ m->sdma_rlc_virtual_addr);
+
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE,
+ m->sdma_rlc_rb_base);
+
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
+ m->sdma_rlc_rb_base_hi);
+
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+ m->sdma_rlc_rb_rptr_addr_lo);
+
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+ m->sdma_rlc_rb_rptr_addr_hi);
+
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL,
+ m->sdma_rlc_doorbell);
+
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+ m->sdma_rlc_rb_cntl);
+
+ return 0;
+}
+
+static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
+ uint32_t pipe_id, uint32_t queue_id)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t act;
+ bool retval = false;
+ uint32_t low, high;
+
+ acquire_queue(kgd, pipe_id, queue_id);
+ act = RREG32(mmCP_HQD_ACTIVE);
+ if (act) {
+ low = lower_32_bits(queue_address >> 8);
+ high = upper_32_bits(queue_address >> 8);
+
+ if (low == RREG32(mmCP_HQD_PQ_BASE) &&
+ high == RREG32(mmCP_HQD_PQ_BASE_HI))
+ retval = true;
+ }
+ release_queue(kgd);
+ return retval;
+}
+
+static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct cik_sdma_rlc_registers *m;
+ uint32_t sdma_base_addr;
+ uint32_t sdma_rlc_rb_cntl;
+
+ m = get_sdma_mqd(mqd);
+ sdma_base_addr = get_sdma_base_addr(m);
+
+ sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+
+ if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
+ return true;
+
+ return false;
+}
+
+static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
+ unsigned int timeout, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t temp;
+
+ acquire_queue(kgd, pipe_id, queue_id);
+ WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
+
+ WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type);
+
+ while (true) {
+ temp = RREG32(mmCP_HQD_ACTIVE);
+ if (temp & CP_HQD_ACTIVE__ACTIVE__SHIFT)
+ break;
+ if (timeout == 0) {
+ pr_err("kfd: cp queue preemption time out (%dms)\n",
+ temp);
+ release_queue(kgd);
+ return -ETIME;
+ }
+ msleep(20);
+ timeout -= 20;
+ }
+
+ release_queue(kgd);
+ return 0;
+}
+
+static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
+ unsigned int timeout)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct cik_sdma_rlc_registers *m;
+ uint32_t sdma_base_addr;
+ uint32_t temp;
+
+ m = get_sdma_mqd(mqd);
+ sdma_base_addr = get_sdma_base_addr(m);
+
+ temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+ temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
+
+ while (true) {
+ temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+ if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT)
+ break;
+ if (timeout == 0)
+ return -ETIME;
+ msleep(20);
+ timeout -= 20;
+ }
+
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, 0);
+
+ return 0;
+}
+
+static int kgd_address_watch_disable(struct kgd_dev *kgd)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ union TCP_WATCH_CNTL_BITS cntl;
+ unsigned int i;
+
+ cntl.u32All = 0;
+
+ cntl.bitfields.valid = 0;
+ cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
+ cntl.bitfields.atc = 1;
+
+ /* Turning off this address until we set all the registers */
+ for (i = 0; i < MAX_WATCH_ADDRESSES; i++)
+ WREG32(watchRegs[i * ADDRESS_WATCH_REG_MAX +
+ ADDRESS_WATCH_REG_CNTL], cntl.u32All);
+
+ return 0;
+}
+
+static int kgd_address_watch_execute(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ uint32_t cntl_val,
+ uint32_t addr_hi,
+ uint32_t addr_lo)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ union TCP_WATCH_CNTL_BITS cntl;
+
+ cntl.u32All = cntl_val;
+
+ /* Turning off this watch point until we set all the registers */
+ cntl.bitfields.valid = 0;
+ WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
+ ADDRESS_WATCH_REG_CNTL], cntl.u32All);
+
+ WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
+ ADDRESS_WATCH_REG_ADDR_HI], addr_hi);
+
+ WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
+ ADDRESS_WATCH_REG_ADDR_LO], addr_lo);
+
+ /* Enable the watch point */
+ cntl.bitfields.valid = 1;
+
+ WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
+ ADDRESS_WATCH_REG_CNTL], cntl.u32All);
+
+ return 0;
+}
+
+static int kgd_wave_control_execute(struct kgd_dev *kgd,
+ uint32_t gfx_index_val,
+ uint32_t sq_cmd)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t data;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ WREG32(mmGRBM_GFX_INDEX, gfx_index_val);
+ WREG32(mmSQ_CMD, sq_cmd);
+
+ /* Restore the GRBM_GFX_INDEX register */
+
+ data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK |
+ GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
+ GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK;
+
+ WREG32(mmGRBM_GFX_INDEX, data);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ unsigned int reg_offset)
+{
+ return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset];
+}
+
+static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
+ uint8_t vmid)
+{
+ uint32_t reg;
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+ reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+ return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
+}
+
+static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
+ uint8_t vmid)
+{
+ uint32_t reg;
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+ reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+ return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
+}
+
+static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+ WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+}
+
+static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+ const union amdgpu_firmware_header *hdr;
+
+ BUG_ON(kgd == NULL);
+
+ switch (type) {
+ case KGD_ENGINE_PFP:
+ hdr = (const union amdgpu_firmware_header *)
+ adev->gfx.pfp_fw->data;
+ break;
+
+ case KGD_ENGINE_ME:
+ hdr = (const union amdgpu_firmware_header *)
+ adev->gfx.me_fw->data;
+ break;
+
+ case KGD_ENGINE_CE:
+ hdr = (const union amdgpu_firmware_header *)
+ adev->gfx.ce_fw->data;
+ break;
+
+ case KGD_ENGINE_MEC1:
+ hdr = (const union amdgpu_firmware_header *)
+ adev->gfx.mec_fw->data;
+ break;
+
+ case KGD_ENGINE_MEC2:
+ hdr = (const union amdgpu_firmware_header *)
+ adev->gfx.mec2_fw->data;
+ break;
+
+ case KGD_ENGINE_RLC:
+ hdr = (const union amdgpu_firmware_header *)
+ adev->gfx.rlc_fw->data;
+ break;
+
+ case KGD_ENGINE_SDMA1:
+ hdr = (const union amdgpu_firmware_header *)
+ adev->sdma[0].fw->data;
+ break;
+
+ case KGD_ENGINE_SDMA2:
+ hdr = (const union amdgpu_firmware_header *)
+ adev->sdma[1].fw->data;
+ break;
+
+ default:
+ return 0;
+ }
+
+ if (hdr == NULL)
+ return 0;
+
+ /* Only 12 bit in use*/
+ return hdr->common.ucode_version;
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
new file mode 100644
index 000000000000..dfd1d503bccf
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -0,0 +1,543 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/fdtable.h>
+#include <linux/uaccess.h>
+#include <linux/firmware.h>
+#include <drm/drmP.h>
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_ucode.h"
+#include "gca/gfx_8_0_sh_mask.h"
+#include "gca/gfx_8_0_d.h"
+#include "gca/gfx_8_0_enum.h"
+#include "oss/oss_3_0_sh_mask.h"
+#include "oss/oss_3_0_d.h"
+#include "gmc/gmc_8_1_sh_mask.h"
+#include "gmc/gmc_8_1_d.h"
+#include "vi_structs.h"
+#include "vid.h"
+
+#define VI_PIPE_PER_MEC (4)
+
+struct cik_sdma_rlc_registers;
+
+/*
+ * Register access functions
+ */
+
+static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
+ uint32_t sh_mem_config,
+ uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
+ uint32_t sh_mem_bases);
+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
+ unsigned int vmid);
+static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
+ uint32_t hpd_size, uint64_t hpd_gpu_addr);
+static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
+static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t __user *wptr);
+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
+static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
+ uint32_t pipe_id, uint32_t queue_id);
+static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
+static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
+ unsigned int timeout, uint32_t pipe_id,
+ uint32_t queue_id);
+static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
+ unsigned int timeout);
+static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
+static int kgd_address_watch_disable(struct kgd_dev *kgd);
+static int kgd_address_watch_execute(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ uint32_t cntl_val,
+ uint32_t addr_hi,
+ uint32_t addr_lo);
+static int kgd_wave_control_execute(struct kgd_dev *kgd,
+ uint32_t gfx_index_val,
+ uint32_t sq_cmd);
+static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ unsigned int reg_offset);
+
+static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
+ uint8_t vmid);
+static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
+ uint8_t vmid);
+static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
+static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
+
+static const struct kfd2kgd_calls kfd2kgd = {
+ .init_gtt_mem_allocation = alloc_gtt_mem,
+ .free_gtt_mem = free_gtt_mem,
+ .get_vmem_size = get_vmem_size,
+ .get_gpu_clock_counter = get_gpu_clock_counter,
+ .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
+ .program_sh_mem_settings = kgd_program_sh_mem_settings,
+ .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
+ .init_pipeline = kgd_init_pipeline,
+ .init_interrupts = kgd_init_interrupts,
+ .hqd_load = kgd_hqd_load,
+ .hqd_sdma_load = kgd_hqd_sdma_load,
+ .hqd_is_occupied = kgd_hqd_is_occupied,
+ .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
+ .hqd_destroy = kgd_hqd_destroy,
+ .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
+ .address_watch_disable = kgd_address_watch_disable,
+ .address_watch_execute = kgd_address_watch_execute,
+ .wave_control_execute = kgd_wave_control_execute,
+ .address_watch_get_offset = kgd_address_watch_get_offset,
+ .get_atc_vmid_pasid_mapping_pasid =
+ get_atc_vmid_pasid_mapping_pasid,
+ .get_atc_vmid_pasid_mapping_valid =
+ get_atc_vmid_pasid_mapping_valid,
+ .write_vmid_invalidate_request = write_vmid_invalidate_request,
+ .get_fw_version = get_fw_version
+};
+
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions()
+{
+ return (struct kfd2kgd_calls *)&kfd2kgd;
+}
+
+static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
+{
+ return (struct amdgpu_device *)kgd;
+}
+
+static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
+ uint32_t queue, uint32_t vmid)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue);
+
+ mutex_lock(&adev->srbm_mutex);
+ WREG32(mmSRBM_GFX_CNTL, value);
+}
+
+static void unlock_srbm(struct kgd_dev *kgd)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ WREG32(mmSRBM_GFX_CNTL, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ uint32_t mec = (++pipe_id / VI_PIPE_PER_MEC) + 1;
+ uint32_t pipe = (pipe_id % VI_PIPE_PER_MEC);
+
+ lock_srbm(kgd, mec, pipe, queue_id, 0);
+}
+
+static void release_queue(struct kgd_dev *kgd)
+{
+ unlock_srbm(kgd);
+}
+
+static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
+ uint32_t sh_mem_config,
+ uint32_t sh_mem_ape1_base,
+ uint32_t sh_mem_ape1_limit,
+ uint32_t sh_mem_bases)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ lock_srbm(kgd, 0, 0, 0, vmid);
+
+ WREG32(mmSH_MEM_CONFIG, sh_mem_config);
+ WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base);
+ WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit);
+ WREG32(mmSH_MEM_BASES, sh_mem_bases);
+
+ unlock_srbm(kgd);
+}
+
+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
+ unsigned int vmid)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ /*
+ * We have to assume that there is no outstanding mapping.
+ * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
+ * a mapping is in progress or because a mapping finished
+ * and the SW cleared it.
+ * So the protocol is to always wait & clear.
+ */
+ uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
+ ATC_VMID0_PASID_MAPPING__VALID_MASK;
+
+ WREG32(mmATC_VMID0_PASID_MAPPING + vmid, pasid_mapping);
+
+ while (!(RREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS) & (1U << vmid)))
+ cpu_relax();
+ WREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid);
+
+ /* Mapping vmid to pasid also for IH block */
+ WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping);
+
+ return 0;
+}
+
+static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
+ uint32_t hpd_size, uint64_t hpd_gpu_addr)
+{
+ return 0;
+}
+
+static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t mec;
+ uint32_t pipe;
+
+ mec = (++pipe_id / VI_PIPE_PER_MEC) + 1;
+ pipe = (pipe_id % VI_PIPE_PER_MEC);
+
+ lock_srbm(kgd, mec, pipe, 0, 0);
+
+ WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK);
+
+ unlock_srbm(kgd);
+
+ return 0;
+}
+
+static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
+{
+ return 0;
+}
+
+static inline struct vi_mqd *get_mqd(void *mqd)
+{
+ return (struct vi_mqd *)mqd;
+}
+
+static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
+{
+ return (struct cik_sdma_rlc_registers *)mqd;
+}
+
+static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t __user *wptr)
+{
+ struct vi_mqd *m;
+ uint32_t shadow_wptr, valid_wptr;
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ m = get_mqd(mqd);
+
+ valid_wptr = copy_from_user(&shadow_wptr, wptr, sizeof(shadow_wptr));
+ acquire_queue(kgd, pipe_id, queue_id);
+
+ WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control);
+ WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
+ WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
+
+ WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid);
+ WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state);
+ WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
+ WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
+ WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum);
+ WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
+ WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
+ WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo);
+ WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
+ m->cp_hqd_pq_rptr_report_addr_hi);
+
+ if (valid_wptr > 0)
+ WREG32(mmCP_HQD_PQ_WPTR, shadow_wptr);
+
+ WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control);
+ WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control);
+
+ WREG32(mmCP_HQD_EOP_BASE_ADDR, m->cp_hqd_eop_base_addr_lo);
+ WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, m->cp_hqd_eop_base_addr_hi);
+ WREG32(mmCP_HQD_EOP_CONTROL, m->cp_hqd_eop_control);
+ WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr);
+ WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr);
+ WREG32(mmCP_HQD_EOP_EVENTS, m->cp_hqd_eop_done_events);
+
+ WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO, m->cp_hqd_ctx_save_base_addr_lo);
+ WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI, m->cp_hqd_ctx_save_base_addr_hi);
+ WREG32(mmCP_HQD_CTX_SAVE_CONTROL, m->cp_hqd_ctx_save_control);
+ WREG32(mmCP_HQD_CNTL_STACK_OFFSET, m->cp_hqd_cntl_stack_offset);
+ WREG32(mmCP_HQD_CNTL_STACK_SIZE, m->cp_hqd_cntl_stack_size);
+ WREG32(mmCP_HQD_WG_STATE_OFFSET, m->cp_hqd_wg_state_offset);
+ WREG32(mmCP_HQD_CTX_SAVE_SIZE, m->cp_hqd_ctx_save_size);
+
+ WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control);
+
+ WREG32(mmCP_HQD_DEQUEUE_REQUEST, m->cp_hqd_dequeue_request);
+ WREG32(mmCP_HQD_ERROR, m->cp_hqd_error);
+ WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem);
+ WREG32(mmCP_HQD_EOP_DONES, m->cp_hqd_eop_dones);
+
+ WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active);
+
+ release_queue(kgd);
+
+ return 0;
+}
+
+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
+{
+ return 0;
+}
+
+static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
+ uint32_t pipe_id, uint32_t queue_id)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t act;
+ bool retval = false;
+ uint32_t low, high;
+
+ acquire_queue(kgd, pipe_id, queue_id);
+ act = RREG32(mmCP_HQD_ACTIVE);
+ if (act) {
+ low = lower_32_bits(queue_address >> 8);
+ high = upper_32_bits(queue_address >> 8);
+
+ if (low == RREG32(mmCP_HQD_PQ_BASE) &&
+ high == RREG32(mmCP_HQD_PQ_BASE_HI))
+ retval = true;
+ }
+ release_queue(kgd);
+ return retval;
+}
+
+static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct cik_sdma_rlc_registers *m;
+ uint32_t sdma_base_addr;
+ uint32_t sdma_rlc_rb_cntl;
+
+ m = get_sdma_mqd(mqd);
+ sdma_base_addr = get_sdma_base_addr(m);
+
+ sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+
+ if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
+ return true;
+
+ return false;
+}
+
+static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
+ unsigned int timeout, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t temp;
+
+ acquire_queue(kgd, pipe_id, queue_id);
+
+ WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type);
+
+ while (true) {
+ temp = RREG32(mmCP_HQD_ACTIVE);
+ if (temp & CP_HQD_ACTIVE__ACTIVE_MASK)
+ break;
+ if (timeout == 0) {
+ pr_err("kfd: cp queue preemption time out (%dms)\n",
+ temp);
+ release_queue(kgd);
+ return -ETIME;
+ }
+ msleep(20);
+ timeout -= 20;
+ }
+
+ release_queue(kgd);
+ return 0;
+}
+
+static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
+ unsigned int timeout)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct cik_sdma_rlc_registers *m;
+ uint32_t sdma_base_addr;
+ uint32_t temp;
+
+ m = get_sdma_mqd(mqd);
+ sdma_base_addr = get_sdma_base_addr(m);
+
+ temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+ temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
+
+ while (true) {
+ temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+ if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT)
+ break;
+ if (timeout == 0)
+ return -ETIME;
+ msleep(20);
+ timeout -= 20;
+ }
+
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, 0);
+
+ return 0;
+}
+
+static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
+ uint8_t vmid)
+{
+ uint32_t reg;
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+ reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+ return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
+}
+
+static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
+ uint8_t vmid)
+{
+ uint32_t reg;
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+ reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+ return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
+}
+
+static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+ WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+}
+
+static int kgd_address_watch_disable(struct kgd_dev *kgd)
+{
+ return 0;
+}
+
+static int kgd_address_watch_execute(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ uint32_t cntl_val,
+ uint32_t addr_hi,
+ uint32_t addr_lo)
+{
+ return 0;
+}
+
+static int kgd_wave_control_execute(struct kgd_dev *kgd,
+ uint32_t gfx_index_val,
+ uint32_t sq_cmd)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t data = 0;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ WREG32(mmGRBM_GFX_INDEX, gfx_index_val);
+ WREG32(mmSQ_CMD, sq_cmd);
+
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SH_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SE_BROADCAST_WRITES, 1);
+
+ WREG32(mmGRBM_GFX_INDEX, data);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ unsigned int reg_offset)
+{
+ return 0;
+}
+
+static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+ const union amdgpu_firmware_header *hdr;
+
+ BUG_ON(kgd == NULL);
+
+ switch (type) {
+ case KGD_ENGINE_PFP:
+ hdr = (const union amdgpu_firmware_header *)
+ adev->gfx.pfp_fw->data;
+ break;
+
+ case KGD_ENGINE_ME:
+ hdr = (const union amdgpu_firmware_header *)
+ adev->gfx.me_fw->data;
+ break;
+
+ case KGD_ENGINE_CE:
+ hdr = (const union amdgpu_firmware_header *)
+ adev->gfx.ce_fw->data;
+ break;
+
+ case KGD_ENGINE_MEC1:
+ hdr = (const union amdgpu_firmware_header *)
+ adev->gfx.mec_fw->data;
+ break;
+
+ case KGD_ENGINE_MEC2:
+ hdr = (const union amdgpu_firmware_header *)
+ adev->gfx.mec2_fw->data;
+ break;
+
+ case KGD_ENGINE_RLC:
+ hdr = (const union amdgpu_firmware_header *)
+ adev->gfx.rlc_fw->data;
+ break;
+
+ case KGD_ENGINE_SDMA1:
+ hdr = (const union amdgpu_firmware_header *)
+ adev->sdma[0].fw->data;
+ break;
+
+ case KGD_ENGINE_SDMA2:
+ hdr = (const union amdgpu_firmware_header *)
+ adev->sdma[1].fw->data;
+ break;
+
+ default:
+ return 0;
+ }
+
+ if (hdr == NULL)
+ return 0;
+
+ /* Only 12 bit in use*/
+ return hdr->common.ucode_version;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index d63135bf29c0..1f040d85ac47 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -669,6 +669,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
struct amdgpu_cs_parser *p)
{
+ struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
struct amdgpu_ib *ib;
int i, j, r;
@@ -694,6 +695,7 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
for (j = 0; j < num_deps; ++j) {
struct amdgpu_fence *fence;
struct amdgpu_ring *ring;
+ struct amdgpu_ctx *ctx;
r = amdgpu_cs_get_ring(adev, deps[j].ip_type,
deps[j].ip_instance,
@@ -701,14 +703,21 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
if (r)
return r;
+ ctx = amdgpu_ctx_get(fpriv, deps[j].ctx_id);
+ if (ctx == NULL)
+ return -EINVAL;
+
r = amdgpu_fence_recreate(ring, p->filp,
deps[j].handle,
&fence);
- if (r)
+ if (r) {
+ amdgpu_ctx_put(ctx);
return r;
+ }
amdgpu_sync_fence(&ib->sync, fence);
amdgpu_fence_unref(&fence);
+ amdgpu_ctx_put(ctx);
}
}
@@ -808,12 +817,16 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance,
wait->in.ring, &ring);
- if (r)
+ if (r) {
+ amdgpu_ctx_put(ctx);
return r;
+ }
r = amdgpu_fence_recreate(ring, filp, wait->in.handle, &fence);
- if (r)
+ if (r) {
+ amdgpu_ctx_put(ctx);
return r;
+ }
r = fence_wait_timeout(&fence->base, true, timeout);
amdgpu_fence_unref(&fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index ba46be361c9b..d79009b65867 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1207,10 +1207,15 @@ static int amdgpu_early_init(struct amdgpu_device *adev)
} else {
if (adev->ip_blocks[i].funcs->early_init) {
r = adev->ip_blocks[i].funcs->early_init((void *)adev);
- if (r)
+ if (r == -ENOENT)
+ adev->ip_block_enabled[i] = false;
+ else if (r)
return r;
+ else
+ adev->ip_block_enabled[i] = true;
+ } else {
+ adev->ip_block_enabled[i] = true;
}
- adev->ip_block_enabled[i] = true;
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 56da962231fc..115906f5fda0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -44,6 +44,8 @@
#include "amdgpu.h"
#include "amdgpu_irq.h"
+#include "amdgpu_amdkfd.h"
+
/*
* KMS wrapper.
* - 3.0.0 - initial driver
@@ -527,12 +529,15 @@ static int __init amdgpu_init(void)
driver->num_ioctls = amdgpu_max_kms_ioctl;
amdgpu_register_atpx_handler();
+ amdgpu_amdkfd_init();
+
/* let modprobe override vga console setting */
return drm_pci_init(driver, pdriver);
}
static void __exit amdgpu_exit(void)
{
+ amdgpu_amdkfd_fini();
drm_pci_exit(driver, pdriver);
amdgpu_unregister_atpx_handler();
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index db5422e65ec5..fb44dd2231b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -24,6 +24,7 @@
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_ih.h"
+#include "amdgpu_amdkfd.h"
/**
* amdgpu_ih_ring_alloc - allocate memory for the IH ring
@@ -199,6 +200,12 @@ restart_ih:
rmb();
while (adev->irq.ih.rptr != wptr) {
+ u32 ring_index = adev->irq.ih.rptr >> 2;
+
+ /* Before dispatching irq to IP blocks, send it to amdkfd */
+ amdgpu_amdkfd_interrupt(adev,
+ (const void *) &adev->irq.ih.ring[ring_index]);
+
amdgpu_ih_decode_iv(adev, &entry);
adev->irq.ih.rptr &= adev->irq.ih.ptr_mask;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 5533434c7a8f..8c40a9671b9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -34,6 +34,7 @@
#include <linux/vga_switcheroo.h>
#include <linux/slab.h>
#include <linux/pm_runtime.h>
+#include "amdgpu_amdkfd.h"
#if defined(CONFIG_VGA_SWITCHEROO)
bool amdgpu_has_atpx(void);
@@ -61,6 +62,8 @@ int amdgpu_driver_unload_kms(struct drm_device *dev)
pm_runtime_get_sync(dev->dev);
+ amdgpu_amdkfd_device_fini(adev);
+
amdgpu_acpi_fini(adev);
amdgpu_device_fini(adev);
@@ -118,6 +121,10 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
"Error during ACPI methods call\n");
}
+ amdgpu_amdkfd_load_interface(adev);
+ amdgpu_amdkfd_device_probe(adev);
+ amdgpu_amdkfd_device_init(adev);
+
if (amdgpu_device_is_px(dev)) {
pm_runtime_use_autosuspend(dev->dev);
pm_runtime_set_autosuspend_delay(dev->dev, 5000);
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 341c56681841..b3b66a0d5ff7 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -64,6 +64,8 @@
#include "oss/oss_2_0_d.h"
#include "oss/oss_2_0_sh_mask.h"
+#include "amdgpu_amdkfd.h"
+
/*
* Indirect registers accessor
*/
@@ -2448,14 +2450,21 @@ static int cik_common_suspend(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ amdgpu_amdkfd_suspend(adev);
+
return cik_common_hw_fini(adev);
}
static int cik_common_resume(void *handle)
{
+ int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- return cik_common_hw_init(adev);
+ r = cik_common_hw_init(adev);
+ if (r)
+ return r;
+
+ return amdgpu_amdkfd_resume(adev);
}
static bool cik_common_is_idle(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h
index d19085a97064..a3e3dfaa01a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/cikd.h
+++ b/drivers/gpu/drm/amd/amdgpu/cikd.h
@@ -552,6 +552,12 @@
#define VCE_CMD_IB_AUTO 0x00000005
#define VCE_CMD_SEMAPHORE 0x00000006
+/* if PTR32, these are the bases for scratch and lds */
+#define PRIVATE_BASE(x) ((x) << 0) /* scratch */
+#define SHARED_BASE(x) ((x) << 16) /* LDS */
+
+#define KFD_CIK_SDMA_QUEUE_OFFSET 0x200
+
/* valid for both DEFAULT_MTYPE and APE1_MTYPE */
enum {
MTYPE_CACHED = 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
index f75a31df30bd..1a2d419cbf16 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
@@ -1679,25 +1679,31 @@ static int cz_dpm_unforce_dpm_levels(struct amdgpu_device *adev)
if (ret)
return ret;
- DRM_INFO("DPM unforce state min=%d, max=%d.\n",
- pi->sclk_dpm.soft_min_clk,
- pi->sclk_dpm.soft_max_clk);
+ DRM_DEBUG("DPM unforce state min=%d, max=%d.\n",
+ pi->sclk_dpm.soft_min_clk,
+ pi->sclk_dpm.soft_max_clk);
return 0;
}
static int cz_dpm_force_dpm_level(struct amdgpu_device *adev,
- enum amdgpu_dpm_forced_level level)
+ enum amdgpu_dpm_forced_level level)
{
int ret = 0;
switch (level) {
case AMDGPU_DPM_FORCED_LEVEL_HIGH:
+ ret = cz_dpm_unforce_dpm_levels(adev);
+ if (ret)
+ return ret;
ret = cz_dpm_force_highest(adev);
if (ret)
return ret;
break;
case AMDGPU_DPM_FORCED_LEVEL_LOW:
+ ret = cz_dpm_unforce_dpm_levels(adev);
+ if (ret)
+ return ret;
ret = cz_dpm_force_lowest(adev);
if (ret)
return ret;
@@ -1711,6 +1717,8 @@ static int cz_dpm_force_dpm_level(struct amdgpu_device *adev,
break;
}
+ adev->pm.dpm.forced_level = level;
+
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index 08387dfd98a7..cc050a329c49 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -2566,6 +2566,7 @@ static void dce_v8_0_crtc_dpms(struct drm_crtc *crtc, int mode)
struct drm_device *dev = crtc->dev;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ unsigned type;
switch (mode) {
case DRM_MODE_DPMS_ON:
@@ -2574,6 +2575,9 @@ static void dce_v8_0_crtc_dpms(struct drm_crtc *crtc, int mode)
dce_v8_0_vga_enable(crtc, true);
amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE);
dce_v8_0_vga_enable(crtc, false);
+ /* Make sure VBLANK interrupt is still enabled */
+ type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id);
+ amdgpu_irq_update(adev, &adev->crtc_irq, type);
drm_vblank_post_modeset(dev, amdgpu_crtc->crtc_id);
dce_v8_0_crtc_load_lut(crtc);
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 7b683fb2173c..1c7c992dea37 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -1813,10 +1813,7 @@ static u32 gfx_v8_0_get_rb_disabled(struct amdgpu_device *adev,
u32 data, mask;
data = RREG32(mmCC_RB_BACKEND_DISABLE);
- if (data & 1)
- data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
- else
- data = 0;
+ data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index fa5a4448531d..68552da40287 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -122,6 +122,32 @@ static void vi_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
}
+/* smu_8_0_d.h */
+#define mmMP0PUB_IND_INDEX 0x180
+#define mmMP0PUB_IND_DATA 0x181
+
+static u32 cz_smc_rreg(struct amdgpu_device *adev, u32 reg)
+{
+ unsigned long flags;
+ u32 r;
+
+ spin_lock_irqsave(&adev->smc_idx_lock, flags);
+ WREG32(mmMP0PUB_IND_INDEX, (reg));
+ r = RREG32(mmMP0PUB_IND_DATA);
+ spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
+ return r;
+}
+
+static void cz_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&adev->smc_idx_lock, flags);
+ WREG32(mmMP0PUB_IND_INDEX, (reg));
+ WREG32(mmMP0PUB_IND_DATA, (v));
+ spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
+}
+
static u32 vi_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg)
{
unsigned long flags;
@@ -1222,8 +1248,13 @@ static int vi_common_early_init(void *handle)
bool smc_enabled = false;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- adev->smc_rreg = &vi_smc_rreg;
- adev->smc_wreg = &vi_smc_wreg;
+ if (adev->flags & AMDGPU_IS_APU) {
+ adev->smc_rreg = &cz_smc_rreg;
+ adev->smc_wreg = &cz_smc_wreg;
+ } else {
+ adev->smc_rreg = &vi_smc_rreg;
+ adev->smc_wreg = &vi_smc_wreg;
+ }
adev->pcie_rreg = &vi_pcie_rreg;
adev->pcie_wreg = &vi_pcie_wreg;
adev->uvd_ctx_rreg = &vi_uvd_ctx_rreg;
diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h
index 31bb89452e12..d98aa9d82fa1 100644
--- a/drivers/gpu/drm/amd/amdgpu/vid.h
+++ b/drivers/gpu/drm/amd/amdgpu/vid.h
@@ -66,6 +66,11 @@
#define AMDGPU_NUM_OF_VMIDS 8
+#define PIPEID(x) ((x) << 0)
+#define MEID(x) ((x) << 2)
+#define VMID(x) ((x) << 4)
+#define QUEUEID(x) ((x) << 8)
+
#define RB_BITMAP_WIDTH_PER_SH 2
#define MC_SEQ_MISC0__MT__MASK 0xf0000000