aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Makefile6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cik_regs.h13
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c39
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c263
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c500
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h54
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c135
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c64
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c176
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c111
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h40
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c44
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c56
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_module.c33
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c321
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c450
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c33
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c32
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pasid.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h95
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c40
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c50
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c155
25 files changed, 1738 insertions, 985 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
index be6246de5091..0f4960148126 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -7,8 +7,10 @@ ccflags-y := -Iinclude/drm -Idrivers/gpu/drm/amd/include/
amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \
kfd_process.o kfd_queue.o kfd_mqd_manager.o \
- kfd_kernel_queue.o kfd_packet_manager.o \
+ kfd_mqd_manager_cik.o kfd_mqd_manager_vi.o \
+ kfd_kernel_queue.o kfd_kernel_queue_cik.o \
+ kfd_kernel_queue_vi.o kfd_packet_manager.o \
kfd_process_queue_manager.o kfd_device_queue_manager.o \
- kfd_interrupt.o
+ kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \
obj-$(CONFIG_HSA_AMD) += amdkfd.o
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_regs.h b/drivers/gpu/drm/amd/amdkfd/cik_regs.h
index 607fc5ceadbe..01ff332fabd4 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_regs.h
+++ b/drivers/gpu/drm/amd/amdkfd/cik_regs.h
@@ -168,6 +168,8 @@
#define IB_ATC_EN (1U << 23)
#define DEFAULT_MIN_IB_AVAIL_SIZE (3U << 20)
+#define AQL_ENABLE 1
+
#define CP_HQD_DEQUEUE_REQUEST 0xC974
#define DEQUEUE_REQUEST_DRAIN 1
#define DEQUEUE_REQUEST_RESET 2
@@ -188,6 +190,17 @@
#define MQD_VMID_MASK (0xf << 0)
#define MQD_CONTROL_PRIV_STATE_EN (1U << 8)
+#define SDMA_RB_VMID(x) (x << 24)
+#define SDMA_RB_ENABLE (1 << 0)
+#define SDMA_RB_SIZE(x) ((x) << 1) /* log2 */
+#define SDMA_RPTR_WRITEBACK_ENABLE (1 << 12)
+#define SDMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */
+#define SDMA_OFFSET(x) (x << 0)
+#define SDMA_DB_ENABLE (1 << 28)
+#define SDMA_ATC (1 << 0)
+#define SDMA_VA_PTR32 (1 << 4)
+#define SDMA_VA_SHARED_BASE(x) (x << 8)
+
#define GRBM_GFX_INDEX 0x30800
#define INSTANCE_INDEX(x) ((x) << 0)
#define SH_INDEX(x) ((x) << 8)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index fcfdf23e1913..5c50aa8a8908 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -178,6 +178,22 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
return -EFAULT;
}
+ if (args->eop_buffer_address &&
+ !access_ok(VERIFY_WRITE,
+ (const void __user *) args->eop_buffer_address,
+ sizeof(uint32_t))) {
+ pr_debug("kfd: can't access eop buffer");
+ return -EFAULT;
+ }
+
+ if (args->ctx_save_restore_address &&
+ !access_ok(VERIFY_WRITE,
+ (const void __user *) args->ctx_save_restore_address,
+ sizeof(uint32_t))) {
+ pr_debug("kfd: can't access ctx save restore buffer");
+ return -EFAULT;
+ }
+
q_properties->is_interop = false;
q_properties->queue_percent = args->queue_percentage;
q_properties->priority = args->queue_priority;
@@ -185,9 +201,16 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
q_properties->queue_size = args->ring_size;
q_properties->read_ptr = (uint32_t *) args->read_pointer_address;
q_properties->write_ptr = (uint32_t *) args->write_pointer_address;
+ q_properties->eop_ring_buffer_address = args->eop_buffer_address;
+ q_properties->eop_ring_buffer_size = args->eop_buffer_size;
+ q_properties->ctx_save_restore_area_address =
+ args->ctx_save_restore_address;
+ q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
+ else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
+ q_properties->type = KFD_QUEUE_TYPE_SDMA;
else
return -ENOTSUPP;
@@ -214,6 +237,11 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
pr_debug("Queue Format (%d)\n", q_properties->format);
+ pr_debug("Queue EOP (0x%llX)\n", q_properties->eop_ring_buffer_address);
+
+ pr_debug("Queue CTX save arex (0x%llX)\n",
+ q_properties->ctx_save_restore_area_address);
+
return 0;
}
@@ -235,9 +263,12 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
if (err)
return err;
+ pr_debug("kfd: looking for gpu id 0x%x\n", args->gpu_id);
dev = kfd_device_by_id(args->gpu_id);
- if (dev == NULL)
+ if (dev == NULL) {
+ pr_debug("kfd: gpu id 0x%x was not found\n", args->gpu_id);
return -EINVAL;
+ }
mutex_lock(&p->mutex);
@@ -251,8 +282,8 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
p->pasid,
dev->id);
- err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, 0,
- KFD_QUEUE_TYPE_COMPUTE, &queue_id);
+ err = pqm_create_queue(&p->pqm, dev, filep, &q_properties,
+ 0, q_properties.type, &queue_id);
if (err != 0)
goto err_create_queue;
@@ -385,7 +416,7 @@ static int kfd_ioctl_set_memory_policy(struct file *filep,
(args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
? cache_policy_coherent : cache_policy_noncoherent;
- if (!dev->dqm->set_cache_memory_policy(dev->dqm,
+ if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm,
&pdd->qpd,
default_policy,
alternate_policy,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 43884ebd4303..5bc32c26b989 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -26,15 +26,25 @@
#include <linux/slab.h>
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
+#include "kfd_pm4_headers.h"
#define MQD_SIZE_ALIGNED 768
static const struct kfd_device_info kaveri_device_info = {
+ .asic_family = CHIP_KAVERI,
.max_pasid_bits = 16,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.mqd_size_aligned = MQD_SIZE_ALIGNED
};
+static const struct kfd_device_info carrizo_device_info = {
+ .asic_family = CHIP_CARRIZO,
+ .max_pasid_bits = 16,
+ .ih_ring_entry_size = 4 * sizeof(uint32_t),
+ .num_of_watch_points = 4,
+ .mqd_size_aligned = MQD_SIZE_ALIGNED
+};
+
struct kfd_deviceid {
unsigned short did;
const struct kfd_device_info *device_info;
@@ -63,9 +73,13 @@ static const struct kfd_deviceid supported_devices[] = {
{ 0x1318, &kaveri_device_info }, /* Kaveri */
{ 0x131B, &kaveri_device_info }, /* Kaveri */
{ 0x131C, &kaveri_device_info }, /* Kaveri */
- { 0x131D, &kaveri_device_info }, /* Kaveri */
+ { 0x131D, &kaveri_device_info } /* Kaveri */
};
+static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
+ unsigned int chunk_size);
+static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
+
static const struct kfd_device_info *lookup_device_info(unsigned short did)
{
size_t i;
@@ -169,20 +183,42 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
kfd->shared_resources = *gpu_resources;
/* calculate max size of mqds needed for queues */
- size = max_num_of_processes *
- max_num_of_queues_per_process *
- kfd->device_info->mqd_size_aligned;
+ size = max_num_of_queues_per_device *
+ kfd->device_info->mqd_size_aligned;
+
+ /*
+ * calculate max size of runlist packet.
+ * There can be only 2 packets at once
+ */
+ size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_map_process) +
+ max_num_of_queues_per_device *
+ sizeof(struct pm4_map_queues) + sizeof(struct pm4_runlist)) * 2;
+
+ /* Add size of HIQ & DIQ */
+ size += KFD_KERNEL_QUEUE_SIZE * 2;
- /* add another 512KB for all other allocations on gart */
+ /* add another 512KB for all other allocations on gart (HPD, fences) */
size += 512 * 1024;
- if (kfd2kgd->init_sa_manager(kfd->kgd, size)) {
+ if (kfd2kgd->init_gtt_mem_allocation(kfd->kgd, size, &kfd->gtt_mem,
+ &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr)) {
dev_err(kfd_device,
- "Error initializing sa manager for device (%x:%x)\n",
- kfd->pdev->vendor, kfd->pdev->device);
+ "Could not allocate %d bytes for device (%x:%x)\n",
+ size, kfd->pdev->vendor, kfd->pdev->device);
goto out;
}
+ dev_info(kfd_device,
+ "Allocated %d bytes on gart for device(%x:%x)\n",
+ size, kfd->pdev->vendor, kfd->pdev->device);
+
+ /* Initialize GTT sa with 512 byte chunk size */
+ if (kfd_gtt_sa_init(kfd, size, 512) != 0) {
+ dev_err(kfd_device,
+ "Error initializing gtt sub-allocator\n");
+ goto kfd_gtt_sa_init_error;
+ }
+
kfd_doorbell_init(kfd);
if (kfd_topology_add_device(kfd) != 0) {
@@ -192,13 +228,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
goto kfd_topology_add_device_error;
}
- if (kfd_interrupt_init(kfd)) {
- dev_err(kfd_device,
- "Error initializing interrupts for device (%x:%x)\n",
- kfd->pdev->vendor, kfd->pdev->device);
- goto kfd_interrupt_error;
- }
-
if (!device_iommu_pasid_init(kfd)) {
dev_err(kfd_device,
"Error initializing iommuv2 for device (%x:%x)\n",
@@ -216,7 +245,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
goto device_queue_manager_error;
}
- if (kfd->dqm->start(kfd->dqm) != 0) {
+ if (kfd->dqm->ops.start(kfd->dqm) != 0) {
dev_err(kfd_device,
"Error starting queuen manager for device (%x:%x)\n",
kfd->pdev->vendor, kfd->pdev->device);
@@ -237,11 +266,11 @@ dqm_start_error:
device_queue_manager_error:
amd_iommu_free_device(kfd->pdev);
device_iommu_pasid_error:
- kfd_interrupt_exit(kfd);
-kfd_interrupt_error:
kfd_topology_remove_device(kfd);
kfd_topology_add_device_error:
- kfd2kgd->fini_sa_manager(kfd->kgd);
+ kfd_gtt_sa_fini(kfd);
+kfd_gtt_sa_init_error:
+ kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
dev_err(kfd_device,
"device (%x:%x) NOT added due to errors\n",
kfd->pdev->vendor, kfd->pdev->device);
@@ -254,8 +283,9 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
if (kfd->init_complete) {
device_queue_manager_uninit(kfd->dqm);
amd_iommu_free_device(kfd->pdev);
- kfd_interrupt_exit(kfd);
kfd_topology_remove_device(kfd);
+ kfd_gtt_sa_fini(kfd);
+ kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
}
kfree(kfd);
@@ -266,7 +296,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd)
BUG_ON(kfd == NULL);
if (kfd->init_complete) {
- kfd->dqm->stop(kfd->dqm);
+ kfd->dqm->ops.stop(kfd->dqm);
amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
amd_iommu_free_device(kfd->pdev);
}
@@ -287,7 +317,7 @@ int kgd2kfd_resume(struct kfd_dev *kfd)
return -ENXIO;
amd_iommu_set_invalidate_ctx_cb(kfd->pdev,
iommu_pasid_shutdown_callback);
- kfd->dqm->start(kfd->dqm);
+ kfd->dqm->ops.start(kfd->dqm);
}
return 0;
@@ -296,13 +326,190 @@ int kgd2kfd_resume(struct kfd_dev *kfd)
/* This is called directly from KGD at ISR. */
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
{
- if (kfd->init_complete) {
- spin_lock(&kfd->interrupt_lock);
+ /* Process interrupts / schedule work as necessary */
+}
+
+static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
+ unsigned int chunk_size)
+{
+ unsigned int num_of_bits;
+
+ BUG_ON(!kfd);
+ BUG_ON(!kfd->gtt_mem);
+ BUG_ON(buf_size < chunk_size);
+ BUG_ON(buf_size == 0);
+ BUG_ON(chunk_size == 0);
+
+ kfd->gtt_sa_chunk_size = chunk_size;
+ kfd->gtt_sa_num_of_chunks = buf_size / chunk_size;
+
+ num_of_bits = kfd->gtt_sa_num_of_chunks / BITS_PER_BYTE;
+ BUG_ON(num_of_bits == 0);
+
+ kfd->gtt_sa_bitmap = kzalloc(num_of_bits, GFP_KERNEL);
+
+ if (!kfd->gtt_sa_bitmap)
+ return -ENOMEM;
+
+ pr_debug("kfd: gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n",
+ kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap);
+
+ mutex_init(&kfd->gtt_sa_lock);
- if (kfd->interrupts_active
- && enqueue_ih_ring_entry(kfd, ih_ring_entry))
- schedule_work(&kfd->interrupt_work);
+ return 0;
+
+}
+
+static void kfd_gtt_sa_fini(struct kfd_dev *kfd)
+{
+ mutex_destroy(&kfd->gtt_sa_lock);
+ kfree(kfd->gtt_sa_bitmap);
+}
+
+static inline uint64_t kfd_gtt_sa_calc_gpu_addr(uint64_t start_addr,
+ unsigned int bit_num,
+ unsigned int chunk_size)
+{
+ return start_addr + bit_num * chunk_size;
+}
- spin_unlock(&kfd->interrupt_lock);
+static inline uint32_t *kfd_gtt_sa_calc_cpu_addr(void *start_addr,
+ unsigned int bit_num,
+ unsigned int chunk_size)
+{
+ return (uint32_t *) ((uint64_t) start_addr + bit_num * chunk_size);
+}
+
+int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
+ struct kfd_mem_obj **mem_obj)
+{
+ unsigned int found, start_search, cur_size;
+
+ BUG_ON(!kfd);
+
+ if (size == 0)
+ return -EINVAL;
+
+ if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
+ return -ENOMEM;
+
+ *mem_obj = kmalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
+ if ((*mem_obj) == NULL)
+ return -ENOMEM;
+
+ pr_debug("kfd: allocated mem_obj = %p for size = %d\n", *mem_obj, size);
+
+ start_search = 0;
+
+ mutex_lock(&kfd->gtt_sa_lock);
+
+kfd_gtt_restart_search:
+ /* Find the first chunk that is free */
+ found = find_next_zero_bit(kfd->gtt_sa_bitmap,
+ kfd->gtt_sa_num_of_chunks,
+ start_search);
+
+ pr_debug("kfd: found = %d\n", found);
+
+ /* If there wasn't any free chunk, bail out */
+ if (found == kfd->gtt_sa_num_of_chunks)
+ goto kfd_gtt_no_free_chunk;
+
+ /* Update fields of mem_obj */
+ (*mem_obj)->range_start = found;
+ (*mem_obj)->range_end = found;
+ (*mem_obj)->gpu_addr = kfd_gtt_sa_calc_gpu_addr(
+ kfd->gtt_start_gpu_addr,
+ found,
+ kfd->gtt_sa_chunk_size);
+ (*mem_obj)->cpu_ptr = kfd_gtt_sa_calc_cpu_addr(
+ kfd->gtt_start_cpu_ptr,
+ found,
+ kfd->gtt_sa_chunk_size);
+
+ pr_debug("kfd: gpu_addr = %p, cpu_addr = %p\n",
+ (uint64_t *) (*mem_obj)->gpu_addr, (*mem_obj)->cpu_ptr);
+
+ /* If we need only one chunk, mark it as allocated and get out */
+ if (size <= kfd->gtt_sa_chunk_size) {
+ pr_debug("kfd: single bit\n");
+ set_bit(found, kfd->gtt_sa_bitmap);
+ goto kfd_gtt_out;
}
+
+ /* Otherwise, try to see if we have enough contiguous chunks */
+ cur_size = size - kfd->gtt_sa_chunk_size;
+ do {
+ (*mem_obj)->range_end =
+ find_next_zero_bit(kfd->gtt_sa_bitmap,
+ kfd->gtt_sa_num_of_chunks, ++found);
+ /*
+ * If next free chunk is not contiguous than we need to
+ * restart our search from the last free chunk we found (which
+ * wasn't contiguous to the previous ones
+ */
+ if ((*mem_obj)->range_end != found) {
+ start_search = found;
+ goto kfd_gtt_restart_search;
+ }
+
+ /*
+ * If we reached end of buffer, bail out with error
+ */
+ if (found == kfd->gtt_sa_num_of_chunks)
+ goto kfd_gtt_no_free_chunk;
+
+ /* Check if we don't need another chunk */
+ if (cur_size <= kfd->gtt_sa_chunk_size)
+ cur_size = 0;
+ else
+ cur_size -= kfd->gtt_sa_chunk_size;
+
+ } while (cur_size > 0);
+
+ pr_debug("kfd: range_start = %d, range_end = %d\n",
+ (*mem_obj)->range_start, (*mem_obj)->range_end);
+
+ /* Mark the chunks as allocated */
+ for (found = (*mem_obj)->range_start;
+ found <= (*mem_obj)->range_end;
+ found++)
+ set_bit(found, kfd->gtt_sa_bitmap);
+
+kfd_gtt_out:
+ mutex_unlock(&kfd->gtt_sa_lock);
+ return 0;
+
+kfd_gtt_no_free_chunk:
+ pr_debug("kfd: allocation failed with mem_obj = %p\n", mem_obj);
+ mutex_unlock(&kfd->gtt_sa_lock);
+ kfree(mem_obj);
+ return -ENOMEM;
+}
+
+int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj)
+{
+ unsigned int bit;
+
+ BUG_ON(!kfd);
+
+ /* Act like kfree when trying to free a NULL object */
+ if (!mem_obj)
+ return 0;
+
+ pr_debug("kfd: free mem_obj = %p, range_start = %d, range_end = %d\n",
+ mem_obj, mem_obj->range_start, mem_obj->range_end);
+
+ mutex_lock(&kfd->gtt_sa_lock);
+
+ /* Mark the chunks as free */
+ for (bit = mem_obj->range_start;
+ bit <= mem_obj->range_end;
+ bit++)
+ clear_bit(bit, kfd->gtt_sa_bitmap);
+
+ mutex_unlock(&kfd->gtt_sa_lock);
+
+ kfree(mem_obj);
+ return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 9c8961d22360..b3589d0e39b9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -26,34 +26,40 @@
#include <linux/types.h>
#include <linux/printk.h>
#include <linux/bitops.h>
+#include <linux/sched.h>
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
#include "kfd_mqd_manager.h"
#include "cik_regs.h"
#include "kfd_kernel_queue.h"
-#include "../../radeon/cik_reg.h"
/* Size of the per-pipe EOP queue */
#define CIK_HPD_EOP_BYTES_LOG2 11
#define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
-static bool is_mem_initialized;
-
-static int init_memory(struct device_queue_manager *dqm);
static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
unsigned int pasid, unsigned int vmid);
static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
struct queue *q,
struct qcm_process_device *qpd);
+
static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock);
static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock);
+static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
+ struct queue *q,
+ struct qcm_process_device *qpd);
+
+static void deallocate_sdma_queue(struct device_queue_manager *dqm,
+ unsigned int sdma_queue_id);
-static inline unsigned int get_pipes_num(struct device_queue_manager *dqm)
+static inline
+enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
{
- BUG_ON(!dqm || !dqm->dev);
- return dqm->dev->shared_resources.compute_pipe_count;
+ if (type == KFD_QUEUE_TYPE_SDMA)
+ return KFD_MQD_TYPE_SDMA;
+ return KFD_MQD_TYPE_CP;
}
static inline unsigned int get_first_pipe(struct device_queue_manager *dqm)
@@ -67,61 +73,7 @@ static inline unsigned int get_pipes_num_cpsch(void)
return PIPE_PER_ME_CP_SCHEDULING;
}
-static inline unsigned int
-get_sh_mem_bases_nybble_64(struct kfd_process_device *pdd)
-{
- uint32_t nybble;
-
- nybble = (pdd->lds_base >> 60) & 0x0E;
-
- return nybble;
-
-}
-
-static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
-{
- unsigned int shared_base;
-
- shared_base = (pdd->lds_base >> 16) & 0xFF;
-
- return shared_base;
-}
-
-static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble);
-static void init_process_memory(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd)
-{
- struct kfd_process_device *pdd;
- unsigned int temp;
-
- BUG_ON(!dqm || !qpd);
-
- pdd = qpd_to_pdd(qpd);
-
- /* check if sh_mem_config register already configured */
- if (qpd->sh_mem_config == 0) {
- qpd->sh_mem_config =
- ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) |
- DEFAULT_MTYPE(MTYPE_NONCACHED) |
- APE1_MTYPE(MTYPE_NONCACHED);
- qpd->sh_mem_ape1_limit = 0;
- qpd->sh_mem_ape1_base = 0;
- }
-
- if (qpd->pqm->process->is_32bit_user_mode) {
- temp = get_sh_mem_bases_32(pdd);
- qpd->sh_mem_bases = SHARED_BASE(temp);
- qpd->sh_mem_config |= PTR32;
- } else {
- temp = get_sh_mem_bases_nybble_64(pdd);
- qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
- }
-
- pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
- qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
-}
-
-static void program_sh_mem_settings(struct device_queue_manager *dqm,
+void program_sh_mem_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
return kfd2kgd->program_sh_mem_settings(dqm->dev->kgd, qpd->vmid,
@@ -183,6 +135,13 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
mutex_lock(&dqm->lock);
+ if (dqm->total_queue_count >= max_num_of_queues_per_device) {
+ pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n",
+ dqm->total_queue_count);
+ mutex_unlock(&dqm->lock);
+ return -EPERM;
+ }
+
if (list_empty(&qpd->queues_list)) {
retval = allocate_vmid(dqm, qpd, q);
if (retval != 0) {
@@ -193,7 +152,10 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
*allocated_vmid = qpd->vmid;
q->properties.vmid = qpd->vmid;
- retval = create_compute_queue_nocpsch(dqm, q, qpd);
+ if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
+ retval = create_compute_queue_nocpsch(dqm, q, qpd);
+ if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+ retval = create_sdma_queue_nocpsch(dqm, q, qpd);
if (retval != 0) {
if (list_empty(&qpd->queues_list)) {
@@ -205,7 +167,19 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
}
list_add(&q->list, &qpd->queues_list);
- dqm->queue_count++;
+ if (q->properties.is_active)
+ dqm->queue_count++;
+
+ if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+ dqm->sdma_queue_count++;
+
+ /*
+ * Unconditionally increment this counter, regardless of the queue's
+ * type or whether the queue is active.
+ */
+ dqm->total_queue_count++;
+ pr_debug("Total of %d queues are accountable so far\n",
+ dqm->total_queue_count);
mutex_unlock(&dqm->lock);
return 0;
@@ -214,12 +188,12 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
{
bool set;
- int pipe, bit;
+ int pipe, bit, i;
set = false;
- for (pipe = dqm->next_pipe_to_allocate; pipe < get_pipes_num(dqm);
- pipe = (pipe + 1) % get_pipes_num(dqm)) {
+ for (pipe = dqm->next_pipe_to_allocate, i = 0; i < get_pipes_num(dqm);
+ pipe = ((pipe + 1) % get_pipes_num(dqm)), ++i) {
if (dqm->allocated_queues[pipe] != 0) {
bit = find_first_bit(
(unsigned long *)&dqm->allocated_queues[pipe],
@@ -260,7 +234,7 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
BUG_ON(!dqm || !q || !qpd);
- mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
+ mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
if (mqd == NULL)
return -ENOMEM;
@@ -280,7 +254,7 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
q->queue);
retval = mqd->load_mqd(mqd, q->mqd, q->pipe,
- q->queue, q->properties.write_ptr);
+ q->queue, (uint32_t __user *) q->properties.write_ptr);
if (retval != 0) {
deallocate_hqd(dqm, q);
mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
@@ -304,28 +278,53 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
pr_debug("kfd: In Func %s\n", __func__);
mutex_lock(&dqm->lock);
- mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
- if (mqd == NULL) {
- retval = -ENOMEM;
+
+ if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
+ mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
+ if (mqd == NULL) {
+ retval = -ENOMEM;
+ goto out;
+ }
+ deallocate_hqd(dqm, q);
+ } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
+ mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
+ if (mqd == NULL) {
+ retval = -ENOMEM;
+ goto out;
+ }
+ dqm->sdma_queue_count--;
+ deallocate_sdma_queue(dqm, q->sdma_id);
+ } else {
+ pr_debug("q->properties.type is invalid (%d)\n",
+ q->properties.type);
+ retval = -EINVAL;
goto out;
}
retval = mqd->destroy_mqd(mqd, q->mqd,
- KFD_PREEMPT_TYPE_WAVEFRONT,
+ KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS,
q->pipe, q->queue);
if (retval != 0)
goto out;
- deallocate_hqd(dqm, q);
-
mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
list_del(&q->list);
if (list_empty(&qpd->queues_list))
deallocate_vmid(dqm, qpd, q);
- dqm->queue_count--;
+ if (q->properties.is_active)
+ dqm->queue_count--;
+
+ /*
+ * Unconditionally decrement this counter, regardless of the queue's
+ * type
+ */
+ dqm->total_queue_count--;
+ pr_debug("Total of %d queues are accountable so far\n",
+ dqm->total_queue_count);
+
out:
mutex_unlock(&dqm->lock);
return retval;
@@ -340,7 +339,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
BUG_ON(!dqm || !q || !q->mqd);
mutex_lock(&dqm->lock);
- mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
+ mqd = dqm->ops.get_mqd_manager(dqm,
+ get_mqd_type_from_queue_type(q->properties.type));
if (mqd == NULL) {
mutex_unlock(&dqm->lock);
return -ENOMEM;
@@ -391,6 +391,7 @@ static int register_process_nocpsch(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
struct device_process_node *n;
+ int retval;
BUG_ON(!dqm || !qpd);
@@ -405,12 +406,13 @@ static int register_process_nocpsch(struct device_queue_manager *dqm,
mutex_lock(&dqm->lock);
list_add(&n->list, &dqm->queues);
- init_process_memory(dqm, qpd);
+ retval = dqm->ops_asic_specific.register_process(dqm, qpd);
+
dqm->processes_count++;
mutex_unlock(&dqm->lock);
- return 0;
+ return retval;
}
static int unregister_process_nocpsch(struct device_queue_manager *dqm,
@@ -455,48 +457,7 @@ set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid,
vmid);
}
-static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
-{
- /* In 64-bit mode, we can only control the top 3 bits of the LDS,
- * scratch and GPUVM apertures.
- * The hardware fills in the remaining 59 bits according to the
- * following pattern:
- * LDS: X0000000'00000000 - X0000001'00000000 (4GB)
- * Scratch: X0000001'00000000 - X0000002'00000000 (4GB)
- * GPUVM: Y0010000'00000000 - Y0020000'00000000 (1TB)
- *
- * (where X/Y is the configurable nybble with the low-bit 0)
- *
- * LDS and scratch will have the same top nybble programmed in the
- * top 3 bits of SH_MEM_BASES.PRIVATE_BASE.
- * GPUVM can have a different top nybble programmed in the
- * top 3 bits of SH_MEM_BASES.SHARED_BASE.
- * We don't bother to support different top nybbles
- * for LDS/Scratch and GPUVM.
- */
-
- BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE ||
- top_address_nybble == 0);
-
- return PRIVATE_BASE(top_address_nybble << 12) |
- SHARED_BASE(top_address_nybble << 12);
-}
-
-static int init_memory(struct device_queue_manager *dqm)
-{
- int i, retval;
-
- for (i = 8; i < 16; i++)
- set_pasid_vmid_mapping(dqm, 0, i);
-
- retval = kfd2kgd->init_memory(dqm->dev->kgd);
- if (retval == 0)
- is_mem_initialized = true;
- return retval;
-}
-
-
-static int init_pipelines(struct device_queue_manager *dqm,
+int init_pipelines(struct device_queue_manager *dqm,
unsigned int pipes_num, unsigned int first_pipe)
{
void *hpdptr;
@@ -515,11 +476,8 @@ static int init_pipelines(struct device_queue_manager *dqm,
* because it contains no data when there are no active queues.
*/
- err = kfd2kgd->allocate_mem(dqm->dev->kgd,
- CIK_HPD_EOP_BYTES * pipes_num,
- PAGE_SIZE,
- KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
- (struct kgd_mem **) &dqm->pipeline_mem);
+ err = kfd_gtt_sa_allocate(dqm->dev, CIK_HPD_EOP_BYTES * pipes_num,
+ &dqm->pipeline_mem);
if (err) {
pr_err("kfd: error allocate vidmem num pipes: %d\n",
@@ -532,26 +490,28 @@ static int init_pipelines(struct device_queue_manager *dqm,
memset(hpdptr, 0, CIK_HPD_EOP_BYTES * pipes_num);
- mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
+ mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
if (mqd == NULL) {
- kfd2kgd->free_mem(dqm->dev->kgd,
- (struct kgd_mem *) dqm->pipeline_mem);
+ kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem);
return -ENOMEM;
}
for (i = 0; i < pipes_num; i++) {
inx = i + first_pipe;
+ /*
+ * HPD buffer on GTT is allocated by amdkfd, no need to waste
+ * space in GTT for pipelines we don't initialize
+ */
pipe_hpd_addr = dqm->pipelines_addr + i * CIK_HPD_EOP_BYTES;
pr_debug("kfd: pipeline address %llX\n", pipe_hpd_addr);
/* = log2(bytes/4)-1 */
- kfd2kgd->init_pipeline(dqm->dev->kgd, i,
+ kfd2kgd->init_pipeline(dqm->dev->kgd, inx,
CIK_HPD_EOP_BYTES_LOG2 - 3, pipe_hpd_addr);
}
return 0;
}
-
static int init_scheduler(struct device_queue_manager *dqm)
{
int retval;
@@ -560,12 +520,7 @@ static int init_scheduler(struct device_queue_manager *dqm)
pr_debug("kfd: In %s\n", __func__);
- retval = init_pipelines(dqm, get_pipes_num(dqm), KFD_DQM_FIRST_PIPE);
- if (retval != 0)
- return retval;
-
- retval = init_memory(dqm);
-
+ retval = init_pipelines(dqm, get_pipes_num(dqm), get_first_pipe(dqm));
return retval;
}
@@ -581,6 +536,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
mutex_init(&dqm->lock);
INIT_LIST_HEAD(&dqm->queues);
dqm->queue_count = dqm->next_pipe_to_allocate = 0;
+ dqm->sdma_queue_count = 0;
dqm->allocated_queues = kcalloc(get_pipes_num(dqm),
sizeof(unsigned int), GFP_KERNEL);
if (!dqm->allocated_queues) {
@@ -592,6 +548,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
dqm->allocated_queues[i] = (1 << QUEUES_PER_PIPE) - 1;
dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1;
+ dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1;
init_scheduler(dqm);
return 0;
@@ -609,8 +566,7 @@ static void uninitialize_nocpsch(struct device_queue_manager *dqm)
for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
kfree(dqm->mqds[i]);
mutex_destroy(&dqm->lock);
- kfd2kgd->free_mem(dqm->dev->kgd,
- (struct kgd_mem *) dqm->pipeline_mem);
+ kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem);
}
static int start_nocpsch(struct device_queue_manager *dqm)
@@ -623,6 +579,77 @@ static int stop_nocpsch(struct device_queue_manager *dqm)
return 0;
}
+static int allocate_sdma_queue(struct device_queue_manager *dqm,
+ unsigned int *sdma_queue_id)
+{
+ int bit;
+
+ if (dqm->sdma_bitmap == 0)
+ return -ENOMEM;
+
+ bit = find_first_bit((unsigned long *)&dqm->sdma_bitmap,
+ CIK_SDMA_QUEUES);
+
+ clear_bit(bit, (unsigned long *)&dqm->sdma_bitmap);
+ *sdma_queue_id = bit;
+
+ return 0;
+}
+
+static void deallocate_sdma_queue(struct device_queue_manager *dqm,
+ unsigned int sdma_queue_id)
+{
+ if (sdma_queue_id >= CIK_SDMA_QUEUES)
+ return;
+ set_bit(sdma_queue_id, (unsigned long *)&dqm->sdma_bitmap);
+}
+
+static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
+ struct qcm_process_device *qpd)
+{
+ uint32_t value = SDMA_ATC;
+
+ if (q->process->is_32bit_user_mode)
+ value |= SDMA_VA_PTR32 | get_sh_mem_bases_32(qpd_to_pdd(qpd));
+ else
+ value |= SDMA_VA_SHARED_BASE(get_sh_mem_bases_nybble_64(
+ qpd_to_pdd(qpd)));
+ q->properties.sdma_vm_addr = value;
+}
+
+static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
+ struct queue *q,
+ struct qcm_process_device *qpd)
+{
+ struct mqd_manager *mqd;
+ int retval;
+
+ mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
+ if (!mqd)
+ return -ENOMEM;
+
+ retval = allocate_sdma_queue(dqm, &q->sdma_id);
+ if (retval != 0)
+ return retval;
+
+ q->properties.sdma_queue_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE;
+ q->properties.sdma_engine_id = q->sdma_id / CIK_SDMA_ENGINE_NUM;
+
+ pr_debug("kfd: sdma id is: %d\n", q->sdma_id);
+ pr_debug(" sdma queue id: %d\n", q->properties.sdma_queue_id);
+ pr_debug(" sdma engine id: %d\n", q->properties.sdma_engine_id);
+
+ retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
+ &q->gart_mqd_addr, &q->properties);
+ if (retval != 0) {
+ deallocate_sdma_queue(dqm, q->sdma_id);
+ return retval;
+ }
+
+ init_sdma_vm(dqm, q, qpd);
+ return 0;
+}
+
/*
* Device Queue Manager implementation for cp scheduler
*/
@@ -664,8 +691,9 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
mutex_init(&dqm->lock);
INIT_LIST_HEAD(&dqm->queues);
dqm->queue_count = dqm->processes_count = 0;
+ dqm->sdma_queue_count = 0;
dqm->active_runlist = false;
- retval = init_pipelines(dqm, get_pipes_num(dqm), 0);
+ retval = dqm->ops_asic_specific.initialize(dqm);
if (retval != 0)
goto fail_init_pipelines;
@@ -696,18 +724,14 @@ static int start_cpsch(struct device_queue_manager *dqm)
pr_debug("kfd: allocating fence memory\n");
/* allocate fence memory on the gart */
- retval = kfd2kgd->allocate_mem(dqm->dev->kgd,
- sizeof(*dqm->fence_addr),
- 32,
- KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
- (struct kgd_mem **) &dqm->fence_mem);
+ retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr),
+ &dqm->fence_mem);
if (retval != 0)
goto fail_allocate_vidmem;
dqm->fence_addr = dqm->fence_mem->cpu_ptr;
dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
-
list_for_each_entry(node, &dqm->queues, list)
if (node->qpd->pqm->process && dqm->dev)
kfd_bind_process_to_device(dqm->dev,
@@ -736,8 +760,7 @@ static int stop_cpsch(struct device_queue_manager *dqm)
pdd = qpd_to_pdd(node->qpd);
pdd->bound = false;
}
- kfd2kgd->free_mem(dqm->dev->kgd,
- (struct kgd_mem *) dqm->fence_mem);
+ kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
pm_uninit(&dqm->packets);
return 0;
@@ -752,6 +775,21 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
pr_debug("kfd: In func %s\n", __func__);
mutex_lock(&dqm->lock);
+ if (dqm->total_queue_count >= max_num_of_queues_per_device) {
+ pr_warn("amdkfd: Can't create new kernel queue because %d queues were already created\n",
+ dqm->total_queue_count);
+ mutex_unlock(&dqm->lock);
+ return -EPERM;
+ }
+
+ /*
+ * Unconditionally increment this counter, regardless of the queue's
+ * type or whether the queue is active.
+ */
+ dqm->total_queue_count++;
+ pr_debug("Total of %d queues are accountable so far\n",
+ dqm->total_queue_count);
+
list_add(&kq->list, &qpd->priv_queue_list);
dqm->queue_count++;
qpd->is_debug = true;
@@ -775,9 +813,24 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
dqm->queue_count--;
qpd->is_debug = false;
execute_queues_cpsch(dqm, false);
+ /*
+ * Unconditionally decrement this counter, regardless of the queue's
+ * type.
+ */
+ dqm->total_queue_count--;
+ pr_debug("Total of %d queues are accountable so far\n",
+ dqm->total_queue_count);
mutex_unlock(&dqm->lock);
}
+static void select_sdma_engine_id(struct queue *q)
+{
+ static int sdma_id;
+
+ q->sdma_id = sdma_id;
+ sdma_id = (sdma_id + 1) % 2;
+}
+
static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd, int *allocate_vmid)
{
@@ -793,7 +846,19 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
mutex_lock(&dqm->lock);
- mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP);
+ if (dqm->total_queue_count >= max_num_of_queues_per_device) {
+ pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n",
+ dqm->total_queue_count);
+ retval = -EPERM;
+ goto out;
+ }
+
+ if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+ select_sdma_engine_id(q);
+
+ mqd = dqm->ops.get_mqd_manager(dqm,
+ get_mqd_type_from_queue_type(q->properties.type));
+
if (mqd == NULL) {
mutex_unlock(&dqm->lock);
return -ENOMEM;
@@ -810,6 +875,17 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
retval = execute_queues_cpsch(dqm, false);
}
+ if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+ dqm->sdma_queue_count++;
+ /*
+ * Unconditionally increment this counter, regardless of the queue's
+ * type or whether the queue is active.
+ */
+ dqm->total_queue_count++;
+
+ pr_debug("Total of %d queues are accountable so far\n",
+ dqm->total_queue_count);
+
out:
mutex_unlock(&dqm->lock);
return retval;
@@ -827,12 +903,20 @@ static int fence_wait_timeout(unsigned int *fence_addr,
pr_err("kfd: qcm fence wait loop timeout expired\n");
return -ETIME;
}
- cpu_relax();
+ schedule();
}
return 0;
}
+static int destroy_sdma_queues(struct device_queue_manager *dqm,
+ unsigned int sdma_engine)
+{
+ return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
+ KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false,
+ sdma_engine);
+}
+
static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
{
int retval;
@@ -845,6 +929,15 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
mutex_lock(&dqm->lock);
if (dqm->active_runlist == false)
goto out;
+
+ pr_debug("kfd: Before destroying queues, sdma queue count is : %u\n",
+ dqm->sdma_queue_count);
+
+ if (dqm->sdma_queue_count > 0) {
+ destroy_sdma_queues(dqm, 0);
+ destroy_sdma_queues(dqm, 1);
+ }
+
retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, 0);
if (retval != 0)
@@ -916,20 +1009,32 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
/* remove queue from list to prevent rescheduling after preemption */
mutex_lock(&dqm->lock);
-
- mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP);
+ mqd = dqm->ops.get_mqd_manager(dqm,
+ get_mqd_type_from_queue_type(q->properties.type));
if (!mqd) {
retval = -ENOMEM;
goto failed;
}
+ if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+ dqm->sdma_queue_count--;
+
list_del(&q->list);
- dqm->queue_count--;
+ if (q->properties.is_active)
+ dqm->queue_count--;
execute_queues_cpsch(dqm, false);
mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
+ /*
+ * Unconditionally decrement this counter, regardless of the queue's
+ * type
+ */
+ dqm->total_queue_count--;
+ pr_debug("Total of %d queues are accountable so far\n",
+ dqm->total_queue_count);
+
mutex_unlock(&dqm->lock);
return 0;
@@ -954,8 +1059,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
void __user *alternate_aperture_base,
uint64_t alternate_aperture_size)
{
- uint32_t default_mtype;
- uint32_t ape1_mtype;
+ bool retval;
pr_debug("kfd: In func %s\n", __func__);
@@ -992,18 +1096,13 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
qpd->sh_mem_ape1_limit = limit >> 16;
}
- default_mtype = (default_policy == cache_policy_coherent) ?
- MTYPE_NONCACHED :
- MTYPE_CACHED;
-
- ape1_mtype = (alternate_policy == cache_policy_coherent) ?
- MTYPE_NONCACHED :
- MTYPE_CACHED;
-
- qpd->sh_mem_config = (qpd->sh_mem_config & PTR32)
- | ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
- | DEFAULT_MTYPE(default_mtype)
- | APE1_MTYPE(ape1_mtype);
+ retval = dqm->ops_asic_specific.set_cache_memory_policy(
+ dqm,
+ qpd,
+ default_policy,
+ alternate_policy,
+ alternate_aperture_base,
+ alternate_aperture_size);
if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
program_sh_mem_settings(dqm, qpd);
@@ -1013,7 +1112,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
qpd->sh_mem_ape1_limit);
mutex_unlock(&dqm->lock);
- return true;
+ return retval;
out:
mutex_unlock(&dqm->lock);
@@ -1026,6 +1125,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
BUG_ON(!dev);
+ pr_debug("kfd: loading device queue manager\n");
+
dqm = kzalloc(sizeof(struct device_queue_manager), GFP_KERNEL);
if (!dqm)
return NULL;
@@ -1035,40 +1136,50 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
case KFD_SCHED_POLICY_HWS:
case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
/* initialize dqm for cp scheduling */
- dqm->create_queue = create_queue_cpsch;
- dqm->initialize = initialize_cpsch;
- dqm->start = start_cpsch;
- dqm->stop = stop_cpsch;
- dqm->destroy_queue = destroy_queue_cpsch;
- dqm->update_queue = update_queue;
- dqm->get_mqd_manager = get_mqd_manager_nocpsch;
- dqm->register_process = register_process_nocpsch;
- dqm->unregister_process = unregister_process_nocpsch;
- dqm->uninitialize = uninitialize_nocpsch;
- dqm->create_kernel_queue = create_kernel_queue_cpsch;
- dqm->destroy_kernel_queue = destroy_kernel_queue_cpsch;
- dqm->set_cache_memory_policy = set_cache_memory_policy;
+ dqm->ops.create_queue = create_queue_cpsch;
+ dqm->ops.initialize = initialize_cpsch;
+ dqm->ops.start = start_cpsch;
+ dqm->ops.stop = stop_cpsch;
+ dqm->ops.destroy_queue = destroy_queue_cpsch;
+ dqm->ops.update_queue = update_queue;
+ dqm->ops.get_mqd_manager = get_mqd_manager_nocpsch;
+ dqm->ops.register_process = register_process_nocpsch;
+ dqm->ops.unregister_process = unregister_process_nocpsch;
+ dqm->ops.uninitialize = uninitialize_nocpsch;
+ dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
+ dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
+ dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
break;
case KFD_SCHED_POLICY_NO_HWS:
/* initialize dqm for no cp scheduling */
- dqm->start = start_nocpsch;
- dqm->stop = stop_nocpsch;
- dqm->create_queue = create_queue_nocpsch;
- dqm->destroy_queue = destroy_queue_nocpsch;
- dqm->update_queue = update_queue;
- dqm->get_mqd_manager = get_mqd_manager_nocpsch;
- dqm->register_process = register_process_nocpsch;
- dqm->unregister_process = unregister_process_nocpsch;
- dqm->initialize = initialize_nocpsch;
- dqm->uninitialize = uninitialize_nocpsch;
- dqm->set_cache_memory_policy = set_cache_memory_policy;
+ dqm->ops.start = start_nocpsch;
+ dqm->ops.stop = stop_nocpsch;
+ dqm->ops.create_queue = create_queue_nocpsch;
+ dqm->ops.destroy_queue = destroy_queue_nocpsch;
+ dqm->ops.update_queue = update_queue;
+ dqm->ops.get_mqd_manager = get_mqd_manager_nocpsch;
+ dqm->ops.register_process = register_process_nocpsch;
+ dqm->ops.unregister_process = unregister_process_nocpsch;
+ dqm->ops.initialize = initialize_nocpsch;
+ dqm->ops.uninitialize = uninitialize_nocpsch;
+ dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
break;
default:
BUG();
break;
}
- if (dqm->initialize(dqm) != 0) {
+ switch (dev->device_info->asic_family) {
+ case CHIP_CARRIZO:
+ device_queue_manager_init_vi(&dqm->ops_asic_specific);
+ break;
+
+ case CHIP_KAVERI:
+ device_queue_manager_init_cik(&dqm->ops_asic_specific);
+ break;
+ }
+
+ if (dqm->ops.initialize(dqm) != 0) {
kfree(dqm);
return NULL;
}
@@ -1080,7 +1191,6 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm)
{
BUG_ON(!dqm);
- dqm->uninitialize(dqm);
+ dqm->ops.uninitialize(dqm);
kfree(dqm);
}
-
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index c3f189e8ae35..d64f86cda34f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -36,6 +36,9 @@
#define KFD_VMID_START_OFFSET (8)
#define VMID_PER_DEVICE CIK_VMID_NUM
#define KFD_DQM_FIRST_PIPE (0)
+#define CIK_SDMA_QUEUES (4)
+#define CIK_SDMA_QUEUES_PER_ENGINE (2)
+#define CIK_SDMA_ENGINE_NUM (2)
struct device_process_node {
struct qcm_process_device *qpd;
@@ -43,7 +46,7 @@ struct device_process_node {
};
/**
- * struct device_queue_manager
+ * struct device_queue_manager_ops
*
* @create_queue: Queue creation routine.
*
@@ -78,15 +81,9 @@ struct device_process_node {
* @set_cache_memory_policy: Sets memory policy (cached/ non cached) for the
* memory apertures.
*
- * This struct is a base class for the kfd queues scheduler in the
- * device level. The device base class should expose the basic operations
- * for queue creation and queue destruction. This base class hides the
- * scheduling mode of the driver and the specific implementation of the
- * concrete device. This class is the only class in the queues scheduler
- * that configures the H/W.
*/
-struct device_queue_manager {
+struct device_queue_manager_ops {
int (*create_queue)(struct device_queue_manager *dqm,
struct queue *q,
struct qcm_process_device *qpd,
@@ -121,7 +118,23 @@ struct device_queue_manager {
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
uint64_t alternate_aperture_size);
+};
+/**
+ * struct device_queue_manager
+ *
+ * This struct is a base class for the kfd queues scheduler in the
+ * device level. The device base class should expose the basic operations
+ * for queue creation and queue destruction. This base class hides the
+ * scheduling mode of the driver and the specific implementation of the
+ * concrete device. This class is the only class in the queues scheduler
+ * that configures the H/W.
+ *
+ */
+
+struct device_queue_manager {
+ struct device_queue_manager_ops ops;
+ struct device_queue_manager_ops ops_asic_specific;
struct mqd_manager *mqds[KFD_MQD_TYPE_MAX];
struct packet_manager packets;
@@ -130,8 +143,11 @@ struct device_queue_manager {
struct list_head queues;
unsigned int processes_count;
unsigned int queue_count;
+ unsigned int sdma_queue_count;
+ unsigned int total_queue_count;
unsigned int next_pipe_to_allocate;
unsigned int *allocated_queues;
+ unsigned int sdma_bitmap;
unsigned int vmid_bitmap;
uint64_t pipelines_addr;
struct kfd_mem_obj *pipeline_mem;
@@ -141,6 +157,28 @@ struct device_queue_manager {
bool active_runlist;
};
+void device_queue_manager_init_cik(struct device_queue_manager_ops *ops);
+void device_queue_manager_init_vi(struct device_queue_manager_ops *ops);
+void program_sh_mem_settings(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd);
+int init_pipelines(struct device_queue_manager *dqm,
+ unsigned int pipes_num, unsigned int first_pipe);
+
+extern inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
+{
+ return (pdd->lds_base >> 16) & 0xFF;
+}
+
+extern inline unsigned int
+get_sh_mem_bases_nybble_64(struct kfd_process_device *pdd)
+{
+ return (pdd->lds_base >> 60) & 0x0E;
+}
+extern inline unsigned int get_pipes_num(struct device_queue_manager *dqm)
+{
+ BUG_ON(!dqm || !dqm->dev);
+ return dqm->dev->shared_resources.compute_pipe_count;
+}
#endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
new file mode 100644
index 000000000000..6b072466e2a6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
@@ -0,0 +1,135 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "kfd_device_queue_manager.h"
+#include "cik_regs.h"
+
+static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size);
+static int register_process_cik(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd);
+static int initialize_cpsch_cik(struct device_queue_manager *dqm);
+
+void device_queue_manager_init_cik(struct device_queue_manager_ops *ops)
+{
+ ops->set_cache_memory_policy = set_cache_memory_policy_cik;
+ ops->register_process = register_process_cik;
+ ops->initialize = initialize_cpsch_cik;
+}
+
+static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
+{
+ /* In 64-bit mode, we can only control the top 3 bits of the LDS,
+ * scratch and GPUVM apertures.
+ * The hardware fills in the remaining 59 bits according to the
+ * following pattern:
+ * LDS: X0000000'00000000 - X0000001'00000000 (4GB)
+ * Scratch: X0000001'00000000 - X0000002'00000000 (4GB)
+ * GPUVM: Y0010000'00000000 - Y0020000'00000000 (1TB)
+ *
+ * (where X/Y is the configurable nybble with the low-bit 0)
+ *
+ * LDS and scratch will have the same top nybble programmed in the
+ * top 3 bits of SH_MEM_BASES.PRIVATE_BASE.
+ * GPUVM can have a different top nybble programmed in the
+ * top 3 bits of SH_MEM_BASES.SHARED_BASE.
+ * We don't bother to support different top nybbles
+ * for LDS/Scratch and GPUVM.
+ */
+
+ BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE ||
+ top_address_nybble == 0);
+
+ return PRIVATE_BASE(top_address_nybble << 12) |
+ SHARED_BASE(top_address_nybble << 12);
+}
+
+static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size)
+{
+ uint32_t default_mtype;
+ uint32_t ape1_mtype;
+
+ default_mtype = (default_policy == cache_policy_coherent) ?
+ MTYPE_NONCACHED :
+ MTYPE_CACHED;
+
+ ape1_mtype = (alternate_policy == cache_policy_coherent) ?
+ MTYPE_NONCACHED :
+ MTYPE_CACHED;
+
+ qpd->sh_mem_config = (qpd->sh_mem_config & PTR32)
+ | ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
+ | DEFAULT_MTYPE(default_mtype)
+ | APE1_MTYPE(ape1_mtype);
+
+ return true;
+}
+
+static int register_process_cik(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
+ struct kfd_process_device *pdd;
+ unsigned int temp;
+
+ BUG_ON(!dqm || !qpd);
+
+ pdd = qpd_to_pdd(qpd);
+
+ /* check if sh_mem_config register already configured */
+ if (qpd->sh_mem_config == 0) {
+ qpd->sh_mem_config =
+ ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) |
+ DEFAULT_MTYPE(MTYPE_NONCACHED) |
+ APE1_MTYPE(MTYPE_NONCACHED);
+ qpd->sh_mem_ape1_limit = 0;
+ qpd->sh_mem_ape1_base = 0;
+ }
+
+ if (qpd->pqm->process->is_32bit_user_mode) {
+ temp = get_sh_mem_bases_32(pdd);
+ qpd->sh_mem_bases = SHARED_BASE(temp);
+ qpd->sh_mem_config |= PTR32;
+ } else {
+ temp = get_sh_mem_bases_nybble_64(pdd);
+ qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
+ }
+
+ pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
+ qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
+
+ return 0;
+}
+
+static int initialize_cpsch_cik(struct device_queue_manager *dqm)
+{
+ return init_pipelines(dqm, get_pipes_num(dqm), 0);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
new file mode 100644
index 000000000000..20553dcd257d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "kfd_device_queue_manager.h"
+
+static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size);
+static int register_process_vi(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd);
+static int initialize_cpsch_vi(struct device_queue_manager *dqm);
+
+void device_queue_manager_init_vi(struct device_queue_manager_ops *ops)
+{
+ pr_warn("amdkfd: VI DQM is not currently supported\n");
+
+ ops->set_cache_memory_policy = set_cache_memory_policy_vi;
+ ops->register_process = register_process_vi;
+ ops->initialize = initialize_cpsch_vi;
+}
+
+static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size)
+{
+ return false;
+}
+
+static int register_process_vi(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
+ return -1;
+}
+
+static int initialize_cpsch_vi(struct device_queue_manager *dqm)
+{
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
index b5791a5c7c06..1a9b355dd114 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -137,10 +137,6 @@ int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma)
if (dev == NULL)
return -EINVAL;
- /* Find if pdd exists for combination of process and gpu id */
- if (!kfd_get_process_device_data(dev, process, 0))
- return -EINVAL;
-
/* Calculate physical address of doorbell */
address = kfd_get_process_doorbells(dev, process);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index e64aa99e5e41..35b987574633 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -303,10 +303,11 @@ int kfd_init_apertures(struct kfd_process *process)
while ((dev = kfd_topology_enum_kfd_devices(id)) != NULL &&
id < NUM_OF_SUPPORTED_GPUS) {
- pdd = kfd_get_process_device_data(dev, process, 1);
- if (!pdd)
+ pdd = kfd_create_process_device_data(dev, process);
+ if (pdd == NULL) {
+ pr_err("Failed to create process device data\n");
return -1;
-
+ }
/*
* For 64 bit process aperture will be statically reserved in
* the x86_64 non canonical process address space
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
deleted file mode 100644
index 5b999095a1f7..000000000000
--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/*
- * KFD Interrupts.
- *
- * AMD GPUs deliver interrupts by pushing an interrupt description onto the
- * interrupt ring and then sending an interrupt. KGD receives the interrupt
- * in ISR and sends us a pointer to each new entry on the interrupt ring.
- *
- * We generally can't process interrupt-signaled events from ISR, so we call
- * out to each interrupt client module (currently only the scheduler) to ask if
- * each interrupt is interesting. If they return true, then it requires further
- * processing so we copy it to an internal interrupt ring and call each
- * interrupt client again from a work-queue.
- *
- * There's no acknowledgment for the interrupts we use. The hardware simply
- * queues a new interrupt each time without waiting.
- *
- * The fixed-size internal queue means that it's possible for us to lose
- * interrupts because we have no back-pressure to the hardware.
- */
-
-#include <linux/slab.h>
-#include <linux/device.h>
-#include "kfd_priv.h"
-
-#define KFD_INTERRUPT_RING_SIZE 256
-
-static void interrupt_wq(struct work_struct *);
-
-int kfd_interrupt_init(struct kfd_dev *kfd)
-{
- void *interrupt_ring = kmalloc_array(KFD_INTERRUPT_RING_SIZE,
- kfd->device_info->ih_ring_entry_size,
- GFP_KERNEL);
- if (!interrupt_ring)
- return -ENOMEM;
-
- kfd->interrupt_ring = interrupt_ring;
- kfd->interrupt_ring_size =
- KFD_INTERRUPT_RING_SIZE * kfd->device_info->ih_ring_entry_size;
- atomic_set(&kfd->interrupt_ring_wptr, 0);
- atomic_set(&kfd->interrupt_ring_rptr, 0);
-
- spin_lock_init(&kfd->interrupt_lock);
-
- INIT_WORK(&kfd->interrupt_work, interrupt_wq);
-
- kfd->interrupts_active = true;
-
- /*
- * After this function returns, the interrupt will be enabled. This
- * barrier ensures that the interrupt running on a different processor
- * sees all the above writes.
- */
- smp_wmb();
-
- return 0;
-}
-
-void kfd_interrupt_exit(struct kfd_dev *kfd)
-{
- /*
- * Stop the interrupt handler from writing to the ring and scheduling
- * workqueue items. The spinlock ensures that any interrupt running
- * after we have unlocked sees interrupts_active = false.
- */
- unsigned long flags;
-
- spin_lock_irqsave(&kfd->interrupt_lock, flags);
- kfd->interrupts_active = false;
- spin_unlock_irqrestore(&kfd->interrupt_lock, flags);
-
- /*
- * Flush_scheduled_work ensures that there are no outstanding
- * work-queue items that will access interrupt_ring. New work items
- * can't be created because we stopped interrupt handling above.
- */
- flush_scheduled_work();
-
- kfree(kfd->interrupt_ring);
-}
-
-/*
- * This assumes that it can't be called concurrently with itself
- * but only with dequeue_ih_ring_entry.
- */
-bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry)
-{
- unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr);
- unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr);
-
- if ((rptr - wptr) % kfd->interrupt_ring_size ==
- kfd->device_info->ih_ring_entry_size) {
- /* This is very bad, the system is likely to hang. */
- dev_err_ratelimited(kfd_chardev(),
- "Interrupt ring overflow, dropping interrupt.\n");
- return false;
- }
-
- memcpy(kfd->interrupt_ring + wptr, ih_ring_entry,
- kfd->device_info->ih_ring_entry_size);
-
- wptr = (wptr + kfd->device_info->ih_ring_entry_size) %
- kfd->interrupt_ring_size;
- smp_wmb(); /* Ensure memcpy'd data is visible before wptr update. */
- atomic_set(&kfd->interrupt_ring_wptr, wptr);
-
- return true;
-}
-
-/*
- * This assumes that it can't be called concurrently with itself
- * but only with enqueue_ih_ring_entry.
- */
-static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry)
-{
- /*
- * Assume that wait queues have an implicit barrier, i.e. anything that
- * happened in the ISR before it queued work is visible.
- */
-
- unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr);
- unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr);
-
- if (rptr == wptr)
- return false;
-
- memcpy(ih_ring_entry, kfd->interrupt_ring + rptr,
- kfd->device_info->ih_ring_entry_size);
-
- rptr = (rptr + kfd->device_info->ih_ring_entry_size) %
- kfd->interrupt_ring_size;
-
- /*
- * Ensure the rptr write update is not visible until
- * memcpy has finished reading.
- */
- smp_mb();
- atomic_set(&kfd->interrupt_ring_rptr, rptr);
-
- return true;
-}
-
-static void interrupt_wq(struct work_struct *work)
-{
- struct kfd_dev *dev = container_of(work, struct kfd_dev,
- interrupt_work);
-
- uint32_t ih_ring_entry[DIV_ROUND_UP(
- dev->device_info->ih_ring_entry_size,
- sizeof(uint32_t))];
-
- while (dequeue_ih_ring_entry(dev, ih_ring_entry))
- ;
-}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 935071410724..e415a2a9207e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -56,8 +56,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
switch (type) {
case KFD_QUEUE_TYPE_DIQ:
case KFD_QUEUE_TYPE_HIQ:
- kq->mqd = dev->dqm->get_mqd_manager(dev->dqm,
- KFD_MQD_TYPE_CIK_HIQ);
+ kq->mqd = dev->dqm->ops.get_mqd_manager(dev->dqm,
+ KFD_MQD_TYPE_HIQ);
break;
default:
BUG();
@@ -72,23 +72,19 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
if (prop.doorbell_ptr == NULL)
goto err_get_kernel_doorbell;
- retval = kfd2kgd->allocate_mem(dev->kgd,
- queue_size,
- PAGE_SIZE,
- KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
- (struct kgd_mem **) &kq->pq);
-
+ retval = kfd_gtt_sa_allocate(dev, queue_size, &kq->pq);
if (retval != 0)
goto err_pq_allocate_vidmem;
kq->pq_kernel_addr = kq->pq->cpu_ptr;
kq->pq_gpu_addr = kq->pq->gpu_addr;
- retval = kfd2kgd->allocate_mem(dev->kgd,
- sizeof(*kq->rptr_kernel),
- 32,
- KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
- (struct kgd_mem **) &kq->rptr_mem);
+ retval = kq->ops_asic_specific.initialize(kq, dev, type, queue_size);
+ if (retval == false)
+ goto err_eop_allocate_vidmem;
+
+ retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->rptr_kernel),
+ &kq->rptr_mem);
if (retval != 0)
goto err_rptr_allocate_vidmem;
@@ -96,11 +92,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
kq->rptr_kernel = kq->rptr_mem->cpu_ptr;
kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr;
- retval = kfd2kgd->allocate_mem(dev->kgd,
- sizeof(*kq->wptr_kernel),
- 32,
- KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
- (struct kgd_mem **) &kq->wptr_mem);
+ retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->wptr_kernel),
+ &kq->wptr_mem);
if (retval != 0)
goto err_wptr_allocate_vidmem;
@@ -121,6 +114,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
prop.queue_address = kq->pq_gpu_addr;
prop.read_ptr = (uint32_t *) kq->rptr_gpu_addr;
prop.write_ptr = (uint32_t *) kq->wptr_gpu_addr;
+ prop.eop_ring_buffer_address = kq->eop_gpu_addr;
+ prop.eop_ring_buffer_size = PAGE_SIZE;
if (init_queue(&kq->queue, prop) != 0)
goto err_init_queue;
@@ -145,11 +140,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
} else {
/* allocate fence for DIQ */
- retval = kfd2kgd->allocate_mem(dev->kgd,
- sizeof(uint32_t),
- 32,
- KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
- (struct kgd_mem **) &kq->fence_mem_obj);
+ retval = kfd_gtt_sa_allocate(dev, sizeof(uint32_t),
+ &kq->fence_mem_obj);
if (retval != 0)
goto err_alloc_fence;
@@ -165,11 +157,13 @@ err_alloc_fence:
err_init_mqd:
uninit_queue(kq->queue);
err_init_queue:
- kfd2kgd->free_mem(dev->kgd, (struct kgd_mem *) kq->wptr_mem);
+ kfd_gtt_sa_free(dev, kq->wptr_mem);
err_wptr_allocate_vidmem:
- kfd2kgd->free_mem(dev->kgd, (struct kgd_mem *) kq->rptr_mem);
+ kfd_gtt_sa_free(dev, kq->rptr_mem);
err_rptr_allocate_vidmem:
- kfd2kgd->free_mem(dev->kgd, (struct kgd_mem *) kq->pq);
+ kfd_gtt_sa_free(dev, kq->eop_mem);
+err_eop_allocate_vidmem:
+ kfd_gtt_sa_free(dev, kq->pq);
err_pq_allocate_vidmem:
pr_err("kfd: error init pq\n");
kfd_release_kernel_doorbell(dev, prop.doorbell_ptr);
@@ -190,10 +184,13 @@ static void uninitialize(struct kernel_queue *kq)
QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS,
kq->queue->pipe,
kq->queue->queue);
+ else if (kq->queue->properties.type == KFD_QUEUE_TYPE_DIQ)
+ kfd_gtt_sa_free(kq->dev, kq->fence_mem_obj);
- kfd2kgd->free_mem(kq->dev->kgd, (struct kgd_mem *) kq->rptr_mem);
- kfd2kgd->free_mem(kq->dev->kgd, (struct kgd_mem *) kq->wptr_mem);
- kfd2kgd->free_mem(kq->dev->kgd, (struct kgd_mem *) kq->pq);
+ kfd_gtt_sa_free(kq->dev, kq->rptr_mem);
+ kfd_gtt_sa_free(kq->dev, kq->wptr_mem);
+ kq->ops_asic_specific.uninitialize(kq);
+ kfd_gtt_sa_free(kq->dev, kq->pq);
kfd_release_kernel_doorbell(kq->dev,
kq->queue->properties.doorbell_ptr);
uninit_queue(kq->queue);
@@ -265,28 +262,6 @@ static void submit_packet(struct kernel_queue *kq)
kq->pending_wptr);
}
-static int sync_with_hw(struct kernel_queue *kq, unsigned long timeout_ms)
-{
- unsigned long org_timeout_ms;
-
- BUG_ON(!kq);
-
- org_timeout_ms = timeout_ms;
- timeout_ms += jiffies * 1000 / HZ;
- while (*kq->wptr_kernel != *kq->rptr_kernel) {
- if (time_after(jiffies * 1000 / HZ, timeout_ms)) {
- pr_err("kfd: kernel_queue %s timeout expired %lu\n",
- __func__, org_timeout_ms);
- pr_err("kfd: wptr: %d rptr: %d\n",
- *kq->wptr_kernel, *kq->rptr_kernel);
- return -ETIME;
- }
- schedule();
- }
-
- return 0;
-}
-
static void rollback_packet(struct kernel_queue *kq)
{
BUG_ON(!kq);
@@ -304,14 +279,23 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
if (!kq)
return NULL;
- kq->initialize = initialize;
- kq->uninitialize = uninitialize;
- kq->acquire_packet_buffer = acquire_packet_buffer;
- kq->submit_packet = submit_packet;
- kq->sync_with_hw = sync_with_hw;
- kq->rollback_packet = rollback_packet;
+ kq->ops.initialize = initialize;
+ kq->ops.uninitialize = uninitialize;
+ kq->ops.acquire_packet_buffer = acquire_packet_buffer;
+ kq->ops.submit_packet = submit_packet;
+ kq->ops.rollback_packet = rollback_packet;
+
+ switch (dev->device_info->asic_family) {
+ case CHIP_CARRIZO:
+ kernel_queue_init_vi(&kq->ops_asic_specific);
+ break;
+
+ case CHIP_KAVERI:
+ kernel_queue_init_cik(&kq->ops_asic_specific);
+ break;
+ }
- if (kq->initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE) == false) {
+ if (kq->ops.initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE) == false) {
pr_err("kfd: failed to init kernel queue\n");
kfree(kq);
return NULL;
@@ -323,7 +307,7 @@ void kernel_queue_uninit(struct kernel_queue *kq)
{
BUG_ON(!kq);
- kq->uninitialize(kq);
+ kq->ops.uninitialize(kq);
kfree(kq);
}
@@ -335,19 +319,18 @@ static __attribute__((unused)) void test_kq(struct kfd_dev *dev)
BUG_ON(!dev);
- pr_debug("kfd: starting kernel queue test\n");
+ pr_err("kfd: starting kernel queue test\n");
kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_HIQ);
BUG_ON(!kq);
- retval = kq->acquire_packet_buffer(kq, 5, &buffer);
+ retval = kq->ops.acquire_packet_buffer(kq, 5, &buffer);
BUG_ON(retval != 0);
for (i = 0; i < 5; i++)
buffer[i] = kq->nop_packet;
- kq->submit_packet(kq);
- kq->sync_with_hw(kq, 1000);
+ kq->ops.submit_packet(kq);
- pr_debug("kfd: ending kernel queue test\n");
+ pr_err("kfd: ending kernel queue test\n");
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
index dcd2bdb68d44..594053136ee4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
@@ -28,8 +28,31 @@
#include <linux/types.h>
#include "kfd_priv.h"
-struct kernel_queue {
- /* interface */
+/**
+ * struct kernel_queue_ops
+ *
+ * @initialize: Initialize a kernel queue, including allocations of GART memory
+ * needed for the queue.
+ *
+ * @uninitialize: Uninitialize a kernel queue and free all its memory usages.
+ *
+ * @acquire_packet_buffer: Returns a pointer to the location in the kernel
+ * queue ring buffer where the calling function can write its packet. It is
+ * Guaranteed that there is enough space for that packet. It also updates the
+ * pending write pointer to that location so subsequent calls to
+ * acquire_packet_buffer will get a correct write pointer
+ *
+ * @submit_packet: Update the write pointer and doorbell of a kernel queue.
+ *
+ * @sync_with_hw: Wait until the write pointer and the read pointer of a kernel
+ * queue are equal, which means the CP has read all the submitted packets.
+ *
+ * @rollback_packet: This routine is called if we failed to build an acquired
+ * packet for some reason. It just overwrites the pending wptr with the current
+ * one
+ *
+ */
+struct kernel_queue_ops {
bool (*initialize)(struct kernel_queue *kq, struct kfd_dev *dev,
enum kfd_queue_type type, unsigned int queue_size);
void (*uninitialize)(struct kernel_queue *kq);
@@ -38,9 +61,12 @@ struct kernel_queue {
unsigned int **buffer_ptr);
void (*submit_packet)(struct kernel_queue *kq);
- int (*sync_with_hw)(struct kernel_queue *kq,
- unsigned long timeout_ms);
void (*rollback_packet)(struct kernel_queue *kq);
+};
+
+struct kernel_queue {
+ struct kernel_queue_ops ops;
+ struct kernel_queue_ops ops_asic_specific;
/* data */
struct kfd_dev *dev;
@@ -58,6 +84,9 @@ struct kernel_queue {
struct kfd_mem_obj *pq;
uint64_t pq_gpu_addr;
uint32_t *pq_kernel_addr;
+ struct kfd_mem_obj *eop_mem;
+ uint64_t eop_gpu_addr;
+ uint32_t *eop_kernel_addr;
struct kfd_mem_obj *fence_mem_obj;
uint64_t fence_gpu_addr;
@@ -66,4 +95,7 @@ struct kernel_queue {
struct list_head list;
};
+void kernel_queue_init_cik(struct kernel_queue_ops *ops);
+void kernel_queue_init_vi(struct kernel_queue_ops *ops);
+
#endif /* KFD_KERNEL_QUEUE_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c
new file mode 100644
index 000000000000..a90eb440b1fb
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "kfd_kernel_queue.h"
+
+static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
+ enum kfd_queue_type type, unsigned int queue_size);
+static void uninitialize_cik(struct kernel_queue *kq);
+
+void kernel_queue_init_cik(struct kernel_queue_ops *ops)
+{
+ ops->initialize = initialize_cik;
+ ops->uninitialize = uninitialize_cik;
+}
+
+static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
+ enum kfd_queue_type type, unsigned int queue_size)
+{
+ return true;
+}
+
+static void uninitialize_cik(struct kernel_queue *kq)
+{
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
new file mode 100644
index 000000000000..f1d48281e322
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "kfd_kernel_queue.h"
+
+static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev,
+ enum kfd_queue_type type, unsigned int queue_size);
+static void uninitialize_vi(struct kernel_queue *kq);
+
+void kernel_queue_init_vi(struct kernel_queue_ops *ops)
+{
+ ops->initialize = initialize_vi;
+ ops->uninitialize = uninitialize_vi;
+}
+
+static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev,
+ enum kfd_queue_type type, unsigned int queue_size)
+{
+ int retval;
+
+ retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem);
+ if (retval != 0)
+ return false;
+
+ kq->eop_gpu_addr = kq->eop_mem->gpu_addr;
+ kq->eop_kernel_addr = kq->eop_mem->cpu_ptr;
+
+ memset(kq->eop_kernel_addr, 0, PAGE_SIZE);
+
+ return true;
+}
+
+static void uninitialize_vi(struct kernel_queue *kq)
+{
+ kfd_gtt_sa_free(kq->dev, kq->eop_mem);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
index 95d5af138e6e..3f34ae16f075 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -29,10 +29,10 @@
#define KFD_DRIVER_AUTHOR "AMD Inc. and others"
#define KFD_DRIVER_DESC "Standalone HSA driver for AMD's GPUs"
-#define KFD_DRIVER_DATE "20141113"
+#define KFD_DRIVER_DATE "20150122"
#define KFD_DRIVER_MAJOR 0
#define KFD_DRIVER_MINOR 7
-#define KFD_DRIVER_PATCHLEVEL 0
+#define KFD_DRIVER_PATCHLEVEL 1
const struct kfd2kgd_calls *kfd2kgd;
static const struct kgd2kfd_calls kgd2kfd = {
@@ -48,17 +48,12 @@ static const struct kgd2kfd_calls kgd2kfd = {
int sched_policy = KFD_SCHED_POLICY_HWS;
module_param(sched_policy, int, 0444);
MODULE_PARM_DESC(sched_policy,
- "Kernel cmdline parameter that defines the amdkfd scheduling policy");
+ "Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)");
-int max_num_of_processes = KFD_MAX_NUM_OF_PROCESSES_DEFAULT;
-module_param(max_num_of_processes, int, 0444);
-MODULE_PARM_DESC(max_num_of_processes,
- "Kernel cmdline parameter that defines the amdkfd maximum number of supported processes");
-
-int max_num_of_queues_per_process = KFD_MAX_NUM_OF_QUEUES_PER_PROCESS_DEFAULT;
-module_param(max_num_of_queues_per_process, int, 0444);
-MODULE_PARM_DESC(max_num_of_queues_per_process,
- "Kernel cmdline parameter that defines the amdkfd maximum number of supported queues per process");
+int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT;
+module_param(max_num_of_queues_per_device, int, 0444);
+MODULE_PARM_DESC(max_num_of_queues_per_device,
+ "Maximum number of supported queues per device (1 = Minimum, 4096 = default)");
bool kgd2kfd_init(unsigned interface_version,
const struct kfd2kgd_calls *f2g,
@@ -100,16 +95,10 @@ static int __init kfd_module_init(void)
}
/* Verify module parameters */
- if ((max_num_of_processes < 0) ||
- (max_num_of_processes > KFD_MAX_NUM_OF_PROCESSES)) {
- pr_err("kfd: max_num_of_processes must be between 0 to KFD_MAX_NUM_OF_PROCESSES\n");
- return -1;
- }
-
- if ((max_num_of_queues_per_process < 0) ||
- (max_num_of_queues_per_process >
- KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)) {
- pr_err("kfd: max_num_of_queues_per_process must be between 0 to KFD_MAX_NUM_OF_QUEUES_PER_PROCESS\n");
+ if ((max_num_of_queues_per_device < 1) ||
+ (max_num_of_queues_per_device >
+ KFD_MAX_NUM_OF_QUEUES_PER_DEVICE)) {
+ pr_err("kfd: max_num_of_queues_per_device must be between 1 to KFD_MAX_NUM_OF_QUEUES_PER_DEVICE\n");
return -1;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index 4c3828cf45bf..b1ef1368c3bb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -21,326 +21,17 @@
*
*/
-#include <linux/printk.h>
-#include <linux/slab.h>
#include "kfd_priv.h"
-#include "kfd_mqd_manager.h"
-#include "cik_regs.h"
-#include "../../radeon/cik_reg.h"
-
-inline void busy_wait(unsigned long ms)
-{
- while (time_before(jiffies, ms))
- cpu_relax();
-}
-
-static inline struct cik_mqd *get_mqd(void *mqd)
-{
- return (struct cik_mqd *)mqd;
-}
-
-static int init_mqd(struct mqd_manager *mm, void **mqd,
- struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
- struct queue_properties *q)
-{
- uint64_t addr;
- struct cik_mqd *m;
- int retval;
-
- BUG_ON(!mm || !q || !mqd);
-
- pr_debug("kfd: In func %s\n", __func__);
-
- retval = kfd2kgd->allocate_mem(mm->dev->kgd,
- sizeof(struct cik_mqd),
- 256,
- KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
- (struct kgd_mem **) mqd_mem_obj);
-
- if (retval != 0)
- return -ENOMEM;
-
- m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr;
- addr = (*mqd_mem_obj)->gpu_addr;
-
- memset(m, 0, ALIGN(sizeof(struct cik_mqd), 256));
-
- m->header = 0xC0310800;
- m->compute_pipelinestat_enable = 1;
- m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
- m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
- m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
- m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
-
- /*
- * Make sure to use the last queue state saved on mqd when the cp
- * reassigns the queue, so when queue is switched on/off (e.g over
- * subscription or quantum timeout) the context will be consistent
- */
- m->cp_hqd_persistent_state =
- DEFAULT_CP_HQD_PERSISTENT_STATE | PRELOAD_REQ;
-
- m->cp_mqd_control = MQD_CONTROL_PRIV_STATE_EN;
- m->cp_mqd_base_addr_lo = lower_32_bits(addr);
- m->cp_mqd_base_addr_hi = upper_32_bits(addr);
-
- m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE | IB_ATC_EN;
- /* Although WinKFD writes this, I suspect it should not be necessary */
- m->cp_hqd_ib_control = IB_ATC_EN | DEFAULT_MIN_IB_AVAIL_SIZE;
-
- m->cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS |
- QUANTUM_DURATION(10);
-
- /*
- * Pipe Priority
- * Identifies the pipe relative priority when this queue is connected
- * to the pipeline. The pipe priority is against the GFX pipe and HP3D.
- * In KFD we are using a fixed pipe priority set to CS_MEDIUM.
- * 0 = CS_LOW (typically below GFX)
- * 1 = CS_MEDIUM (typically between HP3D and GFX
- * 2 = CS_HIGH (typically above HP3D)
- */
- m->cp_hqd_pipe_priority = 1;
- m->cp_hqd_queue_priority = 15;
-
- *mqd = m;
- if (gart_addr != NULL)
- *gart_addr = addr;
- retval = mm->update_mqd(mm, m, q);
-
- return retval;
-}
-
-static void uninit_mqd(struct mqd_manager *mm, void *mqd,
- struct kfd_mem_obj *mqd_mem_obj)
-{
- BUG_ON(!mm || !mqd);
- kfd2kgd->free_mem(mm->dev->kgd, (struct kgd_mem *) mqd_mem_obj);
-}
-
-static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id,
- uint32_t queue_id, uint32_t __user *wptr)
-{
- return kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, wptr);
-
-}
-
-static int update_mqd(struct mqd_manager *mm, void *mqd,
- struct queue_properties *q)
-{
- struct cik_mqd *m;
-
- BUG_ON(!mm || !q || !mqd);
-
- pr_debug("kfd: In func %s\n", __func__);
-
- m = get_mqd(mqd);
- m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE |
- DEFAULT_MIN_AVAIL_SIZE | PQ_ATC_EN;
-
- /*
- * Calculating queue size which is log base 2 of actual queue size -1
- * dwords and another -1 for ffs
- */
- m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int))
- - 1 - 1;
- m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
- m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
- m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
- m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
- m->cp_hqd_pq_doorbell_control = DOORBELL_EN |
- DOORBELL_OFFSET(q->doorbell_off);
-
- m->cp_hqd_vmid = q->vmid;
-
- if (q->format == KFD_QUEUE_FORMAT_AQL) {
- m->cp_hqd_iq_rptr = AQL_ENABLE;
- m->cp_hqd_pq_control |= NO_UPDATE_RPTR;
- }
-
- m->cp_hqd_active = 0;
- q->is_active = false;
- if (q->queue_size > 0 &&
- q->queue_address != 0 &&
- q->queue_percent > 0) {
- m->cp_hqd_active = 1;
- q->is_active = true;
- }
-
- return 0;
-}
-
-static int destroy_mqd(struct mqd_manager *mm, void *mqd,
- enum kfd_preempt_type type,
- unsigned int timeout, uint32_t pipe_id,
- uint32_t queue_id)
-{
- return kfd2kgd->hqd_destroy(mm->dev->kgd, type, timeout,
- pipe_id, queue_id);
-}
-
-static bool is_occupied(struct mqd_manager *mm, void *mqd,
- uint64_t queue_address, uint32_t pipe_id,
- uint32_t queue_id)
-{
-
- return kfd2kgd->hqd_is_occupied(mm->dev->kgd, queue_address,
- pipe_id, queue_id);
-
-}
-
-/*
- * HIQ MQD Implementation, concrete implementation for HIQ MQD implementation.
- * The HIQ queue in Kaveri is using the same MQD structure as all the user mode
- * queues but with different initial values.
- */
-
-static int init_mqd_hiq(struct mqd_manager *mm, void **mqd,
- struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
- struct queue_properties *q)
-{
- uint64_t addr;
- struct cik_mqd *m;
- int retval;
-
- BUG_ON(!mm || !q || !mqd || !mqd_mem_obj);
-
- pr_debug("kfd: In func %s\n", __func__);
-
- retval = kfd2kgd->allocate_mem(mm->dev->kgd,
- sizeof(struct cik_mqd),
- 256,
- KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
- (struct kgd_mem **) mqd_mem_obj);
-
- if (retval != 0)
- return -ENOMEM;
-
- m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr;
- addr = (*mqd_mem_obj)->gpu_addr;
-
- memset(m, 0, ALIGN(sizeof(struct cik_mqd), 256));
-
- m->header = 0xC0310800;
- m->compute_pipelinestat_enable = 1;
- m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
- m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
- m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
- m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
-
- m->cp_hqd_persistent_state = DEFAULT_CP_HQD_PERSISTENT_STATE |
- PRELOAD_REQ;
- m->cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS |
- QUANTUM_DURATION(10);
-
- m->cp_mqd_control = MQD_CONTROL_PRIV_STATE_EN;
- m->cp_mqd_base_addr_lo = lower_32_bits(addr);
- m->cp_mqd_base_addr_hi = upper_32_bits(addr);
-
- m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE;
-
- /*
- * Pipe Priority
- * Identifies the pipe relative priority when this queue is connected
- * to the pipeline. The pipe priority is against the GFX pipe and HP3D.
- * In KFD we are using a fixed pipe priority set to CS_MEDIUM.
- * 0 = CS_LOW (typically below GFX)
- * 1 = CS_MEDIUM (typically between HP3D and GFX
- * 2 = CS_HIGH (typically above HP3D)
- */
- m->cp_hqd_pipe_priority = 1;
- m->cp_hqd_queue_priority = 15;
-
- *mqd = m;
- if (gart_addr)
- *gart_addr = addr;
- retval = mm->update_mqd(mm, m, q);
-
- return retval;
-}
-
-static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
- struct queue_properties *q)
-{
- struct cik_mqd *m;
-
- BUG_ON(!mm || !q || !mqd);
-
- pr_debug("kfd: In func %s\n", __func__);
-
- m = get_mqd(mqd);
- m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE |
- DEFAULT_MIN_AVAIL_SIZE |
- PRIV_STATE |
- KMD_QUEUE;
-
- /*
- * Calculating queue size which is log base 2 of actual queue
- * size -1 dwords
- */
- m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int))
- - 1 - 1;
- m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
- m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
- m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
- m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
- m->cp_hqd_pq_doorbell_control = DOORBELL_EN |
- DOORBELL_OFFSET(q->doorbell_off);
-
- m->cp_hqd_vmid = q->vmid;
-
- m->cp_hqd_active = 0;
- q->is_active = false;
- if (q->queue_size > 0 &&
- q->queue_address != 0 &&
- q->queue_percent > 0) {
- m->cp_hqd_active = 1;
- q->is_active = true;
- }
-
- return 0;
-}
struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
struct kfd_dev *dev)
{
- struct mqd_manager *mqd;
-
- BUG_ON(!dev);
- BUG_ON(type >= KFD_MQD_TYPE_MAX);
-
- pr_debug("kfd: In func %s\n", __func__);
-
- mqd = kzalloc(sizeof(struct mqd_manager), GFP_KERNEL);
- if (!mqd)
- return NULL;
-
- mqd->dev = dev;
-
- switch (type) {
- case KFD_MQD_TYPE_CIK_CP:
- case KFD_MQD_TYPE_CIK_COMPUTE:
- mqd->init_mqd = init_mqd;
- mqd->uninit_mqd = uninit_mqd;
- mqd->load_mqd = load_mqd;
- mqd->update_mqd = update_mqd;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
- break;
- case KFD_MQD_TYPE_CIK_HIQ:
- mqd->init_mqd = init_mqd_hiq;
- mqd->uninit_mqd = uninit_mqd;
- mqd->load_mqd = load_mqd;
- mqd->update_mqd = update_mqd_hiq;
- mqd->destroy_mqd = destroy_mqd;
- mqd->is_occupied = is_occupied;
- break;
- default:
- kfree(mqd);
- return NULL;
+ switch (dev->device_info->asic_family) {
+ case CHIP_KAVERI:
+ return mqd_manager_init_cik(type, dev);
+ case CHIP_CARRIZO:
+ return mqd_manager_init_vi(type, dev);
}
- return mqd;
+ return NULL;
}
-
-/* SDMA queues should be implemented here when the cp will supports them */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
new file mode 100644
index 000000000000..a09e18a339f3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -0,0 +1,450 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include "kfd_priv.h"
+#include "kfd_mqd_manager.h"
+#include "cik_regs.h"
+#include "cik_structs.h"
+
+static inline struct cik_mqd *get_mqd(void *mqd)
+{
+ return (struct cik_mqd *)mqd;
+}
+
+static int init_mqd(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q)
+{
+ uint64_t addr;
+ struct cik_mqd *m;
+ int retval;
+
+ BUG_ON(!mm || !q || !mqd);
+
+ pr_debug("kfd: In func %s\n", __func__);
+
+ retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd),
+ mqd_mem_obj);
+
+ if (retval != 0)
+ return -ENOMEM;
+
+ m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr;
+ addr = (*mqd_mem_obj)->gpu_addr;
+
+ memset(m, 0, ALIGN(sizeof(struct cik_mqd), 256));
+
+ m->header = 0xC0310800;
+ m->compute_pipelinestat_enable = 1;
+ m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
+
+ /*
+ * Make sure to use the last queue state saved on mqd when the cp
+ * reassigns the queue, so when queue is switched on/off (e.g over
+ * subscription or quantum timeout) the context will be consistent
+ */
+ m->cp_hqd_persistent_state =
+ DEFAULT_CP_HQD_PERSISTENT_STATE | PRELOAD_REQ;
+
+ m->cp_mqd_control = MQD_CONTROL_PRIV_STATE_EN;
+ m->cp_mqd_base_addr_lo = lower_32_bits(addr);
+ m->cp_mqd_base_addr_hi = upper_32_bits(addr);
+
+ m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE | IB_ATC_EN;
+ /* Although WinKFD writes this, I suspect it should not be necessary */
+ m->cp_hqd_ib_control = IB_ATC_EN | DEFAULT_MIN_IB_AVAIL_SIZE;
+
+ m->cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS |
+ QUANTUM_DURATION(10);
+
+ /*
+ * Pipe Priority
+ * Identifies the pipe relative priority when this queue is connected
+ * to the pipeline. The pipe priority is against the GFX pipe and HP3D.
+ * In KFD we are using a fixed pipe priority set to CS_MEDIUM.
+ * 0 = CS_LOW (typically below GFX)
+ * 1 = CS_MEDIUM (typically between HP3D and GFX
+ * 2 = CS_HIGH (typically above HP3D)
+ */
+ m->cp_hqd_pipe_priority = 1;
+ m->cp_hqd_queue_priority = 15;
+
+ if (q->format == KFD_QUEUE_FORMAT_AQL)
+ m->cp_hqd_iq_rptr = AQL_ENABLE;
+
+ *mqd = m;
+ if (gart_addr != NULL)
+ *gart_addr = addr;
+ retval = mm->update_mqd(mm, m, q);
+
+ return retval;
+}
+
+static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q)
+{
+ int retval;
+ struct cik_sdma_rlc_registers *m;
+
+ BUG_ON(!mm || !mqd || !mqd_mem_obj);
+
+ retval = kfd_gtt_sa_allocate(mm->dev,
+ sizeof(struct cik_sdma_rlc_registers),
+ mqd_mem_obj);
+
+ if (retval != 0)
+ return -ENOMEM;
+
+ m = (struct cik_sdma_rlc_registers *) (*mqd_mem_obj)->cpu_ptr;
+
+ memset(m, 0, sizeof(struct cik_sdma_rlc_registers));
+
+ *mqd = m;
+ if (gart_addr != NULL)
+ *gart_addr = (*mqd_mem_obj)->gpu_addr;
+
+ retval = mm->update_mqd(mm, m, q);
+
+ return retval;
+}
+
+static void uninit_mqd(struct mqd_manager *mm, void *mqd,
+ struct kfd_mem_obj *mqd_mem_obj)
+{
+ BUG_ON(!mm || !mqd);
+ kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
+}
+
+static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ struct kfd_mem_obj *mqd_mem_obj)
+{
+ BUG_ON(!mm || !mqd);
+ kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
+}
+
+static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t __user *wptr)
+{
+ return kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, wptr);
+}
+
+static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t __user *wptr)
+{
+ return kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd);
+}
+
+static int update_mqd(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q)
+{
+ struct cik_mqd *m;
+
+ BUG_ON(!mm || !q || !mqd);
+
+ pr_debug("kfd: In func %s\n", __func__);
+
+ m = get_mqd(mqd);
+ m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE |
+ DEFAULT_MIN_AVAIL_SIZE | PQ_ATC_EN;
+
+ /*
+ * Calculating queue size which is log base 2 of actual queue size -1
+ * dwords and another -1 for ffs
+ */
+ m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int))
+ - 1 - 1;
+ m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
+ m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
+ m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+ m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+ m->cp_hqd_pq_doorbell_control = DOORBELL_EN |
+ DOORBELL_OFFSET(q->doorbell_off);
+
+ m->cp_hqd_vmid = q->vmid;
+
+ if (q->format == KFD_QUEUE_FORMAT_AQL) {
+ m->cp_hqd_pq_control |= NO_UPDATE_RPTR;
+ }
+
+ m->cp_hqd_active = 0;
+ q->is_active = false;
+ if (q->queue_size > 0 &&
+ q->queue_address != 0 &&
+ q->queue_percent > 0) {
+ m->cp_hqd_active = 1;
+ q->is_active = true;
+ }
+
+ return 0;
+}
+
+static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q)
+{
+ struct cik_sdma_rlc_registers *m;
+
+ BUG_ON(!mm || !mqd || !q);
+
+ m = get_sdma_mqd(mqd);
+ m->sdma_rlc_rb_cntl =
+ SDMA_RB_SIZE((ffs(q->queue_size / sizeof(unsigned int)))) |
+ SDMA_RB_VMID(q->vmid) |
+ SDMA_RPTR_WRITEBACK_ENABLE |
+ SDMA_RPTR_WRITEBACK_TIMER(6);
+
+ m->sdma_rlc_rb_base = lower_32_bits(q->queue_address >> 8);
+ m->sdma_rlc_rb_base_hi = upper_32_bits(q->queue_address >> 8);
+ m->sdma_rlc_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+ m->sdma_rlc_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+ m->sdma_rlc_doorbell = SDMA_OFFSET(q->doorbell_off) | SDMA_DB_ENABLE;
+ m->sdma_rlc_virtual_addr = q->sdma_vm_addr;
+
+ m->sdma_engine_id = q->sdma_engine_id;
+ m->sdma_queue_id = q->sdma_queue_id;
+
+ q->is_active = false;
+ if (q->queue_size > 0 &&
+ q->queue_address != 0 &&
+ q->queue_percent > 0) {
+ m->sdma_rlc_rb_cntl |= SDMA_RB_ENABLE;
+ q->is_active = true;
+ }
+
+ return 0;
+}
+
+static int destroy_mqd(struct mqd_manager *mm, void *mqd,
+ enum kfd_preempt_type type,
+ unsigned int timeout, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ return kfd2kgd->hqd_destroy(mm->dev->kgd, type, timeout,
+ pipe_id, queue_id);
+}
+
+/*
+ * preempt type here is ignored because there is only one way
+ * to preempt sdma queue
+ */
+static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ enum kfd_preempt_type type,
+ unsigned int timeout, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ return kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout);
+}
+
+static bool is_occupied(struct mqd_manager *mm, void *mqd,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+
+ return kfd2kgd->hqd_is_occupied(mm->dev->kgd, queue_address,
+ pipe_id, queue_id);
+
+}
+
+static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ return kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd);
+}
+
+/*
+ * HIQ MQD Implementation, concrete implementation for HIQ MQD implementation.
+ * The HIQ queue in Kaveri is using the same MQD structure as all the user mode
+ * queues but with different initial values.
+ */
+
+static int init_mqd_hiq(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q)
+{
+ uint64_t addr;
+ struct cik_mqd *m;
+ int retval;
+
+ BUG_ON(!mm || !q || !mqd || !mqd_mem_obj);
+
+ pr_debug("kfd: In func %s\n", __func__);
+
+ retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd),
+ mqd_mem_obj);
+
+ if (retval != 0)
+ return -ENOMEM;
+
+ m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr;
+ addr = (*mqd_mem_obj)->gpu_addr;
+
+ memset(m, 0, ALIGN(sizeof(struct cik_mqd), 256));
+
+ m->header = 0xC0310800;
+ m->compute_pipelinestat_enable = 1;
+ m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
+
+ m->cp_hqd_persistent_state = DEFAULT_CP_HQD_PERSISTENT_STATE |
+ PRELOAD_REQ;
+ m->cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS |
+ QUANTUM_DURATION(10);
+
+ m->cp_mqd_control = MQD_CONTROL_PRIV_STATE_EN;
+ m->cp_mqd_base_addr_lo = lower_32_bits(addr);
+ m->cp_mqd_base_addr_hi = upper_32_bits(addr);
+
+ m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE;
+
+ /*
+ * Pipe Priority
+ * Identifies the pipe relative priority when this queue is connected
+ * to the pipeline. The pipe priority is against the GFX pipe and HP3D.
+ * In KFD we are using a fixed pipe priority set to CS_MEDIUM.
+ * 0 = CS_LOW (typically below GFX)
+ * 1 = CS_MEDIUM (typically between HP3D and GFX
+ * 2 = CS_HIGH (typically above HP3D)
+ */
+ m->cp_hqd_pipe_priority = 1;
+ m->cp_hqd_queue_priority = 15;
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+ retval = mm->update_mqd(mm, m, q);
+
+ return retval;
+}
+
+static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q)
+{
+ struct cik_mqd *m;
+
+ BUG_ON(!mm || !q || !mqd);
+
+ pr_debug("kfd: In func %s\n", __func__);
+
+ m = get_mqd(mqd);
+ m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE |
+ DEFAULT_MIN_AVAIL_SIZE |
+ PRIV_STATE |
+ KMD_QUEUE;
+
+ /*
+ * Calculating queue size which is log base 2 of actual queue
+ * size -1 dwords
+ */
+ m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int))
+ - 1 - 1;
+ m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
+ m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
+ m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+ m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+ m->cp_hqd_pq_doorbell_control = DOORBELL_EN |
+ DOORBELL_OFFSET(q->doorbell_off);
+
+ m->cp_hqd_vmid = q->vmid;
+
+ m->cp_hqd_active = 0;
+ q->is_active = false;
+ if (q->queue_size > 0 &&
+ q->queue_address != 0 &&
+ q->queue_percent > 0) {
+ m->cp_hqd_active = 1;
+ q->is_active = true;
+ }
+
+ return 0;
+}
+
+struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
+{
+ struct cik_sdma_rlc_registers *m;
+
+ BUG_ON(!mqd);
+
+ m = (struct cik_sdma_rlc_registers *)mqd;
+
+ return m;
+}
+
+struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
+ struct kfd_dev *dev)
+{
+ struct mqd_manager *mqd;
+
+ BUG_ON(!dev);
+ BUG_ON(type >= KFD_MQD_TYPE_MAX);
+
+ pr_debug("kfd: In func %s\n", __func__);
+
+ mqd = kzalloc(sizeof(struct mqd_manager), GFP_KERNEL);
+ if (!mqd)
+ return NULL;
+
+ mqd->dev = dev;
+
+ switch (type) {
+ case KFD_MQD_TYPE_CP:
+ case KFD_MQD_TYPE_COMPUTE:
+ mqd->init_mqd = init_mqd;
+ mqd->uninit_mqd = uninit_mqd;
+ mqd->load_mqd = load_mqd;
+ mqd->update_mqd = update_mqd;
+ mqd->destroy_mqd = destroy_mqd;
+ mqd->is_occupied = is_occupied;
+ break;
+ case KFD_MQD_TYPE_HIQ:
+ mqd->init_mqd = init_mqd_hiq;
+ mqd->uninit_mqd = uninit_mqd;
+ mqd->load_mqd = load_mqd;
+ mqd->update_mqd = update_mqd_hiq;
+ mqd->destroy_mqd = destroy_mqd;
+ mqd->is_occupied = is_occupied;
+ break;
+ case KFD_MQD_TYPE_SDMA:
+ mqd->init_mqd = init_mqd_sdma;
+ mqd->uninit_mqd = uninit_mqd_sdma;
+ mqd->load_mqd = load_mqd_sdma;
+ mqd->update_mqd = update_mqd_sdma;
+ mqd->destroy_mqd = destroy_mqd_sdma;
+ mqd->is_occupied = is_occupied_sdma;
+ break;
+ default:
+ kfree(mqd);
+ return NULL;
+ }
+
+ return mqd;
+}
+
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
new file mode 100644
index 000000000000..b3a7e3ba1e38
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/printk.h>
+#include "kfd_priv.h"
+#include "kfd_mqd_manager.h"
+
+struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
+ struct kfd_dev *dev)
+{
+ pr_warn("amdkfd: VI MQD is not currently supported\n");
+ return NULL;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 5ce9233d2004..e2533d875f43 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -97,11 +97,8 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,
pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription);
- retval = kfd2kgd->allocate_mem(pm->dqm->dev->kgd,
- *rl_buffer_size,
- PAGE_SIZE,
- KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
- (struct kgd_mem **) &pm->ib_buffer_obj);
+ retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size,
+ &pm->ib_buffer_obj);
if (retval != 0) {
pr_err("kfd: failed to allocate runlist IB\n");
@@ -351,7 +348,7 @@ int pm_send_set_resources(struct packet_manager *pm,
pr_debug("kfd: In func %s\n", __func__);
mutex_lock(&pm->lock);
- pm->priv_queue->acquire_packet_buffer(pm->priv_queue,
+ pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
sizeof(*packet) / sizeof(uint32_t),
(unsigned int **)&packet);
if (packet == NULL) {
@@ -378,8 +375,7 @@ int pm_send_set_resources(struct packet_manager *pm,
packet->queue_mask_lo = lower_32_bits(res->queue_mask);
packet->queue_mask_hi = upper_32_bits(res->queue_mask);
- pm->priv_queue->submit_packet(pm->priv_queue);
- pm->priv_queue->sync_with_hw(pm->priv_queue, KFD_HIQ_TIMEOUT);
+ pm->priv_queue->ops.submit_packet(pm->priv_queue);
mutex_unlock(&pm->lock);
@@ -405,7 +401,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
packet_size_dwords = sizeof(struct pm4_runlist) / sizeof(uint32_t);
mutex_lock(&pm->lock);
- retval = pm->priv_queue->acquire_packet_buffer(pm->priv_queue,
+ retval = pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
packet_size_dwords, &rl_buffer);
if (retval != 0)
goto fail_acquire_packet_buffer;
@@ -415,15 +411,14 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
if (retval != 0)
goto fail_create_runlist;
- pm->priv_queue->submit_packet(pm->priv_queue);
- pm->priv_queue->sync_with_hw(pm->priv_queue, KFD_HIQ_TIMEOUT);
+ pm->priv_queue->ops.submit_packet(pm->priv_queue);
mutex_unlock(&pm->lock);
return retval;
fail_create_runlist:
- pm->priv_queue->rollback_packet(pm->priv_queue);
+ pm->priv_queue->ops.rollback_packet(pm->priv_queue);
fail_acquire_packet_buffer:
mutex_unlock(&pm->lock);
fail_create_runlist_ib:
@@ -441,7 +436,7 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
BUG_ON(!pm || !fence_address);
mutex_lock(&pm->lock);
- retval = pm->priv_queue->acquire_packet_buffer(
+ retval = pm->priv_queue->ops.acquire_packet_buffer(
pm->priv_queue,
sizeof(struct pm4_query_status) / sizeof(uint32_t),
(unsigned int **)&packet);
@@ -462,8 +457,7 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
packet->data_hi = upper_32_bits((uint64_t)fence_value);
packet->data_lo = lower_32_bits((uint64_t)fence_value);
- pm->priv_queue->submit_packet(pm->priv_queue);
- pm->priv_queue->sync_with_hw(pm->priv_queue, KFD_HIQ_TIMEOUT);
+ pm->priv_queue->ops.submit_packet(pm->priv_queue);
mutex_unlock(&pm->lock);
return 0;
@@ -485,7 +479,7 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
BUG_ON(!pm);
mutex_lock(&pm->lock);
- retval = pm->priv_queue->acquire_packet_buffer(
+ retval = pm->priv_queue->ops.acquire_packet_buffer(
pm->priv_queue,
sizeof(struct pm4_unmap_queues) / sizeof(uint32_t),
&buffer);
@@ -540,8 +534,7 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
break;
};
- pm->priv_queue->submit_packet(pm->priv_queue);
- pm->priv_queue->sync_with_hw(pm->priv_queue, KFD_HIQ_TIMEOUT);
+ pm->priv_queue->ops.submit_packet(pm->priv_queue);
mutex_unlock(&pm->lock);
return 0;
@@ -557,8 +550,7 @@ void pm_release_ib(struct packet_manager *pm)
mutex_lock(&pm->lock);
if (pm->allocated) {
- kfd2kgd->free_mem(pm->dqm->dev->kgd,
- (struct kgd_mem *) pm->ib_buffer_obj);
+ kfd_gtt_sa_free(pm->dqm->dev, pm->ib_buffer_obj);
pm->allocated = false;
}
mutex_unlock(&pm->lock);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
index 4c25ef504f79..6cfe7f1f18cf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
@@ -30,7 +30,7 @@ static DEFINE_MUTEX(pasid_mutex);
int kfd_pasid_init(void)
{
- pasid_limit = max_num_of_processes;
+ pasid_limit = KFD_MAX_NUM_OF_PROCESSES;
pasid_bitmap = kcalloc(BITS_TO_LONGS(pasid_limit), sizeof(long), GFP_KERNEL);
if (!pasid_bitmap)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index a5edb29507e3..5a44f2fecf38 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -52,20 +52,19 @@
#define kfd_alloc_struct(ptr_to_struct) \
((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL))
-/* Kernel module parameter to specify maximum number of supported processes */
-extern int max_num_of_processes;
-
-#define KFD_MAX_NUM_OF_PROCESSES_DEFAULT 32
#define KFD_MAX_NUM_OF_PROCESSES 512
+#define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
/*
- * Kernel module parameter to specify maximum number of supported queues
- * per process
+ * Kernel module parameter to specify maximum number of supported queues per
+ * device
*/
-extern int max_num_of_queues_per_process;
+extern int max_num_of_queues_per_device;
-#define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS_DEFAULT 128
-#define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
+#define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT 4096
+#define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE \
+ (KFD_MAX_NUM_OF_PROCESSES * \
+ KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
#define KFD_KERNEL_QUEUE_SIZE 2048
@@ -104,12 +103,26 @@ enum cache_policy {
cache_policy_noncoherent
};
+enum asic_family_type {
+ CHIP_KAVERI = 0,
+ CHIP_CARRIZO
+};
+
struct kfd_device_info {
+ unsigned int asic_family;
unsigned int max_pasid_bits;
size_t ih_ring_entry_size;
+ uint8_t num_of_watch_points;
uint16_t mqd_size_aligned;
};
+struct kfd_mem_obj {
+ uint32_t range_start;
+ uint32_t range_end;
+ uint64_t gpu_addr;
+ uint32_t *cpu_ptr;
+};
+
struct kfd_dev {
struct kgd_dev *kgd;
@@ -135,22 +148,18 @@ struct kfd_dev {
struct kgd2kfd_shared_resources shared_resources;
- void *interrupt_ring;
- size_t interrupt_ring_size;
- atomic_t interrupt_ring_rptr;
- atomic_t interrupt_ring_wptr;
- struct work_struct interrupt_work;
- spinlock_t interrupt_lock;
+ void *gtt_mem;
+ uint64_t gtt_start_gpu_addr;
+ void *gtt_start_cpu_ptr;
+ void *gtt_sa_bitmap;
+ struct mutex gtt_sa_lock;
+ unsigned int gtt_sa_chunk_size;
+ unsigned int gtt_sa_num_of_chunks;
/* QCM Device instance */
struct device_queue_manager *dqm;
bool init_complete;
- /*
- * Interrupts of interest to KFD are copied
- * from the HW ring into a SW ring.
- */
- bool interrupts_active;
};
/* KGD2KFD callbacks */
@@ -162,12 +171,6 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd);
extern const struct kfd2kgd_calls *kfd2kgd;
-struct kfd_mem_obj {
- void *bo;
- uint64_t gpu_addr;
- uint32_t *cpu_ptr;
-};
-
enum kfd_mempool {
KFD_MEMPOOL_SYSTEM_CACHEABLE = 1,
KFD_MEMPOOL_SYSTEM_WRITECOMBINE = 2,
@@ -285,6 +288,15 @@ struct queue_properties {
bool is_active;
/* Not relevant for user mode queues in cp scheduling */
unsigned int vmid;
+ /* Relevant only for sdma queues*/
+ uint32_t sdma_engine_id;
+ uint32_t sdma_queue_id;
+ uint32_t sdma_vm_addr;
+ /* Relevant only for VI */
+ uint64_t eop_ring_buffer_address;
+ uint32_t eop_ring_buffer_size;
+ uint64_t ctx_save_restore_area_address;
+ uint32_t ctx_save_restore_area_size;
};
/**
@@ -327,6 +339,8 @@ struct queue {
uint32_t pipe;
uint32_t queue;
+ unsigned int sdma_id;
+
struct kfd_process *process;
struct kfd_dev *device;
};
@@ -335,10 +349,10 @@ struct queue {
* Please read the kfd_mqd_manager.h description.
*/
enum KFD_MQD_TYPE {
- KFD_MQD_TYPE_CIK_COMPUTE = 0, /* for no cp scheduling */
- KFD_MQD_TYPE_CIK_HIQ, /* for hiq */
- KFD_MQD_TYPE_CIK_CP, /* for cp queues and diq */
- KFD_MQD_TYPE_CIK_SDMA, /* for sdma queues */
+ KFD_MQD_TYPE_COMPUTE = 0, /* for no cp scheduling */
+ KFD_MQD_TYPE_HIQ, /* for hiq */
+ KFD_MQD_TYPE_CP, /* for cp queues and diq */
+ KFD_MQD_TYPE_SDMA, /* for sdma queues */
KFD_MQD_TYPE_MAX
};
@@ -490,8 +504,9 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
struct kfd_process *p);
void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid);
struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
- struct kfd_process *p,
- int create_pdd);
+ struct kfd_process *p);
+struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
+ struct kfd_process *p);
/* Process device data iterator */
struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p);
@@ -519,6 +534,13 @@ unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd,
struct kfd_process *process,
unsigned int queue_id);
+/* GTT Sub-Allocator */
+
+int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
+ struct kfd_mem_obj **mem_obj);
+
+int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj);
+
extern struct device *kfd_device;
/* Topology */
@@ -531,10 +553,7 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev);
struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx);
/* Interrupts */
-int kfd_interrupt_init(struct kfd_dev *dev);
-void kfd_interrupt_exit(struct kfd_dev *dev);
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
-bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry);
/* Power Management */
void kgd2kfd_suspend(struct kfd_dev *kfd);
@@ -546,6 +565,8 @@ int kfd_init_apertures(struct kfd_process *process);
/* Queue Context Management */
inline uint32_t lower_32(uint64_t x);
inline uint32_t upper_32(uint64_t x);
+struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd);
+inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m);
int init_queue(struct queue **q, struct queue_properties properties);
void uninit_queue(struct queue *q);
@@ -554,6 +575,10 @@ void print_queue(struct queue *q);
struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
struct kfd_dev *dev);
+struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
+ struct kfd_dev *dev);
+struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
+ struct kfd_dev *dev);
struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev);
void device_queue_manager_uninit(struct device_queue_manager *dqm);
struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 3c76ef05cbcf..a369c149d172 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -311,24 +311,29 @@ err_alloc_process:
}
struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
- struct kfd_process *p,
- int create_pdd)
+ struct kfd_process *p)
{
struct kfd_process_device *pdd = NULL;
list_for_each_entry(pdd, &p->per_device_data, per_device_list)
if (pdd->dev == dev)
- return pdd;
-
- if (create_pdd) {
- pdd = kzalloc(sizeof(*pdd), GFP_KERNEL);
- if (pdd != NULL) {
- pdd->dev = dev;
- INIT_LIST_HEAD(&pdd->qpd.queues_list);
- INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
- pdd->qpd.dqm = dev->dqm;
- list_add(&pdd->per_device_list, &p->per_device_data);
- }
+ break;
+
+ return pdd;
+}
+
+struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
+ struct kfd_process *p)
+{
+ struct kfd_process_device *pdd = NULL;
+
+ pdd = kzalloc(sizeof(*pdd), GFP_KERNEL);
+ if (pdd != NULL) {
+ pdd->dev = dev;
+ INIT_LIST_HEAD(&pdd->qpd.queues_list);
+ INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
+ pdd->qpd.dqm = dev->dqm;
+ list_add(&pdd->per_device_list, &p->per_device_data);
}
return pdd;
@@ -344,11 +349,14 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
struct kfd_process *p)
{
- struct kfd_process_device *pdd = kfd_get_process_device_data(dev, p, 1);
+ struct kfd_process_device *pdd;
int err;
- if (pdd == NULL)
+ pdd = kfd_get_process_device_data(dev, p);
+ if (!pdd) {
+ pr_err("Process device data doesn't exist\n");
return ERR_PTR(-ENOMEM);
+ }
if (pdd->bound)
return pdd;
@@ -384,7 +392,7 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid)
pqm_uninit(&p->pqm);
- pdd = kfd_get_process_device_data(dev, p, 0);
+ pdd = kfd_get_process_device_data(dev, p);
/*
* Just mark pdd as unbound, because we still need it to call
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 47526780d736..530b82c4e78b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -54,11 +54,11 @@ static int find_available_queue_slot(struct process_queue_manager *pqm,
pr_debug("kfd: in %s\n", __func__);
found = find_first_zero_bit(pqm->queue_slot_bitmap,
- max_num_of_queues_per_process);
+ KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
pr_debug("kfd: the new slot id %lu\n", found);
- if (found >= max_num_of_queues_per_process) {
+ if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
pr_info("amdkfd: Can not open more queues for process with pasid %d\n",
pqm->process->pasid);
return -ENOMEM;
@@ -76,7 +76,7 @@ int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)
INIT_LIST_HEAD(&pqm->queues);
pqm->queue_slot_bitmap =
- kzalloc(DIV_ROUND_UP(max_num_of_queues_per_process,
+ kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
BITS_PER_BYTE), GFP_KERNEL);
if (pqm->queue_slot_bitmap == NULL)
return -ENOMEM;
@@ -128,7 +128,6 @@ static int create_cp_queue(struct process_queue_manager *pqm,
/* let DQM handle it*/
q_properties->vmid = 0;
q_properties->queue_id = qid;
- q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
retval = init_queue(q, *q_properties);
if (retval != 0)
@@ -167,8 +166,11 @@ int pqm_create_queue(struct process_queue_manager *pqm,
q = NULL;
kq = NULL;
- pdd = kfd_get_process_device_data(dev, pqm->process, 1);
- BUG_ON(!pdd);
+ pdd = kfd_get_process_device_data(dev, pqm->process);
+ if (!pdd) {
+ pr_err("Process device data doesn't exist\n");
+ return -1;
+ }
retval = find_available_queue_slot(pqm, qid);
if (retval != 0)
@@ -176,7 +178,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
if (list_empty(&pqm->queues)) {
pdd->qpd.pqm = pqm;
- dev->dqm->register_process(dev->dqm, &pdd->qpd);
+ dev->dqm->ops.register_process(dev->dqm, &pdd->qpd);
}
pqn = kzalloc(sizeof(struct process_queue_node), GFP_KERNEL);
@@ -186,6 +188,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
}
switch (type) {
+ case KFD_QUEUE_TYPE_SDMA:
case KFD_QUEUE_TYPE_COMPUTE:
/* check if there is over subscription */
if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
@@ -201,8 +204,9 @@ int pqm_create_queue(struct process_queue_manager *pqm,
goto err_create_queue;
pqn->q = q;
pqn->kq = NULL;
- retval = dev->dqm->create_queue(dev->dqm, q, &pdd->qpd,
+ retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd,
&q->properties.vmid);
+ pr_debug("DQM returned %d for create_queue\n", retval);
print_queue(q);
break;
case KFD_QUEUE_TYPE_DIQ:
@@ -214,7 +218,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
kq->queue->properties.queue_id = *qid;
pqn->kq = kq;
pqn->q = NULL;
- retval = dev->dqm->create_kernel_queue(dev->dqm, kq, &pdd->qpd);
+ retval = dev->dqm->ops.create_kernel_queue(dev->dqm,
+ kq, &pdd->qpd);
break;
default:
BUG();
@@ -222,7 +227,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
}
if (retval != 0) {
- pr_err("kfd: error dqm create queue\n");
+ pr_debug("Error dqm create queue\n");
goto err_create_queue;
}
@@ -241,7 +246,10 @@ int pqm_create_queue(struct process_queue_manager *pqm,
err_create_queue:
kfree(pqn);
err_allocate_pqn:
+ /* check if queues list is empty unregister process from device */
clear_bit(*qid, pqm->queue_slot_bitmap);
+ if (list_empty(&pqm->queues))
+ dev->dqm->ops.unregister_process(dev->dqm, &pdd->qpd);
return retval;
}
@@ -273,19 +281,22 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
dev = pqn->q->device;
BUG_ON(!dev);
- pdd = kfd_get_process_device_data(dev, pqm->process, 1);
- BUG_ON(!pdd);
+ pdd = kfd_get_process_device_data(dev, pqm->process);
+ if (!pdd) {
+ pr_err("Process device data doesn't exist\n");
+ return -1;
+ }
if (pqn->kq) {
/* destroy kernel queue (DIQ) */
dqm = pqn->kq->dev->dqm;
- dqm->destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd);
+ dqm->ops.destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd);
kernel_queue_uninit(pqn->kq);
}
if (pqn->q) {
dqm = pqn->q->device->dqm;
- retval = dqm->destroy_queue(dqm, &pdd->qpd, pqn->q);
+ retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
if (retval != 0)
return retval;
@@ -297,7 +308,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
clear_bit(qid, pqm->queue_slot_bitmap);
if (list_empty(&pqm->queues))
- dqm->unregister_process(dqm, &pdd->qpd);
+ dqm->ops.unregister_process(dqm, &pdd->qpd);
return retval;
}
@@ -311,14 +322,19 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
BUG_ON(!pqm);
pqn = get_queue_by_qid(pqm, qid);
- BUG_ON(!pqn);
+ if (!pqn) {
+ pr_debug("amdkfd: No queue %d exists for update operation\n",
+ qid);
+ return -EFAULT;
+ }
pqn->q->properties.queue_address = p->queue_address;
pqn->q->properties.queue_size = p->queue_size;
pqn->q->properties.queue_percent = p->queue_percent;
pqn->q->properties.priority = p->priority;
- retval = pqn->q->device->dqm->update_queue(pqn->q->device->dqm, pqn->q);
+ retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
+ pqn->q);
if (retval != 0)
return retval;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index cca1708fd811..498399323a8c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -27,6 +27,7 @@
#include <linux/acpi.h>
#include <linux/hash.h>
#include <linux/cpufreq.h>
+#include <linux/log2.h>
#include "kfd_priv.h"
#include "kfd_crat.h"
@@ -630,10 +631,10 @@ static struct kobj_type cache_type = {
static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
char *buffer)
{
- ssize_t ret;
struct kfd_topology_device *dev;
char public_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE];
uint32_t i;
+ uint32_t log_max_watch_addr;
/* Making sure that the buffer is an empty string */
buffer[0] = 0;
@@ -641,8 +642,10 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
if (strcmp(attr->name, "gpu_id") == 0) {
dev = container_of(attr, struct kfd_topology_device,
attr_gpuid);
- ret = sysfs_show_32bit_val(buffer, dev->gpu_id);
- } else if (strcmp(attr->name, "name") == 0) {
+ return sysfs_show_32bit_val(buffer, dev->gpu_id);
+ }
+
+ if (strcmp(attr->name, "name") == 0) {
dev = container_of(attr, struct kfd_topology_device,
attr_name);
for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE; i++) {
@@ -652,80 +655,90 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
break;
}
public_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1] = 0x0;
- ret = sysfs_show_str_val(buffer, public_name);
- } else {
- dev = container_of(attr, struct kfd_topology_device,
- attr_props);
- sysfs_show_32bit_prop(buffer, "cpu_cores_count",
- dev->node_props.cpu_cores_count);
- sysfs_show_32bit_prop(buffer, "simd_count",
- dev->node_props.simd_count);
-
- if (dev->mem_bank_count < dev->node_props.mem_banks_count) {
- pr_warn("kfd: mem_banks_count truncated from %d to %d\n",
- dev->node_props.mem_banks_count,
- dev->mem_bank_count);
- sysfs_show_32bit_prop(buffer, "mem_banks_count",
- dev->mem_bank_count);
- } else {
- sysfs_show_32bit_prop(buffer, "mem_banks_count",
- dev->node_props.mem_banks_count);
- }
+ return sysfs_show_str_val(buffer, public_name);
+ }
- sysfs_show_32bit_prop(buffer, "caches_count",
- dev->node_props.caches_count);
- sysfs_show_32bit_prop(buffer, "io_links_count",
- dev->node_props.io_links_count);
- sysfs_show_32bit_prop(buffer, "cpu_core_id_base",
- dev->node_props.cpu_core_id_base);
- sysfs_show_32bit_prop(buffer, "simd_id_base",
- dev->node_props.simd_id_base);
- sysfs_show_32bit_prop(buffer, "capability",
- dev->node_props.capability);
- sysfs_show_32bit_prop(buffer, "max_waves_per_simd",
- dev->node_props.max_waves_per_simd);
- sysfs_show_32bit_prop(buffer, "lds_size_in_kb",
- dev->node_props.lds_size_in_kb);
- sysfs_show_32bit_prop(buffer, "gds_size_in_kb",
- dev->node_props.gds_size_in_kb);
- sysfs_show_32bit_prop(buffer, "wave_front_size",
- dev->node_props.wave_front_size);
- sysfs_show_32bit_prop(buffer, "array_count",
- dev->node_props.array_count);
- sysfs_show_32bit_prop(buffer, "simd_arrays_per_engine",
- dev->node_props.simd_arrays_per_engine);
- sysfs_show_32bit_prop(buffer, "cu_per_simd_array",
- dev->node_props.cu_per_simd_array);
- sysfs_show_32bit_prop(buffer, "simd_per_cu",
- dev->node_props.simd_per_cu);
- sysfs_show_32bit_prop(buffer, "max_slots_scratch_cu",
- dev->node_props.max_slots_scratch_cu);
- sysfs_show_32bit_prop(buffer, "vendor_id",
- dev->node_props.vendor_id);
- sysfs_show_32bit_prop(buffer, "device_id",
- dev->node_props.device_id);
- sysfs_show_32bit_prop(buffer, "location_id",
- dev->node_props.location_id);
-
- if (dev->gpu) {
- sysfs_show_32bit_prop(buffer, "max_engine_clk_fcompute",
- kfd2kgd->get_max_engine_clock_in_mhz(
- dev->gpu->kgd));
- sysfs_show_64bit_prop(buffer, "local_mem_size",
- kfd2kgd->get_vmem_size(dev->gpu->kgd));
-
- sysfs_show_32bit_prop(buffer, "fw_version",
- kfd2kgd->get_fw_version(
- dev->gpu->kgd,
- KGD_ENGINE_MEC1));
+ dev = container_of(attr, struct kfd_topology_device,
+ attr_props);
+ sysfs_show_32bit_prop(buffer, "cpu_cores_count",
+ dev->node_props.cpu_cores_count);
+ sysfs_show_32bit_prop(buffer, "simd_count",
+ dev->node_props.simd_count);
+
+ if (dev->mem_bank_count < dev->node_props.mem_banks_count) {
+ pr_warn("kfd: mem_banks_count truncated from %d to %d\n",
+ dev->node_props.mem_banks_count,
+ dev->mem_bank_count);
+ sysfs_show_32bit_prop(buffer, "mem_banks_count",
+ dev->mem_bank_count);
+ } else {
+ sysfs_show_32bit_prop(buffer, "mem_banks_count",
+ dev->node_props.mem_banks_count);
+ }
+ sysfs_show_32bit_prop(buffer, "caches_count",
+ dev->node_props.caches_count);
+ sysfs_show_32bit_prop(buffer, "io_links_count",
+ dev->node_props.io_links_count);
+ sysfs_show_32bit_prop(buffer, "cpu_core_id_base",
+ dev->node_props.cpu_core_id_base);
+ sysfs_show_32bit_prop(buffer, "simd_id_base",
+ dev->node_props.simd_id_base);
+ sysfs_show_32bit_prop(buffer, "capability",
+ dev->node_props.capability);
+ sysfs_show_32bit_prop(buffer, "max_waves_per_simd",
+ dev->node_props.max_waves_per_simd);
+ sysfs_show_32bit_prop(buffer, "lds_size_in_kb",
+ dev->node_props.lds_size_in_kb);
+ sysfs_show_32bit_prop(buffer, "gds_size_in_kb",
+ dev->node_props.gds_size_in_kb);
+ sysfs_show_32bit_prop(buffer, "wave_front_size",
+ dev->node_props.wave_front_size);
+ sysfs_show_32bit_prop(buffer, "array_count",
+ dev->node_props.array_count);
+ sysfs_show_32bit_prop(buffer, "simd_arrays_per_engine",
+ dev->node_props.simd_arrays_per_engine);
+ sysfs_show_32bit_prop(buffer, "cu_per_simd_array",
+ dev->node_props.cu_per_simd_array);
+ sysfs_show_32bit_prop(buffer, "simd_per_cu",
+ dev->node_props.simd_per_cu);
+ sysfs_show_32bit_prop(buffer, "max_slots_scratch_cu",
+ dev->node_props.max_slots_scratch_cu);
+ sysfs_show_32bit_prop(buffer, "vendor_id",
+ dev->node_props.vendor_id);
+ sysfs_show_32bit_prop(buffer, "device_id",
+ dev->node_props.device_id);
+ sysfs_show_32bit_prop(buffer, "location_id",
+ dev->node_props.location_id);
+
+ if (dev->gpu) {
+ log_max_watch_addr =
+ __ilog2_u32(dev->gpu->device_info->num_of_watch_points);
+
+ if (log_max_watch_addr) {
+ dev->node_props.capability |=
+ HSA_CAP_WATCH_POINTS_SUPPORTED;
+
+ dev->node_props.capability |=
+ ((log_max_watch_addr <<
+ HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT) &
+ HSA_CAP_WATCH_POINTS_TOTALBITS_MASK);
}
- ret = sysfs_show_32bit_prop(buffer, "max_engine_clk_ccompute",
- cpufreq_quick_get_max(0)/1000);
+ sysfs_show_32bit_prop(buffer, "max_engine_clk_fcompute",
+ kfd2kgd->get_max_engine_clock_in_mhz(
+ dev->gpu->kgd));
+ sysfs_show_64bit_prop(buffer, "local_mem_size",
+ kfd2kgd->get_vmem_size(dev->gpu->kgd));
+
+ sysfs_show_32bit_prop(buffer, "fw_version",
+ kfd2kgd->get_fw_version(
+ dev->gpu->kgd,
+ KGD_ENGINE_MEC1));
}
- return ret;
+ return sysfs_show_32bit_prop(buffer, "max_engine_clk_ccompute",
+ cpufreq_quick_get_max(0)/1000);
}
static const struct sysfs_ops node_ops = {