aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Kconfig2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Makefile53
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c87
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.h10
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_module.c122
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h32
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c8
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.h1
11 files changed, 157 insertions, 167 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig
index 3858820a0055..fbf0ee5201c3 100644
--- a/drivers/gpu/drm/amd/amdkfd/Kconfig
+++ b/drivers/gpu/drm/amd/amdkfd/Kconfig
@@ -3,7 +3,7 @@
#
config HSA_AMD
- tristate "HSA kernel driver for AMD GPU devices"
+ bool "HSA kernel driver for AMD GPU devices"
depends on DRM_AMDGPU && X86_64
imply AMD_IOMMU_V2
select MMU_NOTIFIER
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
index ffd096fffc1c..69ec96998bb9 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -23,26 +23,41 @@
# Makefile for Heterogenous System Architecture support for AMD GPU devices
#
-ccflags-y := -Idrivers/gpu/drm/amd/include/ \
- -Idrivers/gpu/drm/amd/include/asic_reg
-
-amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
- kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \
- kfd_process.o kfd_queue.o kfd_mqd_manager.o \
- kfd_mqd_manager_cik.o kfd_mqd_manager_vi.o \
- kfd_mqd_manager_v9.o \
- kfd_kernel_queue.o kfd_kernel_queue_cik.o \
- kfd_kernel_queue_vi.o kfd_kernel_queue_v9.o \
- kfd_packet_manager.o kfd_process_queue_manager.o \
- kfd_device_queue_manager.o kfd_device_queue_manager_cik.o \
- kfd_device_queue_manager_vi.o kfd_device_queue_manager_v9.o \
- kfd_interrupt.o kfd_events.o cik_event_interrupt.o \
- kfd_int_process_v9.o kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o
+AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \
+ $(AMDKFD_PATH)/kfd_device.o \
+ $(AMDKFD_PATH)/kfd_chardev.o \
+ $(AMDKFD_PATH)/kfd_topology.o \
+ $(AMDKFD_PATH)/kfd_pasid.o \
+ $(AMDKFD_PATH)/kfd_doorbell.o \
+ $(AMDKFD_PATH)/kfd_flat_memory.o \
+ $(AMDKFD_PATH)/kfd_process.o \
+ $(AMDKFD_PATH)/kfd_queue.o \
+ $(AMDKFD_PATH)/kfd_mqd_manager.o \
+ $(AMDKFD_PATH)/kfd_mqd_manager_cik.o \
+ $(AMDKFD_PATH)/kfd_mqd_manager_vi.o \
+ $(AMDKFD_PATH)/kfd_mqd_manager_v9.o \
+ $(AMDKFD_PATH)/kfd_kernel_queue.o \
+ $(AMDKFD_PATH)/kfd_kernel_queue_cik.o \
+ $(AMDKFD_PATH)/kfd_kernel_queue_vi.o \
+ $(AMDKFD_PATH)/kfd_kernel_queue_v9.o \
+ $(AMDKFD_PATH)/kfd_packet_manager.o \
+ $(AMDKFD_PATH)/kfd_process_queue_manager.o \
+ $(AMDKFD_PATH)/kfd_device_queue_manager.o \
+ $(AMDKFD_PATH)/kfd_device_queue_manager_cik.o \
+ $(AMDKFD_PATH)/kfd_device_queue_manager_vi.o \
+ $(AMDKFD_PATH)/kfd_device_queue_manager_v9.o \
+ $(AMDKFD_PATH)/kfd_interrupt.o \
+ $(AMDKFD_PATH)/kfd_events.o \
+ $(AMDKFD_PATH)/cik_event_interrupt.o \
+ $(AMDKFD_PATH)/kfd_int_process_v9.o \
+ $(AMDKFD_PATH)/kfd_dbgdev.o \
+ $(AMDKFD_PATH)/kfd_dbgmgr.o \
+ $(AMDKFD_PATH)/kfd_crat.o
ifneq ($(CONFIG_AMD_IOMMU_V2),)
-amdkfd-y += kfd_iommu.o
+AMDKFD_FILES += $(AMDKFD_PATH)/kfd_iommu.o
endif
-amdkfd-$(CONFIG_DEBUG_FS) += kfd_debugfs.o
-
-obj-$(CONFIG_HSA_AMD) += amdkfd.o
+ifneq ($(CONFIG_DEBUG_FS),)
+AMDKFD_FILES += $(AMDKFD_PATH)/kfd_debugfs.o
+endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 297b36c26a05..758398bdb39b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1210,7 +1210,7 @@ err_unlock:
return ret;
}
-static bool kfd_dev_is_large_bar(struct kfd_dev *dev)
+bool kfd_dev_is_large_bar(struct kfd_dev *dev)
{
struct kfd_local_mem_info mem_info;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index ee4996029a86..d4560f1869bd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -346,15 +346,15 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
struct list_head *device_list)
{
struct kfd_iolink_properties *props = NULL, *props2;
- struct kfd_topology_device *dev, *cpu_dev;
+ struct kfd_topology_device *dev, *to_dev;
uint32_t id_from;
uint32_t id_to;
id_from = iolink->proximity_domain_from;
id_to = iolink->proximity_domain_to;
- pr_debug("Found IO link entry in CRAT table with id_from=%d\n",
- id_from);
+ pr_debug("Found IO link entry in CRAT table with id_from=%d, id_to %d\n",
+ id_from, id_to);
list_for_each_entry(dev, device_list, list) {
if (id_from == dev->proximity_domain) {
props = kfd_alloc_struct(props);
@@ -369,6 +369,8 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS)
props->weight = 20;
+ else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI)
+ props->weight = 15;
else
props->weight = node_distance(id_from, id_to);
@@ -389,20 +391,23 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
/* CPU topology is created before GPUs are detected, so CPU->GPU
* links are not built at that time. If a PCIe type is discovered, it
* means a GPU is detected and we are adding GPU->CPU to the topology.
- * At this time, also add the corresponded CPU->GPU link.
+ * At this time, also add the corresponded CPU->GPU link if GPU
+ * is large bar.
+ * For xGMI, we only added the link with one direction in the crat
+ * table, add corresponded reversed direction link now.
*/
- if (props && props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) {
- cpu_dev = kfd_topology_device_by_proximity_domain(id_to);
- if (!cpu_dev)
+ if (props && (iolink->flags & CRAT_IOLINK_FLAGS_BI_DIRECTIONAL)) {
+ to_dev = kfd_topology_device_by_proximity_domain(id_to);
+ if (!to_dev)
return -ENODEV;
/* same everything but the other direction */
props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL);
props2->node_from = id_to;
props2->node_to = id_from;
props2->kobj = NULL;
- cpu_dev->io_link_count++;
- cpu_dev->node_props.io_links_count++;
- list_add_tail(&props2->list, &cpu_dev->io_link_props);
+ to_dev->io_link_count++;
+ to_dev->node_props.io_links_count++;
+ list_add_tail(&props2->list, &to_dev->io_link_props);
}
return 0;
@@ -1037,7 +1042,7 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
*
* Return 0 if successful else return -ve value
*/
-static int kfd_fill_gpu_direct_io_link(int *avail_size,
+static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
struct kfd_dev *kdev,
struct crat_subtype_iolink *sub_type_hdr,
uint32_t proximity_domain)
@@ -1052,6 +1057,8 @@ static int kfd_fill_gpu_direct_io_link(int *avail_size,
sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;
sub_type_hdr->length = sizeof(struct crat_subtype_iolink);
sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED;
+ if (kfd_dev_is_large_bar(kdev))
+ sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
/* Fill in IOLINK subtype.
* TODO: Fill-in other fields of iolink subtype
@@ -1069,6 +1076,29 @@ static int kfd_fill_gpu_direct_io_link(int *avail_size,
return 0;
}
+static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
+ struct kfd_dev *kdev,
+ struct crat_subtype_iolink *sub_type_hdr,
+ uint32_t proximity_domain_from,
+ uint32_t proximity_domain_to)
+{
+ *avail_size -= sizeof(struct crat_subtype_iolink);
+ if (*avail_size < 0)
+ return -ENOMEM;
+
+ memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink));
+
+ sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;
+ sub_type_hdr->length = sizeof(struct crat_subtype_iolink);
+ sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED |
+ CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
+
+ sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
+ sub_type_hdr->proximity_domain_from = proximity_domain_from;
+ sub_type_hdr->proximity_domain_to = proximity_domain_to;
+ return 0;
+}
+
/* kfd_create_vcrat_image_gpu - Create Virtual CRAT for CPU
*
* @pcrat_image: Fill in VCRAT for GPU
@@ -1081,14 +1111,16 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
{
struct crat_header *crat_table = (struct crat_header *)pcrat_image;
struct crat_subtype_generic *sub_type_hdr;
+ struct kfd_local_mem_info local_mem_info;
+ struct kfd_topology_device *peer_dev;
struct crat_subtype_computeunit *cu;
struct kfd_cu_info cu_info;
int avail_size = *size;
uint32_t total_num_of_cu;
int num_of_cache_entries = 0;
int cache_mem_filled = 0;
+ uint32_t nid = 0;
int ret = 0;
- struct kfd_local_mem_info local_mem_info;
if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU)
return -EINVAL;
@@ -1212,7 +1244,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
*/
sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
cache_mem_filled);
- ret = kfd_fill_gpu_direct_io_link(&avail_size, kdev,
+ ret = kfd_fill_gpu_direct_io_link_to_cpu(&avail_size, kdev,
(struct crat_subtype_iolink *)sub_type_hdr, proximity_domain);
if (ret < 0)
@@ -1221,6 +1253,35 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
crat_table->length += sub_type_hdr->length;
crat_table->total_entries++;
+
+ /* Fill in Subtype: IO_LINKS
+ * Direct links from GPU to other GPUs through xGMI.
+ * We will loop GPUs that already be processed (with lower value
+ * of proximity_domain), add the link for the GPUs with same
+ * hive id (from this GPU to other GPU) . The reversed iolink
+ * (from other GPU to this GPU) will be added
+ * in kfd_parse_subtype_iolink.
+ */
+ if (kdev->hive_id) {
+ for (nid = 0; nid < proximity_domain; ++nid) {
+ peer_dev = kfd_topology_device_by_proximity_domain(nid);
+ if (!peer_dev->gpu)
+ continue;
+ if (peer_dev->gpu->hive_id != kdev->hive_id)
+ continue;
+ sub_type_hdr = (typeof(sub_type_hdr))(
+ (char *)sub_type_hdr +
+ sizeof(struct crat_subtype_iolink));
+ ret = kfd_fill_gpu_xgmi_link_to_gpu(
+ &avail_size, kdev,
+ (struct crat_subtype_iolink *)sub_type_hdr,
+ proximity_domain, nid);
+ if (ret < 0)
+ return ret;
+ crat_table->length += sub_type_hdr->length;
+ crat_table->total_entries++;
+ }
+ }
*size = crat_table->length;
pr_info("Virtual CRAT table created for GPU\n");
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
index b5cd182b9edd..7c3f192fe25f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
@@ -232,7 +232,8 @@ struct crat_subtype_ccompute {
#define CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT (1 << 2)
#define CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT (1 << 3)
#define CRAT_IOLINK_FLAGS_NO_PEER_TO_PEER_DMA (1 << 4)
-#define CRAT_IOLINK_FLAGS_RESERVED_MASK 0xffffffe0
+#define CRAT_IOLINK_FLAGS_BI_DIRECTIONAL (1 << 31)
+#define CRAT_IOLINK_FLAGS_RESERVED_MASK 0x7fffffe0
/*
* IO interface types
@@ -248,7 +249,12 @@ struct crat_subtype_ccompute {
#define CRAT_IOLINK_TYPE_RAPID_IO 8
#define CRAT_IOLINK_TYPE_INFINIBAND 9
#define CRAT_IOLINK_TYPE_RESERVED3 10
-#define CRAT_IOLINK_TYPE_OTHER 11
+#define CRAT_IOLINK_TYPE_XGMI 11
+#define CRAT_IOLINK_TYPE_XGOP 12
+#define CRAT_IOLINK_TYPE_GZ 13
+#define CRAT_IOLINK_TYPE_ETHERNET_RDMA 14
+#define CRAT_IOLINK_TYPE_RDMA_OTHER 15
+#define CRAT_IOLINK_TYPE_OTHER 16
#define CRAT_IOLINK_TYPE_MAX 255
#define CRAT_IOLINK_RESERVED_LENGTH 24
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 29ac74f40dce..9b4e6ad4a7df 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -477,6 +477,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
goto kfd_doorbell_error;
}
+ if (kfd->kfd2kgd->get_hive_id)
+ kfd->hive_id = kfd->kfd2kgd->get_hive_id(kfd->kgd);
+
if (kfd_topology_add_device(kfd)) {
dev_err(kfd_device, "Error adding device to topology\n");
goto kfd_topology_add_device_error;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
index 6e1f5c7c2d4b..8018163414ff 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -20,21 +20,10 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#include <linux/module.h>
#include <linux/sched.h>
-#include <linux/moduleparam.h>
#include <linux/device.h>
-#include <linux/printk.h>
#include "kfd_priv.h"
-#define KFD_DRIVER_AUTHOR "AMD Inc. and others"
-
-#define KFD_DRIVER_DESC "Standalone HSA driver for AMD's GPUs"
-#define KFD_DRIVER_DATE "20150421"
-#define KFD_DRIVER_MAJOR 0
-#define KFD_DRIVER_MINOR 7
-#define KFD_DRIVER_PATCHLEVEL 2
-
static const struct kgd2kfd_calls kgd2kfd = {
.exit = kgd2kfd_exit,
.probe = kgd2kfd_probe,
@@ -51,77 +40,7 @@ static const struct kgd2kfd_calls kgd2kfd = {
.post_reset = kgd2kfd_post_reset,
};
-int sched_policy = KFD_SCHED_POLICY_HWS;
-module_param(sched_policy, int, 0444);
-MODULE_PARM_DESC(sched_policy,
- "Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)");
-
-int hws_max_conc_proc = 8;
-module_param(hws_max_conc_proc, int, 0444);
-MODULE_PARM_DESC(hws_max_conc_proc,
- "Max # processes HWS can execute concurrently when sched_policy=0 (0 = no concurrency, #VMIDs for KFD = Maximum(default))");
-
-int cwsr_enable = 1;
-module_param(cwsr_enable, int, 0444);
-MODULE_PARM_DESC(cwsr_enable, "CWSR enable (0 = off, 1 = on (default))");
-
-int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT;
-module_param(max_num_of_queues_per_device, int, 0444);
-MODULE_PARM_DESC(max_num_of_queues_per_device,
- "Maximum number of supported queues per device (1 = Minimum, 4096 = default)");
-
-int send_sigterm;
-module_param(send_sigterm, int, 0444);
-MODULE_PARM_DESC(send_sigterm,
- "Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)");
-
-int debug_largebar;
-module_param(debug_largebar, int, 0444);
-MODULE_PARM_DESC(debug_largebar,
- "Debug large-bar flag used to simulate large-bar capability on non-large bar machine (0 = disable, 1 = enable)");
-
-int ignore_crat;
-module_param(ignore_crat, int, 0444);
-MODULE_PARM_DESC(ignore_crat,
- "Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)");
-
-int noretry;
-module_param(noretry, int, 0644);
-MODULE_PARM_DESC(noretry,
- "Set sh_mem_config.retry_disable on GFXv9+ dGPUs (0 = retry enabled (default), 1 = retry disabled)");
-
-int halt_if_hws_hang;
-module_param(halt_if_hws_hang, int, 0644);
-MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)");
-
-
-static int amdkfd_init_completed;
-
-
-int kgd2kfd_init(unsigned int interface_version,
- const struct kgd2kfd_calls **g2f)
-{
- if (!amdkfd_init_completed)
- return -EPROBE_DEFER;
-
- /*
- * Only one interface version is supported,
- * no kfd/kgd version skew allowed.
- */
- if (interface_version != KFD_INTERFACE_VERSION)
- return -EINVAL;
-
- *g2f = &kgd2kfd;
-
- return 0;
-}
-EXPORT_SYMBOL(kgd2kfd_init);
-
-void kgd2kfd_exit(void)
-{
-}
-
-static int __init kfd_module_init(void)
+static int kfd_init(void)
{
int err;
@@ -129,7 +48,7 @@ static int __init kfd_module_init(void)
if ((sched_policy < KFD_SCHED_POLICY_HWS) ||
(sched_policy > KFD_SCHED_POLICY_NO_HWS)) {
pr_err("sched_policy has invalid value\n");
- return -1;
+ return -EINVAL;
}
/* Verify module parameters */
@@ -137,7 +56,7 @@ static int __init kfd_module_init(void)
(max_num_of_queues_per_device >
KFD_MAX_NUM_OF_QUEUES_PER_DEVICE)) {
pr_err("max_num_of_queues_per_device must be between 1 to KFD_MAX_NUM_OF_QUEUES_PER_DEVICE\n");
- return -1;
+ return -EINVAL;
}
err = kfd_chardev_init();
@@ -154,10 +73,6 @@ static int __init kfd_module_init(void)
kfd_debugfs_init();
- amdkfd_init_completed = 1;
-
- dev_info(kfd_device, "Initialized module\n");
-
return 0;
err_create_wq:
@@ -168,23 +83,30 @@ err_ioctl:
return err;
}
-static void __exit kfd_module_exit(void)
+static void kfd_exit(void)
{
- amdkfd_init_completed = 0;
-
kfd_debugfs_fini();
kfd_process_destroy_wq();
kfd_topology_shutdown();
kfd_chardev_exit();
- pr_info("amdkfd: Removed module\n");
}
-module_init(kfd_module_init);
-module_exit(kfd_module_exit);
+int kgd2kfd_init(unsigned int interface_version,
+ const struct kgd2kfd_calls **g2f)
+{
+ int err;
+
+ err = kfd_init();
+ if (err)
+ return err;
+
+ *g2f = &kgd2kfd;
+
+ return 0;
+}
+EXPORT_SYMBOL(kgd2kfd_init);
-MODULE_AUTHOR(KFD_DRIVER_AUTHOR);
-MODULE_DESCRIPTION(KFD_DRIVER_DESC);
-MODULE_LICENSE("GPL and additional rights");
-MODULE_VERSION(__stringify(KFD_DRIVER_MAJOR) "."
- __stringify(KFD_DRIVER_MINOR) "."
- __stringify(KFD_DRIVER_PATCHLEVEL));
+void kgd2kfd_exit(void)
+{
+ kfd_exit();
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 92b285ca73aa..b0064b08aa11 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -103,7 +103,6 @@
*/
extern int max_num_of_queues_per_device;
-#define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT 4096
#define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE \
(KFD_MAX_NUM_OF_PROCESSES * \
KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
@@ -149,33 +148,6 @@ extern int noretry;
*/
extern int halt_if_hws_hang;
-/**
- * enum kfd_sched_policy
- *
- * @KFD_SCHED_POLICY_HWS: H/W scheduling policy known as command processor (cp)
- * scheduling. In this scheduling mode we're using the firmware code to
- * schedule the user mode queues and kernel queues such as HIQ and DIQ.
- * the HIQ queue is used as a special queue that dispatches the configuration
- * to the cp and the user mode queues list that are currently running.
- * the DIQ queue is a debugging queue that dispatches debugging commands to the
- * firmware.
- * in this scheduling mode user mode queues over subscription feature is
- * enabled.
- *
- * @KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: The same as above but the over
- * subscription feature disabled.
- *
- * @KFD_SCHED_POLICY_NO_HWS: no H/W scheduling policy is a mode which directly
- * set the command processor registers and sets the queues "manually". This
- * mode is used *ONLY* for debugging proposes.
- *
- */
-enum kfd_sched_policy {
- KFD_SCHED_POLICY_HWS = 0,
- KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION,
- KFD_SCHED_POLICY_NO_HWS
-};
-
enum cache_policy {
cache_policy_coherent,
cache_policy_noncoherent
@@ -282,6 +254,9 @@ struct kfd_dev {
bool cwsr_enabled;
const void *cwsr_isa;
unsigned int cwsr_isa_size;
+
+ /* xGMI */
+ uint64_t hive_id;
};
/* KGD2KFD callbacks */
@@ -721,6 +696,7 @@ struct amdkfd_ioctl_desc {
unsigned int cmd_drv;
const char *name;
};
+bool kfd_dev_is_large_bar(struct kfd_dev *dev);
int kfd_process_create_wq(void);
void kfd_process_destroy_wq(void);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 4694386cc623..0039e451d9af 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -322,8 +322,10 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n",
pdd->dev->id, p->pasid);
- if (pdd->drm_file)
+ if (pdd->drm_file) {
+ pdd->dev->kfd2kgd->release_process_vm(pdd->dev->kgd, pdd->vm);
fput(pdd->drm_file);
+ }
else if (pdd->vm)
pdd->dev->kfd2kgd->destroy_process_vm(
pdd->dev->kgd, pdd->vm);
@@ -687,11 +689,11 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
if (drm_file)
ret = dev->kfd2kgd->acquire_process_vm(
- dev->kgd, drm_file,
+ dev->kgd, drm_file, p->pasid,
&pdd->vm, &p->kgd_process_info, &p->ef);
else
ret = dev->kfd2kgd->create_process_vm(
- dev->kgd, &pdd->vm, &p->kgd_process_info, &p->ef);
+ dev->kgd, p->pasid, &pdd->vm, &p->kgd_process_info, &p->ef);
if (ret) {
pr_err("Failed to create process VM object\n");
return ret;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 80f5db4ef75f..0dff66be8d7a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -454,6 +454,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
dev->node_props.location_id);
sysfs_show_32bit_prop(buffer, "drm_render_minor",
dev->node_props.drm_render_minor);
+ sysfs_show_64bit_prop(buffer, "hive_id",
+ dev->node_props.hive_id);
if (dev->gpu) {
log_max_watch_addr =
@@ -1230,6 +1232,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
dev->node_props.drm_render_minor =
gpu->shared_resources.drm_render_minor;
+ dev->node_props.hive_id = gpu->hive_id;
+
kfd_fill_mem_clk_max_info(dev);
kfd_fill_iolink_non_crat_info(dev);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index 7d9c3f948dff..92a19be07344 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -49,6 +49,7 @@
#define HSA_CAP_AQL_QUEUE_DOUBLE_MAP 0x00004000
struct kfd_node_properties {
+ uint64_t hive_id;
uint32_t cpu_cores_count;
uint32_t simd_count;
uint32_t mem_banks_count;