From 88807dc8d573c0f718d0d26f592f212c5a487cf0 Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Thu, 4 Apr 2019 15:47:34 -0500 Subject: drm/amdgpu: Remap hdp coherency registers Remap HDP_MEM_COHERENCY_FLUSH_CNTL and HDP_REG_COHERENCY_FLUSH_CNTL to an empty page in mmio space. We will later map this page to process space so application can flush hdp. This can't be done properly at those registers' original location because it will expose more than desired registers to process space. v2: Use explicit register hole location v3: Moved remapped hdp registers into adev struct v4: Use more generic name for remapped page Expose register offset in kfd_ioctl.h v5: Move hdp register remap function to nbio ip function v6: Fixed operator precedence issue and other bugs Signed-off-by: Oak Zeng Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index dc067ed0b72d..bb1b4280f53d 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -426,6 +426,13 @@ struct kfd_ioctl_import_dmabuf_args { __u32 dmabuf_fd; /* to KFD */ }; +/* Register offset inside the remapped mmio page + */ +enum kfd_mmio_remap { + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL = 0, + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL = 4, +}; + #define AMDKFD_IOCTL_BASE 'K' #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) -- cgit v1.2.3-59-g8ed1b From d8e408a82704c86ba87c3d58cfe69dcdb758aa07 Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Thu, 11 Apr 2019 14:43:39 -0500 Subject: drm/amdkfd: Expose HDP registers to user space Introduce a new memory type (KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) and expose mmio page of HDP registers to user space through this new memory type. v2: moved remapped hdp regs to adev struct v3: rename the new memory type to ALLOC_MEM_FLAGS_MMIO_REMAP v4: use more generic function name v5: Fail remapped mmio allocation for asics before gfx9 Signed-off-by: Oak Zeng Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 7 +++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 7 ++++--- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 6 ++++++ drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 1 + include/uapi/linux/kfd_ioctl.h | 1 + 6 files changed, 20 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index aeead072fa79..401edb605fdd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -519,6 +519,13 @@ uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd) return adev->gmc.xgmi.hive_id; } +uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + return adev->rmmio_remap.bus_addr; +} + int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, uint32_t *ib_cmd, uint32_t ib_len) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 4e37fa7e85b1..ea1f141db3ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -169,6 +169,7 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, uint32_t *flags); uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd); +uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd); #define read_user_wptr(mmptr, wptr, dst) \ ({ \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index a6e5184d436c..00e013581a70 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1109,7 +1109,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( if (!offset || !*offset) return -EINVAL; user_addr = *offset; - } else if (flags & ALLOC_MEM_FLAGS_DOORBELL) { + } else if (flags & (ALLOC_MEM_FLAGS_DOORBELL | + ALLOC_MEM_FLAGS_MMIO_REMAP)) { domain = AMDGPU_GEM_DOMAIN_GTT; alloc_domain = AMDGPU_GEM_DOMAIN_CPU; bo_type = ttm_bo_type_sg; @@ -1294,8 +1295,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( /* Free the sync object */ amdgpu_sync_free(&mem->sync); - /* If the SG is not NULL, it's one we created for a doorbell - * BO. We need to free it. + /* If the SG is not NULL, it's one we created for a doorbell or mmio + * remap BO. We need to free it. */ if (mem->bo->tbo.sg) { sg_free_table(mem->bo->tbo.sg); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 083bd8114db1..d795e5018270 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1272,6 +1272,12 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, if (args->size != kfd_doorbell_process_slice(dev)) return -EINVAL; offset = kfd_get_process_doorbells(dev, p); + } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) { + if (args->size != PAGE_SIZE) + return -EINVAL; + offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd); + if (!offset) + return -ENOMEM; } mutex_lock(&p->mutex); diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index b897aca9b4c9..98b9533e672b 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -174,6 +174,7 @@ struct tile_config { #define ALLOC_MEM_FLAGS_GTT (1 << 1) #define ALLOC_MEM_FLAGS_USERPTR (1 << 2) #define ALLOC_MEM_FLAGS_DOORBELL (1 << 3) +#define ALLOC_MEM_FLAGS_MMIO_REMAP (1 << 4) /* * Allocation flags attributes/access options. diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index bb1b4280f53d..1e7d5f3376b0 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -338,6 +338,7 @@ struct kfd_ioctl_acquire_vm_args { #define KFD_IOC_ALLOC_MEM_FLAGS_GTT (1 << 1) #define KFD_IOC_ALLOC_MEM_FLAGS_USERPTR (1 << 2) #define KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL (1 << 3) +#define KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP (1 << 4) /* Allocation flags: attributes/access options */ #define KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE (1 << 31) #define KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE (1 << 30) -- cgit v1.2.3-59-g8ed1b From 1b4670f6983156526c286723465fdf805070b45d Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Thu, 7 Feb 2019 14:02:27 -0600 Subject: drm/amdkfd: Introduce XGMI SDMA queue type Existing QUEUE_TYPE_SDMA means PCIe optimized SDMA queues. Introduce a new QUEUE_TYPE_SDMA_XGMI, which is optimized for non-PCIe transfer such as XGMI. Signed-off-by: Oak Zeng Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 + drivers/gpu/drm/amd/amdkfd/kfd_device.c | 15 +++ .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 123 +++++++++++++++------ .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 3 + drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c | 2 + drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c | 2 + drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 3 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 4 +- .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 10 +- include/uapi/linux/kfd_ioctl.h | 7 +- 10 files changed, 132 insertions(+), 39 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 3ccaa38779ea..38ae53fe8182 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -213,6 +213,8 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, q_properties->type = KFD_QUEUE_TYPE_COMPUTE; else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA) q_properties->type = KFD_QUEUE_TYPE_SDMA; + else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI) + q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI; else return -ENOTSUPP; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 8202a5db3a35..1368b41cb92b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -54,6 +54,7 @@ static const struct kfd_device_info kaveri_device_info = { .needs_iommu_device = true, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -71,6 +72,7 @@ static const struct kfd_device_info carrizo_device_info = { .needs_iommu_device = true, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -87,6 +89,7 @@ static const struct kfd_device_info raven_device_info = { .needs_iommu_device = true, .needs_pci_atomics = true, .num_sdma_engines = 1, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; #endif @@ -105,6 +108,7 @@ static const struct kfd_device_info hawaii_device_info = { .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -121,6 +125,7 @@ static const struct kfd_device_info tonga_device_info = { .needs_iommu_device = false, .needs_pci_atomics = true, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -137,6 +142,7 @@ static const struct kfd_device_info fiji_device_info = { .needs_iommu_device = false, .needs_pci_atomics = true, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -153,6 +159,7 @@ static const struct kfd_device_info fiji_vf_device_info = { .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -170,6 +177,7 @@ static const struct kfd_device_info polaris10_device_info = { .needs_iommu_device = false, .needs_pci_atomics = true, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -186,6 +194,7 @@ static const struct kfd_device_info polaris10_vf_device_info = { .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -202,6 +211,7 @@ static const struct kfd_device_info polaris11_device_info = { .needs_iommu_device = false, .needs_pci_atomics = true, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -218,6 +228,7 @@ static const struct kfd_device_info polaris12_device_info = { .needs_iommu_device = false, .needs_pci_atomics = true, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -234,6 +245,7 @@ static const struct kfd_device_info vega10_device_info = { .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -250,6 +262,7 @@ static const struct kfd_device_info vega10_vf_device_info = { .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -266,6 +279,7 @@ static const struct kfd_device_info vega12_device_info = { .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -282,6 +296,7 @@ static const struct kfd_device_info vega20_device_info = { .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index d41045d3fc3a..1562590d837e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -60,14 +60,14 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd); static void deallocate_sdma_queue(struct device_queue_manager *dqm, - unsigned int sdma_queue_id); + struct queue *q); static void kfd_process_hw_exception(struct work_struct *work); static inline enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) { - if (type == KFD_QUEUE_TYPE_SDMA) + if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI) return KFD_MQD_TYPE_SDMA; return KFD_MQD_TYPE_CP; } @@ -107,12 +107,23 @@ static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm) return dqm->dev->device_info->num_sdma_engines; } +static unsigned int get_num_xgmi_sdma_engines(struct device_queue_manager *dqm) +{ + return dqm->dev->device_info->num_xgmi_sdma_engines; +} + unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) { return dqm->dev->device_info->num_sdma_engines * dqm->dev->device_info->num_sdma_queues_per_engine; } +unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm) +{ + return dqm->dev->device_info->num_xgmi_sdma_engines + * dqm->dev->device_info->num_sdma_queues_per_engine; +} + void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { @@ -133,7 +144,8 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) * preserve the user mode ABI. */ q->doorbell_id = q->properties.queue_id; - } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { /* For SDMA queues on SOC15 with 8-byte doorbell, use static * doorbell assignments based on the engine and queue id. * The doobell index distance between RLC (2*i) and (2*i+1) @@ -174,7 +186,8 @@ static void deallocate_doorbell(struct qcm_process_device *qpd, struct kfd_dev *dev = qpd->dqm->dev; if (!KFD_IS_SOC15(dev->device_info->asic_family) || - q->properties.type == KFD_QUEUE_TYPE_SDMA) + q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) return; old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap); @@ -289,7 +302,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) retval = create_compute_queue_nocpsch(dqm, q, qpd); - else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) retval = create_sdma_queue_nocpsch(dqm, q, qpd); else retval = -EINVAL; @@ -307,6 +321,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, if (q->properties.type == KFD_QUEUE_TYPE_SDMA) dqm->sdma_queue_count++; + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) + dqm->xgmi_sdma_queue_count++; /* * Unconditionally increment this counter, regardless of the queue's @@ -430,7 +446,10 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, deallocate_hqd(dqm, q); } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { dqm->sdma_queue_count--; - deallocate_sdma_queue(dqm, q->sdma_id); + deallocate_sdma_queue(dqm, q); + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { + dqm->xgmi_sdma_queue_count--; + deallocate_sdma_queue(dqm, q); } else { pr_debug("q->properties.type %d is invalid\n", q->properties.type); @@ -521,7 +540,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) } } else if (prev_active && (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || - q->properties.type == KFD_QUEUE_TYPE_SDMA)) { + q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); @@ -548,7 +568,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) retval = map_queues_cpsch(dqm); else if (q->properties.is_active && (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || - q->properties.type == KFD_QUEUE_TYPE_SDMA)) { + q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { if (WARN(q->process->mm != current->mm, "should only run in user thread")) retval = -EFAULT; @@ -840,6 +861,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) INIT_LIST_HEAD(&dqm->queues); dqm->queue_count = dqm->next_pipe_to_allocate = 0; dqm->sdma_queue_count = 0; + dqm->xgmi_sdma_queue_count = 0; for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { int pipe_offset = pipe * get_queues_per_pipe(dqm); @@ -852,6 +874,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1; dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1; + dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1; return 0; } @@ -886,17 +909,34 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, { int bit; - if (dqm->sdma_bitmap == 0) - return -ENOMEM; - - bit = __ffs64(dqm->sdma_bitmap); - dqm->sdma_bitmap &= ~(1ULL << bit); - q->sdma_id = bit; - - q->properties.sdma_engine_id = q->sdma_id % get_num_sdma_engines(dqm); - q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm); + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { + if (dqm->sdma_bitmap == 0) + return -ENOMEM; + bit = __ffs64(dqm->sdma_bitmap); + dqm->sdma_bitmap &= ~(1ULL << bit); + q->sdma_id = bit; + q->properties.sdma_engine_id = q->sdma_id % + get_num_sdma_engines(dqm); + q->properties.sdma_queue_id = q->sdma_id / + get_num_sdma_engines(dqm); + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { + if (dqm->xgmi_sdma_bitmap == 0) + return -ENOMEM; + bit = __ffs64(dqm->xgmi_sdma_bitmap); + dqm->xgmi_sdma_bitmap &= ~(1ULL << bit); + q->sdma_id = bit; + /* sdma_engine_id is sdma id including + * both PCIe-optimized SDMAs and XGMI- + * optimized SDMAs. The calculation below + * assumes the first N engines are always + * PCIe-optimized ones + */ + q->properties.sdma_engine_id = get_num_sdma_engines(dqm) + + q->sdma_id % get_num_xgmi_sdma_engines(dqm); + q->properties.sdma_queue_id = q->sdma_id / + get_num_xgmi_sdma_engines(dqm); + } - pr_debug("SDMA id is: %d\n", q->sdma_id); pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); @@ -904,11 +944,17 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, } static void deallocate_sdma_queue(struct device_queue_manager *dqm, - unsigned int sdma_id) + struct queue *q) { - if (sdma_id >= get_num_sdma_queues(dqm)) - return; - dqm->sdma_bitmap |= (1ULL << sdma_id); + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { + if (q->sdma_id >= get_num_sdma_queues(dqm)) + return; + dqm->sdma_bitmap |= (1ULL << q->sdma_id); + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { + if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm)) + return; + dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id); + } } static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, @@ -946,7 +992,7 @@ out_uninit_mqd: out_deallocate_doorbell: deallocate_doorbell(qpd, q); out_deallocate_sdma_queue: - deallocate_sdma_queue(dqm, q->sdma_id); + deallocate_sdma_queue(dqm, q); return retval; } @@ -1004,8 +1050,10 @@ static int initialize_cpsch(struct device_queue_manager *dqm) INIT_LIST_HEAD(&dqm->queues); dqm->queue_count = dqm->processes_count = 0; dqm->sdma_queue_count = 0; + dqm->xgmi_sdma_queue_count = 0; dqm->active_runlist = false; dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1; + dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1; INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); @@ -1127,7 +1175,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, goto out; } - if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { + if (q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { retval = allocate_sdma_queue(dqm, q); if (retval) goto out; @@ -1167,6 +1216,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (q->properties.type == KFD_QUEUE_TYPE_SDMA) dqm->sdma_queue_count++; + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) + dqm->xgmi_sdma_queue_count++; /* * Unconditionally increment this counter, regardless of the queue's * type or whether the queue is active. @@ -1182,8 +1233,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, out_deallocate_doorbell: deallocate_doorbell(qpd, q); out_deallocate_sdma_queue: - if (q->properties.type == KFD_QUEUE_TYPE_SDMA) - deallocate_sdma_queue(dqm, q->sdma_id); + if (q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) + deallocate_sdma_queue(dqm, q); out: return retval; } @@ -1216,7 +1268,8 @@ static int unmap_sdma_queues(struct device_queue_manager *dqm) { int i, retval = 0; - for (i = 0; i < dqm->dev->device_info->num_sdma_engines; i++) { + for (i = 0; i < dqm->dev->device_info->num_sdma_engines + + dqm->dev->device_info->num_xgmi_sdma_engines; i++) { retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, i); if (retval) @@ -1258,10 +1311,10 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, if (!dqm->active_runlist) return retval; - pr_debug("Before destroying queues, sdma queue count is : %u\n", - dqm->sdma_queue_count); + pr_debug("Before destroying queues, sdma queue count is : %u, xgmi sdma queue count is : %u\n", + dqm->sdma_queue_count, dqm->xgmi_sdma_queue_count); - if (dqm->sdma_queue_count > 0) + if (dqm->sdma_queue_count > 0 || dqm->xgmi_sdma_queue_count) unmap_sdma_queues(dqm); retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, @@ -1333,7 +1386,10 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { dqm->sdma_queue_count--; - deallocate_sdma_queue(dqm, q->sdma_id); + deallocate_sdma_queue(dqm, q); + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { + dqm->xgmi_sdma_queue_count--; + deallocate_sdma_queue(dqm, q); } list_del(&q->list); @@ -1550,7 +1606,10 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, list_for_each_entry(q, &qpd->queues_list, list) { if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { dqm->sdma_queue_count--; - deallocate_sdma_queue(dqm, q->sdma_id); + deallocate_sdma_queue(dqm, q); + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { + dqm->xgmi_sdma_queue_count--; + deallocate_sdma_queue(dqm, q); } if (q->properties.is_active) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 3742fd340ec3..88b4c007696e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -181,10 +181,12 @@ struct device_queue_manager { unsigned int processes_count; unsigned int queue_count; unsigned int sdma_queue_count; + unsigned int xgmi_sdma_queue_count; unsigned int total_queue_count; unsigned int next_pipe_to_allocate; unsigned int *allocated_queues; uint64_t sdma_bitmap; + uint64_t xgmi_sdma_bitmap; unsigned int vmid_bitmap; uint64_t pipelines_addr; struct kfd_mem_obj *pipeline_mem; @@ -216,6 +218,7 @@ unsigned int get_queues_num(struct device_queue_manager *dqm); unsigned int get_queues_per_pipe(struct device_queue_manager *dqm); unsigned int get_pipes_per_mec(struct device_queue_manager *dqm); unsigned int get_num_sdma_queues(struct device_queue_manager *dqm); +unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm); static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c index 33830b1a5a54..604570bea6bd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c @@ -175,6 +175,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer, queue_type__mes_map_queues__debug_interface_queue_vi; break; case KFD_QUEUE_TYPE_SDMA: + case KFD_QUEUE_TYPE_SDMA_XGMI: packet->bitfields2.engine_sel = q->properties.sdma_engine_id + engine_sel__mes_map_queues__sdma0_vi; use_static = false; /* no static queues under SDMA */ @@ -221,6 +222,7 @@ static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer, engine_sel__mes_unmap_queues__compute; break; case KFD_QUEUE_TYPE_SDMA: + case KFD_QUEUE_TYPE_SDMA_XGMI: packet->bitfields2.engine_sel = engine_sel__mes_unmap_queues__sdma0 + sdma_engine; break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c index bf20c6d32ef3..3cdb19826927 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c @@ -212,6 +212,7 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer, queue_type__mes_map_queues__debug_interface_queue_vi; break; case KFD_QUEUE_TYPE_SDMA: + case KFD_QUEUE_TYPE_SDMA_XGMI: packet->bitfields2.engine_sel = q->properties.sdma_engine_id + engine_sel__mes_map_queues__sdma0_vi; use_static = false; /* no static queues under SDMA */ @@ -258,6 +259,7 @@ static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer, engine_sel__mes_unmap_queues__compute; break; case KFD_QUEUE_TYPE_SDMA: + case KFD_QUEUE_TYPE_SDMA_XGMI: packet->bitfields2.engine_sel = engine_sel__mes_unmap_queues__sdma0 + sdma_engine; break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 045a229436a0..077c47fd4fee 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -48,7 +48,8 @@ static void pm_calc_rlib_size(struct packet_manager *pm, process_count = pm->dqm->processes_count; queue_count = pm->dqm->queue_count; - compute_queue_count = queue_count - pm->dqm->sdma_queue_count; + compute_queue_count = queue_count - pm->dqm->sdma_queue_count - + pm->dqm->xgmi_sdma_queue_count; /* check if there is over subscription * Note: the arbitration between the number of VMIDs and diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 10bd1abe1646..8f02d7817162 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -188,6 +188,7 @@ struct kfd_device_info { bool needs_iommu_device; bool needs_pci_atomics; unsigned int num_sdma_engines; + unsigned int num_xgmi_sdma_engines; unsigned int num_sdma_queues_per_engine; }; @@ -329,7 +330,8 @@ enum kfd_queue_type { KFD_QUEUE_TYPE_COMPUTE, KFD_QUEUE_TYPE_SDMA, KFD_QUEUE_TYPE_HIQ, - KFD_QUEUE_TYPE_DIQ + KFD_QUEUE_TYPE_DIQ, + KFD_QUEUE_TYPE_SDMA_XGMI }; enum kfd_queue_format { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index f18d9cdf9aac..e652e25ede75 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -186,8 +186,13 @@ int pqm_create_queue(struct process_queue_manager *pqm, switch (type) { case KFD_QUEUE_TYPE_SDMA: - if (dev->dqm->queue_count >= get_num_sdma_queues(dev->dqm)) { - pr_err("Over-subscription is not allowed for SDMA.\n"); + case KFD_QUEUE_TYPE_SDMA_XGMI: + if ((type == KFD_QUEUE_TYPE_SDMA && dev->dqm->sdma_queue_count + >= get_num_sdma_queues(dev->dqm)) || + (type == KFD_QUEUE_TYPE_SDMA_XGMI && + dev->dqm->xgmi_sdma_queue_count + >= get_num_xgmi_sdma_queues(dev->dqm))) { + pr_debug("Over-subscription is not allowed for SDMA.\n"); retval = -EPERM; goto err_create_queue; } @@ -446,6 +451,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data) q = pqn->q; switch (q->properties.type) { case KFD_QUEUE_TYPE_SDMA: + case KFD_QUEUE_TYPE_SDMA_XGMI: seq_printf(m, " SDMA queue on device %x\n", q->device->id); mqd_type = KFD_MQD_TYPE_SDMA; diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 1e7d5f3376b0..20917c59f39c 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -35,9 +35,10 @@ struct kfd_ioctl_get_version_args { }; /* For kfd_ioctl_create_queue_args.queue_type. */ -#define KFD_IOC_QUEUE_TYPE_COMPUTE 0 -#define KFD_IOC_QUEUE_TYPE_SDMA 1 -#define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL 2 +#define KFD_IOC_QUEUE_TYPE_COMPUTE 0x0 +#define KFD_IOC_QUEUE_TYPE_SDMA 0x1 +#define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL 0x2 +#define KFD_IOC_QUEUE_TYPE_SDMA_XGMI 0x3 #define KFD_MAX_QUEUE_PERCENTAGE 100 #define KFD_MAX_QUEUE_PRIORITY 15 -- cgit v1.2.3-59-g8ed1b From 1a058c3376765ee31d65e28cbbb9d4ff15120056 Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Mon, 6 May 2019 22:11:14 -0500 Subject: drm/amdkfd: New IOCTL to allocate queue GWS Add a new kfd ioctl to allocate queue GWS. Queue GWS is released on queue destroy. Signed-off-by: Oak Zeng Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 27 +++++++++++++++++++++++++++ include/uapi/linux/kfd_ioctl.h | 20 +++++++++++++++++++- 2 files changed, 46 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index c92e931ceb27..aab2aa6c1dee 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1567,6 +1567,31 @@ copy_from_user_failed: return err; } +static int kfd_ioctl_alloc_queue_gws(struct file *filep, + struct kfd_process *p, void *data) +{ + int retval; + struct kfd_ioctl_alloc_queue_gws_args *args = data; + struct kfd_dev *dev = NULL; + + if (!hws_gws_support || + dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) + return -EINVAL; + + dev = kfd_device_by_id(args->gpu_id); + if (!dev) { + pr_debug("Could not find gpu id 0x%x\n", args->gpu_id); + return -EINVAL; + } + + mutex_lock(&p->mutex); + retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL); + mutex_unlock(&p->mutex); + + args->first_gws = 0; + return retval; +} + static int kfd_ioctl_get_dmabuf_info(struct file *filep, struct kfd_process *p, void *data) { @@ -1769,6 +1794,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF, kfd_ioctl_import_dmabuf, 0), + AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS, + kfd_ioctl_alloc_queue_gws, 0), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 20917c59f39c..070d1bc7e725 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -410,6 +410,21 @@ struct kfd_ioctl_unmap_memory_from_gpu_args { __u32 n_success; /* to/from KFD */ }; +/* Allocate GWS for specific queue + * + * @gpu_id: device identifier + * @queue_id: queue's id that GWS is allocated for + * @num_gws: how many GWS to allocate + * @first_gws: index of the first GWS allocated. + * only support contiguous GWS allocation + */ +struct kfd_ioctl_alloc_queue_gws_args { + __u32 gpu_id; /* to KFD */ + __u32 queue_id; /* to KFD */ + __u32 num_gws; /* to KFD */ + __u32 first_gws; /* from KFD */ +}; + struct kfd_ioctl_get_dmabuf_info_args { __u64 size; /* from KFD */ __u64 metadata_ptr; /* to KFD */ @@ -529,7 +544,10 @@ enum kfd_mmio_remap { #define AMDKFD_IOC_IMPORT_DMABUF \ AMDKFD_IOWR(0x1D, struct kfd_ioctl_import_dmabuf_args) +#define AMDKFD_IOC_ALLOC_QUEUE_GWS \ + AMDKFD_IOWR(0x1E, struct kfd_ioctl_alloc_queue_gws_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x1E +#define AMDKFD_COMMAND_END 0x1F #endif -- cgit v1.2.3-59-g8ed1b From ed63bb1d1f8469586006a9ca63c42344401aa2ab Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Thu, 13 Jun 2019 15:34:06 -0700 Subject: dma-buf: give each buffer a full-fledged inode By traversing /proc/*/fd and /proc/*/map_files, processes with CAP_ADMIN can get a lot of fine-grained data about how shmem buffers are shared among processes. stat(2) on each entry gives the caller a unique ID (st_ino), the buffer's size (st_size), and even the number of pages currently charged to the buffer (st_blocks / 512). In contrast, all dma-bufs share the same anonymous inode. So while we can count how many dma-buf fds or mappings a process has, we can't get the size of the backing buffers or tell if two entries point to the same dma-buf. On systems with debugfs, we can get a per-buffer breakdown of size and reference count, but can't tell which processes are actually holding the references to each buffer. Replace the singleton inode with full-fledged inodes allocated by alloc_anon_inode(). This involves creating and mounting a mini-pseudo-filesystem for dma-buf, following the example in fs/aio.c. Signed-off-by: Greg Hackmann Signed-off-by: Chenbo Feng Signed-off-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/msgid/20190613223408.139221-2-fengc@google.com --- drivers/dma-buf/dma-buf.c | 63 +++++++++++++++++++++++++++++++++++++++++----- include/uapi/linux/magic.h | 1 + 2 files changed, 58 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index f4104a21b069..3612ccededd6 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -34,8 +34,10 @@ #include #include #include +#include #include +#include static inline int is_dma_buf_file(struct file *); @@ -46,6 +48,25 @@ struct dma_buf_list { static struct dma_buf_list db_list; +static const struct dentry_operations dma_buf_dentry_ops = { + .d_dname = simple_dname, +}; + +static struct vfsmount *dma_buf_mnt; + +static struct dentry *dma_buf_fs_mount(struct file_system_type *fs_type, + int flags, const char *name, void *data) +{ + return mount_pseudo(fs_type, "dmabuf:", NULL, &dma_buf_dentry_ops, + DMA_BUF_MAGIC); +} + +static struct file_system_type dma_buf_fs_type = { + .name = "dmabuf", + .mount = dma_buf_fs_mount, + .kill_sb = kill_anon_super, +}; + static int dma_buf_release(struct inode *inode, struct file *file) { struct dma_buf *dmabuf; @@ -342,6 +363,31 @@ static inline int is_dma_buf_file(struct file *file) return file->f_op == &dma_buf_fops; } +static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags) +{ + struct file *file; + struct inode *inode = alloc_anon_inode(dma_buf_mnt->mnt_sb); + + if (IS_ERR(inode)) + return ERR_CAST(inode); + + inode->i_size = dmabuf->size; + inode_set_bytes(inode, dmabuf->size); + + file = alloc_file_pseudo(inode, dma_buf_mnt, "dmabuf", + flags, &dma_buf_fops); + if (IS_ERR(file)) + goto err_alloc_file; + file->f_flags = flags & (O_ACCMODE | O_NONBLOCK); + file->private_data = dmabuf; + + return file; + +err_alloc_file: + iput(inode); + return file; +} + /** * DOC: dma buf device access * @@ -436,8 +482,7 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) } dmabuf->resv = resv; - file = anon_inode_getfile("dmabuf", &dma_buf_fops, dmabuf, - exp_info->flags); + file = dma_buf_getfile(dmabuf, exp_info->flags); if (IS_ERR(file)) { ret = PTR_ERR(file); goto err_dmabuf; @@ -1055,8 +1100,8 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused) return ret; seq_puts(s, "\nDma-buf Objects:\n"); - seq_printf(s, "%-8s\t%-8s\t%-8s\t%-8s\texp_name\n", - "size", "flags", "mode", "count"); + seq_printf(s, "%-8s\t%-8s\t%-8s\t%-8s\texp_name\t%-8s\n", + "size", "flags", "mode", "count", "ino"); list_for_each_entry(buf_obj, &db_list.head, list_node) { ret = mutex_lock_interruptible(&buf_obj->lock); @@ -1067,11 +1112,12 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused) continue; } - seq_printf(s, "%08zu\t%08x\t%08x\t%08ld\t%s\n", + seq_printf(s, "%08zu\t%08x\t%08x\t%08ld\t%s\t%08lu\n", buf_obj->size, buf_obj->file->f_flags, buf_obj->file->f_mode, file_count(buf_obj->file), - buf_obj->exp_name); + buf_obj->exp_name, + file_inode(buf_obj->file)->i_ino); robj = buf_obj->resv; while (true) { @@ -1167,6 +1213,10 @@ static inline void dma_buf_uninit_debugfs(void) static int __init dma_buf_init(void) { + dma_buf_mnt = kern_mount(&dma_buf_fs_type); + if (IS_ERR(dma_buf_mnt)) + return PTR_ERR(dma_buf_mnt); + mutex_init(&db_list.lock); INIT_LIST_HEAD(&db_list.head); dma_buf_init_debugfs(); @@ -1177,5 +1227,6 @@ subsys_initcall(dma_buf_init); static void __exit dma_buf_deinit(void) { dma_buf_uninit_debugfs(); + kern_unmount(dma_buf_mnt); } __exitcall(dma_buf_deinit); diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index f8c00045d537..665e18627f78 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -91,5 +91,6 @@ #define UDF_SUPER_MAGIC 0x15013346 #define BALLOON_KVM_MAGIC 0x13661366 #define ZSMALLOC_MAGIC 0x58295829 +#define DMA_BUF_MAGIC 0x444d4142 /* "DMAB" */ #endif /* __LINUX_MAGIC_H__ */ -- cgit v1.2.3-59-g8ed1b From bb2bb903042517b8fb17b2bc21e00512f2dcac01 Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Thu, 13 Jun 2019 15:34:07 -0700 Subject: dma-buf: add DMA_BUF_SET_NAME ioctls This patch adds complimentary DMA_BUF_SET_NAME ioctls, which lets userspace processes attach a free-form name to each buffer. This information can be extremely helpful for tracking and accounting shared buffers. For example, on Android, we know what each buffer will be used for at allocation time: GL, multimedia, camera, etc. The userspace allocator can use DMA_BUF_SET_NAME to associate that information with the buffer, so we can later give developers a breakdown of how much memory they're allocating for graphics, camera, etc. Signed-off-by: Greg Hackmann Signed-off-by: Chenbo Feng Signed-off-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/msgid/20190613223408.139221-3-fengc@google.com --- drivers/dma-buf/dma-buf.c | 65 ++++++++++++++++++++++++++++++++++++++++++-- include/linux/dma-buf.h | 5 +++- include/uapi/linux/dma-buf.h | 3 ++ 3 files changed, 69 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 3612ccededd6..ab96410d1dcd 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -48,8 +48,24 @@ struct dma_buf_list { static struct dma_buf_list db_list; +static char *dmabuffs_dname(struct dentry *dentry, char *buffer, int buflen) +{ + struct dma_buf *dmabuf; + char name[DMA_BUF_NAME_LEN]; + size_t ret = 0; + + dmabuf = dentry->d_fsdata; + mutex_lock(&dmabuf->lock); + if (dmabuf->name) + ret = strlcpy(name, dmabuf->name, DMA_BUF_NAME_LEN); + mutex_unlock(&dmabuf->lock); + + return dynamic_dname(dentry, buffer, buflen, "/%s:%s", + dentry->d_name.name, ret > 0 ? name : ""); +} + static const struct dentry_operations dma_buf_dentry_ops = { - .d_dname = simple_dname, + .d_dname = dmabuffs_dname, }; static struct vfsmount *dma_buf_mnt; @@ -301,6 +317,43 @@ out: return events; } +/** + * dma_buf_set_name - Set a name to a specific dma_buf to track the usage. + * The name of the dma-buf buffer can only be set when the dma-buf is not + * attached to any devices. It could theoritically support changing the + * name of the dma-buf if the same piece of memory is used for multiple + * purpose between different devices. + * + * @dmabuf [in] dmabuf buffer that will be renamed. + * @buf: [in] A piece of userspace memory that contains the name of + * the dma-buf. + * + * Returns 0 on success. If the dma-buf buffer is already attached to + * devices, return -EBUSY. + * + */ +static long dma_buf_set_name(struct dma_buf *dmabuf, const char __user *buf) +{ + char *name = strndup_user(buf, DMA_BUF_NAME_LEN); + long ret = 0; + + if (IS_ERR(name)) + return PTR_ERR(name); + + mutex_lock(&dmabuf->lock); + if (!list_empty(&dmabuf->attachments)) { + ret = -EBUSY; + kfree(name); + goto out_unlock; + } + kfree(dmabuf->name); + dmabuf->name = name; + +out_unlock: + mutex_unlock(&dmabuf->lock); + return ret; +} + static long dma_buf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -339,6 +392,10 @@ static long dma_buf_ioctl(struct file *file, ret = dma_buf_begin_cpu_access(dmabuf, direction); return ret; + + case DMA_BUF_SET_NAME: + return dma_buf_set_name(dmabuf, (const char __user *)arg); + default: return -ENOTTY; } @@ -380,6 +437,7 @@ static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags) goto err_alloc_file; file->f_flags = flags & (O_ACCMODE | O_NONBLOCK); file->private_data = dmabuf; + file->f_path.dentry->d_fsdata = dmabuf; return file; @@ -1112,12 +1170,13 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused) continue; } - seq_printf(s, "%08zu\t%08x\t%08x\t%08ld\t%s\t%08lu\n", + seq_printf(s, "%08zu\t%08x\t%08x\t%08ld\t%s\t%08lu\t%s\n", buf_obj->size, buf_obj->file->f_flags, buf_obj->file->f_mode, file_count(buf_obj->file), buf_obj->exp_name, - file_inode(buf_obj->file)->i_ino); + file_inode(buf_obj->file)->i_ino, + buf_obj->name ?: ""); robj = buf_obj->resv; while (true) { diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 8a327566d7f4..01ad5b942a6f 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -280,10 +280,12 @@ struct dma_buf_ops { * @file: file pointer used for sharing buffers across, and for refcounting. * @attachments: list of dma_buf_attachment that denotes all devices attached. * @ops: dma_buf_ops associated with this buffer object. - * @lock: used internally to serialize list manipulation, attach/detach and vmap/unmap + * @lock: used internally to serialize list manipulation, attach/detach and + * vmap/unmap, and accesses to name * @vmapping_counter: used internally to refcnt the vmaps * @vmap_ptr: the current vmap ptr if vmapping_counter > 0 * @exp_name: name of the exporter; useful for debugging. + * @name: userspace-provided name; useful for accounting and debugging. * @owner: pointer to exporter module; used for refcounting when exporter is a * kernel module. * @list_node: node for dma_buf accounting and debugging. @@ -311,6 +313,7 @@ struct dma_buf { unsigned vmapping_counter; void *vmap_ptr; const char *exp_name; + const char *name; struct module *owner; struct list_head list_node; void *priv; diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h index d75df5210a4a..dbc7092e04b5 100644 --- a/include/uapi/linux/dma-buf.h +++ b/include/uapi/linux/dma-buf.h @@ -35,7 +35,10 @@ struct dma_buf_sync { #define DMA_BUF_SYNC_VALID_FLAGS_MASK \ (DMA_BUF_SYNC_RW | DMA_BUF_SYNC_END) +#define DMA_BUF_NAME_LEN 32 + #define DMA_BUF_BASE 'b' #define DMA_BUF_IOCTL_SYNC _IOW(DMA_BUF_BASE, 0, struct dma_buf_sync) +#define DMA_BUF_SET_NAME _IOW(DMA_BUF_BASE, 1, const char *) #endif -- cgit v1.2.3-59-g8ed1b