From 552825b28ddac200b6080d9e79f4121b68e1517d Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Mon, 2 Apr 2018 11:20:44 +0800 Subject: drm/amdgpu: add new bo flag that indicates BOs don't need fallback (v2) user cases: 1. KFD wraps amdgpu_bo_create, they have no fallback case which is different with amdgpu_gem_object_create. since upstream branch has no amdgpu_amdkfd_gpuvm.c, which need KFD guys add this flag to __alloc_memory_of_gpu: + flags |= AMDGPU_GEM_CREATE_NO_FALLBACK; 2. UMD can specify this flag for their allocation as well if they like. v2: squash in merge conflict fix (Chunming) Signed-off-by: Chunming Zhou Acked-by: Felix Kuehling Cc: felix.kuehling@amd.com Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index c363b67f2d0a..4f5a27d64c54 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -95,6 +95,8 @@ extern "C" { #define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID (1 << 6) /* Flag that BO sharing will be explicitly synchronized */ #define AMDGPU_GEM_CREATE_EXPLICIT_SYNC (1 << 7) +/* Flag that BO doesn't need fallback */ +#define AMDGPU_GEM_CREATE_NO_FALLBACK (1 << 8) struct drm_amdgpu_gem_create_in { /** the requested memory size */ -- cgit v1.2.3-59-g8ed1b From 1afd30efeddbb1b32cf35d3bf6477b35690eeca6 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 10 Apr 2018 13:42:29 +0200 Subject: drm/amdgpu: revert "add new bo flag that indicates BOs don't need fallback (v2)" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 6f51d28bfe8e1a676de5cd877639245bed3cc818. Makes fallback handling to complicated. This is just a feature for the GEM interface and shouldn't leak into the core BO create function. Signed-off-by: Christian König Acked-by: Chunming Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 3 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 5 +---- include/uapi/drm/amdgpu_drm.h | 2 -- 3 files changed, 2 insertions(+), 8 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 68af2f878bc9..e1756b68a17b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -385,8 +385,7 @@ retry: amdgpu_bo_in_cpu_visible_vram(bo)) p->bytes_moved_vis += ctx.bytes_moved; - if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains && - !(bo->flags & AMDGPU_GEM_CREATE_NO_FALLBACK)) { + if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { domain = bo->allowed_domains; goto retry; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index a160ef0332d6..1de6864da717 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -388,8 +388,6 @@ retry: drm_gem_private_object_init(adev->ddev, &bo->gem_base, size); INIT_LIST_HEAD(&bo->shadow_list); INIT_LIST_HEAD(&bo->va); - bo->preferred_domains = preferred_domains; - bo->allowed_domains = allowed_domains; bo->flags = flags; @@ -426,8 +424,7 @@ retry: r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type, &bo->placement, page_align, &ctx, acc_size, NULL, resv, &amdgpu_ttm_bo_destroy); - if (unlikely(r && r != -ERESTARTSYS) && type == ttm_bo_type_device && - !(flags & AMDGPU_GEM_CREATE_NO_FALLBACK)) { + if (unlikely(r && r != -ERESTARTSYS) && type == ttm_bo_type_device) { if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) { flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; goto retry; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 4f5a27d64c54..c363b67f2d0a 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -95,8 +95,6 @@ extern "C" { #define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID (1 << 6) /* Flag that BO sharing will be explicitly synchronized */ #define AMDGPU_GEM_CREATE_EXPLICIT_SYNC (1 << 7) -/* Flag that BO doesn't need fallback */ -#define AMDGPU_GEM_CREATE_NO_FALLBACK (1 << 8) struct drm_amdgpu_gem_create_in { /** the requested memory size */ -- cgit v1.2.3-59-g8ed1b From 3f188453faf7ba5b59e8064df4afffbc946e25ec Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Tue, 17 Apr 2018 18:34:40 +0800 Subject: drm/amdgpu: handle domain mask checking v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit if domain is illegal, we should return error. v2: remove duplicated domain checking. Signed-off-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 7 +------ drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 9 +-------- include/uapi/drm/amdgpu_drm.h | 6 ++++++ 3 files changed, 8 insertions(+), 14 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index ff606ce88837..c62c3dd4dcc6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -229,12 +229,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, return -EINVAL; /* reject invalid gem domains */ - if (args->in.domains & ~(AMDGPU_GEM_DOMAIN_CPU | - AMDGPU_GEM_DOMAIN_GTT | - AMDGPU_GEM_DOMAIN_VRAM | - AMDGPU_GEM_DOMAIN_GDS | - AMDGPU_GEM_DOMAIN_GWS | - AMDGPU_GEM_DOMAIN_OA)) + if (args->in.domains & ~AMDGPU_GEM_DOMAIN_MASK) return -EINVAL; /* create a gem object to contain this object in */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 9258f0694922..feece0a491a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -360,7 +360,6 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, }; struct amdgpu_bo *bo; unsigned long page_align, size = bp->size; - u32 preferred_domains; size_t acc_size; int r; @@ -381,14 +380,8 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, drm_gem_private_object_init(adev->ddev, &bo->gem_base, size); INIT_LIST_HEAD(&bo->shadow_list); INIT_LIST_HEAD(&bo->va); - preferred_domains = bp->preferred_domain ? bp->preferred_domain : + bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain : bp->domain; - bo->preferred_domains = preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | - AMDGPU_GEM_DOMAIN_GTT | - AMDGPU_GEM_DOMAIN_CPU | - AMDGPU_GEM_DOMAIN_GDS | - AMDGPU_GEM_DOMAIN_GWS | - AMDGPU_GEM_DOMAIN_OA); bo->allowed_domains = bo->preferred_domains; if (bp->type != ttm_bo_type_kernel && bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index c363b67f2d0a..b193e95f1f24 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -78,6 +78,12 @@ extern "C" { #define AMDGPU_GEM_DOMAIN_GDS 0x8 #define AMDGPU_GEM_DOMAIN_GWS 0x10 #define AMDGPU_GEM_DOMAIN_OA 0x20 +#define AMDGPU_GEM_DOMAIN_MASK (AMDGPU_GEM_DOMAIN_CPU | \ + AMDGPU_GEM_DOMAIN_GTT | \ + AMDGPU_GEM_DOMAIN_VRAM | \ + AMDGPU_GEM_DOMAIN_GDS | \ + AMDGPU_GEM_DOMAIN_GWS | \ + AMDGPU_GEM_DOMAIN_OA) /* Flag that CPU access will be required for the case of VRAM domain */ #define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED (1 << 0) -- cgit v1.2.3-59-g8ed1b From d240cd9eddd943dbe0267d081697195ff1e90b65 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 3 Apr 2018 13:05:03 -0400 Subject: drm/amdgpu: optionally do a writeback but don't invalidate TC for IB fences MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a new IB flag that enables this new behavior. Full invalidation is unnecessary for RELEASE_MEM and doesn't make sense when draw calls from two adjacent gfx IBs run in parallel. This will be the new default for Mesa. v2: bump the version Signed-off-by: Marek Olšák Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 5 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 8 ++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 4 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 11 +++++++---- drivers/gpu/drm/amd/amdgpu/soc15d.h | 1 + include/uapi/drm/amdgpu_drm.h | 4 ++++ 8 files changed, 27 insertions(+), 11 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 5c0567ad1ba7..7c17a0bc2cd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -75,9 +75,10 @@ * - 3.23.0 - Add query for VRAM lost counter * - 3.24.0 - Add high priority compute support for gfx9 * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk). + * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE. */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 25 +#define KMS_DRIVER_MINOR 26 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 97449e06a242..d09fcab2398f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -131,7 +131,8 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring) * Emits a fence command on the requested ring (all asics). * Returns 0 on success, -ENOMEM on failure. */ -int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f) +int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, + unsigned flags) { struct amdgpu_device *adev = ring->adev; struct amdgpu_fence *fence; @@ -149,7 +150,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f) adev->fence_context + ring->idx, seq); amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, - seq, AMDGPU_FENCE_FLAG_INT); + seq, flags | AMDGPU_FENCE_FLAG_INT); ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; /* This function can't be called concurrently anyway, otherwise diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 311589e02d17..f70eeed9ed76 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -127,6 +127,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, struct amdgpu_vm *vm; uint64_t fence_ctx; uint32_t status = 0, alloc_size; + unsigned fence_flags = 0; unsigned i; int r = 0; @@ -227,7 +228,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, #endif amdgpu_asic_invalidate_hdp(adev, ring); - r = amdgpu_fence_emit(ring, f); + if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE) + fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY; + + r = amdgpu_fence_emit(ring, f, fence_flags); if (r) { dev_err(adev->dev, "failed to emit fence (%d)\n", r); if (job && job->vmid) @@ -242,7 +246,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, /* wrap the last IB with fence */ if (job && job->uf_addr) { amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence, - AMDGPU_FENCE_FLAG_64BIT); + fence_flags | AMDGPU_FENCE_FLAG_64BIT); } if (patch_offset != ~0 && ring->funcs->patch_cond_exec) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 08fcdf6f7b53..4f8dac2d36a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -42,6 +42,7 @@ #define AMDGPU_FENCE_FLAG_64BIT (1 << 0) #define AMDGPU_FENCE_FLAG_INT (1 << 1) +#define AMDGPU_FENCE_FLAG_TC_WB_ONLY (1 << 2) enum amdgpu_ring_type { AMDGPU_RING_TYPE_GFX, @@ -90,7 +91,8 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, unsigned irq_type); void amdgpu_fence_driver_suspend(struct amdgpu_device *adev); void amdgpu_fence_driver_resume(struct amdgpu_device *adev); -int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence); +int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence, + unsigned flags); int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s); void amdgpu_fence_process(struct amdgpu_ring *ring); int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9ec7c1041df2..9c2195a2896d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -633,7 +633,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid); if (vm_flush_needed || pasid_mapping_needed) { - r = amdgpu_fence_emit(ring, &fence); + r = amdgpu_fence_emit(ring, &fence, 0); if (r) return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 6a19e0311a9c..05b2d34110b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3775,13 +3775,16 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, { bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; + bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; /* RELEASE_MEM - flush caches, send int */ amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); - amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | - EOP_TC_ACTION_EN | - EOP_TC_WB_ACTION_EN | - EOP_TC_MD_ACTION_EN | + amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN | + EOP_TC_NC_ACTION_EN) : + (EOP_TCL1_ACTION_EN | + EOP_TC_ACTION_EN | + EOP_TC_WB_ACTION_EN | + EOP_TC_MD_ACTION_EN)) | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5))); amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index 7f408f85fdb6..839a144c1645 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -159,6 +159,7 @@ #define EOP_TC_WB_ACTION_EN (1 << 15) /* L2 */ #define EOP_TCL1_ACTION_EN (1 << 16) #define EOP_TC_ACTION_EN (1 << 17) /* L2 */ +#define EOP_TC_NC_ACTION_EN (1 << 19) #define EOP_TC_MD_ACTION_EN (1 << 21) /* L2 metadata */ #define DATA_SEL(x) ((x) << 29) diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index b193e95f1f24..78fe828f2f79 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -526,6 +526,10 @@ union drm_amdgpu_cs { /* Preempt flag, IB should set Pre_enb bit if PREEMPT flag detected */ #define AMDGPU_IB_FLAG_PREEMPT (1<<2) +/* The IB fence should do the L2 writeback but not invalidate any shader + * caches (L2/vL1/sL1/I$). */ +#define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3) + struct drm_amdgpu_cs_chunk_ib { __u32 _pad; /** AMDGPU_IB_FLAG_* */ -- cgit v1.2.3-59-g8ed1b From 621a6318adea69b08a3652c64bc7cc0cb4dacfb4 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Mon, 22 Jan 2018 20:48:14 +0800 Subject: drm/amdgpu: add save restore list cntl gpm and srm firmware support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RLC save/restore list cntl/gpm_mem/srm_mem ucodes are used for CGPG and gfxoff function. Signed-off-by: Huang Rui Acked-by: Alex Deucher Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 15 +++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 36 ++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 17 +++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h | 3 ++ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 55 +++++++++++++++++++++++++++++-- drivers/gpu/drm/amd/amdgpu/psp_v10_0.c | 9 +++++ include/uapi/drm/amdgpu_drm.h | 6 ++++ 7 files changed, 138 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index d64ef30fed47..5ad893915a85 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -774,9 +774,18 @@ struct amdgpu_rlc { u32 starting_offsets_start; u32 reg_list_format_size_bytes; u32 reg_list_size_bytes; + u32 reg_list_format_direct_reg_list_length; + u32 save_restore_list_cntl_size_bytes; + u32 save_restore_list_gpm_size_bytes; + u32 save_restore_list_srm_size_bytes; u32 *register_list_format; u32 *register_restore; + u8 *save_restore_list_cntl; + u8 *save_restore_list_gpm; + u8 *save_restore_list_srm; + + bool is_rlc_v2_1; }; #define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES @@ -943,6 +952,12 @@ struct amdgpu_gfx { uint32_t ce_feature_version; uint32_t pfp_feature_version; uint32_t rlc_feature_version; + uint32_t rlc_srlc_fw_version; + uint32_t rlc_srlc_feature_version; + uint32_t rlc_srlg_fw_version; + uint32_t rlc_srlg_feature_version; + uint32_t rlc_srls_fw_version; + uint32_t rlc_srls_feature_version; uint32_t mec_feature_version; uint32_t mec2_feature_version; struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index d602f8b14c58..eb4785e51573 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -215,6 +215,18 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, fw_info->ver = adev->gfx.rlc_fw_version; fw_info->feature = adev->gfx.rlc_feature_version; break; + case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL: + fw_info->ver = adev->gfx.rlc_srlc_fw_version; + fw_info->feature = adev->gfx.rlc_srlc_feature_version; + break; + case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM: + fw_info->ver = adev->gfx.rlc_srlg_fw_version; + fw_info->feature = adev->gfx.rlc_srlg_feature_version; + break; + case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM: + fw_info->ver = adev->gfx.rlc_srls_fw_version; + fw_info->feature = adev->gfx.rlc_srls_feature_version; + break; case AMDGPU_INFO_FW_GFX_MEC: if (query_fw->index == 0) { fw_info->ver = adev->gfx.mec_fw_version; @@ -1149,6 +1161,30 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data) seq_printf(m, "RLC feature version: %u, firmware version: 0x%08x\n", fw_info.feature, fw_info.ver); + /* RLC SAVE RESTORE LIST CNTL */ + query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL; + ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); + if (ret) + return ret; + seq_printf(m, "RLC SRLC feature version: %u, firmware version: 0x%08x\n", + fw_info.feature, fw_info.ver); + + /* RLC SAVE RESTORE LIST GPM MEM */ + query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM; + ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); + if (ret) + return ret; + seq_printf(m, "RLC SRLG feature version: %u, firmware version: 0x%08x\n", + fw_info.feature, fw_info.ver); + + /* RLC SAVE RESTORE LIST SRM MEM */ + query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM; + ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); + if (ret) + return ret; + seq_printf(m, "RLC SRLS feature version: %u, firmware version: 0x%08x\n", + fw_info.feature, fw_info.ver); + /* MEC */ query_fw.fw_type = AMDGPU_INFO_FW_GFX_MEC; query_fw.index = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 84d652599d5b..0c74c09ef3b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -337,7 +337,10 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, (ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1 && ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2 && ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1_JT && - ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT)) { + ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT && + ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL && + ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM && + ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM)) { ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes); memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data + @@ -359,6 +362,18 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, le32_to_cpu(header->ucode_array_offset_bytes) + le32_to_cpu(cp_hdr->jt_offset) * 4), ucode->ucode_size); + } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL) { + ucode->ucode_size = adev->gfx.rlc.save_restore_list_cntl_size_bytes; + memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_cntl, + ucode->ucode_size); + } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM) { + ucode->ucode_size = adev->gfx.rlc.save_restore_list_gpm_size_bytes; + memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_gpm, + ucode->ucode_size); + } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) { + ucode->ucode_size = adev->gfx.rlc.save_restore_list_srm_size_bytes; + memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_srm, + ucode->ucode_size); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 0b262f4bb4fc..08e38579af24 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -187,6 +187,9 @@ enum AMDGPU_UCODE_ID { AMDGPU_UCODE_ID_CP_MEC2, AMDGPU_UCODE_ID_CP_MEC2_JT, AMDGPU_UCODE_ID_RLC_G, + AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL, + AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM, + AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM, AMDGPU_UCODE_ID_STORAGE, AMDGPU_UCODE_ID_SMC, AMDGPU_UCODE_ID_UVD, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 587a8731fa31..73b76fa29bad 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -41,7 +41,6 @@ #define GFX9_MEC_HPD_SIZE 2048 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L -#define GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH 34 #define mmPWR_MISC_CNTL_STATUS 0x0183 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0 @@ -401,6 +400,27 @@ static void gfx_v9_0_free_microcode(struct amdgpu_device *adev) kfree(adev->gfx.rlc.register_list_format); } +static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev) +{ + const struct rlc_firmware_header_v2_1 *rlc_hdr; + + rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; + adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); + adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); + adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); + adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); + adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); + adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); + adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); + adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); + adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); + adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); + adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); + adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); + adev->gfx.rlc.reg_list_format_direct_reg_list_length = + le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); +} + static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) { const char *chip_name; @@ -412,6 +432,8 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) const struct rlc_firmware_header_v2_0 *rlc_hdr; unsigned int *tmp = NULL; unsigned int i = 0; + uint16_t version_major; + uint16_t version_minor; DRM_DEBUG("\n"); @@ -468,6 +490,12 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) goto out; err = amdgpu_ucode_validate(adev->gfx.rlc_fw); rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + + version_major = le16_to_cpu(rlc_hdr->header.header_version_major); + version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); + if (version_major == 2 && version_minor == 1) + adev->gfx.rlc.is_rlc_v2_1 = true; + adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); adev->gfx.rlc.save_and_restore_offset = @@ -508,6 +536,9 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); + if (adev->gfx.rlc.is_rlc_v2_1) + gfx_v9_0_init_rlc_ext_microcode(adev); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); if (err) @@ -566,6 +597,26 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) adev->firmware.fw_size += ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + if (adev->gfx.rlc.is_rlc_v2_1) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); + } + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; info->fw = adev->gfx.mec_fw; @@ -1781,7 +1832,7 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) /* setup unique_indirect_regs array and indirect_start_offsets array */ gfx_v9_0_parse_ind_reg_list(register_list_format, - GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH, + adev->gfx.rlc.reg_list_format_direct_reg_list_length, adev->gfx.rlc.reg_list_format_size_bytes >> 2, unique_indirect_regs, &unique_indirect_reg_count, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c index 8873d833a7f7..0ff136d02d9b 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c @@ -70,6 +70,15 @@ psp_v10_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type * case AMDGPU_UCODE_ID_RLC_G: *type = GFX_FW_TYPE_RLC_G; break; + case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL: + *type = GFX_FW_TYPE_RLC_RESTORE_LIST_CNTL; + break; + case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM: + *type = GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM; + break; + case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM: + *type = GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM; + break; case AMDGPU_UCODE_ID_SMC: *type = GFX_FW_TYPE_SMU; break; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 78fe828f2f79..081d25640b64 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -630,6 +630,12 @@ struct drm_amdgpu_cs_chunk_data { #define AMDGPU_INFO_FW_ASD 0x0d /* Subquery id: Query VCN firmware version */ #define AMDGPU_INFO_FW_VCN 0x0e + /* Subquery id: Query GFX RLC SRLC firmware version */ + #define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL 0x0f + /* Subquery id: Query GFX RLC SRLG firmware version */ + #define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM 0x10 + /* Subquery id: Query GFX RLC SRLS firmware version */ + #define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM 0x11 /* number of bytes moved for TTM migration */ #define AMDGPU_INFO_NUM_BYTES_MOVED 0x0f /* the used VRAM size */ -- cgit v1.2.3-59-g8ed1b From 959a2091fae0fa498c79e095a4f6cbbb202a1194 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Mon, 14 May 2018 12:15:27 -0400 Subject: drm/amdgpu: Add support to change mtype for 2nd part of gart BOs on GFX9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change prepares for a workaround in amdkfd for a GFX9 HW bug. It requires the control stack memory of compute queues, which is allocated from the second page of MQD gart BOs, to have mtype NC, rather than the default UC. Signed-off-by: Yong Zhao Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 54 +++++++++++++++++++++++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 5 +-- include/uapi/drm/amdgpu_drm.h | 4 +++ 3 files changed, 51 insertions(+), 12 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index dfd22db13fb1..cc3b067e1ec6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -834,6 +834,45 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) sg_free_table(ttm->sg); } +int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, + struct ttm_buffer_object *tbo, + uint64_t flags) +{ + struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo); + struct ttm_tt *ttm = tbo->ttm; + struct amdgpu_ttm_tt *gtt = (void *)ttm; + int r; + + if (abo->flags & AMDGPU_GEM_CREATE_MQD_GFX9) { + uint64_t page_idx = 1; + + r = amdgpu_gart_bind(adev, gtt->offset, page_idx, + ttm->pages, gtt->ttm.dma_address, flags); + if (r) + goto gart_bind_fail; + + /* Patch mtype of the second part BO */ + flags &= ~AMDGPU_PTE_MTYPE_MASK; + flags |= AMDGPU_PTE_MTYPE(AMDGPU_MTYPE_NC); + + r = amdgpu_gart_bind(adev, + gtt->offset + (page_idx << PAGE_SHIFT), + ttm->num_pages - page_idx, + &ttm->pages[page_idx], + &(gtt->ttm.dma_address[page_idx]), flags); + } else { + r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, + ttm->pages, gtt->ttm.dma_address, flags); + } + +gart_bind_fail: + if (r) + DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", + ttm->num_pages, gtt->offset); + + return r; +} + static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem) { @@ -907,8 +946,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp); gtt->offset = (u64)tmp.start << PAGE_SHIFT; - r = amdgpu_gart_bind(adev, gtt->offset, bo->ttm->num_pages, - bo->ttm->pages, gtt->ttm.dma_address, flags); + r = amdgpu_ttm_gart_bind(adev, bo, flags); if (unlikely(r)) { ttm_bo_mem_put(bo, &tmp); return r; @@ -925,19 +963,15 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo) { struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); - struct amdgpu_ttm_tt *gtt = (void *)tbo->ttm; uint64_t flags; int r; - if (!gtt) + if (!tbo->ttm) return 0; - flags = amdgpu_ttm_tt_pte_flags(adev, >t->ttm.ttm, &tbo->mem); - r = amdgpu_gart_bind(adev, gtt->offset, gtt->ttm.ttm.num_pages, - gtt->ttm.ttm.pages, gtt->ttm.dma_address, flags); - if (r) - DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", - gtt->ttm.ttm.num_pages, gtt->offset); + flags = amdgpu_ttm_tt_pte_flags(adev, tbo->ttm, &tbo->mem); + r = amdgpu_ttm_gart_bind(adev, tbo, flags); + return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 30f080364c97..4cf678684a12 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -75,11 +75,12 @@ struct amdgpu_bo_list_entry; /* PDE Block Fragment Size for VEGA10 */ #define AMDGPU_PDE_BFS(a) ((uint64_t)a << 59) -/* VEGA10 only */ + +/* For GFX9 */ #define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57) #define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL) -/* For Raven */ +#define AMDGPU_MTYPE_NC 0 #define AMDGPU_MTYPE_CC 2 #define AMDGPU_PTE_DEFAULT_ATC (AMDGPU_PTE_SYSTEM \ diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 081d25640b64..78b4dd89fcb4 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -101,6 +101,10 @@ extern "C" { #define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID (1 << 6) /* Flag that BO sharing will be explicitly synchronized */ #define AMDGPU_GEM_CREATE_EXPLICIT_SYNC (1 << 7) +/* Flag that indicates allocating MQD gart on GFX9, where the mtype + * for the second page onward should be set to NC. + */ +#define AMDGPU_GEM_CREATE_MQD_GFX9 (1 << 8) struct drm_amdgpu_gem_create_in { /** the requested memory size */ -- cgit v1.2.3-59-g8ed1b