From 2ba75d31198d2242fbce7a8082e44011bd4f2048 Mon Sep 17 00:00:00 2001 From: Ohad Sharabi Date: Sun, 3 Apr 2022 11:22:34 +0300 Subject: habanalabs: refactor HOP functions in MMU V1 Take advantage of the HOPs shift/masks now defined as arrays. Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay Signed-off-by: Greg Kroah-Hartman --- drivers/misc/habanalabs/common/mmu/mmu_v1.c | 297 +++++++++------------------- 1 file changed, 88 insertions(+), 209 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/habanalabs/common/mmu/mmu_v1.c b/drivers/misc/habanalabs/common/mmu/mmu_v1.c index f43657ad442b..e2d91a69acc2 100644 --- a/drivers/misc/habanalabs/common/mmu/mmu_v1.c +++ b/drivers/misc/habanalabs/common/mmu/mmu_v1.c @@ -10,6 +10,8 @@ #include +#define MMU_V1_MAX_HOPS (MMU_HOP4 + 1) + static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr); static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr) @@ -170,51 +172,15 @@ static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr) return num_of_ptes_left; } -static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr, - u64 virt_addr, u64 mask, u64 shift) -{ - return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * - ((virt_addr & mask) >> shift); -} - -static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_prop, - u64 hop_addr, u64 vaddr) -{ - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop_masks[MMU_HOP0], - mmu_prop->hop_shifts[MMU_HOP0]); -} - -static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_prop, - u64 hop_addr, u64 vaddr) -{ - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop_masks[MMU_HOP1], - mmu_prop->hop_shifts[MMU_HOP1]); -} - -static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_prop, - u64 hop_addr, u64 vaddr) +static inline u64 get_hop_pte_addr(struct hl_ctx *ctx, struct hl_mmu_properties *mmu_prop, + u64 *hop_addr_arr, u64 virt_addr, enum mmu_hop_num hop_idx) { - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop_masks[MMU_HOP2], - mmu_prop->hop_shifts[MMU_HOP2]); -} + u64 mask, shift; -static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_prop, - u64 hop_addr, u64 vaddr) -{ - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop_masks[MMU_HOP3], - mmu_prop->hop_shifts[MMU_HOP3]); -} - -static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_prop, - u64 hop_addr, u64 vaddr) -{ - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop_masks[MMU_HOP4], - mmu_prop->hop_shifts[MMU_HOP4]); + mask = mmu_prop->hop_masks[hop_idx]; + shift = mmu_prop->hop_shifts[hop_idx]; + return hop_addr_arr[hop_idx] + + ctx->hdev->asic_prop.mmu_pte_size * ((virt_addr & mask) >> shift); } static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte, @@ -516,74 +482,50 @@ static void hl_mmu_v1_ctx_fini(struct hl_ctx *ctx) } } -static int _hl_mmu_v1_unmap(struct hl_ctx *ctx, +static int hl_mmu_v1_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr) { + u64 hop_addr[MMU_V1_MAX_HOPS] = {0}, hop_pte_addr[MMU_V1_MAX_HOPS] = {0}, curr_pte = 0; struct hl_device *hdev = ctx->hdev; struct asic_fixed_properties *prop = &hdev->asic_prop; struct hl_mmu_properties *mmu_prop; - u64 hop0_addr = 0, hop0_pte_addr = 0, - hop1_addr = 0, hop1_pte_addr = 0, - hop2_addr = 0, hop2_pte_addr = 0, - hop3_addr = 0, hop3_pte_addr = 0, - hop4_addr = 0, hop4_pte_addr = 0, - curr_pte; bool is_huge, clear_hop3 = true; + int hop_idx; /* shifts and masks are the same in PMMU and HPMMU, use one of them */ mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; - hop0_addr = get_hop0_addr(ctx); - hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr); - - curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr; - - hop1_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte); - - if (hop1_addr == ULLONG_MAX) - goto not_mapped; - - hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr); - - curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr; - - hop2_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte); - - if (hop2_addr == ULLONG_MAX) - goto not_mapped; - - hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr); - - curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr; - - hop3_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte); - - if (hop3_addr == ULLONG_MAX) - goto not_mapped; + for (hop_idx = MMU_HOP0; hop_idx < MMU_HOP4; hop_idx++) { + if (hop_idx == MMU_HOP0) { + hop_addr[hop_idx] = get_hop0_addr(ctx); + } else { + hop_addr[hop_idx] = hl_mmu_get_next_hop_addr(ctx, curr_pte); + if (hop_addr[hop_idx] == ULLONG_MAX) + goto not_mapped; + } - hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr); + hop_pte_addr[hop_idx] = + get_hop_pte_addr(ctx, mmu_prop, hop_addr, virt_addr, hop_idx); - curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr; + curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[hop_idx]; + } is_huge = curr_pte & mmu_prop->last_mask; if (is_dram_addr && !is_huge) { - dev_err(hdev->dev, - "DRAM unmapping should use huge pages only\n"); + dev_err(hdev->dev, "DRAM unmapping should use huge pages only\n"); return -EFAULT; } if (!is_huge) { - hop4_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte); - - if (hop4_addr == ULLONG_MAX) + hop_idx = MMU_HOP4; + hop_addr[hop_idx] = hl_mmu_get_next_hop_addr(ctx, curr_pte); + if (hop_addr[hop_idx] == ULLONG_MAX) goto not_mapped; - hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr, - virt_addr); - - curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr; - + hop_pte_addr[hop_idx] = + get_hop_pte_addr(ctx, mmu_prop, hop_addr, virt_addr, hop_idx); + curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[hop_idx]; clear_hop3 = false; } @@ -605,39 +547,33 @@ static int _hl_mmu_v1_unmap(struct hl_ctx *ctx, goto not_mapped; } - write_final_pte(ctx, hop3_pte_addr, default_pte); - put_pte(ctx, hop3_addr); + hop_idx = MMU_HOP3; + write_final_pte(ctx, hop_pte_addr[hop_idx], default_pte); + put_pte(ctx, hop_addr[hop_idx]); } else { if (!(curr_pte & PAGE_PRESENT_MASK)) goto not_mapped; - if (hop4_addr) - clear_pte(ctx, hop4_pte_addr); + if (hop_addr[MMU_HOP4]) + clear_pte(ctx, hop_pte_addr[MMU_HOP4]); else - clear_pte(ctx, hop3_pte_addr); + clear_pte(ctx, hop_pte_addr[MMU_HOP3]); - if (hop4_addr && !put_pte(ctx, hop4_addr)) + if (hop_addr[MMU_HOP4] && !put_pte(ctx, hop_addr[MMU_HOP4])) clear_hop3 = true; if (!clear_hop3) goto mapped; - clear_pte(ctx, hop3_pte_addr); + for (hop_idx = MMU_HOP3; hop_idx >= 0; hop_idx--) { + clear_pte(ctx, hop_pte_addr[hop_idx]); - if (put_pte(ctx, hop3_addr)) - goto mapped; + if (hop_idx == MMU_HOP0) + break; - clear_pte(ctx, hop2_pte_addr); - - if (put_pte(ctx, hop2_addr)) - goto mapped; - - clear_pte(ctx, hop1_pte_addr); - - if (put_pte(ctx, hop1_addr)) - goto mapped; - - clear_pte(ctx, hop0_pte_addr); + if (put_pte(ctx, hop_addr[hop_idx])) + goto mapped; + } } mapped: @@ -650,21 +586,15 @@ not_mapped: return -EINVAL; } -static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, +static int hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size, bool is_dram_addr) { + u64 hop_addr[MMU_V1_MAX_HOPS] = {0}, hop_pte_addr[MMU_V1_MAX_HOPS] = {0}, curr_pte = 0; struct hl_device *hdev = ctx->hdev; struct asic_fixed_properties *prop = &hdev->asic_prop; struct hl_mmu_properties *mmu_prop; - u64 hop0_addr = 0, hop0_pte_addr = 0, - hop1_addr = 0, hop1_pte_addr = 0, - hop2_addr = 0, hop2_pte_addr = 0, - hop3_addr = 0, hop3_pte_addr = 0, - hop4_addr = 0, hop4_pte_addr = 0, - curr_pte = 0; - bool hop1_new = false, hop2_new = false, hop3_new = false, - hop4_new = false, is_huge; - int rc = -ENOMEM; + bool is_huge, hop_new[MMU_V1_MAX_HOPS] = {false}; + int num_hops, hop_idx, prev_hop, rc = -ENOMEM; /* * This mapping function can map a page or a huge page. For huge page @@ -684,39 +614,21 @@ static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, is_huge = false; } - hop0_addr = get_hop0_addr(ctx); - hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr); - curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr; - - hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new); - if (hop1_addr == ULLONG_MAX) - goto err; - - hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr); - curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr; - - hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new); - if (hop2_addr == ULLONG_MAX) - goto err; - - hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr); - curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr; + num_hops = is_huge ? (MMU_V1_MAX_HOPS - 1) : MMU_V1_MAX_HOPS; - hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new); - if (hop3_addr == ULLONG_MAX) - goto err; - - hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr); - curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr; - - if (!is_huge) { - hop4_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop4_new); - if (hop4_addr == ULLONG_MAX) - goto err; + for (hop_idx = MMU_HOP0; hop_idx < num_hops; hop_idx++) { + if (hop_idx == MMU_HOP0) { + hop_addr[hop_idx] = get_hop0_addr(ctx); + } else { + hop_addr[hop_idx] = + get_alloc_next_hop_addr(ctx, curr_pte, &hop_new[hop_idx]); + if (hop_addr[hop_idx] == ULLONG_MAX) + goto err; + } - hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr, - virt_addr); - curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr; + hop_pte_addr[hop_idx] = + get_hop_pte_addr(ctx, mmu_prop, hop_addr, virt_addr, hop_idx); + curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[hop_idx]; } if (hdev->dram_default_page_mapping && is_dram_addr) { @@ -732,30 +644,22 @@ static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, goto err; } - if (hop1_new || hop2_new || hop3_new || hop4_new) { - dev_err(hdev->dev, - "DRAM mapping should not allocate more hops\n"); - rc = -EFAULT; - goto err; + for (hop_idx = MMU_HOP1; hop_idx < num_hops; hop_idx++) { + if (hop_new[hop_idx]) { + dev_err(hdev->dev, "DRAM mapping should not allocate more hops\n"); + rc = -EFAULT; + goto err; + } } } else if (curr_pte & PAGE_PRESENT_MASK) { dev_err(hdev->dev, "mapping already exists for virt_addr 0x%llx\n", virt_addr); - dev_dbg(hdev->dev, "hop0 pte: 0x%llx (0x%llx)\n", - *(u64 *) (uintptr_t) hop0_pte_addr, hop0_pte_addr); - dev_dbg(hdev->dev, "hop1 pte: 0x%llx (0x%llx)\n", - *(u64 *) (uintptr_t) hop1_pte_addr, hop1_pte_addr); - dev_dbg(hdev->dev, "hop2 pte: 0x%llx (0x%llx)\n", - *(u64 *) (uintptr_t) hop2_pte_addr, hop2_pte_addr); - dev_dbg(hdev->dev, "hop3 pte: 0x%llx (0x%llx)\n", - *(u64 *) (uintptr_t) hop3_pte_addr, hop3_pte_addr); - - if (!is_huge) - dev_dbg(hdev->dev, "hop4 pte: 0x%llx (0x%llx)\n", - *(u64 *) (uintptr_t) hop4_pte_addr, - hop4_pte_addr); + for (hop_idx = MMU_HOP0; hop_idx < num_hops; hop_idx++) + dev_dbg(hdev->dev, "hop%d pte: 0x%llx (0x%llx)\n", hop_idx, + *(u64 *) (uintptr_t) hop_pte_addr[hop_idx], + hop_pte_addr[hop_idx]); rc = -EINVAL; goto err; @@ -764,53 +668,28 @@ static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | mmu_prop->last_mask | PAGE_PRESENT_MASK; - if (is_huge) - write_final_pte(ctx, hop3_pte_addr, curr_pte); - else - write_final_pte(ctx, hop4_pte_addr, curr_pte); + write_final_pte(ctx, hop_pte_addr[num_hops - 1], curr_pte); - if (hop1_new) { - curr_pte = - (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; - write_pte(ctx, hop0_pte_addr, curr_pte); - } - if (hop2_new) { - curr_pte = - (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; - write_pte(ctx, hop1_pte_addr, curr_pte); - get_pte(ctx, hop1_addr); - } - if (hop3_new) { - curr_pte = - (hop3_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; - write_pte(ctx, hop2_pte_addr, curr_pte); - get_pte(ctx, hop2_addr); - } + for (hop_idx = MMU_HOP1; hop_idx < num_hops; hop_idx++) { + prev_hop = hop_idx - 1; - if (!is_huge) { - if (hop4_new) { - curr_pte = (hop4_addr & HOP_PHYS_ADDR_MASK) | - PAGE_PRESENT_MASK; - write_pte(ctx, hop3_pte_addr, curr_pte); - get_pte(ctx, hop3_addr); + if (hop_new[hop_idx]) { + curr_pte = (hop_addr[hop_idx] & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; + write_pte(ctx, hop_pte_addr[prev_hop], curr_pte); + if (hop_idx != MMU_HOP1) + get_pte(ctx, hop_addr[prev_hop]); } - - get_pte(ctx, hop4_addr); - } else { - get_pte(ctx, hop3_addr); } + get_pte(ctx, hop_addr[num_hops - 1]); + return 0; err: - if (hop4_new) - free_hop(ctx, hop4_addr); - if (hop3_new) - free_hop(ctx, hop3_addr); - if (hop2_new) - free_hop(ctx, hop2_addr); - if (hop1_new) - free_hop(ctx, hop1_addr); + for (hop_idx = num_hops; hop_idx > MMU_HOP0; hop_idx--) { + if (hop_new[hop_idx]) + free_hop(ctx, hop_addr[hop_idx]); + } return rc; } @@ -928,8 +807,8 @@ void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu) mmu->fini = hl_mmu_v1_fini; mmu->ctx_init = hl_mmu_v1_ctx_init; mmu->ctx_fini = hl_mmu_v1_ctx_fini; - mmu->map = _hl_mmu_v1_map; - mmu->unmap = _hl_mmu_v1_unmap; + mmu->map = hl_mmu_v1_map; + mmu->unmap = hl_mmu_v1_unmap; mmu->flush = flush; mmu->swap_out = hl_mmu_v1_swap_out; mmu->swap_in = hl_mmu_v1_swap_in; -- cgit v1.2.3-59-g8ed1b