aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/misc/habanalabs/common/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/habanalabs/common/memory.c')
-rw-r--r--drivers/misc/habanalabs/common/memory.c683
1 files changed, 403 insertions, 280 deletions
diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index cbe9da4e0211..1f5910517b0e 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -14,6 +14,9 @@
#define HL_MMU_DEBUG 0
+/* use small pages for supporting non-pow2 (32M/40M/48M) DRAM phys page sizes */
+#define DRAM_POOL_PAGE_SIZE SZ_8M
+
/*
* The va ranges in context object contain a list with the available chunks of
* device virtual memory.
@@ -38,15 +41,14 @@
*/
/*
- * alloc_device_memory - allocate device memory
- *
- * @ctx : current context
- * @args : host parameters containing the requested size
- * @ret_handle : result handle
+ * alloc_device_memory() - allocate device memory.
+ * @ctx: pointer to the context structure.
+ * @args: host parameters containing the requested size.
+ * @ret_handle: result handle.
*
* This function does the following:
- * - Allocate the requested size rounded up to 'dram_page_size' pages
- * - Return unique handle
+ * - Allocate the requested size rounded up to 'dram_page_size' pages.
+ * - Return unique handle for later map/unmap/free.
*/
static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
u32 *ret_handle)
@@ -55,15 +57,14 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
struct hl_vm *vm = &hdev->vm;
struct hl_vm_phys_pg_pack *phys_pg_pack;
u64 paddr = 0, total_size, num_pgs, i;
- u32 num_curr_pgs, page_size, page_shift;
+ u32 num_curr_pgs, page_size;
int handle, rc;
bool contiguous;
num_curr_pgs = 0;
page_size = hdev->asic_prop.dram_page_size;
- page_shift = __ffs(page_size);
- num_pgs = (args->alloc.mem_size + (page_size - 1)) >> page_shift;
- total_size = num_pgs << page_shift;
+ num_pgs = DIV_ROUND_UP_ULL(args->alloc.mem_size, page_size);
+ total_size = num_pgs * page_size;
if (!total_size) {
dev_err(hdev->dev, "Cannot allocate 0 bytes\n");
@@ -182,17 +183,17 @@ pages_pack_err:
return rc;
}
-/*
- * dma_map_host_va - DMA mapping of the given host virtual address.
- * @hdev: habanalabs device structure
- * @addr: the host virtual address of the memory area
- * @size: the size of the memory area
- * @p_userptr: pointer to result userptr structure
+/**
+ * dma_map_host_va() - DMA mapping of the given host virtual address.
+ * @hdev: habanalabs device structure.
+ * @addr: the host virtual address of the memory area.
+ * @size: the size of the memory area.
+ * @p_userptr: pointer to result userptr structure.
*
* This function does the following:
- * - Allocate userptr structure
- * - Pin the given host memory using the userptr structure
- * - Perform DMA mapping to have the DMA addresses of the pages
+ * - Allocate userptr structure.
+ * - Pin the given host memory using the userptr structure.
+ * - Perform DMA mapping to have the DMA addresses of the pages.
*/
static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size,
struct hl_userptr **p_userptr)
@@ -236,14 +237,14 @@ userptr_err:
return rc;
}
-/*
- * dma_unmap_host_va - DMA unmapping of the given host virtual address.
- * @hdev: habanalabs device structure
- * @userptr: userptr to free
+/**
+ * dma_unmap_host_va() - DMA unmapping of the given host virtual address.
+ * @hdev: habanalabs device structure.
+ * @userptr: userptr to free.
*
* This function does the following:
- * - Unpins the physical pages
- * - Frees the userptr structure
+ * - Unpins the physical pages.
+ * - Frees the userptr structure.
*/
static void dma_unmap_host_va(struct hl_device *hdev,
struct hl_userptr *userptr)
@@ -252,14 +253,13 @@ static void dma_unmap_host_va(struct hl_device *hdev,
kfree(userptr);
}
-/*
- * dram_pg_pool_do_release - free DRAM pages pool
- *
- * @ref : pointer to reference object
+/**
+ * dram_pg_pool_do_release() - free DRAM pages pool
+ * @ref: pointer to reference object.
*
* This function does the following:
- * - Frees the idr structure of physical pages handles
- * - Frees the generic pool of DRAM physical pages
+ * - Frees the idr structure of physical pages handles.
+ * - Frees the generic pool of DRAM physical pages.
*/
static void dram_pg_pool_do_release(struct kref *ref)
{
@@ -274,15 +274,15 @@ static void dram_pg_pool_do_release(struct kref *ref)
gen_pool_destroy(vm->dram_pg_pool);
}
-/*
- * free_phys_pg_pack - free physical page pack
- * @hdev: habanalabs device structure
- * @phys_pg_pack: physical page pack to free
+/**
+ * free_phys_pg_pack() - free physical page pack.
+ * @hdev: habanalabs device structure.
+ * @phys_pg_pack: physical page pack to free.
*
* This function does the following:
* - For DRAM memory only, iterate over the pack and free each physical block
- * structure by returning it to the general pool
- * - Free the hl_vm_phys_pg_pack structure
+ * structure by returning it to the general pool.
+ * - Free the hl_vm_phys_pg_pack structure.
*/
static void free_phys_pg_pack(struct hl_device *hdev,
struct hl_vm_phys_pg_pack *phys_pg_pack)
@@ -313,20 +313,20 @@ static void free_phys_pg_pack(struct hl_device *hdev,
kfree(phys_pg_pack);
}
-/*
- * free_device_memory - free device memory
- *
- * @ctx : current context
- * @handle : handle of the memory chunk to free
+/**
+ * free_device_memory() - free device memory.
+ * @ctx: pointer to the context structure.
+ * @args: host parameters containing the requested size.
*
* This function does the following:
- * - Free the device memory related to the given handle
+ * - Free the device memory related to the given handle.
*/
-static int free_device_memory(struct hl_ctx *ctx, u32 handle)
+static int free_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args)
{
struct hl_device *hdev = ctx->hdev;
struct hl_vm *vm = &hdev->vm;
struct hl_vm_phys_pg_pack *phys_pg_pack;
+ u32 handle = args->free.handle;
spin_lock(&vm->idr_lock);
phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
@@ -361,16 +361,15 @@ static int free_device_memory(struct hl_ctx *ctx, u32 handle)
return 0;
}
-/*
- * clear_va_list_locked - free virtual addresses list
- *
- * @hdev : habanalabs device structure
- * @va_list : list of virtual addresses to free
+/**
+ * clear_va_list_locked() - free virtual addresses list.
+ * @hdev: habanalabs device structure.
+ * @va_list: list of virtual addresses to free.
*
* This function does the following:
- * - Iterate over the list and free each virtual addresses block
+ * - Iterate over the list and free each virtual addresses block.
*
- * This function should be called only when va_list lock is taken
+ * This function should be called only when va_list lock is taken.
*/
static void clear_va_list_locked(struct hl_device *hdev,
struct list_head *va_list)
@@ -383,16 +382,15 @@ static void clear_va_list_locked(struct hl_device *hdev,
}
}
-/*
- * print_va_list_locked - print virtual addresses list
- *
- * @hdev : habanalabs device structure
- * @va_list : list of virtual addresses to print
+/**
+ * print_va_list_locked() - print virtual addresses list.
+ * @hdev: habanalabs device structure.
+ * @va_list: list of virtual addresses to print.
*
* This function does the following:
- * - Iterate over the list and print each virtual addresses block
+ * - Iterate over the list and print each virtual addresses block.
*
- * This function should be called only when va_list lock is taken
+ * This function should be called only when va_list lock is taken.
*/
static void print_va_list_locked(struct hl_device *hdev,
struct list_head *va_list)
@@ -409,18 +407,17 @@ static void print_va_list_locked(struct hl_device *hdev,
#endif
}
-/*
- * merge_va_blocks_locked - merge a virtual block if possible
- *
- * @hdev : pointer to the habanalabs device structure
- * @va_list : pointer to the virtual addresses block list
- * @va_block : virtual block to merge with adjacent blocks
+/**
+ * merge_va_blocks_locked() - merge a virtual block if possible.
+ * @hdev: pointer to the habanalabs device structure.
+ * @va_list: pointer to the virtual addresses block list.
+ * @va_block: virtual block to merge with adjacent blocks.
*
* This function does the following:
* - Merge the given blocks with the adjacent blocks if their virtual ranges
- * create a contiguous virtual range
+ * create a contiguous virtual range.
*
- * This Function should be called only when va_list lock is taken
+ * This Function should be called only when va_list lock is taken.
*/
static void merge_va_blocks_locked(struct hl_device *hdev,
struct list_head *va_list, struct hl_vm_va_block *va_block)
@@ -445,19 +442,18 @@ static void merge_va_blocks_locked(struct hl_device *hdev,
}
}
-/*
- * add_va_block_locked - add a virtual block to the virtual addresses list
- *
- * @hdev : pointer to the habanalabs device structure
- * @va_list : pointer to the virtual addresses block list
- * @start : start virtual address
- * @end : end virtual address
+/**
+ * add_va_block_locked() - add a virtual block to the virtual addresses list.
+ * @hdev: pointer to the habanalabs device structure.
+ * @va_list: pointer to the virtual addresses block list.
+ * @start: start virtual address.
+ * @end: end virtual address.
*
* This function does the following:
- * - Add the given block to the virtual blocks list and merge with other
- * blocks if a contiguous virtual block can be created
+ * - Add the given block to the virtual blocks list and merge with other blocks
+ * if a contiguous virtual block can be created.
*
- * This Function should be called only when va_list lock is taken
+ * This Function should be called only when va_list lock is taken.
*/
static int add_va_block_locked(struct hl_device *hdev,
struct list_head *va_list, u64 start, u64 end)
@@ -501,16 +497,15 @@ static int add_va_block_locked(struct hl_device *hdev,
return 0;
}
-/*
- * add_va_block - wrapper for add_va_block_locked
- *
- * @hdev : pointer to the habanalabs device structure
- * @va_list : pointer to the virtual addresses block list
- * @start : start virtual address
- * @end : end virtual address
+/**
+ * add_va_block() - wrapper for add_va_block_locked.
+ * @hdev: pointer to the habanalabs device structure.
+ * @va_list: pointer to the virtual addresses block list.
+ * @start: start virtual address.
+ * @end: end virtual address.
*
* This function does the following:
- * - Takes the list lock and calls add_va_block_locked
+ * - Takes the list lock and calls add_va_block_locked.
*/
static inline int add_va_block(struct hl_device *hdev,
struct hl_va_range *va_range, u64 start, u64 end)
@@ -524,8 +519,9 @@ static inline int add_va_block(struct hl_device *hdev,
return rc;
}
-/*
+/**
* get_va_block() - get a virtual block for the given size and alignment.
+ *
* @hdev: pointer to the habanalabs device structure.
* @va_range: pointer to the virtual addresses range.
* @size: requested block size.
@@ -534,33 +530,51 @@ static inline int add_va_block(struct hl_device *hdev,
*
* This function does the following:
* - Iterate on the virtual block list to find a suitable virtual block for the
- * given size and alignment.
+ * given size, hint address and alignment.
* - Reserve the requested block and update the list.
* - Return the start address of the virtual block.
*/
-static u64 get_va_block(struct hl_device *hdev, struct hl_va_range *va_range,
- u64 size, u64 hint_addr, u32 va_block_align)
+static u64 get_va_block(struct hl_device *hdev,
+ struct hl_va_range *va_range,
+ u64 size, u64 hint_addr, u32 va_block_align)
{
struct hl_vm_va_block *va_block, *new_va_block = NULL;
- u64 valid_start, valid_size, prev_start, prev_end, align_mask,
- res_valid_start = 0, res_valid_size = 0;
+ u64 tmp_hint_addr, valid_start, valid_size, prev_start, prev_end,
+ align_mask, reserved_valid_start = 0, reserved_valid_size = 0;
bool add_prev = false;
+ bool is_align_pow_2 = is_power_of_2(va_range->page_size);
- align_mask = ~((u64)va_block_align - 1);
+ if (is_align_pow_2)
+ align_mask = ~((u64)va_block_align - 1);
+ else
+ /*
+ * with non-power-of-2 range we work only with page granularity
+ * and the start address is page aligned,
+ * so no need for alignment checking.
+ */
+ size = DIV_ROUND_UP_ULL(size, va_range->page_size) *
+ va_range->page_size;
- /* check if hint_addr is aligned */
- if (hint_addr & (va_block_align - 1))
+ tmp_hint_addr = hint_addr;
+
+ /* Check if we need to ignore hint address */
+ if ((is_align_pow_2 && (hint_addr & (va_block_align - 1))) ||
+ (!is_align_pow_2 &&
+ do_div(tmp_hint_addr, va_range->page_size))) {
+ dev_info(hdev->dev, "Hint address 0x%llx will be ignored\n",
+ hint_addr);
hint_addr = 0;
+ }
mutex_lock(&va_range->lock);
print_va_list_locked(hdev, &va_range->list);
list_for_each_entry(va_block, &va_range->list, node) {
- /* calc the first possible aligned addr */
+ /* Calc the first possible aligned addr */
valid_start = va_block->start;
- if (valid_start & (va_block_align - 1)) {
+ if (is_align_pow_2 && (valid_start & (va_block_align - 1))) {
valid_start &= align_mask;
valid_start += va_block_align;
if (valid_start > va_block->end)
@@ -568,35 +582,41 @@ static u64 get_va_block(struct hl_device *hdev, struct hl_va_range *va_range,
}
valid_size = va_block->end - valid_start;
+ if (valid_size < size)
+ continue;
- if (valid_size >= size &&
- (!new_va_block || valid_size < res_valid_size)) {
+ /* Pick the minimal length block which has the required size */
+ if (!new_va_block || (valid_size < reserved_valid_size)) {
new_va_block = va_block;
- res_valid_start = valid_start;
- res_valid_size = valid_size;
+ reserved_valid_start = valid_start;
+ reserved_valid_size = valid_size;
}
if (hint_addr && hint_addr >= valid_start &&
- ((hint_addr + size) <= va_block->end)) {
+ (hint_addr + size) <= va_block->end) {
new_va_block = va_block;
- res_valid_start = hint_addr;
- res_valid_size = valid_size;
+ reserved_valid_start = hint_addr;
+ reserved_valid_size = valid_size;
break;
}
}
if (!new_va_block) {
dev_err(hdev->dev, "no available va block for size %llu\n",
- size);
+ size);
goto out;
}
- if (res_valid_start > new_va_block->start) {
+ /*
+ * Check if there is some leftover range due to reserving the new
+ * va block, then return it to the main virtual addresses list.
+ */
+ if (reserved_valid_start > new_va_block->start) {
prev_start = new_va_block->start;
- prev_end = res_valid_start - 1;
+ prev_end = reserved_valid_start - 1;
- new_va_block->start = res_valid_start;
- new_va_block->size = res_valid_size;
+ new_va_block->start = reserved_valid_start;
+ new_va_block->size = reserved_valid_size;
add_prev = true;
}
@@ -617,7 +637,7 @@ static u64 get_va_block(struct hl_device *hdev, struct hl_va_range *va_range,
out:
mutex_unlock(&va_range->lock);
- return res_valid_start;
+ return reserved_valid_start;
}
/*
@@ -644,9 +664,9 @@ u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
/**
* hl_get_va_range_type() - get va_range type for the given address and size.
- * @address: The start address of the area we want to validate.
- * @size: The size in bytes of the area we want to validate.
- * @type: returned va_range type
+ * @address: the start address of the area we want to validate.
+ * @size: the size in bytes of the area we want to validate.
+ * @type: returned va_range type.
*
* Return: true if the area is inside a valid range, false otherwise.
*/
@@ -667,16 +687,15 @@ static int hl_get_va_range_type(struct hl_ctx *ctx, u64 address, u64 size,
return -EINVAL;
}
-/*
- * hl_unreserve_va_block - wrapper for add_va_block for unreserving a va block
- *
+/**
+ * hl_unreserve_va_block() - wrapper for add_va_block to unreserve a va block.
* @hdev: pointer to the habanalabs device structure
- * @ctx: current context
- * @start: start virtual address
- * @end: end virtual address
+ * @ctx: pointer to the context structure.
+ * @start: start virtual address.
+ * @end: end virtual address.
*
* This function does the following:
- * - Takes the list lock and calls add_va_block_locked
+ * - Takes the list lock and calls add_va_block_locked.
*/
int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
u64 start_addr, u64 size)
@@ -701,11 +720,10 @@ int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
return rc;
}
-/*
- * get_sg_info - get number of pages and the DMA address from SG list
- *
- * @sg : the SG list
- * @dma_addr : pointer to DMA address to return
+/**
+ * get_sg_info() - get number of pages and the DMA address from SG list.
+ * @sg: the SG list.
+ * @dma_addr: pointer to DMA address to return.
*
* Calculate the number of consecutive pages described by the SG list. Take the
* offset of the address in the first page, add to it the length and round it up
@@ -719,17 +737,17 @@ static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
(PAGE_SIZE - 1)) >> PAGE_SHIFT;
}
-/*
- * init_phys_pg_pack_from_userptr - initialize physical page pack from host
- * memory
- * @ctx: current context
- * @userptr: userptr to initialize from
- * @pphys_pg_pack: result pointer
+/**
+ * init_phys_pg_pack_from_userptr() - initialize physical page pack from host
+ * memory
+ * @ctx: pointer to the context structure.
+ * @userptr: userptr to initialize from.
+ * @pphys_pg_pack: result pointer.
*
* This function does the following:
- * - Pin the physical pages related to the given virtual block
+ * - Pin the physical pages related to the given virtual block.
* - Create a physical page pack from the physical pages related to the given
- * virtual block
+ * virtual block.
*/
static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
struct hl_userptr *userptr,
@@ -821,16 +839,16 @@ page_pack_arr_mem_err:
return rc;
}
-/*
- * map_phys_pg_pack - maps the physical page pack.
- * @ctx: current context
- * @vaddr: start address of the virtual area to map from
- * @phys_pg_pack: the pack of physical pages to map to
+/**
+ * map_phys_pg_pack() - maps the physical page pack..
+ * @ctx: pointer to the context structure.
+ * @vaddr: start address of the virtual area to map from.
+ * @phys_pg_pack: the pack of physical pages to map to.
*
* This function does the following:
- * - Maps each chunk of virtual memory to matching physical chunk
- * - Stores number of successful mappings in the given argument
- * - Returns 0 on success, error code otherwise
+ * - Maps each chunk of virtual memory to matching physical chunk.
+ * - Stores number of successful mappings in the given argument.
+ * - Returns 0 on success, error code otherwise.
*/
static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
struct hl_vm_phys_pg_pack *phys_pg_pack)
@@ -875,19 +893,21 @@ err:
return rc;
}
-/*
- * unmap_phys_pg_pack - unmaps the physical page pack
- * @ctx: current context
- * @vaddr: start address of the virtual area to unmap
- * @phys_pg_pack: the pack of physical pages to unmap
+/**
+ * unmap_phys_pg_pack() - unmaps the physical page pack.
+ * @ctx: pointer to the context structure.
+ * @vaddr: start address of the virtual area to unmap.
+ * @phys_pg_pack: the pack of physical pages to unmap.
*/
static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
struct hl_vm_phys_pg_pack *phys_pg_pack)
{
struct hl_device *hdev = ctx->hdev;
u64 next_vaddr, i;
+ bool is_host_addr;
u32 page_size;
+ is_host_addr = !hl_is_dram_va(hdev, vaddr);
page_size = phys_pg_pack->page_size;
next_vaddr = vaddr;
@@ -900,14 +920,18 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
/*
* unmapping on Palladium can be really long, so avoid a CPU
* soft lockup bug by sleeping a little between unmapping pages
+ *
+ * In addition, when unmapping host memory we pass through
+ * the Linux kernel to unpin the pages and that takes a long
+ * time. Therefore, sleep every 32K pages to avoid soft lockup
*/
- if (hdev->pldm)
- usleep_range(500, 1000);
+ if (hdev->pldm || (is_host_addr && (i & 0x7FFF) == 0))
+ usleep_range(50, 200);
}
}
static int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args,
- u64 *paddr)
+ u64 *paddr)
{
struct hl_device *hdev = ctx->hdev;
struct hl_vm *vm = &hdev->vm;
@@ -930,19 +954,18 @@ static int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args,
return 0;
}
-/*
- * map_device_va - map the given memory
- *
- * @ctx : current context
- * @args : host parameters with handle/host virtual address
- * @device_addr : pointer to result device virtual address
+/**
+ * map_device_va() - map the given memory.
+ * @ctx: pointer to the context structure.
+ * @args: host parameters with handle/host virtual address.
+ * @device_addr: pointer to result device virtual address.
*
* This function does the following:
* - If given a physical device memory handle, map to a device virtual block
- * and return the start address of this block
+ * and return the start address of this block.
* - If given a host virtual address and size, find the related physical pages,
* map a device virtual block to this pages and return the start address of
- * this block
+ * this block.
*/
static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
u64 *device_addr)
@@ -1028,7 +1051,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
hint_addr = args->map_device.hint_addr;
- /* DRAM VA alignment is the same as the DRAM page size */
+ /* DRAM VA alignment is the same as the MMU page size */
va_range = ctx->va_range[HL_VA_RANGE_TYPE_DRAM];
va_block_align = hdev->asic_prop.dmmu.page_size;
}
@@ -1119,24 +1142,26 @@ init_page_pack_err:
return rc;
}
-/*
- * unmap_device_va - unmap the given device virtual address
- *
- * @ctx : current context
- * @vaddr : device virtual address to unmap
- * @ctx_free : true if in context free flow, false otherwise.
+/**
+ * unmap_device_va() - unmap the given device virtual address.
+ * @ctx: pointer to the context structure.
+ * @args: host parameters with device virtual address to unmap.
+ * @ctx_free: true if in context free flow, false otherwise.
*
* This function does the following:
- * - Unmap the physical pages related to the given virtual address
- * - return the device virtual block to the virtual block list
+ * - unmap the physical pages related to the given virtual address.
+ * - return the device virtual block to the virtual block list.
*/
-static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free)
+static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
+ bool ctx_free)
{
struct hl_device *hdev = ctx->hdev;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
struct hl_vm_hash_node *hnode = NULL;
struct hl_userptr *userptr = NULL;
struct hl_va_range *va_range;
+ u64 vaddr = args->unmap.device_virt_addr;
enum vm_type_t *vm_type;
bool is_userptr;
int rc = 0;
@@ -1195,7 +1220,13 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free)
goto mapping_cnt_err;
}
- vaddr &= ~(((u64) phys_pg_pack->page_size) - 1);
+ if (!is_userptr && !is_power_of_2(phys_pg_pack->page_size))
+ vaddr = prop->dram_base_address +
+ DIV_ROUND_DOWN_ULL(vaddr - prop->dram_base_address,
+ phys_pg_pack->page_size) *
+ phys_pg_pack->page_size;
+ else
+ vaddr &= ~(((u64) phys_pg_pack->page_size) - 1);
mutex_lock(&ctx->mmu_lock);
@@ -1258,12 +1289,90 @@ vm_type_err:
return rc;
}
+static int map_block(struct hl_device *hdev, u64 address, u64 *handle,
+ u32 *size)
+{
+ u32 block_id = 0;
+ int rc;
+
+ rc = hdev->asic_funcs->get_hw_block_id(hdev, address, size, &block_id);
+
+ *handle = block_id | HL_MMAP_TYPE_BLOCK;
+ *handle <<= PAGE_SHIFT;
+
+ return rc;
+}
+
+static void hw_block_vm_close(struct vm_area_struct *vma)
+{
+ struct hl_ctx *ctx = (struct hl_ctx *) vma->vm_private_data;
+
+ hl_ctx_put(ctx);
+ vma->vm_private_data = NULL;
+}
+
+static const struct vm_operations_struct hw_block_vm_ops = {
+ .close = hw_block_vm_close
+};
+
+/**
+ * hl_hw_block_mmap() - mmap a hw block to user.
+ * @hpriv: pointer to the private data of the fd
+ * @vma: pointer to vm_area_struct of the process
+ *
+ * Driver increments context reference for every HW block mapped in order
+ * to prevent user from closing FD without unmapping first
+ */
+int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
+{
+ struct hl_device *hdev = hpriv->hdev;
+ u32 block_id, block_size;
+ int rc;
+
+ /* We use the page offset to hold the block id and thus we need to clear
+ * it before doing the mmap itself
+ */
+ block_id = vma->vm_pgoff;
+ vma->vm_pgoff = 0;
+
+ /* Driver only allows mapping of a complete HW block */
+ block_size = vma->vm_end - vma->vm_start;
+
+#ifdef _HAS_TYPE_ARG_IN_ACCESS_OK
+ if (!access_ok(VERIFY_WRITE,
+ (void __user *) (uintptr_t) vma->vm_start, block_size)) {
+#else
+ if (!access_ok((void __user *) (uintptr_t) vma->vm_start, block_size)) {
+#endif
+ dev_err(hdev->dev,
+ "user pointer is invalid - 0x%lx\n",
+ vma->vm_start);
+
+ return -EINVAL;
+ }
+
+ vma->vm_ops = &hw_block_vm_ops;
+ vma->vm_private_data = hpriv->ctx;
+
+ hl_ctx_get(hdev, hpriv->ctx);
+
+ rc = hdev->asic_funcs->hw_block_mmap(hdev, vma, block_id, block_size);
+ if (rc) {
+ hl_ctx_put(hpriv->ctx);
+ return rc;
+ }
+
+ vma->vm_pgoff = block_id;
+
+ return 0;
+}
+
static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
{
struct hl_device *hdev = hpriv->hdev;
struct hl_ctx *ctx = hpriv->ctx;
- u64 device_addr = 0;
- u32 handle = 0;
+ u64 block_handle, device_addr = 0;
+ u32 handle = 0, block_size;
int rc;
switch (args->in.op) {
@@ -1286,7 +1395,7 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
break;
case HL_MEM_OP_FREE:
- rc = free_device_memory(ctx, args->in.free.handle);
+ rc = free_device_memory(ctx, &args->in);
break;
case HL_MEM_OP_MAP:
@@ -1295,7 +1404,7 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
rc = 0;
} else {
rc = get_paddr_from_handle(ctx, &args->in,
- &device_addr);
+ &device_addr);
}
memset(args, 0, sizeof(*args));
@@ -1306,6 +1415,13 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
rc = 0;
break;
+ case HL_MEM_OP_MAP_BLOCK:
+ rc = map_block(hdev, args->in.map_block.block_addr,
+ &block_handle, &block_size);
+ args->out.block_handle = block_handle;
+ args->out.block_size = block_size;
+ break;
+
default:
dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
rc = -ENOTTY;
@@ -1322,8 +1438,8 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
union hl_mem_args *args = data;
struct hl_device *hdev = hpriv->hdev;
struct hl_ctx *ctx = hpriv->ctx;
- u64 device_addr = 0;
- u32 handle = 0;
+ u64 block_handle, device_addr = 0;
+ u32 handle = 0, block_size;
int rc;
if (!hl_device_operational(hdev, &status)) {
@@ -1394,7 +1510,7 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
goto out;
}
- rc = free_device_memory(ctx, args->in.free.handle);
+ rc = free_device_memory(ctx, &args->in);
break;
case HL_MEM_OP_MAP:
@@ -1405,8 +1521,14 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
break;
case HL_MEM_OP_UNMAP:
- rc = unmap_device_va(ctx, args->in.unmap.device_virt_addr,
- false);
+ rc = unmap_device_va(ctx, &args->in, false);
+ break;
+
+ case HL_MEM_OP_MAP_BLOCK:
+ rc = map_block(hdev, args->in.map_block.block_addr,
+ &block_handle, &block_size);
+ args->out.block_handle = block_handle;
+ args->out.block_size = block_size;
break;
default:
@@ -1430,58 +1552,53 @@ static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size,
return -EFAULT;
}
- userptr->vec = frame_vector_create(npages);
- if (!userptr->vec) {
- dev_err(hdev->dev, "Failed to create frame vector\n");
+ userptr->pages = kvmalloc_array(npages, sizeof(*userptr->pages),
+ GFP_KERNEL);
+ if (!userptr->pages)
return -ENOMEM;
- }
- rc = get_vaddr_frames(start, npages, FOLL_FORCE | FOLL_WRITE,
- userptr->vec);
+ rc = pin_user_pages_fast(start, npages,
+ FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM,
+ userptr->pages);
if (rc != npages) {
dev_err(hdev->dev,
"Failed to map host memory, user ptr probably wrong\n");
if (rc < 0)
- goto destroy_framevec;
- rc = -EFAULT;
- goto put_framevec;
- }
-
- if (frame_vector_to_pages(userptr->vec) < 0) {
- dev_err(hdev->dev,
- "Failed to translate frame vector to pages\n");
+ goto destroy_pages;
+ npages = rc;
rc = -EFAULT;
- goto put_framevec;
+ goto put_pages;
}
+ userptr->npages = npages;
rc = sg_alloc_table_from_pages(userptr->sgt,
- frame_vector_pages(userptr->vec),
- npages, offset, size, GFP_ATOMIC);
+ userptr->pages,
+ npages, offset, size, GFP_ATOMIC);
if (rc < 0) {
dev_err(hdev->dev, "failed to create SG table from pages\n");
- goto put_framevec;
+ goto put_pages;
}
return 0;
-put_framevec:
- put_vaddr_frames(userptr->vec);
-destroy_framevec:
- frame_vector_destroy(userptr->vec);
+put_pages:
+ unpin_user_pages(userptr->pages, npages);
+destroy_pages:
+ kvfree(userptr->pages);
return rc;
}
-/*
- * hl_pin_host_memory - pins a chunk of host memory.
- * @hdev: pointer to the habanalabs device structure
- * @addr: the host virtual address of the memory area
- * @size: the size of the memory area
- * @userptr: pointer to hl_userptr structure
+/**
+ * hl_pin_host_memory() - pins a chunk of host memory.
+ * @hdev: pointer to the habanalabs device structure.
+ * @addr: the host virtual address of the memory area.
+ * @size: the size of the memory area.
+ * @userptr: pointer to hl_userptr structure.
*
* This function does the following:
- * - Pins the physical pages
- * - Create an SG list from those pages
+ * - Pins the physical pages.
+ * - Create an SG list from those pages.
*/
int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
struct hl_userptr *userptr)
@@ -1554,8 +1671,6 @@ free_sgt:
*/
void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
{
- struct page **pages;
-
hl_debugfs_remove_userptr(hdev, userptr);
if (userptr->dma_mapped)
@@ -1563,15 +1678,8 @@ void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
userptr->sgt->nents,
userptr->dir);
- pages = frame_vector_pages(userptr->vec);
- if (!IS_ERR(pages)) {
- int i;
-
- for (i = 0; i < frame_vector_count(userptr->vec); i++)
- set_page_dirty_lock(pages[i]);
- }
- put_vaddr_frames(userptr->vec);
- frame_vector_destroy(userptr->vec);
+ unpin_user_pages_dirty_lock(userptr->pages, userptr->npages, true);
+ kvfree(userptr->pages);
list_del(&userptr->job_node);
@@ -1579,11 +1687,10 @@ void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
kfree(userptr->sgt);
}
-/*
- * hl_userptr_delete_list - clear userptr list
- *
- * @hdev : pointer to the habanalabs device structure
- * @userptr_list : pointer to the list to clear
+/**
+ * hl_userptr_delete_list() - clear userptr list.
+ * @hdev: pointer to the habanalabs device structure.
+ * @userptr_list: pointer to the list to clear.
*
* This function does the following:
* - Iterates over the list and unpins the host memory and frees the userptr
@@ -1602,12 +1709,11 @@ void hl_userptr_delete_list(struct hl_device *hdev,
INIT_LIST_HEAD(userptr_list);
}
-/*
- * hl_userptr_is_pinned - returns whether the given userptr is pinned
- *
- * @hdev : pointer to the habanalabs device structure
- * @userptr_list : pointer to the list to clear
- * @userptr : pointer to userptr to check
+/**
+ * hl_userptr_is_pinned() - returns whether the given userptr is pinned.
+ * @hdev: pointer to the habanalabs device structure.
+ * @userptr_list: pointer to the list to clear.
+ * @userptr: pointer to userptr to check.
*
* This function does the following:
* - Iterates over the list and checks if the given userptr is in it, means is
@@ -1625,12 +1731,12 @@ bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
return false;
}
-/*
- * va_range_init - initialize virtual addresses range
- * @hdev: pointer to the habanalabs device structure
- * @va_range: pointer to the range to initialize
- * @start: range start address
- * @end: range end address
+/**
+ * va_range_init() - initialize virtual addresses range.
+ * @hdev: pointer to the habanalabs device structure.
+ * @va_range: pointer to the range to initialize.
+ * @start: range start address.
+ * @end: range end address.
*
* This function does the following:
* - Initializes the virtual addresses list of the given range with the given
@@ -1643,15 +1749,21 @@ static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range,
INIT_LIST_HEAD(&va_range->list);
- /* PAGE_SIZE alignment */
+ /*
+ * PAGE_SIZE alignment
+ * it is the callers responsibility to align the addresses if the
+ * page size is not a power of 2
+ */
- if (start & (PAGE_SIZE - 1)) {
- start &= PAGE_MASK;
- start += PAGE_SIZE;
- }
+ if (is_power_of_2(page_size)) {
+ if (start & (PAGE_SIZE - 1)) {
+ start &= PAGE_MASK;
+ start += PAGE_SIZE;
+ }
- if (end & (PAGE_SIZE - 1))
- end &= PAGE_MASK;
+ if (end & (PAGE_SIZE - 1))
+ end &= PAGE_MASK;
+ }
if (start >= end) {
dev_err(hdev->dev, "too small vm range for va list\n");
@@ -1672,13 +1784,13 @@ static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range,
return 0;
}
-/*
- * va_range_fini() - clear a virtual addresses range
- * @hdev: pointer to the habanalabs structure
- * va_range: pointer to virtual addresses range
+/**
+ * va_range_fini() - clear a virtual addresses range.
+ * @hdev: pointer to the habanalabs structure.
+ * va_range: pointer to virtual addresses rang.e
*
* This function does the following:
- * - Frees the virtual addresses block list and its lock
+ * - Frees the virtual addresses block list and its lock.
*/
static void va_range_fini(struct hl_device *hdev, struct hl_va_range *va_range)
{
@@ -1690,22 +1802,22 @@ static void va_range_fini(struct hl_device *hdev, struct hl_va_range *va_range)
kfree(va_range);
}
-/*
- * vm_ctx_init_with_ranges() - initialize virtual memory for context
- * @ctx: pointer to the habanalabs context structure
+/**
+ * vm_ctx_init_with_ranges() - initialize virtual memory for context.
+ * @ctx: pointer to the habanalabs context structure.
* @host_range_start: host virtual addresses range start.
* @host_range_end: host virtual addresses range end.
* @host_huge_range_start: host virtual addresses range start for memory
- * allocated with huge pages.
+ * allocated with huge pages.
* @host_huge_range_end: host virtual addresses range end for memory allocated
* with huge pages.
* @dram_range_start: dram virtual addresses range start.
* @dram_range_end: dram virtual addresses range end.
*
* This function initializes the following:
- * - MMU for context
- * - Virtual address to area descriptor hashtable
- * - Virtual block list of available virtual memory
+ * - MMU for context.
+ * - Virtual address to area descriptor hashtable.
+ * - Virtual block list of available virtual memory.
*/
static int vm_ctx_init_with_ranges(struct hl_ctx *ctx,
u64 host_range_start,
@@ -1826,7 +1938,8 @@ int hl_vm_ctx_init(struct hl_ctx *ctx)
dram_range_start = prop->dmmu.start_addr;
dram_range_end = prop->dmmu.end_addr;
- dram_page_size = prop->dmmu.page_size;
+ dram_page_size = prop->dram_page_size ?
+ prop->dram_page_size : prop->dmmu.page_size;
host_range_start = prop->pmmu.start_addr;
host_range_end = prop->pmmu.end_addr;
host_page_size = prop->pmmu.page_size;
@@ -1840,15 +1953,14 @@ int hl_vm_ctx_init(struct hl_ctx *ctx)
dram_range_start, dram_range_end, dram_page_size);
}
-/*
- * hl_vm_ctx_fini - virtual memory teardown of context
- *
- * @ctx : pointer to the habanalabs context structure
+/**
+ * hl_vm_ctx_fini() - virtual memory teardown of context.
+ * @ctx: pointer to the habanalabs context structure.
*
* This function perform teardown the following:
- * - Virtual block list of available virtual memory
- * - Virtual address to area descriptor hashtable
- * - MMU for context
+ * - Virtual block list of available virtual memory.
+ * - Virtual address to area descriptor hashtable.
+ * - MMU for context.
*
* In addition this function does the following:
* - Unmaps the existing hashtable nodes if the hashtable is not empty. The
@@ -1867,9 +1979,10 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
struct hl_vm_phys_pg_pack *phys_pg_list;
struct hl_vm_hash_node *hnode;
struct hlist_node *tmp_node;
+ struct hl_mem_in args;
int i;
- if (!ctx->hdev->mmu_enable)
+ if (!hdev->mmu_enable)
return;
hl_debugfs_remove_ctx_mem_hash(hdev, ctx);
@@ -1886,13 +1999,18 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
dev_dbg(hdev->dev,
"hl_mem_hash_node of vaddr 0x%llx of asid %d is still alive\n",
hnode->vaddr, ctx->asid);
- unmap_device_va(ctx, hnode->vaddr, true);
+ args.unmap.device_virt_addr = hnode->vaddr;
+ unmap_device_va(ctx, &args, true);
}
+ mutex_lock(&ctx->mmu_lock);
+
/* invalidate the cache once after the unmapping loop */
hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_PHYS_PACK);
+ mutex_unlock(&ctx->mmu_lock);
+
spin_lock(&vm->idr_lock);
idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i)
if (phys_pg_list->asid == ctx->asid) {
@@ -1919,19 +2037,19 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
* because the user notifies us on allocations. If the user is no more,
* all DRAM is available
*/
- if (!ctx->hdev->asic_prop.dram_supports_virtual_memory)
- atomic64_set(&ctx->hdev->dram_used_mem, 0);
+ if (ctx->asid != HL_KERNEL_ASID_ID &&
+ !hdev->asic_prop.dram_supports_virtual_memory)
+ atomic64_set(&hdev->dram_used_mem, 0);
}
-/*
- * hl_vm_init - initialize virtual memory module
- *
- * @hdev : pointer to the habanalabs device structure
+/**
+ * hl_vm_init() - initialize virtual memory module.
+ * @hdev: pointer to the habanalabs device structure.
*
* This function initializes the following:
- * - MMU module
- * - DRAM physical pages pool of 2MB
- * - Idr for device memory allocation handles
+ * - MMU module.
+ * - DRAM physical pages pool of 2MB.
+ * - Idr for device memory allocation handles.
*/
int hl_vm_init(struct hl_device *hdev)
{
@@ -1939,7 +2057,13 @@ int hl_vm_init(struct hl_device *hdev)
struct hl_vm *vm = &hdev->vm;
int rc;
- vm->dram_pg_pool = gen_pool_create(__ffs(prop->dram_page_size), -1);
+ if (is_power_of_2(prop->dram_page_size))
+ vm->dram_pg_pool =
+ gen_pool_create(__ffs(prop->dram_page_size), -1);
+ else
+ vm->dram_pg_pool =
+ gen_pool_create(__ffs(DRAM_POOL_PAGE_SIZE), -1);
+
if (!vm->dram_pg_pool) {
dev_err(hdev->dev, "Failed to create dram page pool\n");
return -ENOMEM;
@@ -1972,15 +2096,14 @@ pool_add_err:
return rc;
}
-/*
- * hl_vm_fini - virtual memory module teardown
- *
- * @hdev : pointer to the habanalabs device structure
+/**
+ * hl_vm_fini() - virtual memory module teardown.
+ * @hdev: pointer to the habanalabs device structure.
*
* This function perform teardown to the following:
- * - Idr for device memory allocation handles
- * - DRAM physical pages pool of 2MB
- * - MMU module
+ * - Idr for device memory allocation handles.
+ * - DRAM physical pages pool of 2MB.
+ * - MMU module.
*/
void hl_vm_fini(struct hl_device *hdev)
{