aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/dma/swiotlb.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/dma/swiotlb.c')
-rw-r--r--kernel/dma/swiotlb.c583
1 files changed, 394 insertions, 189 deletions
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 8e840fbbed7c..339a990554e7 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -21,39 +21,33 @@
#define pr_fmt(fmt) "software IO TLB: " fmt
#include <linux/cache.h>
+#include <linux/cc_platform.h>
+#include <linux/ctype.h>
+#include <linux/debugfs.h>
#include <linux/dma-direct.h>
#include <linux/dma-map-ops.h>
-#include <linux/mm.h>
#include <linux/export.h>
+#include <linux/gfp.h>
+#include <linux/highmem.h>
+#include <linux/io.h>
+#include <linux/iommu-helper.h>
+#include <linux/init.h>
+#include <linux/memblock.h>
+#include <linux/mm.h>
+#include <linux/pfn.h>
+#include <linux/scatterlist.h>
+#include <linux/set_memory.h>
#include <linux/spinlock.h>
#include <linux/string.h>
#include <linux/swiotlb.h>
-#include <linux/pfn.h>
#include <linux/types.h>
-#include <linux/ctype.h>
-#include <linux/highmem.h>
-#include <linux/gfp.h>
-#include <linux/scatterlist.h>
-#include <linux/cc_platform.h>
-#include <linux/set_memory.h>
-#ifdef CONFIG_DEBUG_FS
-#include <linux/debugfs.h>
-#endif
#ifdef CONFIG_DMA_RESTRICTED_POOL
-#include <linux/io.h>
#include <linux/of.h>
#include <linux/of_fdt.h>
#include <linux/of_reserved_mem.h>
#include <linux/slab.h>
#endif
-#include <asm/io.h>
-#include <asm/dma.h>
-
-#include <linux/init.h>
-#include <linux/memblock.h>
-#include <linux/iommu-helper.h>
-
#define CREATE_TRACE_POINTS
#include <trace/events/swiotlb.h>
@@ -68,17 +62,76 @@
#define INVALID_PHYS_ADDR (~(phys_addr_t)0)
-enum swiotlb_force swiotlb_force;
+struct io_tlb_slot {
+ phys_addr_t orig_addr;
+ size_t alloc_size;
+ unsigned int list;
+};
+
+static bool swiotlb_force_bounce;
+static bool swiotlb_force_disable;
struct io_tlb_mem io_tlb_default_mem;
+phys_addr_t swiotlb_unencrypted_base;
+
+static unsigned long default_nslabs = IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT;
+static unsigned long default_nareas;
+
+/**
+ * struct io_tlb_area - IO TLB memory area descriptor
+ *
+ * This is a single area with a single lock.
+ *
+ * @used: The number of used IO TLB block.
+ * @index: The slot index to start searching in this area for next round.
+ * @lock: The lock to protect the above data structures in the map and
+ * unmap calls.
+ */
+struct io_tlb_area {
+ unsigned long used;
+ unsigned int index;
+ spinlock_t lock;
+};
+
/*
- * Max segment that we can provide which (if pages are contingous) will
- * not be bounced (unless SWIOTLB_FORCE is set).
+ * Round up number of slabs to the next power of 2. The last area is going
+ * be smaller than the rest if default_nslabs is not power of two.
+ * The number of slot in an area should be a multiple of IO_TLB_SEGSIZE,
+ * otherwise a segment may span two or more areas. It conflicts with free
+ * contiguous slots tracking: free slots are treated contiguous no matter
+ * whether they cross an area boundary.
+ *
+ * Return true if default_nslabs is rounded up.
*/
-static unsigned int max_segment;
+static bool round_up_default_nslabs(void)
+{
+ if (!default_nareas)
+ return false;
-static unsigned long default_nslabs = IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT;
+ if (default_nslabs < IO_TLB_SEGSIZE * default_nareas)
+ default_nslabs = IO_TLB_SEGSIZE * default_nareas;
+ else if (is_power_of_2(default_nslabs))
+ return false;
+ default_nslabs = roundup_pow_of_two(default_nslabs);
+ return true;
+}
+
+static void swiotlb_adjust_nareas(unsigned int nareas)
+{
+ /* use a single area when non is specified */
+ if (!nareas)
+ nareas = 1;
+ else if (!is_power_of_2(nareas))
+ nareas = roundup_pow_of_two(nareas);
+
+ default_nareas = nareas;
+
+ pr_info("area num %d.\n", nareas);
+ if (round_up_default_nslabs())
+ pr_info("SWIOTLB bounce buffer size roundup to %luMB",
+ (default_nslabs << IO_TLB_SHIFT) >> 20);
+}
static int __init
setup_io_tlb_npages(char *str)
@@ -90,10 +143,14 @@ setup_io_tlb_npages(char *str)
}
if (*str == ',')
++str;
+ if (isdigit(*str))
+ swiotlb_adjust_nareas(simple_strtoul(str, &str, 0));
+ if (*str == ',')
+ ++str;
if (!strcmp(str, "force"))
- swiotlb_force = SWIOTLB_FORCE;
+ swiotlb_force_bounce = true;
else if (!strcmp(str, "noforce"))
- swiotlb_force = SWIOTLB_NO_FORCE;
+ swiotlb_force_disable = true;
return 0;
}
@@ -101,18 +158,12 @@ early_param("swiotlb", setup_io_tlb_npages);
unsigned int swiotlb_max_segment(void)
{
- return io_tlb_default_mem.nslabs ? max_segment : 0;
+ if (!io_tlb_default_mem.nslabs)
+ return 0;
+ return rounddown(io_tlb_default_mem.nslabs << IO_TLB_SHIFT, PAGE_SIZE);
}
EXPORT_SYMBOL_GPL(swiotlb_max_segment);
-void swiotlb_set_max_segment(unsigned int val)
-{
- if (swiotlb_force == SWIOTLB_FORCE)
- max_segment = 1;
- else
- max_segment = rounddown(val, PAGE_SIZE);
-}
-
unsigned long swiotlb_size_or_default(void)
{
return default_nslabs << IO_TLB_SHIFT;
@@ -127,8 +178,11 @@ void __init swiotlb_adjust_size(unsigned long size)
*/
if (default_nslabs != IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT)
return;
+
size = ALIGN(size, IO_TLB_SIZE);
default_nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
+ if (round_up_default_nslabs())
+ size = default_nslabs << IO_TLB_SHIFT;
pr_info("SWIOTLB bounce buffer size adjusted to %luMB", size >> 20);
}
@@ -156,6 +210,34 @@ static inline unsigned long nr_slots(u64 val)
}
/*
+ * Remap swioltb memory in the unencrypted physical address space
+ * when swiotlb_unencrypted_base is set. (e.g. for Hyper-V AMD SEV-SNP
+ * Isolation VMs).
+ */
+#ifdef CONFIG_HAS_IOMEM
+static void *swiotlb_mem_remap(struct io_tlb_mem *mem, unsigned long bytes)
+{
+ void *vaddr = NULL;
+
+ if (swiotlb_unencrypted_base) {
+ phys_addr_t paddr = mem->start + swiotlb_unencrypted_base;
+
+ vaddr = memremap(paddr, bytes, MEMREMAP_WB);
+ if (!vaddr)
+ pr_err("Failed to map the unencrypted memory %pa size %lx.\n",
+ &paddr, bytes);
+ }
+
+ return vaddr;
+}
+#else
+static void *swiotlb_mem_remap(struct io_tlb_mem *mem, unsigned long bytes)
+{
+ return NULL;
+}
+#endif
+
+/*
* Early SWIOTLB allocation may be too early to allow an architecture to
* perform the desired operations. This function allows the architecture to
* call SWIOTLB when the operations are possible. It needs to be called
@@ -172,11 +254,15 @@ void __init swiotlb_update_mem_attributes(void)
vaddr = phys_to_virt(mem->start);
bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT);
set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
- memset(vaddr, 0, bytes);
+
+ mem->vaddr = swiotlb_mem_remap(mem, bytes);
+ if (!mem->vaddr)
+ mem->vaddr = vaddr;
}
static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
- unsigned long nslabs, bool late_alloc)
+ unsigned long nslabs, unsigned int flags,
+ bool late_alloc, unsigned int nareas)
{
void *vaddr = phys_to_virt(start);
unsigned long bytes = nslabs << IO_TLB_SHIFT, i;
@@ -184,72 +270,114 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
mem->nslabs = nslabs;
mem->start = start;
mem->end = mem->start + bytes;
- mem->index = 0;
mem->late_alloc = late_alloc;
+ mem->nareas = nareas;
+ mem->area_nslabs = nslabs / mem->nareas;
+
+ mem->force_bounce = swiotlb_force_bounce || (flags & SWIOTLB_FORCE);
- if (swiotlb_force == SWIOTLB_FORCE)
- mem->force_bounce = true;
+ for (i = 0; i < mem->nareas; i++) {
+ spin_lock_init(&mem->areas[i].lock);
+ mem->areas[i].index = 0;
+ mem->areas[i].used = 0;
+ }
- spin_lock_init(&mem->lock);
for (i = 0; i < mem->nslabs; i++) {
mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i);
mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
mem->slots[i].alloc_size = 0;
}
+
+ /*
+ * If swiotlb_unencrypted_base is set, the bounce buffer memory will
+ * be remapped and cleared in swiotlb_update_mem_attributes.
+ */
+ if (swiotlb_unencrypted_base)
+ return;
+
memset(vaddr, 0, bytes);
+ mem->vaddr = vaddr;
+ return;
}
-int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
+/*
+ * Statically reserve bounce buffer space and initialize bounce buffer data
+ * structures for the software IO TLB used to implement the DMA API.
+ */
+void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
+ int (*remap)(void *tlb, unsigned long nslabs))
{
struct io_tlb_mem *mem = &io_tlb_default_mem;
+ unsigned long nslabs;
size_t alloc_size;
+ size_t bytes;
+ void *tlb;
- if (swiotlb_force == SWIOTLB_NO_FORCE)
- return 0;
+ if (!addressing_limit && !swiotlb_force_bounce)
+ return;
+ if (swiotlb_force_disable)
+ return;
- /* protect against double initialization */
- if (WARN_ON_ONCE(mem->nslabs))
- return -ENOMEM;
+ /*
+ * default_nslabs maybe changed when adjust area number.
+ * So allocate bounce buffer after adjusting area number.
+ */
+ if (!default_nareas)
+ swiotlb_adjust_nareas(num_possible_cpus());
+
+ nslabs = default_nslabs;
+ /*
+ * By default allocate the bounce buffer memory from low memory, but
+ * allow to pick a location everywhere for hypervisors with guest
+ * memory encryption.
+ */
+retry:
+ bytes = PAGE_ALIGN(nslabs << IO_TLB_SHIFT);
+ if (flags & SWIOTLB_ANY)
+ tlb = memblock_alloc(bytes, PAGE_SIZE);
+ else
+ tlb = memblock_alloc_low(bytes, PAGE_SIZE);
+ if (!tlb) {
+ pr_warn("%s: failed to allocate tlb structure\n", __func__);
+ return;
+ }
+
+ if (remap && remap(tlb, nslabs) < 0) {
+ memblock_free(tlb, PAGE_ALIGN(bytes));
+
+ nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE);
+ if (nslabs >= IO_TLB_MIN_SLABS)
+ goto retry;
+
+ pr_warn("%s: Failed to remap %zu bytes\n", __func__, bytes);
+ return;
+ }
alloc_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), nslabs));
mem->slots = memblock_alloc(alloc_size, PAGE_SIZE);
- if (!mem->slots)
- panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
- __func__, alloc_size, PAGE_SIZE);
+ if (!mem->slots) {
+ pr_warn("%s: Failed to allocate %zu bytes align=0x%lx\n",
+ __func__, alloc_size, PAGE_SIZE);
+ return;
+ }
- swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, false);
+ mem->areas = memblock_alloc(array_size(sizeof(struct io_tlb_area),
+ default_nareas), SMP_CACHE_BYTES);
+ if (!mem->areas) {
+ pr_warn("%s: Failed to allocate mem->areas.\n", __func__);
+ return;
+ }
+
+ swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, flags, false,
+ default_nareas);
- if (verbose)
+ if (flags & SWIOTLB_VERBOSE)
swiotlb_print_info();
- swiotlb_set_max_segment(mem->nslabs << IO_TLB_SHIFT);
- return 0;
}
-/*
- * Statically reserve bounce buffer space and initialize bounce buffer data
- * structures for the software IO TLB used to implement the DMA API.
- */
-void __init
-swiotlb_init(int verbose)
+void __init swiotlb_init(bool addressing_limit, unsigned int flags)
{
- size_t bytes = PAGE_ALIGN(default_nslabs << IO_TLB_SHIFT);
- void *tlb;
-
- if (swiotlb_force == SWIOTLB_NO_FORCE)
- return;
-
- /* Get IO TLB memory from the low pages */
- tlb = memblock_alloc_low(bytes, PAGE_SIZE);
- if (!tlb)
- goto fail;
- if (swiotlb_init_with_tbl(tlb, default_nslabs, verbose))
- goto fail_free_mem;
- return;
-
-fail_free_mem:
- memblock_free(tlb, bytes);
-fail:
- pr_warn("Cannot allocate buffer");
+ swiotlb_init_remap(addressing_limit, flags, NULL);
}
/*
@@ -257,73 +385,81 @@ fail:
* initialize the swiotlb later using the slab allocator if needed.
* This should be just like above, but with some error catching.
*/
-int
-swiotlb_late_init_with_default_size(size_t default_size)
+int swiotlb_init_late(size_t size, gfp_t gfp_mask,
+ int (*remap)(void *tlb, unsigned long nslabs))
{
- unsigned long nslabs =
- ALIGN(default_size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
- unsigned long bytes;
+ struct io_tlb_mem *mem = &io_tlb_default_mem;
+ unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
unsigned char *vstart = NULL;
- unsigned int order;
+ unsigned int order, area_order;
+ bool retried = false;
int rc = 0;
- if (swiotlb_force == SWIOTLB_NO_FORCE)
+ if (swiotlb_force_disable)
return 0;
- /*
- * Get IO TLB memory from the low pages
- */
+retry:
order = get_order(nslabs << IO_TLB_SHIFT);
nslabs = SLABS_PER_PAGE << order;
- bytes = nslabs << IO_TLB_SHIFT;
while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
- vstart = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
+ vstart = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
order);
if (vstart)
break;
order--;
+ nslabs = SLABS_PER_PAGE << order;
+ retried = true;
}
if (!vstart)
return -ENOMEM;
- if (order != get_order(bytes)) {
+ if (remap)
+ rc = remap(vstart, nslabs);
+ if (rc) {
+ free_pages((unsigned long)vstart, order);
+
+ nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE);
+ if (nslabs < IO_TLB_MIN_SLABS)
+ return rc;
+ retried = true;
+ goto retry;
+ }
+
+ if (retried) {
pr_warn("only able to allocate %ld MB\n",
(PAGE_SIZE << order) >> 20);
- nslabs = SLABS_PER_PAGE << order;
}
- rc = swiotlb_late_init_with_tbl(vstart, nslabs);
- if (rc)
- free_pages((unsigned long)vstart, order);
- return rc;
-}
-
-int
-swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
-{
- struct io_tlb_mem *mem = &io_tlb_default_mem;
- unsigned long bytes = nslabs << IO_TLB_SHIFT;
+ if (!default_nareas)
+ swiotlb_adjust_nareas(num_possible_cpus());
- if (swiotlb_force == SWIOTLB_NO_FORCE)
- return 0;
-
- /* protect against double initialization */
- if (WARN_ON_ONCE(mem->nslabs))
- return -ENOMEM;
+ area_order = get_order(array_size(sizeof(*mem->areas),
+ default_nareas));
+ mem->areas = (struct io_tlb_area *)
+ __get_free_pages(GFP_KERNEL | __GFP_ZERO, area_order);
+ if (!mem->areas)
+ goto error_area;
mem->slots = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
get_order(array_size(sizeof(*mem->slots), nslabs)));
if (!mem->slots)
- return -ENOMEM;
+ goto error_slots;
- set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
- swiotlb_init_io_tlb_mem(mem, virt_to_phys(tlb), nslabs, true);
+ set_memory_decrypted((unsigned long)vstart,
+ (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT);
+ swiotlb_init_io_tlb_mem(mem, virt_to_phys(vstart), nslabs, 0, true,
+ default_nareas);
swiotlb_print_info();
- swiotlb_set_max_segment(mem->nslabs << IO_TLB_SHIFT);
return 0;
+
+error_slots:
+ free_pages((unsigned long)mem->areas, area_order);
+error_area:
+ free_pages((unsigned long)vstart, order);
+ return -ENOMEM;
}
void __init swiotlb_exit(void)
@@ -331,6 +467,10 @@ void __init swiotlb_exit(void)
struct io_tlb_mem *mem = &io_tlb_default_mem;
unsigned long tbl_vaddr;
size_t tbl_size, slots_size;
+ unsigned int area_order;
+
+ if (swiotlb_force_bounce)
+ return;
if (!mem->nslabs)
return;
@@ -342,9 +482,14 @@ void __init swiotlb_exit(void)
set_memory_encrypted(tbl_vaddr, tbl_size >> PAGE_SHIFT);
if (mem->late_alloc) {
+ area_order = get_order(array_size(sizeof(*mem->areas),
+ mem->nareas));
+ free_pages((unsigned long)mem->areas, area_order);
free_pages(tbl_vaddr, get_order(tbl_size));
free_pages((unsigned long)mem->slots, get_order(slots_size));
} else {
+ memblock_free_late(__pa(mem->areas),
+ array_size(sizeof(*mem->areas), mem->nareas));
memblock_free_late(mem->start, tbl_size);
memblock_free_late(__pa(mem->slots), slots_size);
}
@@ -371,7 +516,7 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
phys_addr_t orig_addr = mem->slots[index].orig_addr;
size_t alloc_size = mem->slots[index].alloc_size;
unsigned long pfn = PFN_DOWN(orig_addr);
- unsigned char *vaddr = phys_to_virt(tlb_addr);
+ unsigned char *vaddr = mem->vaddr + tlb_addr - mem->start;
unsigned int tlb_offset, orig_addr_offset;
if (orig_addr == INVALID_PHYS_ADDR)
@@ -405,9 +550,8 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
}
if (PageHighMem(pfn_to_page(pfn))) {
- /* The buffer does not have a mapping. Map it in and copy */
unsigned int offset = orig_addr & ~PAGE_MASK;
- char *buffer;
+ struct page *page;
unsigned int sz = 0;
unsigned long flags;
@@ -415,12 +559,11 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
sz = min_t(size_t, PAGE_SIZE - offset, size);
local_irq_save(flags);
- buffer = kmap_atomic(pfn_to_page(pfn));
+ page = pfn_to_page(pfn);
if (dir == DMA_TO_DEVICE)
- memcpy(vaddr, buffer + offset, sz);
+ memcpy_from_page(vaddr, page, offset, sz);
else
- memcpy(buffer + offset, vaddr, sz);
- kunmap_atomic(buffer);
+ memcpy_to_page(page, offset, vaddr, sz);
local_irq_restore(flags);
size -= sz;
@@ -435,7 +578,10 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
}
}
-#define slot_addr(start, idx) ((start) + ((idx) << IO_TLB_SHIFT))
+static inline phys_addr_t slot_addr(phys_addr_t start, phys_addr_t idx)
+{
+ return start + (idx << IO_TLB_SHIFT);
+}
/*
* Carefully handle integer overflow which can occur when boundary_mask == ~0UL.
@@ -447,9 +593,9 @@ static inline unsigned long get_max_slots(unsigned long boundary_mask)
return nr_slots(boundary_mask + 1);
}
-static unsigned int wrap_index(struct io_tlb_mem *mem, unsigned int index)
+static unsigned int wrap_area_index(struct io_tlb_mem *mem, unsigned int index)
{
- if (index >= mem->nslabs)
+ if (index >= mem->area_nslabs)
return 0;
return index;
}
@@ -458,10 +604,12 @@ static unsigned int wrap_index(struct io_tlb_mem *mem, unsigned int index)
* Find a suitable number of IO TLB entries size that will fit this request and
* allocate a buffer from that IO TLB pool.
*/
-static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
- size_t alloc_size, unsigned int alloc_align_mask)
+static int swiotlb_do_find_slots(struct device *dev, int area_index,
+ phys_addr_t orig_addr, size_t alloc_size,
+ unsigned int alloc_align_mask)
{
struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
+ struct io_tlb_area *area = mem->areas + area_index;
unsigned long boundary_mask = dma_get_seg_boundary(dev);
dma_addr_t tbl_dma_addr =
phys_to_dma_unencrypted(dev, mem->start) & boundary_mask;
@@ -472,8 +620,11 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
unsigned int index, wrap, count = 0, i;
unsigned int offset = swiotlb_align_offset(dev, orig_addr);
unsigned long flags;
+ unsigned int slot_base;
+ unsigned int slot_index;
BUG_ON(!nslots);
+ BUG_ON(area_index >= mem->nareas);
/*
* For mappings with an alignment requirement don't bother looping to
@@ -485,16 +636,20 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT));
stride = max(stride, (alloc_align_mask >> IO_TLB_SHIFT) + 1);
- spin_lock_irqsave(&mem->lock, flags);
- if (unlikely(nslots > mem->nslabs - mem->used))
+ spin_lock_irqsave(&area->lock, flags);
+ if (unlikely(nslots > mem->area_nslabs - area->used))
goto not_found;
- index = wrap = wrap_index(mem, ALIGN(mem->index, stride));
+ slot_base = area_index * mem->area_nslabs;
+ index = wrap = wrap_area_index(mem, ALIGN(area->index, stride));
+
do {
+ slot_index = slot_base + index;
+
if (orig_addr &&
- (slot_addr(tbl_dma_addr, index) & iotlb_align_mask) !=
- (orig_addr & iotlb_align_mask)) {
- index = wrap_index(mem, index + 1);
+ (slot_addr(tbl_dma_addr, slot_index) &
+ iotlb_align_mask) != (orig_addr & iotlb_align_mask)) {
+ index = wrap_area_index(mem, index + 1);
continue;
}
@@ -503,26 +658,26 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
* contiguous buffers, we allocate the buffers from that slot
* and mark the entries as '0' indicating unavailable.
*/
- if (!iommu_is_span_boundary(index, nslots,
+ if (!iommu_is_span_boundary(slot_index, nslots,
nr_slots(tbl_dma_addr),
max_slots)) {
- if (mem->slots[index].list >= nslots)
+ if (mem->slots[slot_index].list >= nslots)
goto found;
}
- index = wrap_index(mem, index + stride);
+ index = wrap_area_index(mem, index + stride);
} while (index != wrap);
not_found:
- spin_unlock_irqrestore(&mem->lock, flags);
+ spin_unlock_irqrestore(&area->lock, flags);
return -1;
found:
- for (i = index; i < index + nslots; i++) {
+ for (i = slot_index; i < slot_index + nslots; i++) {
mem->slots[i].list = 0;
- mem->slots[i].alloc_size =
- alloc_size - (offset + ((i - index) << IO_TLB_SHIFT));
+ mem->slots[i].alloc_size = alloc_size - (offset +
+ ((i - slot_index) << IO_TLB_SHIFT));
}
- for (i = index - 1;
+ for (i = slot_index - 1;
io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
mem->slots[i].list; i--)
mem->slots[i].list = ++count;
@@ -530,14 +685,42 @@ found:
/*
* Update the indices to avoid searching in the next round.
*/
- if (index + nslots < mem->nslabs)
- mem->index = index + nslots;
+ if (index + nslots < mem->area_nslabs)
+ area->index = index + nslots;
else
- mem->index = 0;
- mem->used += nslots;
+ area->index = 0;
+ area->used += nslots;
+ spin_unlock_irqrestore(&area->lock, flags);
+ return slot_index;
+}
- spin_unlock_irqrestore(&mem->lock, flags);
- return index;
+static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
+ size_t alloc_size, unsigned int alloc_align_mask)
+{
+ struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
+ int start = raw_smp_processor_id() & (mem->nareas - 1);
+ int i = start, index;
+
+ do {
+ index = swiotlb_do_find_slots(dev, i, orig_addr, alloc_size,
+ alloc_align_mask);
+ if (index >= 0)
+ return index;
+ if (++i >= mem->nareas)
+ i = 0;
+ } while (i != start);
+
+ return -1;
+}
+
+static unsigned long mem_used(struct io_tlb_mem *mem)
+{
+ int i;
+ unsigned long used = 0;
+
+ for (i = 0; i < mem->nareas; i++)
+ used += mem->areas[i].used;
+ return used;
}
phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
@@ -551,8 +734,11 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
int index;
phys_addr_t tlb_addr;
- if (!mem)
- panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
+ if (!mem || !mem->nslabs) {
+ dev_warn_ratelimited(dev,
+ "Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
+ return (phys_addr_t)DMA_MAPPING_ERROR;
+ }
if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
@@ -569,7 +755,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
if (!(attrs & DMA_ATTR_NO_WARN))
dev_warn_ratelimited(dev,
"swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
- alloc_size, mem->nslabs, mem->used);
+ alloc_size, mem->nslabs, mem_used(mem));
return (phys_addr_t)DMA_MAPPING_ERROR;
}
@@ -581,9 +767,14 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
for (i = 0; i < nr_slots(alloc_size + offset); i++)
mem->slots[index + i].orig_addr = slot_addr(orig_addr, i);
tlb_addr = slot_addr(mem->start, index) + offset;
- if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
- (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
- swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE);
+ /*
+ * When dir == DMA_FROM_DEVICE we could omit the copy from the orig
+ * to the tlb buffer, if we knew for sure the device will
+ * overwrite the entire current content. But we don't. Thus
+ * unconditional bounce may prevent leaking swiotlb content (i.e.
+ * kernel memory) to user-space.
+ */
+ swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE);
return tlb_addr;
}
@@ -594,6 +785,8 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
unsigned int offset = swiotlb_align_offset(dev, tlb_addr);
int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
int nslots = nr_slots(mem->slots[index].alloc_size + offset);
+ int aindex = index / mem->area_nslabs;
+ struct io_tlb_area *area = &mem->areas[aindex];
int count, i;
/*
@@ -602,7 +795,9 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
* While returning the entries to the free list, we merge the entries
* with slots below and above the pool being returned.
*/
- spin_lock_irqsave(&mem->lock, flags);
+ BUG_ON(aindex >= mem->nareas);
+
+ spin_lock_irqsave(&area->lock, flags);
if (index + nslots < ALIGN(index + 1, IO_TLB_SEGSIZE))
count = mem->slots[index + nslots].list;
else
@@ -626,8 +821,8 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && mem->slots[i].list;
i--)
mem->slots[i].list = ++count;
- mem->used -= nslots;
- spin_unlock_irqrestore(&mem->lock, flags);
+ area->used -= nslots;
+ spin_unlock_irqrestore(&area->lock, flags);
}
/*
@@ -675,8 +870,7 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
phys_addr_t swiotlb_addr;
dma_addr_t dma_addr;
- trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size,
- swiotlb_force);
+ trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size);
swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, 0, dir,
attrs);
@@ -701,7 +895,18 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
size_t swiotlb_max_mapping_size(struct device *dev)
{
- return ((size_t)IO_TLB_SIZE) * IO_TLB_SEGSIZE;
+ int min_align_mask = dma_get_min_align_mask(dev);
+ int min_align = 0;
+
+ /*
+ * swiotlb_find_slots() skips slots according to
+ * min align mask. This affects max mapping size.
+ * Take it into acount here.
+ */
+ if (min_align_mask)
+ min_align = roundup(min_align_mask, IO_TLB_SIZE);
+
+ return ((size_t)IO_TLB_SIZE) * IO_TLB_SEGSIZE - min_align;
}
bool is_swiotlb_active(struct device *dev)
@@ -712,47 +917,37 @@ bool is_swiotlb_active(struct device *dev)
}
EXPORT_SYMBOL_GPL(is_swiotlb_active);
-#ifdef CONFIG_DEBUG_FS
-static struct dentry *debugfs_dir;
+static int io_tlb_used_get(void *data, u64 *val)
+{
+ *val = mem_used(&io_tlb_default_mem);
+ return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_used, io_tlb_used_get, NULL, "%llu\n");
-static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem)
+static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
+ const char *dirname)
{
+ mem->debugfs = debugfs_create_dir(dirname, io_tlb_default_mem.debugfs);
+ if (!mem->nslabs)
+ return;
+
debugfs_create_ulong("io_tlb_nslabs", 0400, mem->debugfs, &mem->nslabs);
- debugfs_create_ulong("io_tlb_used", 0400, mem->debugfs, &mem->used);
+ debugfs_create_file("io_tlb_used", 0400, mem->debugfs, NULL,
+ &fops_io_tlb_used);
}
-static int __init swiotlb_create_default_debugfs(void)
+static int __init __maybe_unused swiotlb_create_default_debugfs(void)
{
- struct io_tlb_mem *mem = &io_tlb_default_mem;
-
- debugfs_dir = debugfs_create_dir("swiotlb", NULL);
- if (mem->nslabs) {
- mem->debugfs = debugfs_dir;
- swiotlb_create_debugfs_files(mem);
- }
+ swiotlb_create_debugfs_files(&io_tlb_default_mem, "swiotlb");
return 0;
}
+#ifdef CONFIG_DEBUG_FS
late_initcall(swiotlb_create_default_debugfs);
-
#endif
#ifdef CONFIG_DMA_RESTRICTED_POOL
-#ifdef CONFIG_DEBUG_FS
-static void rmem_swiotlb_debugfs_init(struct reserved_mem *rmem)
-{
- struct io_tlb_mem *mem = rmem->priv;
-
- mem->debugfs = debugfs_create_dir(rmem->name, debugfs_dir);
- swiotlb_create_debugfs_files(mem);
-}
-#else
-static void rmem_swiotlb_debugfs_init(struct reserved_mem *rmem)
-{
-}
-#endif
-
struct page *swiotlb_alloc(struct device *dev, size_t size)
{
struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
@@ -789,6 +984,9 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
struct io_tlb_mem *mem = rmem->priv;
unsigned long nslabs = rmem->size >> IO_TLB_SHIFT;
+ /* Set Per-device io tlb area to one */
+ unsigned int nareas = 1;
+
/*
* Since multiple devices can share the same pool, the private data,
* io_tlb_mem struct, will be initialized by the first device attached
@@ -799,22 +997,29 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
if (!mem)
return -ENOMEM;
- mem->slots = kzalloc(array_size(sizeof(*mem->slots), nslabs),
- GFP_KERNEL);
+ mem->slots = kcalloc(nslabs, sizeof(*mem->slots), GFP_KERNEL);
if (!mem->slots) {
kfree(mem);
return -ENOMEM;
}
+ mem->areas = kcalloc(nareas, sizeof(*mem->areas),
+ GFP_KERNEL);
+ if (!mem->areas) {
+ kfree(mem->slots);
+ kfree(mem);
+ return -ENOMEM;
+ }
+
set_memory_decrypted((unsigned long)phys_to_virt(rmem->base),
rmem->size >> PAGE_SHIFT);
- swiotlb_init_io_tlb_mem(mem, rmem->base, nslabs, false);
- mem->force_bounce = true;
+ swiotlb_init_io_tlb_mem(mem, rmem->base, nslabs, SWIOTLB_FORCE,
+ false, nareas);
mem->for_alloc = true;
rmem->priv = mem;
- rmem_swiotlb_debugfs_init(rmem);
+ swiotlb_create_debugfs_files(mem, rmem->name);
}
dev->dma_io_tlb_mem = mem;