aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/page_pool.c
diff options
context:
space:
mode:
authorJesper Dangaard Brouer <brouer@redhat.com>2019-06-18 15:05:47 +0200
committerDavid S. Miller <davem@davemloft.net>2019-06-19 11:23:13 -0400
commit99c07c43c4ea0bc101331401a0fabfc51933c6a3 (patch)
treea21190f1a54dc072c9e3f6e9a94a5a0970d5f3bf /net/core/page_pool.c
parentmlx5: more strict use of page_pool API (diff)
downloadlinux-dev-99c07c43c4ea0bc101331401a0fabfc51933c6a3.tar.xz
linux-dev-99c07c43c4ea0bc101331401a0fabfc51933c6a3.zip
xdp: tracking page_pool resources and safe removal
This patch is needed before we can allow drivers to use page_pool for DMA-mappings. Today with page_pool and XDP return API, it is possible to remove the page_pool object (from rhashtable), while there are still in-flight packet-pages. This is safely handled via RCU and failed lookups in __xdp_return() fallback to call put_page(), when page_pool object is gone. In-case page is still DMA mapped, this will result in page note getting correctly DMA unmapped. To solve this, the page_pool is extended with tracking in-flight pages. And XDP disconnect system queries page_pool and waits, via workqueue, for all in-flight pages to be returned. To avoid killing performance when tracking in-flight pages, the implement use two (unsigned) counters, that in placed on different cache-lines, and can be used to deduct in-flight packets. This is done by mapping the unsigned "sequence" counters onto signed Two's complement arithmetic operations. This is e.g. used by kernel's time_after macros, described in kernel commit 1ba3aab3033b and 5a581b367b5, and also explained in RFC1982. The trick is these two incrementing counters only need to be read and compared, when checking if it's safe to free the page_pool structure. Which will only happen when driver have disconnected RX/alloc side. Thus, on a non-fast-path. It is chosen that page_pool tracking is also enabled for the non-DMA use-case, as this can be used for statistics later. After this patch, using page_pool requires more strict resource "release", e.g. via page_pool_release_page() that was introduced in this patchset, and previous patches implement/fix this more strict requirement. Drivers no-longer call page_pool_destroy(). Drivers already call xdp_rxq_info_unreg() which call xdp_rxq_info_unreg_mem_model(), which will attempt to disconnect the mem id, and if attempt fails schedule the disconnect for later via delayed workqueue. Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com> Reviewed-by: Ilias Apalodimas <ilias.apalodimas@linaro.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core/page_pool.c')
-rw-r--r--net/core/page_pool.c62
1 files changed, 46 insertions, 16 deletions
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 41391b5dc14c..8679e24fd665 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -43,6 +43,8 @@ static int page_pool_init(struct page_pool *pool,
if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
return -ENOMEM;
+ atomic_set(&pool->pages_state_release_cnt, 0);
+
return 0;
}
@@ -151,6 +153,9 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
page->dma_addr = dma;
skip_dma_map:
+ /* Track how many pages are held 'in-flight' */
+ pool->pages_state_hold_cnt++;
+
/* When page just alloc'ed is should/must have refcnt 1. */
return page;
}
@@ -173,6 +178,33 @@ struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp)
}
EXPORT_SYMBOL(page_pool_alloc_pages);
+/* Calculate distance between two u32 values, valid if distance is below 2^(31)
+ * https://en.wikipedia.org/wiki/Serial_number_arithmetic#General_Solution
+ */
+#define _distance(a, b) (s32)((a) - (b))
+
+static s32 page_pool_inflight(struct page_pool *pool)
+{
+ u32 release_cnt = atomic_read(&pool->pages_state_release_cnt);
+ u32 hold_cnt = READ_ONCE(pool->pages_state_hold_cnt);
+ s32 distance;
+
+ distance = _distance(hold_cnt, release_cnt);
+
+ /* TODO: Add tracepoint here */
+ return distance;
+}
+
+static bool __page_pool_safe_to_destroy(struct page_pool *pool)
+{
+ s32 inflight = page_pool_inflight(pool);
+
+ /* The distance should not be able to become negative */
+ WARN(inflight < 0, "Negative(%d) inflight packet-pages", inflight);
+
+ return (inflight == 0);
+}
+
/* Cleanup page_pool state from page */
static void __page_pool_clean_page(struct page_pool *pool,
struct page *page)
@@ -180,7 +212,7 @@ static void __page_pool_clean_page(struct page_pool *pool,
dma_addr_t dma;
if (!(pool->p.flags & PP_FLAG_DMA_MAP))
- return;
+ goto skip_dma_unmap;
dma = page->dma_addr;
/* DMA unmap */
@@ -188,11 +220,16 @@ static void __page_pool_clean_page(struct page_pool *pool,
PAGE_SIZE << pool->p.order, pool->p.dma_dir,
DMA_ATTR_SKIP_CPU_SYNC);
page->dma_addr = 0;
+skip_dma_unmap:
+ atomic_inc(&pool->pages_state_release_cnt);
}
/* unmap the page and clean our state */
void page_pool_unmap_page(struct page_pool *pool, struct page *page)
{
+ /* When page is unmapped, this implies page will not be
+ * returned to page_pool.
+ */
__page_pool_clean_page(pool, page);
}
EXPORT_SYMBOL(page_pool_unmap_page);
@@ -201,6 +238,7 @@ EXPORT_SYMBOL(page_pool_unmap_page);
static void __page_pool_return_page(struct page_pool *pool, struct page *page)
{
__page_pool_clean_page(pool, page);
+
put_page(page);
/* An optimization would be to call __free_pages(page, pool->p.order)
* knowing page is not part of page-cache (thus avoiding a
@@ -296,24 +334,17 @@ void __page_pool_free(struct page_pool *pool)
{
WARN(pool->alloc.count, "API usage violation");
WARN(!ptr_ring_empty(&pool->ring), "ptr_ring is not empty");
+ WARN(!__page_pool_safe_to_destroy(pool), "still in-flight pages");
ptr_ring_cleanup(&pool->ring, NULL);
kfree(pool);
}
EXPORT_SYMBOL(__page_pool_free);
-static void __page_pool_destroy_rcu(struct rcu_head *rcu)
-{
- struct page_pool *pool;
-
- pool = container_of(rcu, struct page_pool, rcu);
-
- __page_pool_empty_ring(pool);
- __page_pool_free(pool);
-}
-
-/* Cleanup and release resources */
-void page_pool_destroy(struct page_pool *pool)
+/* Request to shutdown: release pages cached by page_pool, and check
+ * for in-flight pages
+ */
+bool __page_pool_request_shutdown(struct page_pool *pool)
{
struct page *page;
@@ -331,7 +362,6 @@ void page_pool_destroy(struct page_pool *pool)
*/
__page_pool_empty_ring(pool);
- /* An xdp_mem_allocator can still ref page_pool pointer */
- call_rcu(&pool->rcu, __page_pool_destroy_rcu);
+ return __page_pool_safe_to_destroy(pool);
}
-EXPORT_SYMBOL(page_pool_destroy);
+EXPORT_SYMBOL(__page_pool_request_shutdown);