aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2019-06-19 11:23:14 -0400
committerDavid S. Miller <davem@davemloft.net>2019-06-19 11:23:14 -0400
commit2a54003e7af1eaddc05848dac14f7bcd77301478 (patch)
tree31644496760e3272d55f8248ef49eed16c32ec1d /include
parentnet: netsec: remove loops in napi Rx process (diff)
parentpage_pool: make sure struct device is stable (diff)
downloadlinux-dev-2a54003e7af1eaddc05848dac14f7bcd77301478.tar.xz
linux-dev-2a54003e7af1eaddc05848dac14f7bcd77301478.zip
Merge branch 'xdp-page_pool-fixes-and-in-flight-accounting'
Jesper Dangaard Brouer says: ==================== xdp: page_pool fixes and in-flight accounting This patchset fix page_pool API and users, such that drivers can use it for DMA-mapping. A number of places exist, where the DMA-mapping would not get released/unmapped, all these are fixed. This occurs e.g. when an xdp_frame gets converted to an SKB. As network stack doesn't have any callback for XDP memory models. The patchset also address a shutdown race-condition. Today removing a XDP memory model, based on page_pool, is only delayed one RCU grace period. This isn't enough as redirected xdp_frames can still be in-flight on different queues (remote driver TX, cpumap or veth). We stress that when drivers use page_pool for DMA-mapping, then they MUST use one packet per page. This might change in the future, but more work lies ahead, before we can lift this restriction. This patchset change the page_pool API to be more strict, as in-flight page accounting is added. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/net/page_pool.h69
-rw-r--r--include/net/xdp.h15
-rw-r--r--include/net/xdp_priv.h23
-rw-r--r--include/trace/events/page_pool.h87
-rw-r--r--include/trace/events/xdp.h115
5 files changed, 300 insertions, 9 deletions
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index 694d055e01ef..f09b3f1994e6 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -16,14 +16,16 @@
* page_pool_alloc_pages() call. Drivers should likely use
* page_pool_dev_alloc_pages() replacing dev_alloc_pages().
*
- * If page_pool handles DMA mapping (use page->private), then API user
- * is responsible for invoking page_pool_put_page() once. In-case of
- * elevated refcnt, the DMA state is released, assuming other users of
- * the page will eventually call put_page().
+ * API keeps track of in-flight pages, in-order to let API user know
+ * when it is safe to dealloactor page_pool object. Thus, API users
+ * must make sure to call page_pool_release_page() when a page is
+ * "leaving" the page_pool. Or call page_pool_put_page() where
+ * appropiate. For maintaining correct accounting.
*
- * If no DMA mapping is done, then it can act as shim-layer that
- * fall-through to alloc_page. As no state is kept on the page, the
- * regular put_page() call is sufficient.
+ * API user must only call page_pool_put_page() once on a page, as it
+ * will either recycle the page, or in case of elevated refcnt, it
+ * will release the DMA mapping and in-flight state accounting. We
+ * hope to lift this requirement in the future.
*/
#ifndef _NET_PAGE_POOL_H
#define _NET_PAGE_POOL_H
@@ -66,9 +68,10 @@ struct page_pool_params {
};
struct page_pool {
- struct rcu_head rcu;
struct page_pool_params p;
+ u32 pages_state_hold_cnt;
+
/*
* Data structure for allocation side
*
@@ -96,6 +99,8 @@ struct page_pool {
* TODO: Implement bulk return pages into this structure.
*/
struct ptr_ring ring;
+
+ atomic_t pages_state_release_cnt;
};
struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
@@ -109,7 +114,16 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool)
struct page_pool *page_pool_create(const struct page_pool_params *params);
-void page_pool_destroy(struct page_pool *pool);
+void __page_pool_free(struct page_pool *pool);
+static inline void page_pool_free(struct page_pool *pool)
+{
+ /* When page_pool isn't compiled-in, net/core/xdp.c doesn't
+ * allow registering MEM_TYPE_PAGE_POOL, but shield linker.
+ */
+#ifdef CONFIG_PAGE_POOL
+ __page_pool_free(pool);
+#endif
+}
/* Never call this directly, use helpers below */
void __page_pool_put_page(struct page_pool *pool,
@@ -132,6 +146,43 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
__page_pool_put_page(pool, page, true);
}
+/* API user MUST have disconnected alloc-side (not allowed to call
+ * page_pool_alloc_pages()) before calling this. The free-side can
+ * still run concurrently, to handle in-flight packet-pages.
+ *
+ * A request to shutdown can fail (with false) if there are still
+ * in-flight packet-pages.
+ */
+bool __page_pool_request_shutdown(struct page_pool *pool);
+static inline bool page_pool_request_shutdown(struct page_pool *pool)
+{
+ /* When page_pool isn't compiled-in, net/core/xdp.c doesn't
+ * allow registering MEM_TYPE_PAGE_POOL, but shield linker.
+ */
+#ifdef CONFIG_PAGE_POOL
+ return __page_pool_request_shutdown(pool);
+#endif
+}
+
+/* Disconnects a page (from a page_pool). API users can have a need
+ * to disconnect a page (from a page_pool), to allow it to be used as
+ * a regular page (that will eventually be returned to the normal
+ * page-allocator via put_page).
+ */
+void page_pool_unmap_page(struct page_pool *pool, struct page *page);
+static inline void page_pool_release_page(struct page_pool *pool,
+ struct page *page)
+{
+#ifdef CONFIG_PAGE_POOL
+ page_pool_unmap_page(pool, page);
+#endif
+}
+
+static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
+{
+ return page->dma_addr;
+}
+
static inline bool is_page_pool_compiled_in(void)
{
#ifdef CONFIG_PAGE_POOL
diff --git a/include/net/xdp.h b/include/net/xdp.h
index 8e0deddef35c..40c6d3398458 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -129,6 +129,21 @@ void xdp_return_frame(struct xdp_frame *xdpf);
void xdp_return_frame_rx_napi(struct xdp_frame *xdpf);
void xdp_return_buff(struct xdp_buff *xdp);
+/* When sending xdp_frame into the network stack, then there is no
+ * return point callback, which is needed to release e.g. DMA-mapping
+ * resources with page_pool. Thus, have explicit function to release
+ * frame resources.
+ */
+void __xdp_release_frame(void *data, struct xdp_mem_info *mem);
+static inline void xdp_release_frame(struct xdp_frame *xdpf)
+{
+ struct xdp_mem_info *mem = &xdpf->mem;
+
+ /* Curr only page_pool needs this */
+ if (mem->type == MEM_TYPE_PAGE_POOL)
+ __xdp_release_frame(xdpf->data, mem);
+}
+
int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
struct net_device *dev, u32 queue_index);
void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq);
diff --git a/include/net/xdp_priv.h b/include/net/xdp_priv.h
new file mode 100644
index 000000000000..6a8cba6ea79a
--- /dev/null
+++ b/include/net/xdp_priv.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_NET_XDP_PRIV_H__
+#define __LINUX_NET_XDP_PRIV_H__
+
+#include <linux/rhashtable.h>
+
+/* Private to net/core/xdp.c, but used by trace/events/xdp.h */
+struct xdp_mem_allocator {
+ struct xdp_mem_info mem;
+ union {
+ void *allocator;
+ struct page_pool *page_pool;
+ struct zero_copy_allocator *zc_alloc;
+ };
+ int disconnect_cnt;
+ unsigned long defer_start;
+ struct rhash_head node;
+ struct rcu_head rcu;
+ struct delayed_work defer_wq;
+ unsigned long defer_warn;
+};
+
+#endif /* __LINUX_NET_XDP_PRIV_H__ */
diff --git a/include/trace/events/page_pool.h b/include/trace/events/page_pool.h
new file mode 100644
index 000000000000..47b5ee880aa9
--- /dev/null
+++ b/include/trace/events/page_pool.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM page_pool
+
+#if !defined(_TRACE_PAGE_POOL_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_PAGE_POOL_H
+
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+#include <net/page_pool.h>
+
+TRACE_EVENT(page_pool_inflight,
+
+ TP_PROTO(const struct page_pool *pool,
+ s32 inflight, u32 hold, u32 release),
+
+ TP_ARGS(pool, inflight, hold, release),
+
+ TP_STRUCT__entry(
+ __field(const struct page_pool *, pool)
+ __field(s32, inflight)
+ __field(u32, hold)
+ __field(u32, release)
+ ),
+
+ TP_fast_assign(
+ __entry->pool = pool;
+ __entry->inflight = inflight;
+ __entry->hold = hold;
+ __entry->release = release;
+ ),
+
+ TP_printk("page_pool=%p inflight=%d hold=%u release=%u",
+ __entry->pool, __entry->inflight, __entry->hold, __entry->release)
+);
+
+TRACE_EVENT(page_pool_state_release,
+
+ TP_PROTO(const struct page_pool *pool,
+ const struct page *page, u32 release),
+
+ TP_ARGS(pool, page, release),
+
+ TP_STRUCT__entry(
+ __field(const struct page_pool *, pool)
+ __field(const struct page *, page)
+ __field(u32, release)
+ ),
+
+ TP_fast_assign(
+ __entry->pool = pool;
+ __entry->page = page;
+ __entry->release = release;
+ ),
+
+ TP_printk("page_pool=%p page=%p release=%u",
+ __entry->pool, __entry->page, __entry->release)
+);
+
+TRACE_EVENT(page_pool_state_hold,
+
+ TP_PROTO(const struct page_pool *pool,
+ const struct page *page, u32 hold),
+
+ TP_ARGS(pool, page, hold),
+
+ TP_STRUCT__entry(
+ __field(const struct page_pool *, pool)
+ __field(const struct page *, page)
+ __field(u32, hold)
+ ),
+
+ TP_fast_assign(
+ __entry->pool = pool;
+ __entry->page = page;
+ __entry->hold = hold;
+ ),
+
+ TP_printk("page_pool=%p page=%p hold=%u",
+ __entry->pool, __entry->page, __entry->hold)
+);
+
+#endif /* _TRACE_PAGE_POOL_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index e95cb86b65cf..bb5e380e2ef3 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -269,6 +269,121 @@ TRACE_EVENT(xdp_devmap_xmit,
__entry->from_ifindex, __entry->to_ifindex, __entry->err)
);
+/* Expect users already include <net/xdp.h>, but not xdp_priv.h */
+#include <net/xdp_priv.h>
+
+#define __MEM_TYPE_MAP(FN) \
+ FN(PAGE_SHARED) \
+ FN(PAGE_ORDER0) \
+ FN(PAGE_POOL) \
+ FN(ZERO_COPY)
+
+#define __MEM_TYPE_TP_FN(x) \
+ TRACE_DEFINE_ENUM(MEM_TYPE_##x);
+#define __MEM_TYPE_SYM_FN(x) \
+ { MEM_TYPE_##x, #x },
+#define __MEM_TYPE_SYM_TAB \
+ __MEM_TYPE_MAP(__MEM_TYPE_SYM_FN) { -1, 0 }
+__MEM_TYPE_MAP(__MEM_TYPE_TP_FN)
+
+TRACE_EVENT(mem_disconnect,
+
+ TP_PROTO(const struct xdp_mem_allocator *xa,
+ bool safe_to_remove, bool force),
+
+ TP_ARGS(xa, safe_to_remove, force),
+
+ TP_STRUCT__entry(
+ __field(const struct xdp_mem_allocator *, xa)
+ __field(u32, mem_id)
+ __field(u32, mem_type)
+ __field(const void *, allocator)
+ __field(bool, safe_to_remove)
+ __field(bool, force)
+ __field(int, disconnect_cnt)
+ ),
+
+ TP_fast_assign(
+ __entry->xa = xa;
+ __entry->mem_id = xa->mem.id;
+ __entry->mem_type = xa->mem.type;
+ __entry->allocator = xa->allocator;
+ __entry->safe_to_remove = safe_to_remove;
+ __entry->force = force;
+ __entry->disconnect_cnt = xa->disconnect_cnt;
+ ),
+
+ TP_printk("mem_id=%d mem_type=%s allocator=%p"
+ " safe_to_remove=%s force=%s disconnect_cnt=%d",
+ __entry->mem_id,
+ __print_symbolic(__entry->mem_type, __MEM_TYPE_SYM_TAB),
+ __entry->allocator,
+ __entry->safe_to_remove ? "true" : "false",
+ __entry->force ? "true" : "false",
+ __entry->disconnect_cnt
+ )
+);
+
+TRACE_EVENT(mem_connect,
+
+ TP_PROTO(const struct xdp_mem_allocator *xa,
+ const struct xdp_rxq_info *rxq),
+
+ TP_ARGS(xa, rxq),
+
+ TP_STRUCT__entry(
+ __field(const struct xdp_mem_allocator *, xa)
+ __field(u32, mem_id)
+ __field(u32, mem_type)
+ __field(const void *, allocator)
+ __field(const struct xdp_rxq_info *, rxq)
+ __field(int, ifindex)
+ ),
+
+ TP_fast_assign(
+ __entry->xa = xa;
+ __entry->mem_id = xa->mem.id;
+ __entry->mem_type = xa->mem.type;
+ __entry->allocator = xa->allocator;
+ __entry->rxq = rxq;
+ __entry->ifindex = rxq->dev->ifindex;
+ ),
+
+ TP_printk("mem_id=%d mem_type=%s allocator=%p"
+ " ifindex=%d",
+ __entry->mem_id,
+ __print_symbolic(__entry->mem_type, __MEM_TYPE_SYM_TAB),
+ __entry->allocator,
+ __entry->ifindex
+ )
+);
+
+TRACE_EVENT(mem_return_failed,
+
+ TP_PROTO(const struct xdp_mem_info *mem,
+ const struct page *page),
+
+ TP_ARGS(mem, page),
+
+ TP_STRUCT__entry(
+ __field(const struct page *, page)
+ __field(u32, mem_id)
+ __field(u32, mem_type)
+ ),
+
+ TP_fast_assign(
+ __entry->page = page;
+ __entry->mem_id = mem->id;
+ __entry->mem_type = mem->type;
+ ),
+
+ TP_printk("mem_id=%d mem_type=%s page=%p",
+ __entry->mem_id,
+ __print_symbolic(__entry->mem_type, __MEM_TYPE_SYM_TAB),
+ __entry->page
+ )
+);
+
#endif /* _TRACE_XDP_H */
#include <trace/define_trace.h>