diff options
author | 2025-07-23 17:47:01 -0700 | |
---|---|---|
committer | 2025-07-23 17:47:01 -0700 | |
commit | 1cdf3f2d8f1c5d709a9a0cae101a4f07c245d2e7 (patch) | |
tree | 3c490f7ca7c4f520a1781860d11f882e2c8a191e | |
parent | vxlan: remove redundant conversion of vni in vxlan_nl2conf (diff) | |
parent | libeth: xdp: access ->pp through netmem_desc instead of page (diff) | |
download | wireguard-linux-1cdf3f2d8f1c5d709a9a0cae101a4f07c245d2e7.tar.xz wireguard-linux-1cdf3f2d8f1c5d709a9a0cae101a4f07c245d2e7.zip |
Merge branch 'split-netmem-from-struct-page'
Byungchul Park says:
====================
Split netmem from struct page
The MM subsystem is trying to reduce struct page to a single pointer.
See the following link for your information:
https://kernelnewbies.org/MatthewWilcox/Memdescs/Path
The first step towards that is splitting struct page by its individual
users, as has already been done with folio and slab. This patchset does
that for page pool.
Matthew Wilcox tried and stopped the same work, you can see in:
https://lore.kernel.org/20230111042214.907030-1-willy@infradead.org
I focused on removing the page pool members in struct page this time,
not moving the allocation code of page pool from net to mm. It can be
done later if needed.
====================
Link: https://patch.msgid.link/20250721021835.63939-1-byungchul@sk.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r-- | drivers/net/ethernet/freescale/fec_main.c | 10 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/iavf/iavf_txrx.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/idpf/idpf_txrx.c | 8 | ||||
-rw-r--r-- | drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/en_rx.c | 4 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 3 | ||||
-rw-r--r-- | drivers/net/ethernet/ti/icssg/icssg_prueth_sr1.c | 4 | ||||
-rw-r--r-- | drivers/net/netdevsim/netdev.c | 6 | ||||
-rw-r--r-- | drivers/net/wireless/mediatek/mt76/mt76.h | 3 | ||||
-rw-r--r-- | include/linux/mm.h | 4 | ||||
-rw-r--r-- | include/net/libeth/xdp.h | 2 | ||||
-rw-r--r-- | include/net/netmem.h | 153 |
12 files changed, 161 insertions, 40 deletions
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index b481ee8ee478..1383918f8a3f 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1045,7 +1045,9 @@ static void fec_enet_bd_init(struct net_device *dev) struct page *page = txq->tx_buf[i].buf_p; if (page) - page_pool_put_page(page->pp, page, 0, false); + page_pool_put_page(pp_page_to_nmdesc(page)->pp, + page, 0, + false); } txq->tx_buf[i].buf_p = NULL; @@ -1586,7 +1588,8 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id, int budget) xdp_return_frame_rx_napi(xdpf); } else { /* recycle pages of XDP_TX frames */ /* The dma_sync_size = 0 as XDP_TX has already synced DMA for_device */ - page_pool_put_page(page->pp, page, 0, true); + page_pool_put_page(pp_page_to_nmdesc(page)->pp, page, + 0, true); } txq->tx_buf[index].buf_p = NULL; @@ -3348,7 +3351,8 @@ static void fec_enet_free_buffers(struct net_device *ndev) } else { struct page *page = txq->tx_buf[i].buf_p; - page_pool_put_page(page->pp, page, 0, false); + page_pool_put_page(pp_page_to_nmdesc(page)->pp, + page, 0, false); } txq->tx_buf[i].buf_p = NULL; diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c index aaf70c625655..363c42bf3dcf 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c @@ -1216,7 +1216,7 @@ static struct sk_buff *iavf_build_skb(const struct libeth_fqe *rx_buffer, unsigned int size) { struct page *buf_page = __netmem_to_page(rx_buffer->netmem); - u32 hr = buf_page->pp->p.offset; + u32 hr = pp_page_to_nmdesc(buf_page)->pp->p.offset; struct sk_buff *skb; void *va; diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index c976d9e15aca..66a1b040639d 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -3276,8 +3276,10 @@ static u32 idpf_rx_hsplit_wa(const struct libeth_fqe *hdr, hdr_page = __netmem_to_page(hdr->netmem); buf_page = __netmem_to_page(buf->netmem); - dst = page_address(hdr_page) + hdr->offset + hdr_page->pp->p.offset; - src = page_address(buf_page) + buf->offset + buf_page->pp->p.offset; + dst = page_address(hdr_page) + hdr->offset + + pp_page_to_nmdesc(hdr_page)->pp->p.offset; + src = page_address(buf_page) + buf->offset + + pp_page_to_nmdesc(buf_page)->pp->p.offset; memcpy(dst, src, LARGEST_ALIGN(copy)); buf->offset += copy; @@ -3296,7 +3298,7 @@ static u32 idpf_rx_hsplit_wa(const struct libeth_fqe *hdr, struct sk_buff *idpf_rx_build_skb(const struct libeth_fqe *buf, u32 size) { struct page *buf_page = __netmem_to_page(buf->netmem); - u32 hr = buf_page->pp->p.offset; + u32 hr = pp_page_to_nmdesc(buf_page)->pp->p.offset; struct sk_buff *skb; void *va; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index 99ace381cc78..625bb5a05344 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -1571,7 +1571,7 @@ handle_xdp_verdict: cq->pool_ptrs++; if (xsk_buff) { xsk_buff_free(xsk_buff); - } else if (page->pp) { + } else if (pp_page_to_nmdesc(page)->pp) { page_pool_recycle_direct(pool->page_pool, page); } else { otx2_dma_unmap_page(pfvf, iova, pfvf->rbsize, diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index b33285d755b9..92a16ddb7d86 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -460,9 +460,11 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, truesize += frag_info->frag_stride; if (frag_info->frag_stride == PAGE_SIZE / 2) { + struct netmem_desc *desc = pp_page_to_nmdesc(page); + frags->page_offset ^= PAGE_SIZE / 2; release = page_count(page) != 1 || - atomic_long_read(&page->pp_ref_count) != 1 || + atomic_long_read(&desc->pp_ref_count) != 1 || page_is_pfmemalloc(page) || page_to_nid(page) != numa_mem_id(); } else if (!priv->rx_headroom) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 5ce1b463b7a8..5d51600935a6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -710,7 +710,8 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, /* No need to check page_pool_page_is_pp() as we * know this is a page_pool page. */ - page_pool_recycle_direct(page->pp, page); + page_pool_recycle_direct(pp_page_to_nmdesc(page)->pp, + page); } while (++n < num); break; diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth_sr1.c b/drivers/net/ethernet/ti/icssg/icssg_prueth_sr1.c index ff5f41bf499e..5e225310c9de 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth_sr1.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth_sr1.c @@ -367,7 +367,7 @@ static irqreturn_t prueth_rx_mgm_ts_thread_sr1(int irq, void *dev_id) return IRQ_NONE; prueth_tx_ts_sr1(emac, (void *)page_address(page)); - page_pool_recycle_direct(page->pp, page); + page_pool_recycle_direct(pp_page_to_nmdesc(page)->pp, page); return IRQ_HANDLED; } @@ -392,7 +392,7 @@ static irqreturn_t prueth_rx_mgm_rsp_thread(int irq, void *dev_id) complete(&emac->cmd_complete); } - page_pool_recycle_direct(page->pp, page); + page_pool_recycle_direct(pp_page_to_nmdesc(page)->pp, page); return IRQ_HANDLED; } diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index a7628f5c09af..39fe28af48b9 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -917,7 +917,8 @@ nsim_pp_hold_write(struct file *file, const char __user *data, if (!ns->page) ret = -ENOMEM; } else { - page_pool_put_full_page(ns->page->pp, ns->page, false); + page_pool_put_full_page(pp_page_to_nmdesc(ns->page)->pp, + ns->page, false); ns->page = NULL; } @@ -1145,7 +1146,8 @@ void nsim_destroy(struct netdevsim *ns) /* Put this intentionally late to exercise the orphaning path */ if (ns->page) { - page_pool_put_full_page(ns->page->pp, ns->page, false); + page_pool_put_full_page(pp_page_to_nmdesc(ns->page)->pp, + ns->page, false); ns->page = NULL; } diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index 2912568612bc..8dd5c29fb75b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -1810,7 +1810,8 @@ static inline void mt76_put_page_pool_buf(void *buf, bool allow_direct) { struct page *page = virt_to_head_page(buf); - page_pool_put_full_page(page->pp, page, allow_direct); + page_pool_put_full_page(pp_page_to_nmdesc(page)->pp, page, + allow_direct); } static inline void * diff --git a/include/linux/mm.h b/include/linux/mm.h index fa538feaa8d9..ae50c1641bed 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4178,12 +4178,12 @@ int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status); #define PP_MAGIC_MASK ~(PP_DMA_INDEX_MASK | 0x3UL) #ifdef CONFIG_PAGE_POOL -static inline bool page_pool_page_is_pp(struct page *page) +static inline bool page_pool_page_is_pp(const struct page *page) { return (page->pp_magic & PP_MAGIC_MASK) == PP_SIGNATURE; } #else -static inline bool page_pool_page_is_pp(struct page *page) +static inline bool page_pool_page_is_pp(const struct page *page) { return false; } diff --git a/include/net/libeth/xdp.h b/include/net/libeth/xdp.h index 6ce6aec6884c..f4880b50e804 100644 --- a/include/net/libeth/xdp.h +++ b/include/net/libeth/xdp.h @@ -1292,7 +1292,7 @@ static inline void libeth_xdp_prepare_buff(struct libeth_xdp_buff *xdp, xdp_init_buff(&xdp->base, fqe->truesize, xdp->base.rxq); #endif xdp_prepare_buff(&xdp->base, page_address(page) + fqe->offset, - page->pp->p.offset, len, true); + pp_page_to_nmdesc(page)->pp->p.offset, len, true); } /** diff --git a/include/net/netmem.h b/include/net/netmem.h index de1d95f04076..f7dacc9e75fd 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -12,6 +12,50 @@ #include <linux/mm.h> #include <net/net_debug.h> +/* These fields in struct page are used by the page_pool and net stack: + * + * struct { + * unsigned long pp_magic; + * struct page_pool *pp; + * unsigned long _pp_mapping_pad; + * unsigned long dma_addr; + * atomic_long_t pp_ref_count; + * }; + * + * We mirror the page_pool fields here so the page_pool can access these + * fields without worrying whether the underlying fields belong to a + * page or netmem_desc. + * + * CAUTION: Do not update the fields in netmem_desc without also + * updating the anonymous aliasing union in struct net_iov. + */ +struct netmem_desc { + unsigned long _flags; + unsigned long pp_magic; + struct page_pool *pp; + unsigned long _pp_mapping_pad; + unsigned long dma_addr; + atomic_long_t pp_ref_count; +}; + +#define NETMEM_DESC_ASSERT_OFFSET(pg, desc) \ + static_assert(offsetof(struct page, pg) == \ + offsetof(struct netmem_desc, desc)) +NETMEM_DESC_ASSERT_OFFSET(flags, _flags); +NETMEM_DESC_ASSERT_OFFSET(pp_magic, pp_magic); +NETMEM_DESC_ASSERT_OFFSET(pp, pp); +NETMEM_DESC_ASSERT_OFFSET(_pp_mapping_pad, _pp_mapping_pad); +NETMEM_DESC_ASSERT_OFFSET(dma_addr, dma_addr); +NETMEM_DESC_ASSERT_OFFSET(pp_ref_count, pp_ref_count); +#undef NETMEM_DESC_ASSERT_OFFSET + +/* + * Since struct netmem_desc uses the space in struct page, the size + * should be checked, until struct netmem_desc has its own instance from + * slab, to avoid conflicting with other members within struct page. + */ +static_assert(sizeof(struct netmem_desc) <= offsetof(struct page, _refcount)); + /* net_iov */ DECLARE_STATIC_KEY_FALSE(page_pool_mem_providers); @@ -30,13 +74,48 @@ enum net_iov_type { NET_IOV_MAX = ULONG_MAX }; +/* A memory descriptor representing abstract networking I/O vectors, + * generally for non-pages memory that doesn't have its corresponding + * struct page and needs to be explicitly allocated through slab. + * + * net_iovs are allocated and used by networking code, and the size of + * the chunk is PAGE_SIZE. + * + * This memory can be any form of non-struct paged memory. Examples + * include imported dmabuf memory and imported io_uring memory. See + * net_iov_type for all the supported types. + * + * @pp_magic: pp field, similar to the one in struct page/struct + * netmem_desc. + * @pp: the pp this net_iov belongs to, if any. + * @dma_addr: the dma addrs of the net_iov. Needed for the network + * card to send/receive this net_iov. + * @pp_ref_count: the pp ref count of this net_iov, exactly the same + * usage as struct page/struct netmem_desc. + * @owner: the net_iov_area this net_iov belongs to, if any. + * @type: the type of the memory. Different types of net_iovs are + * supported. + */ struct net_iov { - enum net_iov_type type; - unsigned long pp_magic; - struct page_pool *pp; + union { + struct netmem_desc desc; + + /* XXX: The following part should be removed once all + * the references to them are converted so as to be + * accessed via netmem_desc e.g. niov->desc.pp instead + * of niov->pp. + */ + struct { + unsigned long _flags; + unsigned long pp_magic; + struct page_pool *pp; + unsigned long _pp_mapping_pad; + unsigned long dma_addr; + atomic_long_t pp_ref_count; + }; + }; struct net_iov_area *owner; - unsigned long dma_addr; - atomic_long_t pp_ref_count; + enum net_iov_type type; }; struct net_iov_area { @@ -48,27 +127,22 @@ struct net_iov_area { unsigned long base_virtual; }; -/* These fields in struct page are used by the page_pool and net stack: - * - * struct { - * unsigned long pp_magic; - * struct page_pool *pp; - * unsigned long _pp_mapping_pad; - * unsigned long dma_addr; - * atomic_long_t pp_ref_count; - * }; - * - * We mirror the page_pool fields here so the page_pool can access these fields - * without worrying whether the underlying fields belong to a page or net_iov. +/* net_iov is union'ed with struct netmem_desc mirroring struct page, so + * the page_pool can access these fields without worrying whether the + * underlying fields are accessed via netmem_desc or directly via + * net_iov, until all the references to them are converted so as to be + * accessed via netmem_desc e.g. niov->desc.pp instead of niov->pp. * - * The non-net stack fields of struct page are private to the mm stack and must - * never be mirrored to net_iov. + * The non-net stack fields of struct page are private to the mm stack + * and must never be mirrored to net_iov. */ -#define NET_IOV_ASSERT_OFFSET(pg, iov) \ - static_assert(offsetof(struct page, pg) == \ +#define NET_IOV_ASSERT_OFFSET(desc, iov) \ + static_assert(offsetof(struct netmem_desc, desc) == \ offsetof(struct net_iov, iov)) +NET_IOV_ASSERT_OFFSET(_flags, _flags); NET_IOV_ASSERT_OFFSET(pp_magic, pp_magic); NET_IOV_ASSERT_OFFSET(pp, pp); +NET_IOV_ASSERT_OFFSET(_pp_mapping_pad, _pp_mapping_pad); NET_IOV_ASSERT_OFFSET(dma_addr, dma_addr); NET_IOV_ASSERT_OFFSET(pp_ref_count, pp_ref_count); #undef NET_IOV_ASSERT_OFFSET @@ -173,6 +247,24 @@ static inline unsigned long netmem_pfn_trace(netmem_ref netmem) return page_to_pfn(netmem_to_page(netmem)); } +/** + * __netmem_to_nmdesc - unsafely get pointer to the &netmem_desc backing + * @netmem + * @netmem: netmem reference to convert + * + * Unsafe version that can be used only when @netmem is always backed by + * system memory, performs faster and generates smaller object code (no + * check for the LSB, no WARN). When @netmem points to IOV, provokes + * undefined behaviour. + * + * Return: pointer to the &netmem_desc (garbage if @netmem is not backed + * by system memory). + */ +static inline struct netmem_desc *__netmem_to_nmdesc(netmem_ref netmem) +{ + return (__force struct netmem_desc *)netmem; +} + /* __netmem_clear_lsb - convert netmem_ref to struct net_iov * for access to * common fields. * @netmem: netmem reference to extract as net_iov. @@ -193,6 +285,23 @@ static inline struct net_iov *__netmem_clear_lsb(netmem_ref netmem) return (struct net_iov *)((__force unsigned long)netmem & ~NET_IOV); } +/* XXX: How to extract netmem_desc from page must be changed, once + * netmem_desc no longer overlays on page and will be allocated through + * slab. + */ +#define __pp_page_to_nmdesc(p) (_Generic((p), \ + const struct page * : (const struct netmem_desc *)(p), \ + struct page * : (struct netmem_desc *)(p))) + +/* CAUTION: Check if the page is a pp page before calling this helper or + * know it's a pp page. + */ +#define pp_page_to_nmdesc(p) \ +({ \ + DEBUG_NET_WARN_ON_ONCE(!page_pool_page_is_pp(p)); \ + __pp_page_to_nmdesc(p); \ +}) + /** * __netmem_get_pp - unsafely get pointer to the &page_pool backing @netmem * @netmem: netmem reference to get the pointer from @@ -206,7 +315,7 @@ static inline struct net_iov *__netmem_clear_lsb(netmem_ref netmem) */ static inline struct page_pool *__netmem_get_pp(netmem_ref netmem) { - return __netmem_to_page(netmem)->pp; + return __netmem_to_nmdesc(netmem)->pp; } static inline struct page_pool *netmem_get_pp(netmem_ref netmem) |