aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/net/core/dev.c
diff options
context:
space:
mode:
authorPaolo Abeni <pabeni@redhat.com>2025-03-20 19:22:38 +0100
committerJakub Kicinski <kuba@kernel.org>2025-03-24 13:58:22 -0700
commitc353e8983e0dea5dbba7789033326e1ad34135b7 (patch)
tree7d6915e7a8f2ee733b75e96ac0793414841b54d3 /net/core/dev.c
parenttty: caif: removed unused function debugfs_tx() (diff)
downloadwireguard-linux-c353e8983e0dea5dbba7789033326e1ad34135b7.tar.xz
wireguard-linux-c353e8983e0dea5dbba7789033326e1ad34135b7.zip
net: introduce per netns packet chains
Currently network taps unbound to any interface are linked in the global ptype_all list, affecting the performance in all the network namespaces. Add per netns ptypes chains, so that in the mentioned case only the netns owning the packet socket(s) is affected. While at that drop the global ptype_all list: no in kernel user registers a tap on "any" type without specifying either the target device or the target namespace (and IMHO doing that would not make any sense). Note that this adds a conditional in the fast path (to check for per netns ptype_specific list) and increases the dataset size by a cacheline (owing the per netns lists). Reviewed-by: Sabrina Dubroca <sd@queasysnail.net> Signed-off-by: Paolo Abeni <pabeni@redhat.com> Reviewed-by: Eric Dumazet <edumaze@google.com> Link: https://patch.msgid.link/ae405f98875ee87f8150c460ad162de7e466f8a7.1742494826.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c53
1 files changed, 41 insertions, 12 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 235560341765..bcf81c3ff6a3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -572,10 +572,18 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
static inline struct list_head *ptype_head(const struct packet_type *pt)
{
- if (pt->type == htons(ETH_P_ALL))
- return pt->dev ? &pt->dev->ptype_all : &net_hotdata.ptype_all;
- else
- return pt->dev ? &pt->dev->ptype_specific :
+ if (pt->type == htons(ETH_P_ALL)) {
+ if (!pt->af_packet_net && !pt->dev)
+ return NULL;
+
+ return pt->dev ? &pt->dev->ptype_all :
+ &pt->af_packet_net->ptype_all;
+ }
+
+ if (pt->dev)
+ return &pt->dev->ptype_specific;
+
+ return pt->af_packet_net ? &pt->af_packet_net->ptype_specific :
&ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
}
@@ -596,6 +604,9 @@ void dev_add_pack(struct packet_type *pt)
{
struct list_head *head = ptype_head(pt);
+ if (WARN_ON_ONCE(!head))
+ return;
+
spin_lock(&ptype_lock);
list_add_rcu(&pt->list, head);
spin_unlock(&ptype_lock);
@@ -620,6 +631,9 @@ void __dev_remove_pack(struct packet_type *pt)
struct list_head *head = ptype_head(pt);
struct packet_type *pt1;
+ if (!head)
+ return;
+
spin_lock(&ptype_lock);
list_for_each_entry(pt1, head, list) {
@@ -2441,16 +2455,21 @@ static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
}
/**
- * dev_nit_active - return true if any network interface taps are in use
+ * dev_nit_active_rcu - return true if any network interface taps are in use
+ *
+ * The caller must hold the RCU lock
*
* @dev: network device to check for the presence of taps
*/
-bool dev_nit_active(struct net_device *dev)
+bool dev_nit_active_rcu(const struct net_device *dev)
{
- return !list_empty(&net_hotdata.ptype_all) ||
+ /* Callers may hold either RCU or RCU BH lock */
+ WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
+
+ return !list_empty(&dev_net(dev)->ptype_all) ||
!list_empty(&dev->ptype_all);
}
-EXPORT_SYMBOL_GPL(dev_nit_active);
+EXPORT_SYMBOL_GPL(dev_nit_active_rcu);
/*
* Support routine. Sends outgoing frames to any network
@@ -2459,11 +2478,12 @@ EXPORT_SYMBOL_GPL(dev_nit_active);
void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
{
- struct list_head *ptype_list = &net_hotdata.ptype_all;
struct packet_type *ptype, *pt_prev = NULL;
+ struct list_head *ptype_list;
struct sk_buff *skb2 = NULL;
rcu_read_lock();
+ ptype_list = &dev_net_rcu(dev)->ptype_all;
again:
list_for_each_entry_rcu(ptype, ptype_list, list) {
if (READ_ONCE(ptype->ignore_outgoing))
@@ -2507,7 +2527,7 @@ again:
pt_prev = ptype;
}
- if (ptype_list == &net_hotdata.ptype_all) {
+ if (ptype_list != &dev->ptype_all) {
ptype_list = &dev->ptype_all;
goto again;
}
@@ -3752,7 +3772,7 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev,
unsigned int len;
int rc;
- if (dev_nit_active(dev))
+ if (dev_nit_active_rcu(dev))
dev_queue_xmit_nit(skb, dev);
len = skb->len;
@@ -5696,7 +5716,8 @@ another_round:
if (pfmemalloc)
goto skip_taps;
- list_for_each_entry_rcu(ptype, &net_hotdata.ptype_all, list) {
+ list_for_each_entry_rcu(ptype, &dev_net_rcu(skb->dev)->ptype_all,
+ list) {
if (pt_prev)
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
@@ -5808,6 +5829,14 @@ check_vlan_id:
deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
&ptype_base[ntohs(type) &
PTYPE_HASH_MASK]);
+
+ /* orig_dev and skb->dev could belong to different netns;
+ * Even in such case we need to traverse only the list
+ * coming from skb->dev, as the ptype owner (packet socket)
+ * will use dev_net(skb->dev) to do namespace filtering.
+ */
+ deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
+ &dev_net_rcu(skb->dev)->ptype_specific);
}
deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,