aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-02-20 10:19:54 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-02-20 10:19:54 -0800
commit27eddbf3449026a73d6ed52d55b192bfcf526a03 (patch)
tree18ec1d5c04a718c237322fb1d2cf3d0598d2fe31
parentMerge tag 'v6.14-rc3-smb3-client-fixes' of git://git.samba.org/sfrench/cifs-2.6 (diff)
parentMerge branch 'net-remove-the-single-page-frag-cache-for-good' (diff)
downloadwireguard-linux-27eddbf3449026a73d6ed52d55b192bfcf526a03.tar.xz
wireguard-linux-27eddbf3449026a73d6ed52d55b192bfcf526a03.zip
Merge tag 'net-6.14-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Paolo Abeni: "Smaller than usual with no fixes from any subtree. Current release - regressions: - core: fix race of rtnl_net_lock(dev_net(dev)) Previous releases - regressions: - core: remove the single page frag cache for good - flow_dissector: fix handling of mixed port and port-range keys - sched: cls_api: fix error handling causing NULL dereference - tcp: - adjust rcvq_space after updating scaling ratio - drop secpath at the same time as we currently drop dst - eth: gtp: suppress list corruption splat in gtp_net_exit_batch_rtnl(). Previous releases - always broken: - vsock: - fix variables initialization during resuming - for connectible sockets allow only connected - eth: - geneve: fix use-after-free in geneve_find_dev() - ibmvnic: don't reference skb after sending to VIOS" * tag 'net-6.14-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (34 commits) Revert "net: skb: introduce and use a single page frag cache" net: allow small head cache usage with large MAX_SKB_FRAGS values nfp: bpf: Add check for nfp_app_ctrl_msg_alloc() tcp: drop secpath at the same time as we currently drop dst net: axienet: Set mac_managed_pm arp: switch to dev_getbyhwaddr() in arp_req_set_public() net: Add non-RCU dev_getbyhwaddr() helper sctp: Fix undefined behavior in left shift operation selftests/bpf: Add a specific dst port matching flow_dissector: Fix port range key handling in BPF conversion selftests/net/forwarding: Add a test case for tc-flower of mixed port and port-range flow_dissector: Fix handling of mixed port and port-range keys geneve: Suppress list corruption splat in geneve_destroy_tunnels(). gtp: Suppress list corruption splat in gtp_net_exit_batch_rtnl(). dev: Use rtnl_net_dev_lock() in unregister_netdev(). net: Fix dev_net(dev) race in unregister_netdevice_notifier_dev_net(). net: Add net_passive_inc() and net_passive_dec(). net: pse-pd: pd692x0: Fix power limit retrieval MAINTAINERS: trim the GVE entry gve: set xdp redirect target only when it is available ...
-rw-r--r--MAINTAINERS9
-rw-r--r--drivers/net/ethernet/google/gve/gve.h10
-rw-r--r--drivers/net/ethernet/google/gve/gve_main.c6
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c4
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/cmsg.c2
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_axienet_main.c1
-rw-r--r--drivers/net/geneve.c16
-rw-r--r--drivers/net/gtp.c5
-rw-r--r--drivers/net/pse-pd/pd692x0.c2
-rw-r--r--drivers/net/wwan/mhi_wwan_mbim.c2
-rw-r--r--drivers/s390/net/ism_drv.c14
-rw-r--r--include/linux/netdevice.h3
-rw-r--r--include/net/gro.h3
-rw-r--r--include/net/net_namespace.h11
-rw-r--r--include/net/tcp.h14
-rw-r--r--net/core/dev.c108
-rw-r--r--net/core/drop_monitor.c29
-rw-r--r--net/core/flow_dissector.c49
-rw-r--r--net/core/gro.c3
-rw-r--r--net/core/net_namespace.c8
-rw-r--r--net/core/skbuff.c110
-rw-r--r--net/core/sock_map.c3
-rw-r--r--net/ipv4/arp.c2
-rw-r--r--net/ipv4/tcp_fastopen.c4
-rw-r--r--net/ipv4/tcp_input.c20
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/sched/cls_api.c2
-rw-r--r--net/sctp/stream.c2
-rw-r--r--net/vmw_vsock/af_vsock.c3
-rw-r--r--net/vmw_vsock/virtio_transport.c10
-rw-r--r--net/vmw_vsock/vsock_bpf.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_basic.c70
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower_port_range.sh46
34 files changed, 365 insertions, 217 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 4e17764cb6ed..3864d473f52f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9829,8 +9829,7 @@ F: drivers/input/touchscreen/goodix*
GOOGLE ETHERNET DRIVERS
M: Jeroen de Borst <jeroendb@google.com>
-M: Praveen Kaligineedi <pkaligineedi@google.com>
-R: Shailend Chand <shailend@google.com>
+M: Harshitha Ramamurthy <hramamurthy@google.com>
L: netdev@vger.kernel.org
S: Maintained
F: Documentation/networking/device_drivers/ethernet/google/gve.rst
@@ -16472,6 +16471,12 @@ F: net/ethtool/cabletest.c
F: tools/testing/selftests/drivers/net/*/ethtool*
K: cable_test
+NETWORKING [ETHTOOL MAC MERGE]
+M: Vladimir Oltean <vladimir.oltean@nxp.com>
+F: net/ethtool/mm.c
+F: tools/testing/selftests/drivers/net/hw/ethtool_mm.sh
+K: ethtool_mm
+
NETWORKING [GENERAL]
M: "David S. Miller" <davem@davemloft.net>
M: Eric Dumazet <edumazet@google.com>
diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index 8167cc5fb0df..78d2a19593d1 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -1116,6 +1116,16 @@ static inline u32 gve_xdp_tx_start_queue_id(struct gve_priv *priv)
return gve_xdp_tx_queue_id(priv, 0);
}
+static inline bool gve_supports_xdp_xmit(struct gve_priv *priv)
+{
+ switch (priv->queue_format) {
+ case GVE_GQI_QPL_FORMAT:
+ return true;
+ default:
+ return false;
+ }
+}
+
/* gqi napi handler defined in gve_main.c */
int gve_napi_poll(struct napi_struct *napi, int budget);
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index 533e659b15b3..92237fb0b60c 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -1903,6 +1903,8 @@ static void gve_turndown(struct gve_priv *priv)
/* Stop tx queues */
netif_tx_disable(priv->dev);
+ xdp_features_clear_redirect_target(priv->dev);
+
gve_clear_napi_enabled(priv);
gve_clear_report_stats(priv);
@@ -1972,6 +1974,9 @@ static void gve_turnup(struct gve_priv *priv)
napi_schedule(&block->napi);
}
+ if (priv->num_xdp_queues && gve_supports_xdp_xmit(priv))
+ xdp_features_set_redirect_target(priv->dev, false);
+
gve_set_napi_enabled(priv);
}
@@ -2246,7 +2251,6 @@ static void gve_set_netdev_xdp_features(struct gve_priv *priv)
if (priv->queue_format == GVE_GQI_QPL_FORMAT) {
xdp_features = NETDEV_XDP_ACT_BASIC;
xdp_features |= NETDEV_XDP_ACT_REDIRECT;
- xdp_features |= NETDEV_XDP_ACT_NDO_XMIT;
xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY;
} else {
xdp_features = 0;
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index e95ae0d39948..0676fc547b6f 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2408,6 +2408,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
dma_addr_t data_dma_addr;
struct netdev_queue *txq;
unsigned long lpar_rc;
+ unsigned int skblen;
union sub_crq tx_crq;
unsigned int offset;
bool use_scrq_send_direct = false;
@@ -2522,6 +2523,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
tx_buff->skb = skb;
tx_buff->index = bufidx;
tx_buff->pool_index = queue_num;
+ skblen = skb->len;
memset(&tx_crq, 0, sizeof(tx_crq));
tx_crq.v1.first = IBMVNIC_CRQ_CMD;
@@ -2614,7 +2616,7 @@ early_exit:
netif_stop_subqueue(netdev, queue_num);
}
- tx_bytes += skb->len;
+ tx_bytes += skblen;
txq_trans_cond_update(txq);
ret = NETDEV_TX_OK;
goto out;
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
index 2ec62c8d86e1..59486fe2ad18 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
@@ -20,6 +20,8 @@ nfp_bpf_cmsg_alloc(struct nfp_app_bpf *bpf, unsigned int size)
struct sk_buff *skb;
skb = nfp_app_ctrl_msg_alloc(bpf->app, size, GFP_KERNEL);
+ if (!skb)
+ return NULL;
skb_put(skb, size);
return skb;
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index 9e7fa012e4fa..f33178f90c42 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -2897,6 +2897,7 @@ static int axienet_probe(struct platform_device *pdev)
lp->phylink_config.dev = &ndev->dev;
lp->phylink_config.type = PHYLINK_NETDEV;
+ lp->phylink_config.mac_managed_pm = true;
lp->phylink_config.mac_capabilities = MAC_SYM_PAUSE | MAC_ASYM_PAUSE |
MAC_10FD | MAC_100FD | MAC_1000FD;
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 642155cb8315..dbb3960126ee 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1902,21 +1902,9 @@ static void geneve_destroy_tunnels(struct net *net, struct list_head *head)
{
struct geneve_net *gn = net_generic(net, geneve_net_id);
struct geneve_dev *geneve, *next;
- struct net_device *dev, *aux;
- /* gather any geneve devices that were moved into this ns */
- for_each_netdev_safe(net, dev, aux)
- if (dev->rtnl_link_ops == &geneve_link_ops)
- unregister_netdevice_queue(dev, head);
-
- /* now gather any other geneve devices that were created in this ns */
- list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) {
- /* If geneve->dev is in the same netns, it was already added
- * to the list by the previous loop.
- */
- if (!net_eq(dev_net(geneve->dev), net))
- unregister_netdevice_queue(geneve->dev, head);
- }
+ list_for_each_entry_safe(geneve, next, &gn->geneve_list, next)
+ geneve_dellink(geneve->dev, head);
}
static void __net_exit geneve_exit_batch_rtnl(struct list_head *net_list,
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index d64740bf44ed..b7b46c5e6399 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -2481,11 +2481,6 @@ static void __net_exit gtp_net_exit_batch_rtnl(struct list_head *net_list,
list_for_each_entry(net, net_list, exit_list) {
struct gtp_net *gn = net_generic(net, gtp_net_id);
struct gtp_dev *gtp, *gtp_next;
- struct net_device *dev;
-
- for_each_netdev(net, dev)
- if (dev->rtnl_link_ops == &gtp_link_ops)
- gtp_dellink(dev, dev_to_kill);
list_for_each_entry_safe(gtp, gtp_next, &gn->gtp_dev_list, list)
gtp_dellink(gtp->dev, dev_to_kill);
diff --git a/drivers/net/pse-pd/pd692x0.c b/drivers/net/pse-pd/pd692x0.c
index fc9e23927b3b..7d60a714ca53 100644
--- a/drivers/net/pse-pd/pd692x0.c
+++ b/drivers/net/pse-pd/pd692x0.c
@@ -1047,7 +1047,7 @@ static int pd692x0_pi_get_pw_limit(struct pse_controller_dev *pcdev,
if (ret < 0)
return ret;
- return pd692x0_pi_get_pw_from_table(buf.data[2], buf.data[3]);
+ return pd692x0_pi_get_pw_from_table(buf.data[0], buf.data[1]);
}
static int pd692x0_pi_set_pw_limit(struct pse_controller_dev *pcdev,
diff --git a/drivers/net/wwan/mhi_wwan_mbim.c b/drivers/net/wwan/mhi_wwan_mbim.c
index d5a9360323d2..8755c5e6a65b 100644
--- a/drivers/net/wwan/mhi_wwan_mbim.c
+++ b/drivers/net/wwan/mhi_wwan_mbim.c
@@ -220,7 +220,7 @@ static int mbim_rx_verify_nth16(struct mhi_mbim_context *mbim, struct sk_buff *s
if (mbim->rx_seq + 1 != le16_to_cpu(nth16->wSequence) &&
(mbim->rx_seq || le16_to_cpu(nth16->wSequence)) &&
!(mbim->rx_seq == 0xffff && !le16_to_cpu(nth16->wSequence))) {
- net_err_ratelimited("sequence number glitch prev=%d curr=%d\n",
+ net_dbg_ratelimited("sequence number glitch prev=%d curr=%d\n",
mbim->rx_seq, le16_to_cpu(nth16->wSequence));
}
mbim->rx_seq = le16_to_cpu(nth16->wSequence);
diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c
index e36e3ea165d3..2f34761e6413 100644
--- a/drivers/s390/net/ism_drv.c
+++ b/drivers/s390/net/ism_drv.c
@@ -588,6 +588,15 @@ out:
return ret;
}
+static void ism_dev_release(struct device *dev)
+{
+ struct ism_dev *ism;
+
+ ism = container_of(dev, struct ism_dev, dev);
+
+ kfree(ism);
+}
+
static int ism_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct ism_dev *ism;
@@ -601,6 +610,7 @@ static int ism_probe(struct pci_dev *pdev, const struct pci_device_id *id)
dev_set_drvdata(&pdev->dev, ism);
ism->pdev = pdev;
ism->dev.parent = &pdev->dev;
+ ism->dev.release = ism_dev_release;
device_initialize(&ism->dev);
dev_set_name(&ism->dev, dev_name(&pdev->dev));
ret = device_add(&ism->dev);
@@ -637,7 +647,7 @@ err:
device_del(&ism->dev);
err_dev:
dev_set_drvdata(&pdev->dev, NULL);
- kfree(ism);
+ put_device(&ism->dev);
return ret;
}
@@ -682,7 +692,7 @@ static void ism_remove(struct pci_dev *pdev)
pci_disable_device(pdev);
device_del(&ism->dev);
dev_set_drvdata(&pdev->dev, NULL);
- kfree(ism);
+ put_device(&ism->dev);
}
static struct pci_driver ism_driver = {
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c0a86afb85da..ab550a89b9bf 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3275,6 +3275,8 @@ static inline struct net_device *first_net_device_rcu(struct net *net)
}
int netdev_boot_setup_check(struct net_device *dev);
+struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type,
+ const char *hwaddr);
struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
const char *hwaddr);
struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type);
@@ -4115,7 +4117,6 @@ void netif_receive_skb_list(struct list_head *head);
gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb);
void napi_gro_flush(struct napi_struct *napi, bool flush_old);
struct sk_buff *napi_get_frags(struct napi_struct *napi);
-void napi_get_frags_check(struct napi_struct *napi);
gro_result_t napi_gro_frags(struct napi_struct *napi);
static inline void napi_free_frags(struct napi_struct *napi)
diff --git a/include/net/gro.h b/include/net/gro.h
index b9b58c1f8d19..7b548f91754b 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -11,6 +11,9 @@
#include <net/udp.h>
#include <net/hotdata.h>
+/* This should be increased if a protocol with a bigger head is added. */
+#define GRO_MAX_HEAD (MAX_HEADER + 128)
+
struct napi_gro_cb {
union {
struct {
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 7ba1402ca779..f467a66abc6b 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -297,6 +297,7 @@ static inline int check_net(const struct net *net)
}
void net_drop_ns(void *);
+void net_passive_dec(struct net *net);
#else
@@ -326,8 +327,18 @@ static inline int check_net(const struct net *net)
}
#define net_drop_ns NULL
+
+static inline void net_passive_dec(struct net *net)
+{
+ refcount_dec(&net->passive);
+}
#endif
+static inline void net_passive_inc(struct net *net)
+{
+ refcount_inc(&net->passive);
+}
+
/* Returns true if the netns initialization is completed successfully */
static inline bool net_initialized(const struct net *net)
{
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 5b2b04835688..930cda5b5eb9 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -41,6 +41,7 @@
#include <net/inet_ecn.h>
#include <net/dst.h>
#include <net/mptcp.h>
+#include <net/xfrm.h>
#include <linux/seq_file.h>
#include <linux/memcontrol.h>
@@ -683,6 +684,19 @@ void tcp_fin(struct sock *sk);
void tcp_check_space(struct sock *sk);
void tcp_sack_compress_send_ack(struct sock *sk);
+static inline void tcp_cleanup_skb(struct sk_buff *skb)
+{
+ skb_dst_drop(skb);
+ secpath_reset(skb);
+}
+
+static inline void tcp_add_receive_queue(struct sock *sk, struct sk_buff *skb)
+{
+ DEBUG_NET_WARN_ON_ONCE(skb_dst(skb));
+ DEBUG_NET_WARN_ON_ONCE(secpath_exists(skb));
+ __skb_queue_tail(&sk->sk_receive_queue, skb);
+}
+
/* tcp_timer.c */
void tcp_init_xmit_timers(struct sock *);
static inline void tcp_clear_xmit_timers(struct sock *sk)
diff --git a/net/core/dev.c b/net/core/dev.c
index b91658e8aedb..1b252e9459fd 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1121,6 +1121,12 @@ out:
return ret;
}
+static bool dev_addr_cmp(struct net_device *dev, unsigned short type,
+ const char *ha)
+{
+ return dev->type == type && !memcmp(dev->dev_addr, ha, dev->addr_len);
+}
+
/**
* dev_getbyhwaddr_rcu - find a device by its hardware address
* @net: the applicable net namespace
@@ -1129,7 +1135,7 @@ out:
*
* Search for an interface by MAC address. Returns NULL if the device
* is not found or a pointer to the device.
- * The caller must hold RCU or RTNL.
+ * The caller must hold RCU.
* The returned device has not had its ref count increased
* and the caller must therefore be careful about locking
*
@@ -1141,14 +1147,39 @@ struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
struct net_device *dev;
for_each_netdev_rcu(net, dev)
- if (dev->type == type &&
- !memcmp(dev->dev_addr, ha, dev->addr_len))
+ if (dev_addr_cmp(dev, type, ha))
return dev;
return NULL;
}
EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
+/**
+ * dev_getbyhwaddr() - find a device by its hardware address
+ * @net: the applicable net namespace
+ * @type: media type of device
+ * @ha: hardware address
+ *
+ * Similar to dev_getbyhwaddr_rcu(), but the owner needs to hold
+ * rtnl_lock.
+ *
+ * Context: rtnl_lock() must be held.
+ * Return: pointer to the net_device, or NULL if not found
+ */
+struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type,
+ const char *ha)
+{
+ struct net_device *dev;
+
+ ASSERT_RTNL();
+ for_each_netdev(net, dev)
+ if (dev_addr_cmp(dev, type, ha))
+ return dev;
+
+ return NULL;
+}
+EXPORT_SYMBOL(dev_getbyhwaddr);
+
struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
{
struct net_device *dev, *ret = NULL;
@@ -2070,6 +2101,42 @@ static void __move_netdevice_notifier_net(struct net *src_net,
__register_netdevice_notifier_net(dst_net, nb, true);
}
+static void rtnl_net_dev_lock(struct net_device *dev)
+{
+ bool again;
+
+ do {
+ struct net *net;
+
+ again = false;
+
+ /* netns might be being dismantled. */
+ rcu_read_lock();
+ net = dev_net_rcu(dev);
+ net_passive_inc(net);
+ rcu_read_unlock();
+
+ rtnl_net_lock(net);
+
+#ifdef CONFIG_NET_NS
+ /* dev might have been moved to another netns. */
+ if (!net_eq(net, rcu_access_pointer(dev->nd_net.net))) {
+ rtnl_net_unlock(net);
+ net_passive_dec(net);
+ again = true;
+ }
+#endif
+ } while (again);
+}
+
+static void rtnl_net_dev_unlock(struct net_device *dev)
+{
+ struct net *net = dev_net(dev);
+
+ rtnl_net_unlock(net);
+ net_passive_dec(net);
+}
+
int register_netdevice_notifier_dev_net(struct net_device *dev,
struct notifier_block *nb,
struct netdev_net_notifier *nn)
@@ -2077,6 +2144,11 @@ int register_netdevice_notifier_dev_net(struct net_device *dev,
struct net *net = dev_net(dev);
int err;
+ /* rtnl_net_lock() assumes dev is not yet published by
+ * register_netdevice().
+ */
+ DEBUG_NET_WARN_ON_ONCE(!list_empty(&dev->dev_list));
+
rtnl_net_lock(net);
err = __register_netdevice_notifier_net(net, nb, false);
if (!err) {
@@ -2093,13 +2165,12 @@ int unregister_netdevice_notifier_dev_net(struct net_device *dev,
struct notifier_block *nb,
struct netdev_net_notifier *nn)
{
- struct net *net = dev_net(dev);
int err;
- rtnl_net_lock(net);
+ rtnl_net_dev_lock(dev);
list_del(&nn->list);
- err = __unregister_netdevice_notifier_net(net, nb);
- rtnl_net_unlock(net);
+ err = __unregister_netdevice_notifier_net(dev_net(dev), nb);
+ rtnl_net_dev_unlock(dev);
return err;
}
@@ -6920,6 +6991,23 @@ netif_napi_dev_list_add(struct net_device *dev, struct napi_struct *napi)
list_add_rcu(&napi->dev_list, higher); /* adds after higher */
}
+/* Double check that napi_get_frags() allocates skbs with
+ * skb->head being backed by slab, not a page fragment.
+ * This is to make sure bug fixed in 3226b158e67c
+ * ("net: avoid 32 x truesize under-estimation for tiny skbs")
+ * does not accidentally come back.
+ */
+static void napi_get_frags_check(struct napi_struct *napi)
+{
+ struct sk_buff *skb;
+
+ local_bh_disable();
+ skb = napi_get_frags(napi);
+ WARN_ON_ONCE(skb && skb->head_frag);
+ napi_free_frags(napi);
+ local_bh_enable();
+}
+
void netif_napi_add_weight_locked(struct net_device *dev,
struct napi_struct *napi,
int (*poll)(struct napi_struct *, int),
@@ -11880,11 +11968,9 @@ EXPORT_SYMBOL(unregister_netdevice_many);
*/
void unregister_netdev(struct net_device *dev)
{
- struct net *net = dev_net(dev);
-
- rtnl_net_lock(net);
+ rtnl_net_dev_lock(dev);
unregister_netdevice(dev);
- rtnl_net_unlock(net);
+ rtnl_net_dev_unlock(dev);
}
EXPORT_SYMBOL(unregister_netdev);
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 6efd4cccc9dd..212f0a048cab 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -1734,30 +1734,30 @@ static int __init init_net_drop_monitor(void)
return -ENOSPC;
}
- rc = genl_register_family(&net_drop_monitor_family);
- if (rc) {
- pr_err("Could not create drop monitor netlink family\n");
- return rc;
+ for_each_possible_cpu(cpu) {
+ net_dm_cpu_data_init(cpu);
+ net_dm_hw_cpu_data_init(cpu);
}
- WARN_ON(net_drop_monitor_family.mcgrp_offset != NET_DM_GRP_ALERT);
rc = register_netdevice_notifier(&dropmon_net_notifier);
if (rc < 0) {
pr_crit("Failed to register netdevice notifier\n");
+ return rc;
+ }
+
+ rc = genl_register_family(&net_drop_monitor_family);
+ if (rc) {
+ pr_err("Could not create drop monitor netlink family\n");
goto out_unreg;
}
+ WARN_ON(net_drop_monitor_family.mcgrp_offset != NET_DM_GRP_ALERT);
rc = 0;
- for_each_possible_cpu(cpu) {
- net_dm_cpu_data_init(cpu);
- net_dm_hw_cpu_data_init(cpu);
- }
-
goto out;
out_unreg:
- genl_unregister_family(&net_drop_monitor_family);
+ WARN_ON(unregister_netdevice_notifier(&dropmon_net_notifier));
out:
return rc;
}
@@ -1766,19 +1766,18 @@ static void exit_net_drop_monitor(void)
{
int cpu;
- BUG_ON(unregister_netdevice_notifier(&dropmon_net_notifier));
-
/*
* Because of the module_get/put we do in the trace state change path
* we are guaranteed not to have any current users when we get here
*/
+ BUG_ON(genl_unregister_family(&net_drop_monitor_family));
+
+ BUG_ON(unregister_netdevice_notifier(&dropmon_net_notifier));
for_each_possible_cpu(cpu) {
net_dm_hw_cpu_data_fini(cpu);
net_dm_cpu_data_fini(cpu);
}
-
- BUG_ON(genl_unregister_family(&net_drop_monitor_family));
}
module_init(init_net_drop_monitor);
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 5db41bf2ed93..9cd8de6bebb5 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -853,23 +853,30 @@ __skb_flow_dissect_ports(const struct sk_buff *skb,
void *target_container, const void *data,
int nhoff, u8 ip_proto, int hlen)
{
- enum flow_dissector_key_id dissector_ports = FLOW_DISSECTOR_KEY_MAX;
- struct flow_dissector_key_ports *key_ports;
+ struct flow_dissector_key_ports_range *key_ports_range = NULL;
+ struct flow_dissector_key_ports *key_ports = NULL;
+ __be32 ports;
if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS))
- dissector_ports = FLOW_DISSECTOR_KEY_PORTS;
- else if (dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_PORTS_RANGE))
- dissector_ports = FLOW_DISSECTOR_KEY_PORTS_RANGE;
+ key_ports = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_PORTS,
+ target_container);
- if (dissector_ports == FLOW_DISSECTOR_KEY_MAX)
+ if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS_RANGE))
+ key_ports_range = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_PORTS_RANGE,
+ target_container);
+
+ if (!key_ports && !key_ports_range)
return;
- key_ports = skb_flow_dissector_target(flow_dissector,
- dissector_ports,
- target_container);
- key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto,
- data, hlen);
+ ports = __skb_flow_get_ports(skb, nhoff, ip_proto, data, hlen);
+
+ if (key_ports)
+ key_ports->ports = ports;
+
+ if (key_ports_range)
+ key_ports_range->tp.ports = ports;
}
static void
@@ -924,6 +931,7 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
struct flow_dissector *flow_dissector,
void *target_container)
{
+ struct flow_dissector_key_ports_range *key_ports_range = NULL;
struct flow_dissector_key_ports *key_ports = NULL;
struct flow_dissector_key_control *key_control;
struct flow_dissector_key_basic *key_basic;
@@ -968,20 +976,21 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
}
- if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS))
+ if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) {
key_ports = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_PORTS,
target_container);
- else if (dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_PORTS_RANGE))
- key_ports = skb_flow_dissector_target(flow_dissector,
- FLOW_DISSECTOR_KEY_PORTS_RANGE,
- target_container);
-
- if (key_ports) {
key_ports->src = flow_keys->sport;
key_ports->dst = flow_keys->dport;
}
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_PORTS_RANGE)) {
+ key_ports_range = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_PORTS_RANGE,
+ target_container);
+ key_ports_range->tp.src = flow_keys->sport;
+ key_ports_range->tp.dst = flow_keys->dport;
+ }
if (dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_FLOW_LABEL)) {
diff --git a/net/core/gro.c b/net/core/gro.c
index d1f44084e978..78b320b63174 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -7,9 +7,6 @@
#define MAX_GRO_SKBS 8
-/* This should be increased if a protocol with a bigger head is added. */
-#define GRO_MAX_HEAD (MAX_HEADER + 128)
-
static DEFINE_SPINLOCK(offload_lock);
/**
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index cb39a12b2f82..4303f2a49262 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -464,7 +464,7 @@ static void net_complete_free(void)
}
-static void net_free(struct net *net)
+void net_passive_dec(struct net *net)
{
if (refcount_dec_and_test(&net->passive)) {
kfree(rcu_access_pointer(net->gen));
@@ -482,7 +482,7 @@ void net_drop_ns(void *p)
struct net *net = (struct net *)p;
if (net)
- net_free(net);
+ net_passive_dec(net);
}
struct net *copy_net_ns(unsigned long flags,
@@ -523,7 +523,7 @@ put_userns:
key_remove_domain(net->key_domain);
#endif
put_user_ns(user_ns);
- net_free(net);
+ net_passive_dec(net);
dec_ucounts:
dec_net_namespaces(ucounts);
return ERR_PTR(rv);
@@ -672,7 +672,7 @@ static void cleanup_net(struct work_struct *work)
key_remove_domain(net->key_domain);
#endif
put_user_ns(net->user_ns);
- net_free(net);
+ net_passive_dec(net);
}
cleanup_net_task = NULL;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a441613a1e6c..7b03b64fdcb2 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -69,6 +69,7 @@
#include <net/dst.h>
#include <net/sock.h>
#include <net/checksum.h>
+#include <net/gro.h>
#include <net/gso.h>
#include <net/hotdata.h>
#include <net/ip6_checksum.h>
@@ -95,7 +96,9 @@
static struct kmem_cache *skbuff_ext_cache __ro_after_init;
#endif
-#define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(MAX_TCP_HEADER)
+#define GRO_MAX_HEAD_PAD (GRO_MAX_HEAD + NET_SKB_PAD + NET_IP_ALIGN)
+#define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(max(MAX_TCP_HEADER, \
+ GRO_MAX_HEAD_PAD))
/* We want SKB_SMALL_HEAD_CACHE_SIZE to not be a power of two.
* This should ensure that SKB_SMALL_HEAD_HEADROOM is a unique
@@ -220,67 +223,9 @@ static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr)
#define NAPI_SKB_CACHE_BULK 16
#define NAPI_SKB_CACHE_HALF (NAPI_SKB_CACHE_SIZE / 2)
-#if PAGE_SIZE == SZ_4K
-
-#define NAPI_HAS_SMALL_PAGE_FRAG 1
-#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) ((nc).pfmemalloc)
-
-/* specialized page frag allocator using a single order 0 page
- * and slicing it into 1K sized fragment. Constrained to systems
- * with a very limited amount of 1K fragments fitting a single
- * page - to avoid excessive truesize underestimation
- */
-
-struct page_frag_1k {
- void *va;
- u16 offset;
- bool pfmemalloc;
-};
-
-static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp)
-{
- struct page *page;
- int offset;
-
- offset = nc->offset - SZ_1K;
- if (likely(offset >= 0))
- goto use_frag;
-
- page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
- if (!page)
- return NULL;
-
- nc->va = page_address(page);
- nc->pfmemalloc = page_is_pfmemalloc(page);
- offset = PAGE_SIZE - SZ_1K;
- page_ref_add(page, offset / SZ_1K);
-
-use_frag:
- nc->offset = offset;
- return nc->va + offset;
-}
-#else
-
-/* the small page is actually unused in this build; add dummy helpers
- * to please the compiler and avoid later preprocessor's conditionals
- */
-#define NAPI_HAS_SMALL_PAGE_FRAG 0
-#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) false
-
-struct page_frag_1k {
-};
-
-static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp_mask)
-{
- return NULL;
-}
-
-#endif
-
struct napi_alloc_cache {
local_lock_t bh_lock;
struct page_frag_cache page;
- struct page_frag_1k page_small;
unsigned int skb_count;
void *skb_cache[NAPI_SKB_CACHE_SIZE];
};
@@ -290,23 +235,6 @@ static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache) = {
.bh_lock = INIT_LOCAL_LOCK(bh_lock),
};
-/* Double check that napi_get_frags() allocates skbs with
- * skb->head being backed by slab, not a page fragment.
- * This is to make sure bug fixed in 3226b158e67c
- * ("net: avoid 32 x truesize under-estimation for tiny skbs")
- * does not accidentally come back.
- */
-void napi_get_frags_check(struct napi_struct *napi)
-{
- struct sk_buff *skb;
-
- local_bh_disable();
- skb = napi_get_frags(napi);
- WARN_ON_ONCE(!NAPI_HAS_SMALL_PAGE_FRAG && skb && skb->head_frag);
- napi_free_frags(napi);
- local_bh_enable();
-}
-
void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
{
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
@@ -736,7 +664,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
/* If requested length is either too small or too big,
* we use kmalloc() for skb->head allocation.
*/
- if (len <= SKB_WITH_OVERHEAD(1024) ||
+ if (len <= SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE) ||
len > SKB_WITH_OVERHEAD(PAGE_SIZE) ||
(gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
@@ -813,10 +741,8 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
/* If requested length is either too small or too big,
* we use kmalloc() for skb->head allocation.
- * When the small frag allocator is available, prefer it over kmalloc
- * for small fragments
*/
- if ((!NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) ||
+ if (len <= SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE) ||
len > SKB_WITH_OVERHEAD(PAGE_SIZE) ||
(gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX | SKB_ALLOC_NAPI,
@@ -826,32 +752,16 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
goto skb_success;
}
+ len = SKB_HEAD_ALIGN(len);
+
if (sk_memalloc_socks())
gfp_mask |= __GFP_MEMALLOC;
local_lock_nested_bh(&napi_alloc_cache.bh_lock);
nc = this_cpu_ptr(&napi_alloc_cache);
- if (NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) {
- /* we are artificially inflating the allocation size, but
- * that is not as bad as it may look like, as:
- * - 'len' less than GRO_MAX_HEAD makes little sense
- * - On most systems, larger 'len' values lead to fragment
- * size above 512 bytes
- * - kmalloc would use the kmalloc-1k slab for such values
- * - Builds with smaller GRO_MAX_HEAD will very likely do
- * little networking, as that implies no WiFi and no
- * tunnels support, and 32 bits arches.
- */
- len = SZ_1K;
- data = page_frag_alloc_1k(&nc->page_small, gfp_mask);
- pfmemalloc = NAPI_SMALL_PAGE_PFMEMALLOC(nc->page_small);
- } else {
- len = SKB_HEAD_ALIGN(len);
-
- data = page_frag_alloc(&nc->page, len, gfp_mask);
- pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page);
- }
+ data = page_frag_alloc(&nc->page, len, gfp_mask);
+ pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page);
local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
if (unlikely(!data))
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index f1b9b3958792..2f1be9baad05 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -541,6 +541,9 @@ static bool sock_map_sk_state_allowed(const struct sock *sk)
return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN);
if (sk_is_stream_unix(sk))
return (1 << sk->sk_state) & TCPF_ESTABLISHED;
+ if (sk_is_vsock(sk) &&
+ (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET))
+ return (1 << sk->sk_state) & TCPF_ESTABLISHED;
return true;
}
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index f23a1ec6694c..814300eee39d 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1077,7 +1077,7 @@ static int arp_req_set_public(struct net *net, struct arpreq *r,
__be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr;
if (!dev && (r->arp_flags & ATF_COM)) {
- dev = dev_getbyhwaddr_rcu(net, r->arp_ha.sa_family,
+ dev = dev_getbyhwaddr(net, r->arp_ha.sa_family,
r->arp_ha.sa_data);
if (!dev)
return -ENODEV;
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 0f523cbfe329..32b28fc21b63 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -178,7 +178,7 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
if (!skb)
return;
- skb_dst_drop(skb);
+ tcp_cleanup_skb(skb);
/* segs_in has been initialized to 1 in tcp_create_openreq_child().
* Hence, reset segs_in to 0 before calling tcp_segs_in()
* to avoid double counting. Also, tcp_segs_in() expects
@@ -195,7 +195,7 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN;
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
- __skb_queue_tail(&sk->sk_receive_queue, skb);
+ tcp_add_receive_queue(sk, skb);
tp->syn_data_acked = 1;
/* u64_stats_update_begin(&tp->syncp) not needed here,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index eb82e01da911..0cbf81bf3d45 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -243,9 +243,15 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
do_div(val, skb->truesize);
tcp_sk(sk)->scaling_ratio = val ? val : 1;
- if (old_ratio != tcp_sk(sk)->scaling_ratio)
- WRITE_ONCE(tcp_sk(sk)->window_clamp,
- tcp_win_from_space(sk, sk->sk_rcvbuf));
+ if (old_ratio != tcp_sk(sk)->scaling_ratio) {
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ val = tcp_win_from_space(sk, sk->sk_rcvbuf);
+ tcp_set_window_clamp(sk, val);
+
+ if (tp->window_clamp < tp->rcvq_space.space)
+ tp->rcvq_space.space = tp->window_clamp;
+ }
}
icsk->icsk_ack.rcv_mss = min_t(unsigned int, len,
tcp_sk(sk)->advmss);
@@ -4970,7 +4976,7 @@ static void tcp_ofo_queue(struct sock *sk)
tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
if (!eaten)
- __skb_queue_tail(&sk->sk_receive_queue, skb);
+ tcp_add_receive_queue(sk, skb);
else
kfree_skb_partial(skb, fragstolen);
@@ -5162,7 +5168,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb,
skb, fragstolen)) ? 1 : 0;
tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
if (!eaten) {
- __skb_queue_tail(&sk->sk_receive_queue, skb);
+ tcp_add_receive_queue(sk, skb);
skb_set_owner_r(skb, sk);
}
return eaten;
@@ -5245,7 +5251,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
__kfree_skb(skb);
return;
}
- skb_dst_drop(skb);
+ tcp_cleanup_skb(skb);
__skb_pull(skb, tcp_hdr(skb)->doff * 4);
reason = SKB_DROP_REASON_NOT_SPECIFIED;
@@ -6226,7 +6232,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
/* Bulk data transfer: receiver */
- skb_dst_drop(skb);
+ tcp_cleanup_skb(skb);
__skb_pull(skb, tcp_header_len);
eaten = tcp_queue_rcv(sk, skb, &fragstolen);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index cc2b5194a18d..2632844d2c35 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2027,7 +2027,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
*/
skb_condense(skb);
- skb_dst_drop(skb);
+ tcp_cleanup_skb(skb);
if (unlikely(tcp_checksum_complete(skb))) {
bh_unlock_sock(sk);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 8e47e5355be6..4f648af8cfaa 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -97,7 +97,7 @@ tcf_exts_miss_cookie_base_alloc(struct tcf_exts *exts, struct tcf_proto *tp,
err = xa_alloc_cyclic(&tcf_exts_miss_cookies_xa, &n->miss_cookie_base,
n, xa_limit_32b, &next, GFP_KERNEL);
- if (err)
+ if (err < 0)
goto err_xa_alloc;
exts->miss_cookie_node = n;
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index c241cc552e8d..bfcff6d6a438 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -735,7 +735,7 @@ struct sctp_chunk *sctp_process_strreset_tsnreq(
* value SHOULD be the smallest TSN not acknowledged by the
* receiver of the request plus 2^31.
*/
- init_tsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + (1 << 31);
+ init_tsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + (1U << 31);
sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL,
init_tsn, GFP_ATOMIC);
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 53a081d49d28..7e3db87ae433 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1189,6 +1189,9 @@ static int vsock_read_skb(struct sock *sk, skb_read_actor_t read_actor)
{
struct vsock_sock *vsk = vsock_sk(sk);
+ if (WARN_ON_ONCE(!vsk->transport))
+ return -ENODEV;
+
return vsk->transport->read_skb(vsk, read_actor);
}
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index b58c3818f284..f0e48e6911fc 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -670,6 +670,13 @@ static int virtio_vsock_vqs_init(struct virtio_vsock *vsock)
};
int ret;
+ mutex_lock(&vsock->rx_lock);
+ vsock->rx_buf_nr = 0;
+ vsock->rx_buf_max_nr = 0;
+ mutex_unlock(&vsock->rx_lock);
+
+ atomic_set(&vsock->queued_replies, 0);
+
ret = virtio_find_vqs(vdev, VSOCK_VQ_MAX, vsock->vqs, vqs_info, NULL);
if (ret < 0)
return ret;
@@ -779,9 +786,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
vsock->vdev = vdev;
- vsock->rx_buf_nr = 0;
- vsock->rx_buf_max_nr = 0;
- atomic_set(&vsock->queued_replies, 0);
mutex_init(&vsock->tx_lock);
mutex_init(&vsock->rx_lock);
diff --git a/net/vmw_vsock/vsock_bpf.c b/net/vmw_vsock/vsock_bpf.c
index f201d9eca1df..07b96d56f3a5 100644
--- a/net/vmw_vsock/vsock_bpf.c
+++ b/net/vmw_vsock/vsock_bpf.c
@@ -87,7 +87,7 @@ static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
lock_sock(sk);
vsk = vsock_sk(sk);
- if (!vsk->transport) {
+ if (WARN_ON_ONCE(!vsk->transport)) {
copied = -ENODEV;
goto out;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c
index 3729fbfd3084..80b153d3ddec 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c
@@ -542,8 +542,12 @@ static void detach_program(struct bpf_flow *skel, int prog_fd)
static int set_port_drop(int pf, bool multi_port)
{
+ char dst_port[16];
+
+ snprintf(dst_port, sizeof(dst_port), "%d", CFG_PORT_INNER);
+
SYS(fail, "tc qdisc add dev lo ingress");
- SYS(fail_delete_qdisc, "tc filter add %s %s %s %s %s %s %s %s %s %s",
+ SYS(fail_delete_qdisc, "tc filter add %s %s %s %s %s %s %s %s %s %s %s %s",
"dev lo",
"parent FFFF:",
"protocol", pf == PF_INET6 ? "ipv6" : "ip",
@@ -551,6 +555,7 @@ static int set_port_drop(int pf, bool multi_port)
"flower",
"ip_proto udp",
"src_port", multi_port ? "8-10" : "9",
+ "dst_port", dst_port,
"action drop");
return 0;
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index 884ad87783d5..05eb37935c3e 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -111,31 +111,35 @@ out:
static void test_sockmap_vsock_delete_on_close(void)
{
- int err, c, p, map;
- const int zero = 0;
-
- err = create_pair(AF_VSOCK, SOCK_STREAM, &c, &p);
- if (!ASSERT_OK(err, "create_pair(AF_VSOCK)"))
- return;
+ int map, c, p, err, zero = 0;
map = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(int),
sizeof(int), 1, NULL);
- if (!ASSERT_GE(map, 0, "bpf_map_create")) {
- close(c);
- goto out;
- }
+ if (!ASSERT_OK_FD(map, "bpf_map_create"))
+ return;
- err = bpf_map_update_elem(map, &zero, &c, BPF_NOEXIST);
- close(c);
- if (!ASSERT_OK(err, "bpf_map_update"))
- goto out;
+ err = create_pair(AF_VSOCK, SOCK_STREAM, &c, &p);
+ if (!ASSERT_OK(err, "create_pair"))
+ goto close_map;
- err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
+ if (xbpf_map_update_elem(map, &zero, &c, BPF_NOEXIST))
+ goto close_socks;
+
+ xclose(c);
+ xclose(p);
+
+ err = create_pair(AF_VSOCK, SOCK_STREAM, &c, &p);
+ if (!ASSERT_OK(err, "create_pair"))
+ goto close_map;
+
+ err = bpf_map_update_elem(map, &zero, &c, BPF_NOEXIST);
ASSERT_OK(err, "after close(), bpf_map_update");
-out:
- close(p);
- close(map);
+close_socks:
+ xclose(c);
+ xclose(p);
+close_map:
+ xclose(map);
}
static void test_skmsg_helpers(enum bpf_map_type map_type)
@@ -1061,6 +1065,34 @@ destroy:
test_sockmap_pass_prog__destroy(skel);
}
+static void test_sockmap_vsock_unconnected(void)
+{
+ struct sockaddr_storage addr;
+ int map, s, zero = 0;
+ socklen_t alen;
+
+ map = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(int),
+ sizeof(int), 1, NULL);
+ if (!ASSERT_OK_FD(map, "bpf_map_create"))
+ return;
+
+ s = xsocket(AF_VSOCK, SOCK_STREAM, 0);
+ if (s < 0)
+ goto close_map;
+
+ /* Fail connect(), but trigger transport assignment. */
+ init_addr_loopback(AF_VSOCK, &addr, &alen);
+ if (!ASSERT_ERR(connect(s, sockaddr(&addr), alen), "connect"))
+ goto close_sock;
+
+ ASSERT_ERR(bpf_map_update_elem(map, &zero, &s, BPF_ANY), "map_update");
+
+close_sock:
+ xclose(s);
+close_map:
+ xclose(map);
+}
+
void test_sockmap_basic(void)
{
if (test__start_subtest("sockmap create_update_free"))
@@ -1127,4 +1159,6 @@ void test_sockmap_basic(void)
test_skmsg_helpers_with_link(BPF_MAP_TYPE_SOCKHASH);
if (test__start_subtest("sockmap skb_verdict vsock poll"))
test_sockmap_skb_verdict_vsock_poll();
+ if (test__start_subtest("sockmap vsock unconnected"))
+ test_sockmap_vsock_unconnected();
}
diff --git a/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh b/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh
index 3885a2a91f7d..baed5e380dae 100755
--- a/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh
+++ b/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh
@@ -20,6 +20,7 @@ ALL_TESTS="
test_port_range_ipv4_tcp
test_port_range_ipv6_udp
test_port_range_ipv6_tcp
+ test_port_range_ipv4_udp_drop
"
NUM_NETIFS=4
@@ -194,6 +195,51 @@ test_port_range_ipv6_tcp()
__test_port_range $proto $ip_proto $sip $dip $mode "$name"
}
+test_port_range_ipv4_udp_drop()
+{
+ local proto=ipv4
+ local ip_proto=udp
+ local sip=192.0.2.1
+ local dip=192.0.2.2
+ local mode="-4"
+ local name="IPv4 UDP Drop"
+ local dmac=$(mac_get $h2)
+ local smac=$(mac_get $h1)
+ local sport_min=2000
+ local sport_max=3000
+ local sport_mid=$((sport_min + (sport_max - sport_min) / 2))
+ local dport=5000
+
+ RET=0
+
+ tc filter add dev $swp1 ingress protocol $proto handle 101 pref 1 \
+ flower src_ip $sip dst_ip $dip ip_proto $ip_proto \
+ src_port $sport_min-$sport_max \
+ dst_port $dport \
+ action drop
+
+ # Test ports outside range - should pass
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$((sport_min - 1)),dp=$dport"
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$((sport_max + 1)),dp=$dport"
+
+ # Test ports inside range - should be dropped
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$sport_min,dp=$dport"
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$sport_mid,dp=$dport"
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$sport_max,dp=$dport"
+
+ tc_check_packets "dev $swp1 ingress" 101 3
+ check_err $? "Filter did not drop the expected number of packets"
+
+ tc filter del dev $swp1 ingress protocol $proto pref 1 handle 101 flower
+
+ log_test "Port range matching - $name"
+}
+
setup_prepare()
{
h1=${NETIFS[p1]}