aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-08-03 16:29:08 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-08-03 16:29:08 -0700
commitf86d1fbbe7858884d6754534a0afbb74fc30bc26 (patch)
treef61796870edefbe77d495e9d719c68af1d14275b /net/core
parentMerge tag 'ata-5.20-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dlemoal/libata (diff)
parentMerge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net (diff)
downloadlinux-dev-f86d1fbbe7858884d6754534a0afbb74fc30bc26.tar.xz
linux-dev-f86d1fbbe7858884d6754534a0afbb74fc30bc26.zip
Merge tag 'net-next-6.0' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking changes from Paolo Abeni: "Core: - Refactor the forward memory allocation to better cope with memory pressure with many open sockets, moving from a per socket cache to a per-CPU one - Replace rwlocks with RCU for better fairness in ping, raw sockets and IP multicast router. - Network-side support for IO uring zero-copy send. - A few skb drop reason improvements, including codegen the source file with string mapping instead of using macro magic. - Rename reference tracking helpers to a more consistent netdev_* schema. - Adapt u64_stats_t type to address load/store tearing issues. - Refine debug helper usage to reduce the log noise caused by bots. BPF: - Improve socket map performance, avoiding skb cloning on read operation. - Add support for 64 bits enum, to match types exposed by kernel. - Introduce support for sleepable uprobes program. - Introduce support for enum textual representation in libbpf. - New helpers to implement synproxy with eBPF/XDP. - Improve loop performances, inlining indirect calls when possible. - Removed all the deprecated libbpf APIs. - Implement new eBPF-based LSM flavor. - Add type match support, which allow accurate queries to the eBPF used types. - A few TCP congetsion control framework usability improvements. - Add new infrastructure to manipulate CT entries via eBPF programs. - Allow for livepatch (KLP) and BPF trampolines to attach to the same kernel function. Protocols: - Introduce per network namespace lookup tables for unix sockets, increasing scalability and reducing contention. - Preparation work for Wi-Fi 7 Multi-Link Operation (MLO) support. - Add support to forciby close TIME_WAIT TCP sockets via user-space tools. - Significant performance improvement for the TLS 1.3 receive path, both for zero-copy and not-zero-copy. - Support for changing the initial MTPCP subflow priority/backup status - Introduce virtually contingus buffers for sockets over RDMA, to cope better with memory pressure. - Extend CAN ethtool support with timestamping capabilities - Refactor CAN build infrastructure to allow building only the needed features. Driver API: - Remove devlink mutex to allow parallel commands on multiple links. - Add support for pause stats in distributed switch. - Implement devlink helpers to query and flash line cards. - New helper for phy mode to register conversion. New hardware / drivers: - Ethernet DSA driver for the rockchip mt7531 on BPI-R2 Pro. - Ethernet DSA driver for the Renesas RZ/N1 A5PSW switch. - Ethernet DSA driver for the Microchip LAN937x switch. - Ethernet PHY driver for the Aquantia AQR113C EPHY. - CAN driver for the OBD-II ELM327 interface. - CAN driver for RZ/N1 SJA1000 CAN controller. - Bluetooth: Infineon CYW55572 Wi-Fi plus Bluetooth combo device. Drivers: - Intel Ethernet NICs: - i40e: add support for vlan pruning - i40e: add support for XDP framented packets - ice: improved vlan offload support - ice: add support for PPPoE offload - Mellanox Ethernet (mlx5) - refactor packet steering offload for performance and scalability - extend support for TC offload - refactor devlink code to clean-up the locking schema - support stacked vlans for bridge offloads - use TLS objects pool to improve connection rate - Netronome Ethernet NICs (nfp): - extend support for IPv6 fields mangling offload - add support for vepa mode in HW bridge - better support for virtio data path acceleration (VDPA) - enable TSO by default - Microsoft vNIC driver (mana) - add support for XDP redirect - Others Ethernet drivers: - bonding: add per-port priority support - microchip lan743x: extend phy support - Fungible funeth: support UDP segmentation offload and XDP xmit - Solarflare EF100: add support for virtual function representors - MediaTek SoC: add XDP support - Mellanox Ethernet/IB switch (mlxsw): - dropped support for unreleased H/W (XM router). - improved stats accuracy - unified bridge model coversion improving scalability (parts 1-6) - support for PTP in Spectrum-2 asics - Broadcom PHYs - add PTP support for BCM54210E - add support for the BCM53128 internal PHY - Marvell Ethernet switches (prestera): - implement support for multicast forwarding offload - Embedded Ethernet switches: - refactor OcteonTx MAC filter for better scalability - improve TC H/W offload for the Felix driver - refactor the Microchip ksz8 and ksz9477 drivers to share the probe code (parts 1, 2), add support for phylink mac configuration - Other WiFi: - Microchip wilc1000: diable WEP support and enable WPA3 - Atheros ath10k: encapsulation offload support Old code removal: - Neterion vxge ethernet driver: this is untouched since more than 10 years" * tag 'net-next-6.0' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1890 commits) doc: sfp-phylink: Fix a broken reference wireguard: selftests: support UML wireguard: allowedips: don't corrupt stack when detecting overflow wireguard: selftests: update config fragments wireguard: ratelimiter: use hrtimer in selftest net/mlx5e: xsk: Discard unaligned XSK frames on striding RQ net: usb: ax88179_178a: Bind only to vendor-specific interface selftests: net: fix IOAM test skip return code net: usb: make USB_RTL8153_ECM non user configurable net: marvell: prestera: remove reduntant code octeontx2-pf: Reduce minimum mtu size to 60 net: devlink: Fix missing mutex_unlock() call net/tls: Remove redundant workqueue flush before destroy net: txgbe: Fix an error handling path in txgbe_probe() net: dsa: Fix spelling mistakes and cleanup code Documentation: devlink: add add devlink-selftests to the table of contents dccp: put dccp_qpolicy_full() and dccp_qpolicy_push() in the same lock net: ionic: fix error check for vlan flags in ionic_set_nic_features() net: ice: fix error NETIF_F_HW_VLAN_CTAG_FILTER check in ice_vsi_sync_fltr() nfp: flower: add support for tunnel offload without key ID ...
Diffstat (limited to 'net/core')
-rw-r--r--net/core/.gitignore1
-rw-r--r--net/core/Makefile23
-rw-r--r--net/core/datagram.c3
-rw-r--r--net/core/dev.c49
-rw-r--r--net/core/dev_ioctl.c4
-rw-r--r--net/core/devlink.c1651
-rw-r--r--net/core/drop_monitor.c36
-rw-r--r--net/core/dst.c8
-rw-r--r--net/core/failover.c4
-rw-r--r--net/core/filter.c202
-rw-r--r--net/core/flow_dissector.c53
-rw-r--r--net/core/flow_offload.c14
-rw-r--r--net/core/link_watch.c2
-rw-r--r--net/core/neighbour.c50
-rw-r--r--net/core/net-sysfs.c8
-rw-r--r--net/core/netpoll.c2
-rw-r--r--net/core/page_pool.c3
-rw-r--r--net/core/pktgen.c6
-rw-r--r--net/core/skbuff.c31
-rw-r--r--net/core/skmsg.c53
-rw-r--r--net/core/sock.c34
-rw-r--r--net/core/sock_map.c23
-rw-r--r--net/core/stream.c6
23 files changed, 1434 insertions, 832 deletions
diff --git a/net/core/.gitignore b/net/core/.gitignore
new file mode 100644
index 000000000000..df1e74372cce
--- /dev/null
+++ b/net/core/.gitignore
@@ -0,0 +1 @@
+dropreason_str.c
diff --git a/net/core/Makefile b/net/core/Makefile
index a8e4f737692b..e8ce3bd283a6 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -4,7 +4,8 @@
#
obj-y := sock.o request_sock.o skbuff.o datagram.o stream.o scm.o \
- gen_stats.o gen_estimator.o net_namespace.o secure_seq.o flow_dissector.o
+ gen_stats.o gen_estimator.o net_namespace.o secure_seq.o \
+ flow_dissector.o dropreason_str.o
obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
@@ -39,3 +40,23 @@ obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
obj-$(CONFIG_BPF_SYSCALL) += sock_map.o
obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o
obj-$(CONFIG_OF) += of_net.o
+
+clean-files := dropreason_str.c
+
+quiet_cmd_dropreason_str = GEN $@
+cmd_dropreason_str = awk -F ',' 'BEGIN{ print "\#include <net/dropreason.h>\n"; \
+ print "const char * const drop_reasons[] = {" }\
+ /^enum skb_drop/ { dr=1; }\
+ /^\};/ { dr=0; }\
+ /^\tSKB_DROP_REASON_/ {\
+ if (dr) {\
+ sub(/\tSKB_DROP_REASON_/, "", $$1);\
+ printf "\t[SKB_DROP_REASON_%s] = \"%s\",\n", $$1, $$1;\
+ }\
+ }\
+ END{ print "};" }' $< > $@
+
+$(obj)/dropreason_str.c: $(srctree)/include/net/dropreason.h
+ $(call cmd,dropreason_str)
+
+$(obj)/dropreason_str.o: $(obj)/dropreason_str.c
diff --git a/net/core/datagram.c b/net/core/datagram.c
index ecbc0f471089..f3988ef8e9af 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -320,7 +320,6 @@ EXPORT_SYMBOL(skb_recv_datagram);
void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
{
consume_skb(skb);
- sk_mem_reclaim_partial(sk);
}
EXPORT_SYMBOL(skb_free_datagram);
@@ -336,7 +335,6 @@ void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len)
slow = lock_sock_fast(sk);
sk_peek_offset_bwd(sk, len);
skb_orphan(skb);
- sk_mem_reclaim_partial(sk);
unlock_sock_fast(sk, slow);
/* skb is now orphaned, can be freed outside of locked section */
@@ -396,7 +394,6 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
NULL);
kfree_skb(skb);
- sk_mem_reclaim_partial(sk);
return err;
}
EXPORT_SYMBOL(skb_kill_datagram);
diff --git a/net/core/dev.c b/net/core/dev.c
index 30a1603a7225..716df64fcfa5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3927,7 +3927,7 @@ int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
skb->pkt_type = PACKET_LOOPBACK;
if (skb->ip_summed == CHECKSUM_NONE)
skb->ip_summed = CHECKSUM_UNNECESSARY;
- WARN_ON(!skb_dst(skb));
+ DEBUG_NET_WARN_ON_ONCE(!skb_dst(skb));
skb_dst_force(skb);
netif_rx(skb);
return 0;
@@ -4168,6 +4168,7 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
bool again = false;
skb_reset_mac_header(skb);
+ skb_assert_len(skb);
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP))
__skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SCHED);
@@ -6357,6 +6358,23 @@ int dev_set_threaded(struct net_device *dev, bool threaded)
}
EXPORT_SYMBOL(dev_set_threaded);
+/* Double check that napi_get_frags() allocates skbs with
+ * skb->head being backed by slab, not a page fragment.
+ * This is to make sure bug fixed in 3226b158e67c
+ * ("net: avoid 32 x truesize under-estimation for tiny skbs")
+ * does not accidentally come back.
+ */
+static void napi_get_frags_check(struct napi_struct *napi)
+{
+ struct sk_buff *skb;
+
+ local_bh_disable();
+ skb = napi_get_frags(napi);
+ WARN_ON_ONCE(skb && skb->head_frag);
+ napi_free_frags(napi);
+ local_bh_enable();
+}
+
void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight)
{
@@ -6384,6 +6402,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
set_bit(NAPI_STATE_NPSVC, &napi->state);
list_add_rcu(&napi->dev_list, &dev->napi_list);
napi_hash_add(napi);
+ napi_get_frags_check(napi);
/* Create kthread for this napi if dev->threaded is set.
* Clear dev->threaded if kthread creation failed so that
* threaded mode will not be enabled in napi_enable().
@@ -7469,7 +7488,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
adj->ref_nr = 1;
adj->private = private;
adj->ignore = false;
- dev_hold_track(adj_dev, &adj->dev_tracker, GFP_KERNEL);
+ netdev_hold(adj_dev, &adj->dev_tracker, GFP_KERNEL);
pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d; dev_hold on %s\n",
dev->name, adj_dev->name, adj->ref_nr, adj_dev->name);
@@ -7498,7 +7517,7 @@ remove_symlinks:
if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list))
netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
free_adj:
- dev_put_track(adj_dev, &adj->dev_tracker);
+ netdev_put(adj_dev, &adj->dev_tracker);
kfree(adj);
return ret;
@@ -7540,7 +7559,7 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev,
list_del_rcu(&adj->list);
pr_debug("adjacency: dev_put for %s, because link removed from %s to %s\n",
adj_dev->name, dev->name, adj_dev->name);
- dev_put_track(adj_dev, &adj->dev_tracker);
+ netdev_put(adj_dev, &adj->dev_tracker);
kfree_rcu(adj, rcu);
}
@@ -10068,7 +10087,7 @@ int register_netdevice(struct net_device *dev)
dev_init_scheduler(dev);
- dev_hold_track(dev, &dev->dev_registered_tracker, GFP_KERNEL);
+ netdev_hold(dev, &dev->dev_registered_tracker, GFP_KERNEL);
list_netdevice(dev);
add_device_randomness(dev->dev_addr, dev->addr_len);
@@ -10467,23 +10486,23 @@ void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s,
int cpu;
for_each_possible_cpu(cpu) {
+ u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
const struct pcpu_sw_netstats *stats;
- struct pcpu_sw_netstats tmp;
unsigned int start;
stats = per_cpu_ptr(netstats, cpu);
do {
start = u64_stats_fetch_begin_irq(&stats->syncp);
- tmp.rx_packets = stats->rx_packets;
- tmp.rx_bytes = stats->rx_bytes;
- tmp.tx_packets = stats->tx_packets;
- tmp.tx_bytes = stats->tx_bytes;
+ rx_packets = u64_stats_read(&stats->rx_packets);
+ rx_bytes = u64_stats_read(&stats->rx_bytes);
+ tx_packets = u64_stats_read(&stats->tx_packets);
+ tx_bytes = u64_stats_read(&stats->tx_bytes);
} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
- s->rx_packets += tmp.rx_packets;
- s->rx_bytes += tmp.rx_bytes;
- s->tx_packets += tmp.tx_packets;
- s->tx_bytes += tmp.tx_bytes;
+ s->rx_packets += rx_packets;
+ s->rx_bytes += rx_bytes;
+ s->tx_packets += tx_packets;
+ s->tx_bytes += tx_bytes;
}
}
EXPORT_SYMBOL_GPL(dev_fetch_sw_netstats);
@@ -10877,7 +10896,7 @@ void unregister_netdevice_many(struct list_head *head)
synchronize_net();
list_for_each_entry(dev, head, unreg_list) {
- dev_put_track(dev, &dev->dev_registered_tracker);
+ netdev_put(dev, &dev->dev_registered_tracker);
net_set_todo(dev);
}
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 4f6be442ae7e..7674bb9f3076 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -384,10 +384,10 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data,
return -ENODEV;
if (!netif_is_bridge_master(dev))
return -EOPNOTSUPP;
- dev_hold_track(dev, &dev_tracker, GFP_KERNEL);
+ netdev_hold(dev, &dev_tracker, GFP_KERNEL);
rtnl_unlock();
err = br_ioctl_call(net, netdev_priv(dev), cmd, ifr, NULL);
- dev_put_track(dev, &dev_tracker);
+ netdev_put(dev, &dev_tracker);
rtnl_lock();
return err;
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 5cc88490f18f..5da5c7cca98a 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -66,9 +66,11 @@ struct devlink {
* port, sb, dpipe, resource, params, region, traps and more.
*/
struct mutex lock;
+ struct lock_class_key lock_key;
u8 reload_failed:1;
refcount_t refcount;
struct completion comp;
+ struct rcu_head rcu;
char priv[] __aligned(NETDEV_ALIGN);
};
@@ -87,6 +89,7 @@ struct devlink_linecard {
const char *type;
struct devlink_linecard_type *types;
unsigned int types_count;
+ struct devlink *nested_devlink;
};
/**
@@ -198,8 +201,13 @@ static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_
DEVLINK_PORT_FN_STATE_ACTIVE),
};
+static const struct nla_policy devlink_selftest_nl_policy[DEVLINK_ATTR_SELFTEST_ID_MAX + 1] = {
+ [DEVLINK_ATTR_SELFTEST_ID_FLASH] = { .type = NLA_FLAG },
+};
+
static DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC);
#define DEVLINK_REGISTERED XA_MARK_1
+#define DEVLINK_UNREGISTERING XA_MARK_2
/* devlink instances are open to the access from the user space after
* devlink_register() call. Such logical barrier allows us to have certain
@@ -217,24 +225,27 @@ static DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC);
#define ASSERT_DEVLINK_NOT_REGISTERED(d) \
WARN_ON_ONCE(xa_get_mark(&devlinks, (d)->index, DEVLINK_REGISTERED))
-/* devlink_mutex
- *
- * An overall lock guarding every operation coming from userspace.
- * It also guards devlink devices list and it is taken when
- * driver registers/unregisters it.
- */
-static DEFINE_MUTEX(devlink_mutex);
-
struct net *devlink_net(const struct devlink *devlink)
{
return read_pnet(&devlink->_net);
}
EXPORT_SYMBOL_GPL(devlink_net);
+static void __devlink_put_rcu(struct rcu_head *head)
+{
+ struct devlink *devlink = container_of(head, struct devlink, rcu);
+
+ complete(&devlink->comp);
+}
+
void devlink_put(struct devlink *devlink)
{
if (refcount_dec_and_test(&devlink->refcount))
- complete(&devlink->comp);
+ /* Make sure unregister operation that may await the completion
+ * is unblocked only after all users are after the end of
+ * RCU grace period.
+ */
+ call_rcu(&devlink->rcu, __devlink_put_rcu);
}
struct devlink *__must_check devlink_try_get(struct devlink *devlink)
@@ -265,18 +276,82 @@ void devl_lock(struct devlink *devlink)
}
EXPORT_SYMBOL_GPL(devl_lock);
+int devl_trylock(struct devlink *devlink)
+{
+ return mutex_trylock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devl_trylock);
+
void devl_unlock(struct devlink *devlink)
{
mutex_unlock(&devlink->lock);
}
EXPORT_SYMBOL_GPL(devl_unlock);
+static struct devlink *
+devlinks_xa_find_get(struct net *net, unsigned long *indexp, xa_mark_t filter,
+ void * (*xa_find_fn)(struct xarray *, unsigned long *,
+ unsigned long, xa_mark_t))
+{
+ struct devlink *devlink;
+
+ rcu_read_lock();
+retry:
+ devlink = xa_find_fn(&devlinks, indexp, ULONG_MAX, DEVLINK_REGISTERED);
+ if (!devlink)
+ goto unlock;
+
+ /* In case devlink_unregister() was already called and "unregistering"
+ * mark was set, do not allow to get a devlink reference here.
+ * This prevents live-lock of devlink_unregister() wait for completion.
+ */
+ if (xa_get_mark(&devlinks, *indexp, DEVLINK_UNREGISTERING))
+ goto retry;
+
+ /* For a possible retry, the xa_find_after() should be always used */
+ xa_find_fn = xa_find_after;
+ if (!devlink_try_get(devlink))
+ goto retry;
+ if (!net_eq(devlink_net(devlink), net)) {
+ devlink_put(devlink);
+ goto retry;
+ }
+unlock:
+ rcu_read_unlock();
+ return devlink;
+}
+
+static struct devlink *devlinks_xa_find_get_first(struct net *net,
+ unsigned long *indexp,
+ xa_mark_t filter)
+{
+ return devlinks_xa_find_get(net, indexp, filter, xa_find);
+}
+
+static struct devlink *devlinks_xa_find_get_next(struct net *net,
+ unsigned long *indexp,
+ xa_mark_t filter)
+{
+ return devlinks_xa_find_get(net, indexp, filter, xa_find_after);
+}
+
+/* Iterate over devlink pointers which were possible to get reference to.
+ * devlink_put() needs to be called for each iterated devlink pointer
+ * in loop body in order to release the reference.
+ */
+#define devlinks_xa_for_each_get(net, index, devlink, filter) \
+ for (index = 0, \
+ devlink = devlinks_xa_find_get_first(net, &index, filter); \
+ devlink; devlink = devlinks_xa_find_get_next(net, &index, filter))
+
+#define devlinks_xa_for_each_registered_get(net, index, devlink) \
+ devlinks_xa_for_each_get(net, index, devlink, DEVLINK_REGISTERED)
+
static struct devlink *devlink_get_from_attrs(struct net *net,
struct nlattr **attrs)
{
struct devlink *devlink;
unsigned long index;
- bool found = false;
char *busname;
char *devname;
@@ -286,21 +361,14 @@ static struct devlink *devlink_get_from_attrs(struct net *net,
busname = nla_data(attrs[DEVLINK_ATTR_BUS_NAME]);
devname = nla_data(attrs[DEVLINK_ATTR_DEV_NAME]);
- lockdep_assert_held(&devlink_mutex);
-
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ devlinks_xa_for_each_registered_get(net, index, devlink) {
if (strcmp(devlink->dev->bus->name, busname) == 0 &&
- strcmp(dev_name(devlink->dev), devname) == 0 &&
- net_eq(devlink_net(devlink), net)) {
- found = true;
- break;
- }
+ strcmp(dev_name(devlink->dev), devname) == 0)
+ return devlink;
+ devlink_put(devlink);
}
- if (!found || !devlink_try_get(devlink))
- devlink = ERR_PTR(-ENODEV);
-
- return devlink;
+ return ERR_PTR(-ENODEV);
}
static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink,
@@ -634,6 +702,10 @@ struct devlink_region {
const struct devlink_region_ops *ops;
const struct devlink_port_region_ops *port_ops;
};
+ struct mutex snapshot_lock; /* protects snapshot_list,
+ * max_snapshots and cur_snapshots
+ * consistency.
+ */
struct list_head snapshot_list;
u32 max_snapshots;
u32 cur_snapshots;
@@ -690,12 +762,6 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id)
#define DEVLINK_NL_FLAG_NEED_RATE_NODE BIT(3)
#define DEVLINK_NL_FLAG_NEED_LINECARD BIT(4)
-/* The per devlink instance lock is taken by default in the pre-doit
- * operation, yet several commands do not require this. The global
- * devlink lock is taken and protects from disruption by user-calls.
- */
-#define DEVLINK_NL_FLAG_NO_LOCK BIT(5)
-
static int devlink_nl_pre_doit(const struct genl_ops *ops,
struct sk_buff *skb, struct genl_info *info)
{
@@ -704,14 +770,10 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
struct devlink *devlink;
int err;
- mutex_lock(&devlink_mutex);
devlink = devlink_get_from_attrs(genl_info_net(info), info->attrs);
- if (IS_ERR(devlink)) {
- mutex_unlock(&devlink_mutex);
+ if (IS_ERR(devlink))
return PTR_ERR(devlink);
- }
- if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
- mutex_lock(&devlink->lock);
+ devl_lock(devlink);
info->user_ptr[0] = devlink;
if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) {
devlink_port = devlink_port_get_from_info(devlink, info);
@@ -753,10 +815,8 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
return 0;
unlock:
- if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
devlink_put(devlink);
- mutex_unlock(&devlink_mutex);
return err;
}
@@ -771,10 +831,8 @@ static void devlink_nl_post_doit(const struct genl_ops *ops,
linecard = info->user_ptr[1];
devlink_linecard_put(linecard);
}
- if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
devlink_put(devlink);
- mutex_unlock(&devlink_mutex);
}
static struct genl_family devlink_nl_family;
@@ -796,6 +854,24 @@ static int devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink)
return 0;
}
+static int devlink_nl_put_nested_handle(struct sk_buff *msg, struct devlink *devlink)
+{
+ struct nlattr *nested_attr;
+
+ nested_attr = nla_nest_start(msg, DEVLINK_ATTR_NESTED_DEVLINK);
+ if (!nested_attr)
+ return -EMSGSIZE;
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, nested_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, nested_attr);
+ return -EMSGSIZE;
+}
+
struct devlink_reload_combination {
enum devlink_reload_action action;
enum devlink_reload_limit limit;
@@ -1321,15 +1397,8 @@ static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg,
int idx = 0;
int err = 0;
- mutex_lock(&devlink_mutex);
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
- if (!devlink_try_get(devlink))
- continue;
-
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
- goto retry;
-
- mutex_lock(&devlink->lock);
+ devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
+ devl_lock(devlink);
list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
enum devlink_command cmd = DEVLINK_CMD_RATE_NEW;
u32 id = NETLINK_CB(cb->skb).portid;
@@ -1342,18 +1411,16 @@ static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg,
cb->nlh->nlmsg_seq,
NLM_F_MULTI, NULL);
if (err) {
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
devlink_put(devlink);
goto out;
}
idx++;
}
- mutex_unlock(&devlink->lock);
-retry:
+ devl_unlock(devlink);
devlink_put(devlink);
}
out:
- mutex_unlock(&devlink_mutex);
if (err != -EMSGSIZE)
return err;
@@ -1424,16 +1491,7 @@ static int devlink_nl_cmd_get_dumpit(struct sk_buff *msg,
int idx = 0;
int err;
- mutex_lock(&devlink_mutex);
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
- if (!devlink_try_get(devlink))
- continue;
-
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) {
- devlink_put(devlink);
- continue;
- }
-
+ devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
if (idx < start) {
idx++;
devlink_put(devlink);
@@ -1449,8 +1507,6 @@ static int devlink_nl_cmd_get_dumpit(struct sk_buff *msg,
idx++;
}
out:
- mutex_unlock(&devlink_mutex);
-
cb->args[0] = idx;
return msg->len;
}
@@ -1487,15 +1543,8 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg,
int idx = 0;
int err;
- mutex_lock(&devlink_mutex);
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
- if (!devlink_try_get(devlink))
- continue;
-
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
- goto retry;
-
- mutex_lock(&devlink->lock);
+ devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
+ devl_lock(devlink);
list_for_each_entry(devlink_port, &devlink->port_list, list) {
if (idx < start) {
idx++;
@@ -1507,19 +1556,16 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg,
cb->nlh->nlmsg_seq,
NLM_F_MULTI, cb->extack);
if (err) {
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
devlink_put(devlink);
goto out;
}
idx++;
}
- mutex_unlock(&devlink->lock);
-retry:
+ devl_unlock(devlink);
devlink_put(devlink);
}
out:
- mutex_unlock(&devlink_mutex);
-
cb->args[0] = idx;
return msg->len;
}
@@ -1700,9 +1746,9 @@ static int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb,
return devlink->ops->port_unsplit(devlink, devlink_port, info->extack);
}
-static int devlink_port_new_notifiy(struct devlink *devlink,
- unsigned int port_index,
- struct genl_info *info)
+static int devlink_port_new_notify(struct devlink *devlink,
+ unsigned int port_index,
+ struct genl_info *info)
{
struct devlink_port *devlink_port;
struct sk_buff *msg;
@@ -1712,7 +1758,7 @@ static int devlink_port_new_notifiy(struct devlink *devlink,
if (!msg)
return -ENOMEM;
- mutex_lock(&devlink->lock);
+ lockdep_assert_held(&devlink->lock);
devlink_port = devlink_port_get_by_index(devlink, port_index);
if (!devlink_port) {
err = -ENODEV;
@@ -1724,12 +1770,9 @@ static int devlink_port_new_notifiy(struct devlink *devlink,
if (err)
goto out;
- err = genlmsg_reply(msg, info);
- mutex_unlock(&devlink->lock);
- return err;
+ return genlmsg_reply(msg, info);
out:
- mutex_unlock(&devlink->lock);
nlmsg_free(msg);
return err;
}
@@ -1777,7 +1820,7 @@ static int devlink_nl_cmd_port_new_doit(struct sk_buff *skb,
if (err)
return err;
- err = devlink_port_new_notifiy(devlink, new_port_index, info);
+ err = devlink_port_new_notify(devlink, new_port_index, info);
if (err && err != -ENODEV) {
/* Fail to send the response; destroy newly created port. */
devlink->ops->port_del(devlink, new_port_index, extack);
@@ -2100,6 +2143,10 @@ static int devlink_nl_linecard_fill(struct sk_buff *msg,
nla_nest_end(msg, attr);
}
+ if (linecard->nested_devlink &&
+ devlink_nl_put_nested_handle(msg, linecard->nested_devlink))
+ goto nla_put_failure;
+
genlmsg_end(msg, hdr);
return 0;
@@ -2172,14 +2219,7 @@ static int devlink_nl_cmd_linecard_get_dumpit(struct sk_buff *msg,
int idx = 0;
int err;
- mutex_lock(&devlink_mutex);
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
- if (!devlink_try_get(devlink))
- continue;
-
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
- goto retry;
-
+ devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
mutex_lock(&devlink->linecards_lock);
list_for_each_entry(linecard, &devlink->linecard_list, list) {
if (idx < start) {
@@ -2202,12 +2242,9 @@ static int devlink_nl_cmd_linecard_get_dumpit(struct sk_buff *msg,
idx++;
}
mutex_unlock(&devlink->linecards_lock);
-retry:
devlink_put(devlink);
}
out:
- mutex_unlock(&devlink_mutex);
-
cb->args[0] = idx;
return msg->len;
}
@@ -2444,15 +2481,8 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg,
int idx = 0;
int err;
- mutex_lock(&devlink_mutex);
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
- if (!devlink_try_get(devlink))
- continue;
-
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
- goto retry;
-
- mutex_lock(&devlink->lock);
+ devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
+ devl_lock(devlink);
list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
if (idx < start) {
idx++;
@@ -2464,19 +2494,16 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg,
cb->nlh->nlmsg_seq,
NLM_F_MULTI);
if (err) {
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
devlink_put(devlink);
goto out;
}
idx++;
}
- mutex_unlock(&devlink->lock);
-retry:
+ devl_unlock(devlink);
devlink_put(devlink);
}
out:
- mutex_unlock(&devlink_mutex);
-
cb->args[0] = idx;
return msg->len;
}
@@ -2596,16 +2623,11 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg,
int idx = 0;
int err = 0;
- mutex_lock(&devlink_mutex);
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
- if (!devlink_try_get(devlink))
- continue;
-
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
- !devlink->ops->sb_pool_get)
+ devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
+ if (!devlink->ops->sb_pool_get)
goto retry;
- mutex_lock(&devlink->lock);
+ devl_lock(devlink);
list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
err = __sb_pool_get_dumpit(msg, start, &idx, devlink,
devlink_sb,
@@ -2614,18 +2636,16 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg,
if (err == -EOPNOTSUPP) {
err = 0;
} else if (err) {
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
devlink_put(devlink);
goto out;
}
}
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
retry:
devlink_put(devlink);
}
out:
- mutex_unlock(&devlink_mutex);
-
if (err != -EMSGSIZE)
return err;
@@ -2817,16 +2837,11 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg,
int idx = 0;
int err = 0;
- mutex_lock(&devlink_mutex);
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
- if (!devlink_try_get(devlink))
- continue;
-
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
- !devlink->ops->sb_port_pool_get)
+ devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
+ if (!devlink->ops->sb_port_pool_get)
goto retry;
- mutex_lock(&devlink->lock);
+ devl_lock(devlink);
list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
err = __sb_port_pool_get_dumpit(msg, start, &idx,
devlink, devlink_sb,
@@ -2835,18 +2850,16 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg,
if (err == -EOPNOTSUPP) {
err = 0;
} else if (err) {
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
devlink_put(devlink);
goto out;
}
}
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
retry:
devlink_put(devlink);
}
out:
- mutex_unlock(&devlink_mutex);
-
if (err != -EMSGSIZE)
return err;
@@ -3066,16 +3079,11 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
int idx = 0;
int err = 0;
- mutex_lock(&devlink_mutex);
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
- if (!devlink_try_get(devlink))
- continue;
-
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
- !devlink->ops->sb_tc_pool_bind_get)
+ devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
+ if (!devlink->ops->sb_tc_pool_bind_get)
goto retry;
- mutex_lock(&devlink->lock);
+ devl_lock(devlink);
list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
err = __sb_tc_pool_bind_get_dumpit(msg, start, &idx,
devlink,
@@ -3085,18 +3093,16 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
if (err == -EOPNOTSUPP) {
err = 0;
} else if (err) {
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
devlink_put(devlink);
goto out;
}
}
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
retry:
devlink_put(devlink);
}
out:
- mutex_unlock(&devlink_mutex);
-
if (err != -EMSGSIZE)
return err;
@@ -4794,6 +4800,204 @@ static int devlink_nl_cmd_flash_update(struct sk_buff *skb,
return ret;
}
+static int
+devlink_nl_selftests_fill(struct sk_buff *msg, struct devlink *devlink,
+ u32 portid, u32 seq, int flags,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *selftests;
+ void *hdr;
+ int err;
+ int i;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags,
+ DEVLINK_CMD_SELFTESTS_GET);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ err = -EMSGSIZE;
+ if (devlink_nl_put_handle(msg, devlink))
+ goto err_cancel_msg;
+
+ selftests = nla_nest_start(msg, DEVLINK_ATTR_SELFTESTS);
+ if (!selftests)
+ goto err_cancel_msg;
+
+ for (i = DEVLINK_ATTR_SELFTEST_ID_UNSPEC + 1;
+ i <= DEVLINK_ATTR_SELFTEST_ID_MAX; i++) {
+ if (devlink->ops->selftest_check(devlink, i, extack)) {
+ err = nla_put_flag(msg, i);
+ if (err)
+ goto err_cancel_msg;
+ }
+ }
+
+ nla_nest_end(msg, selftests);
+ genlmsg_end(msg, hdr);
+ return 0;
+
+err_cancel_msg:
+ genlmsg_cancel(msg, hdr);
+ return err;
+}
+
+static int devlink_nl_cmd_selftests_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct sk_buff *msg;
+ int err;
+
+ if (!devlink->ops->selftest_check)
+ return -EOPNOTSUPP;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_selftests_fill(msg, devlink, info->snd_portid,
+ info->snd_seq, 0, info->extack);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int devlink_nl_cmd_selftests_get_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ struct devlink *devlink;
+ int start = cb->args[0];
+ unsigned long index;
+ int idx = 0;
+ int err = 0;
+
+ devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
+ if (idx < start || !devlink->ops->selftest_check)
+ goto inc;
+
+ devl_lock(devlink);
+ err = devlink_nl_selftests_fill(msg, devlink,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ cb->extack);
+ devl_unlock(devlink);
+ if (err) {
+ devlink_put(devlink);
+ break;
+ }
+inc:
+ idx++;
+ devlink_put(devlink);
+ }
+
+ if (err != -EMSGSIZE)
+ return err;
+
+ cb->args[0] = idx;
+ return msg->len;
+}
+
+static int devlink_selftest_result_put(struct sk_buff *skb, unsigned int id,
+ enum devlink_selftest_status test_status)
+{
+ struct nlattr *result_attr;
+
+ result_attr = nla_nest_start(skb, DEVLINK_ATTR_SELFTEST_RESULT);
+ if (!result_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, DEVLINK_ATTR_SELFTEST_RESULT_ID, id) ||
+ nla_put_u8(skb, DEVLINK_ATTR_SELFTEST_RESULT_STATUS,
+ test_status))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, result_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, result_attr);
+ return -EMSGSIZE;
+}
+
+static int devlink_nl_cmd_selftests_run(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct nlattr *tb[DEVLINK_ATTR_SELFTEST_ID_MAX + 1];
+ struct devlink *devlink = info->user_ptr[0];
+ struct nlattr *attrs, *selftests;
+ struct sk_buff *msg;
+ void *hdr;
+ int err;
+ int i;
+
+ if (!devlink->ops->selftest_run || !devlink->ops->selftest_check)
+ return -EOPNOTSUPP;
+
+ if (!info->attrs[DEVLINK_ATTR_SELFTESTS]) {
+ NL_SET_ERR_MSG_MOD(info->extack, "selftest required");
+ return -EINVAL;
+ }
+
+ attrs = info->attrs[DEVLINK_ATTR_SELFTESTS];
+
+ err = nla_parse_nested(tb, DEVLINK_ATTR_SELFTEST_ID_MAX, attrs,
+ devlink_selftest_nl_policy, info->extack);
+ if (err < 0)
+ return err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = -EMSGSIZE;
+ hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
+ &devlink_nl_family, 0, DEVLINK_CMD_SELFTESTS_RUN);
+ if (!hdr)
+ goto free_msg;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto genlmsg_cancel;
+
+ selftests = nla_nest_start(msg, DEVLINK_ATTR_SELFTESTS);
+ if (!selftests)
+ goto genlmsg_cancel;
+
+ for (i = DEVLINK_ATTR_SELFTEST_ID_UNSPEC + 1;
+ i <= DEVLINK_ATTR_SELFTEST_ID_MAX; i++) {
+ enum devlink_selftest_status test_status;
+
+ if (nla_get_flag(tb[i])) {
+ if (!devlink->ops->selftest_check(devlink, i,
+ info->extack)) {
+ if (devlink_selftest_result_put(msg, i,
+ DEVLINK_SELFTEST_STATUS_SKIP))
+ goto selftests_nest_cancel;
+ continue;
+ }
+
+ test_status = devlink->ops->selftest_run(devlink, i,
+ info->extack);
+ if (devlink_selftest_result_put(msg, i, test_status))
+ goto selftests_nest_cancel;
+ }
+ }
+
+ nla_nest_end(msg, selftests);
+ genlmsg_end(msg, hdr);
+ return genlmsg_reply(msg, info);
+
+selftests_nest_cancel:
+ nla_nest_cancel(msg, selftests);
+genlmsg_cancel:
+ genlmsg_cancel(msg, hdr);
+free_msg:
+ nlmsg_free(msg);
+ return err;
+}
+
static const struct devlink_param devlink_param_generic[] = {
{
.id = DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
@@ -5153,15 +5357,8 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg,
int idx = 0;
int err = 0;
- mutex_lock(&devlink_mutex);
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
- if (!devlink_try_get(devlink))
- continue;
-
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
- goto retry;
-
- mutex_lock(&devlink->lock);
+ devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
+ devl_lock(devlink);
list_for_each_entry(param_item, &devlink->param_list, list) {
if (idx < start) {
idx++;
@@ -5175,19 +5372,16 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg,
if (err == -EOPNOTSUPP) {
err = 0;
} else if (err) {
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
devlink_put(devlink);
goto out;
}
idx++;
}
- mutex_unlock(&devlink->lock);
-retry:
+ devl_unlock(devlink);
devlink_put(devlink);
}
out:
- mutex_unlock(&devlink_mutex);
-
if (err != -EMSGSIZE)
return err;
@@ -5388,15 +5582,8 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
int idx = 0;
int err = 0;
- mutex_lock(&devlink_mutex);
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
- if (!devlink_try_get(devlink))
- continue;
-
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
- goto retry;
-
- mutex_lock(&devlink->lock);
+ devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
+ devl_lock(devlink);
list_for_each_entry(devlink_port, &devlink->port_list, list) {
list_for_each_entry(param_item,
&devlink_port->param_list, list) {
@@ -5414,20 +5601,17 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
if (err == -EOPNOTSUPP) {
err = 0;
} else if (err) {
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
devlink_put(devlink);
goto out;
}
idx++;
}
}
- mutex_unlock(&devlink->lock);
-retry:
+ devl_unlock(devlink);
devlink_put(devlink);
}
out:
- mutex_unlock(&devlink_mutex);
-
if (err != -EMSGSIZE)
return err;
@@ -5672,21 +5856,28 @@ static int __devlink_snapshot_id_increment(struct devlink *devlink, u32 id)
{
unsigned long count;
void *p;
+ int err;
- lockdep_assert_held(&devlink->lock);
-
+ xa_lock(&devlink->snapshot_ids);
p = xa_load(&devlink->snapshot_ids, id);
- if (WARN_ON(!p))
- return -EINVAL;
+ if (WARN_ON(!p)) {
+ err = -EINVAL;
+ goto unlock;
+ }
- if (WARN_ON(!xa_is_value(p)))
- return -EINVAL;
+ if (WARN_ON(!xa_is_value(p))) {
+ err = -EINVAL;
+ goto unlock;
+ }
count = xa_to_value(p);
count++;
- return xa_err(xa_store(&devlink->snapshot_ids, id, xa_mk_value(count),
- GFP_KERNEL));
+ err = xa_err(__xa_store(&devlink->snapshot_ids, id, xa_mk_value(count),
+ GFP_ATOMIC));
+unlock:
+ xa_unlock(&devlink->snapshot_ids);
+ return err;
}
/**
@@ -5709,25 +5900,26 @@ static void __devlink_snapshot_id_decrement(struct devlink *devlink, u32 id)
unsigned long count;
void *p;
- lockdep_assert_held(&devlink->lock);
-
+ xa_lock(&devlink->snapshot_ids);
p = xa_load(&devlink->snapshot_ids, id);
if (WARN_ON(!p))
- return;
+ goto unlock;
if (WARN_ON(!xa_is_value(p)))
- return;
+ goto unlock;
count = xa_to_value(p);
if (count > 1) {
count--;
- xa_store(&devlink->snapshot_ids, id, xa_mk_value(count),
- GFP_KERNEL);
+ __xa_store(&devlink->snapshot_ids, id, xa_mk_value(count),
+ GFP_ATOMIC);
} else {
/* If this was the last user, we can erase this id */
- xa_erase(&devlink->snapshot_ids, id);
+ __xa_erase(&devlink->snapshot_ids, id);
}
+unlock:
+ xa_unlock(&devlink->snapshot_ids);
}
/**
@@ -5748,13 +5940,17 @@ static void __devlink_snapshot_id_decrement(struct devlink *devlink, u32 id)
*/
static int __devlink_snapshot_id_insert(struct devlink *devlink, u32 id)
{
- lockdep_assert_held(&devlink->lock);
+ int err;
- if (xa_load(&devlink->snapshot_ids, id))
+ xa_lock(&devlink->snapshot_ids);
+ if (xa_load(&devlink->snapshot_ids, id)) {
+ xa_unlock(&devlink->snapshot_ids);
return -EEXIST;
-
- return xa_err(xa_store(&devlink->snapshot_ids, id, xa_mk_value(0),
- GFP_KERNEL));
+ }
+ err = xa_err(__xa_store(&devlink->snapshot_ids, id, xa_mk_value(0),
+ GFP_ATOMIC));
+ xa_unlock(&devlink->snapshot_ids);
+ return err;
}
/**
@@ -5775,8 +5971,6 @@ static int __devlink_snapshot_id_insert(struct devlink *devlink, u32 id)
*/
static int __devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id)
{
- lockdep_assert_held(&devlink->lock);
-
return xa_alloc(&devlink->snapshot_ids, id, xa_mk_value(1),
xa_limit_32b, GFP_KERNEL);
}
@@ -5789,7 +5983,7 @@ static int __devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id)
* Multiple snapshots can be created on a region.
* The @snapshot_id should be obtained using the getter function.
*
- * Must be called only while holding the devlink instance lock.
+ * Must be called only while holding the region snapshot lock.
*
* @region: devlink region of the snapshot
* @data: snapshot data
@@ -5803,7 +5997,7 @@ __devlink_region_snapshot_create(struct devlink_region *region,
struct devlink_snapshot *snapshot;
int err;
- lockdep_assert_held(&devlink->lock);
+ lockdep_assert_held(&region->snapshot_lock);
/* check if region can hold one more snapshot */
if (region->cur_snapshots == region->max_snapshots)
@@ -5841,7 +6035,7 @@ static void devlink_region_snapshot_del(struct devlink_region *region,
{
struct devlink *devlink = region->devlink;
- lockdep_assert_held(&devlink->lock);
+ lockdep_assert_held(&region->snapshot_lock);
devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_DEL);
region->cur_snapshots--;
@@ -5935,7 +6129,7 @@ static int devlink_nl_cmd_region_get_devlink_dumpit(struct sk_buff *msg,
struct devlink_port *port;
int err = 0;
- mutex_lock(&devlink->lock);
+ devl_lock(devlink);
list_for_each_entry(region, &devlink->region_list, list) {
if (*idx < start) {
(*idx)++;
@@ -5959,7 +6153,7 @@ static int devlink_nl_cmd_region_get_devlink_dumpit(struct sk_buff *msg,
}
out:
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
return err;
}
@@ -5972,23 +6166,14 @@ static int devlink_nl_cmd_region_get_dumpit(struct sk_buff *msg,
int idx = 0;
int err = 0;
- mutex_lock(&devlink_mutex);
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
- if (!devlink_try_get(devlink))
- continue;
-
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
- goto retry;
-
+ devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
err = devlink_nl_cmd_region_get_devlink_dumpit(msg, cb, devlink,
&idx, start);
-retry:
devlink_put(devlink);
if (err)
goto out;
}
out:
- mutex_unlock(&devlink_mutex);
cb->args[0] = idx;
return msg->len;
}
@@ -6027,11 +6212,15 @@ static int devlink_nl_cmd_region_del(struct sk_buff *skb,
if (!region)
return -EINVAL;
+ mutex_lock(&region->snapshot_lock);
snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id);
- if (!snapshot)
+ if (!snapshot) {
+ mutex_unlock(&region->snapshot_lock);
return -EINVAL;
+ }
devlink_region_snapshot_del(region, snapshot);
+ mutex_unlock(&region->snapshot_lock);
return 0;
}
@@ -6079,9 +6268,12 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info)
return -EOPNOTSUPP;
}
+ mutex_lock(&region->snapshot_lock);
+
if (region->cur_snapshots == region->max_snapshots) {
NL_SET_ERR_MSG_MOD(info->extack, "The region has reached the maximum number of stored snapshots");
- return -ENOSPC;
+ err = -ENOSPC;
+ goto unlock;
}
snapshot_id_attr = info->attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID];
@@ -6090,17 +6282,18 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info)
if (devlink_region_snapshot_get_by_id(region, snapshot_id)) {
NL_SET_ERR_MSG_MOD(info->extack, "The requested snapshot id is already in use");
- return -EEXIST;
+ err = -EEXIST;
+ goto unlock;
}
err = __devlink_snapshot_id_insert(devlink, snapshot_id);
if (err)
- return err;
+ goto unlock;
} else {
err = __devlink_region_snapshot_id_get(devlink, &snapshot_id);
if (err) {
NL_SET_ERR_MSG_MOD(info->extack, "Failed to allocate a new snapshot id");
- return err;
+ goto unlock;
}
}
@@ -6122,8 +6315,10 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info)
snapshot = devlink_region_snapshot_get_by_id(region,
snapshot_id);
- if (WARN_ON(!snapshot))
- return -EINVAL;
+ if (WARN_ON(!snapshot)) {
+ err = -EINVAL;
+ goto unlock;
+ }
msg = devlink_nl_region_notify_build(region, snapshot,
DEVLINK_CMD_REGION_NEW,
@@ -6138,16 +6333,20 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info)
goto err_notify;
}
+ mutex_unlock(&region->snapshot_lock);
return 0;
err_snapshot_create:
region->ops->destructor(data);
err_snapshot_capture:
__devlink_snapshot_id_decrement(devlink, snapshot_id);
+ mutex_unlock(&region->snapshot_lock);
return err;
err_notify:
devlink_region_snapshot_del(region, snapshot);
+unlock:
+ mutex_unlock(&region->snapshot_lock);
return err;
}
@@ -6242,14 +6441,11 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
start_offset = *((u64 *)&cb->args[0]);
- mutex_lock(&devlink_mutex);
devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs);
- if (IS_ERR(devlink)) {
- err = PTR_ERR(devlink);
- goto out_dev;
- }
+ if (IS_ERR(devlink))
+ return PTR_ERR(devlink);
- mutex_lock(&devlink->lock);
+ devl_lock(devlink);
if (!attrs[DEVLINK_ATTR_REGION_NAME] ||
!attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]) {
@@ -6345,19 +6541,15 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
nla_nest_end(skb, chunks_attr);
genlmsg_end(skb, hdr);
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
devlink_put(devlink);
- mutex_unlock(&devlink_mutex);
-
return skb->len;
nla_put_failure:
genlmsg_cancel(skb, hdr);
out_unlock:
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
devlink_put(devlink);
-out_dev:
- mutex_unlock(&devlink_mutex);
return err;
}
@@ -6506,23 +6698,16 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg,
int idx = 0;
int err = 0;
- mutex_lock(&devlink_mutex);
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
- if (!devlink_try_get(devlink))
- continue;
-
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
- goto retry;
-
+ devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
if (idx < start || !devlink->ops->info_get)
goto inc;
- mutex_lock(&devlink->lock);
+ devl_lock(devlink);
err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
cb->extack);
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
if (err == -EOPNOTSUPP)
err = 0;
else if (err) {
@@ -6531,10 +6716,8 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg,
}
inc:
idx++;
-retry:
devlink_put(devlink);
}
- mutex_unlock(&devlink_mutex);
if (err != -EMSGSIZE)
return err;
@@ -7523,6 +7706,7 @@ int devlink_health_report(struct devlink_health_reporter *reporter,
enum devlink_health_reporter_state prev_health_state;
struct devlink *devlink = reporter->devlink;
unsigned long recover_ts_threshold;
+ int ret;
/* write a log message of the current error */
WARN_ON(!msg);
@@ -7556,11 +7740,14 @@ int devlink_health_report(struct devlink_health_reporter *reporter,
mutex_unlock(&reporter->dump_lock);
}
- if (reporter->auto_recover)
- return devlink_health_reporter_recover(reporter,
- priv_ctx, NULL);
+ if (!reporter->auto_recover)
+ return 0;
- return 0;
+ devl_lock(devlink);
+ ret = devlink_health_reporter_recover(reporter, priv_ctx, NULL);
+ devl_unlock(devlink);
+
+ return ret;
}
EXPORT_SYMBOL_GPL(devlink_health_report);
@@ -7609,18 +7796,13 @@ devlink_health_reporter_get_from_cb(struct netlink_callback *cb)
struct nlattr **attrs = info->attrs;
struct devlink *devlink;
- mutex_lock(&devlink_mutex);
devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs);
if (IS_ERR(devlink))
- goto unlock;
+ return NULL;
reporter = devlink_health_reporter_get_from_attrs(devlink, attrs);
devlink_put(devlink);
- mutex_unlock(&devlink_mutex);
return reporter;
-unlock:
- mutex_unlock(&devlink_mutex);
- return NULL;
}
void
@@ -7686,14 +7868,7 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
int idx = 0;
int err;
- mutex_lock(&devlink_mutex);
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
- if (!devlink_try_get(devlink))
- continue;
-
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
- goto retry_rep;
-
+ devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
mutex_lock(&devlink->reporters_lock);
list_for_each_entry(reporter, &devlink->reporter_list,
list) {
@@ -7713,18 +7888,11 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
idx++;
}
mutex_unlock(&devlink->reporters_lock);
-retry_rep:
devlink_put(devlink);
}
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
- if (!devlink_try_get(devlink))
- continue;
-
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
- goto retry_port;
-
- mutex_lock(&devlink->lock);
+ devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
+ devl_lock(devlink);
list_for_each_entry(port, &devlink->port_list, list) {
mutex_lock(&port->reporters_lock);
list_for_each_entry(reporter, &port->reporter_list, list) {
@@ -7739,7 +7907,7 @@ retry_rep:
cb->nlh->nlmsg_seq, NLM_F_MULTI);
if (err) {
mutex_unlock(&port->reporters_lock);
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
devlink_put(devlink);
goto out;
}
@@ -7747,13 +7915,10 @@ retry_rep:
}
mutex_unlock(&port->reporters_lock);
}
- mutex_unlock(&devlink->lock);
-retry_port:
+ devl_unlock(devlink);
devlink_put(devlink);
}
out:
- mutex_unlock(&devlink_mutex);
-
cb->args[0] = idx;
return msg->len;
}
@@ -7946,8 +8111,8 @@ static int devlink_nl_cmd_health_reporter_test_doit(struct sk_buff *skb,
}
struct devlink_stats {
- u64 rx_bytes;
- u64 rx_packets;
+ u64_stats_t rx_bytes;
+ u64_stats_t rx_packets;
struct u64_stats_sync syncp;
};
@@ -8104,12 +8269,12 @@ static void devlink_trap_stats_read(struct devlink_stats __percpu *trap_stats,
cpu_stats = per_cpu_ptr(trap_stats, i);
do {
start = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
- rx_packets = cpu_stats->rx_packets;
- rx_bytes = cpu_stats->rx_bytes;
+ rx_packets = u64_stats_read(&cpu_stats->rx_packets);
+ rx_bytes = u64_stats_read(&cpu_stats->rx_bytes);
} while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
- stats->rx_packets += rx_packets;
- stats->rx_bytes += rx_bytes;
+ u64_stats_add(&stats->rx_packets, rx_packets);
+ u64_stats_add(&stats->rx_bytes, rx_bytes);
}
}
@@ -8127,11 +8292,13 @@ devlink_trap_group_stats_put(struct sk_buff *msg,
return -EMSGSIZE;
if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS,
- stats.rx_packets, DEVLINK_ATTR_PAD))
+ u64_stats_read(&stats.rx_packets),
+ DEVLINK_ATTR_PAD))
goto nla_put_failure;
if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES,
- stats.rx_bytes, DEVLINK_ATTR_PAD))
+ u64_stats_read(&stats.rx_bytes),
+ DEVLINK_ATTR_PAD))
goto nla_put_failure;
nla_nest_end(msg, attr);
@@ -8171,11 +8338,13 @@ static int devlink_trap_stats_put(struct sk_buff *msg, struct devlink *devlink,
goto nla_put_failure;
if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS,
- stats.rx_packets, DEVLINK_ATTR_PAD))
+ u64_stats_read(&stats.rx_packets),
+ DEVLINK_ATTR_PAD))
goto nla_put_failure;
if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES,
- stats.rx_bytes, DEVLINK_ATTR_PAD))
+ u64_stats_read(&stats.rx_bytes),
+ DEVLINK_ATTR_PAD))
goto nla_put_failure;
nla_nest_end(msg, attr);
@@ -8282,15 +8451,8 @@ static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg,
int idx = 0;
int err;
- mutex_lock(&devlink_mutex);
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
- if (!devlink_try_get(devlink))
- continue;
-
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
- goto retry;
-
- mutex_lock(&devlink->lock);
+ devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
+ devl_lock(devlink);
list_for_each_entry(trap_item, &devlink->trap_list, list) {
if (idx < start) {
idx++;
@@ -8302,19 +8464,16 @@ static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg,
cb->nlh->nlmsg_seq,
NLM_F_MULTI);
if (err) {
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
devlink_put(devlink);
goto out;
}
idx++;
}
- mutex_unlock(&devlink->lock);
-retry:
+ devl_unlock(devlink);
devlink_put(devlink);
}
out:
- mutex_unlock(&devlink_mutex);
-
cb->args[0] = idx;
return msg->len;
}
@@ -8509,15 +8668,8 @@ static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg,
int idx = 0;
int err;
- mutex_lock(&devlink_mutex);
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
- if (!devlink_try_get(devlink))
- continue;
-
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
- goto retry;
-
- mutex_lock(&devlink->lock);
+ devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
+ devl_lock(devlink);
list_for_each_entry(group_item, &devlink->trap_group_list,
list) {
if (idx < start) {
@@ -8530,19 +8682,16 @@ static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg,
cb->nlh->nlmsg_seq,
NLM_F_MULTI);
if (err) {
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
devlink_put(devlink);
goto out;
}
idx++;
}
- mutex_unlock(&devlink->lock);
-retry:
+ devl_unlock(devlink);
devlink_put(devlink);
}
out:
- mutex_unlock(&devlink_mutex);
-
cb->args[0] = idx;
return msg->len;
}
@@ -8823,15 +8972,8 @@ static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg,
int idx = 0;
int err;
- mutex_lock(&devlink_mutex);
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
- if (!devlink_try_get(devlink))
- continue;
-
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
- goto retry;
-
- mutex_lock(&devlink->lock);
+ devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
+ devl_lock(devlink);
list_for_each_entry(policer_item, &devlink->trap_policer_list,
list) {
if (idx < start) {
@@ -8844,19 +8986,16 @@ static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg,
cb->nlh->nlmsg_seq,
NLM_F_MULTI);
if (err) {
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
devlink_put(devlink);
goto out;
}
idx++;
}
- mutex_unlock(&devlink->lock);
-retry:
+ devl_unlock(devlink);
devlink_put(devlink);
}
out:
- mutex_unlock(&devlink_mutex);
-
cb->args[0] = idx;
return msg->len;
}
@@ -8996,6 +9135,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_LINECARD_INDEX] = { .type = NLA_U32 },
[DEVLINK_ATTR_LINECARD_TYPE] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_SELFTESTS] = { .type = NLA_NESTED },
};
static const struct genl_small_ops devlink_nl_ops[] = {
@@ -9063,13 +9203,11 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.cmd = DEVLINK_CMD_PORT_NEW,
.doit = devlink_nl_cmd_port_new_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
},
{
.cmd = DEVLINK_CMD_PORT_DEL,
.doit = devlink_nl_cmd_port_del_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
},
{
.cmd = DEVLINK_CMD_LINECARD_GET,
@@ -9199,7 +9337,6 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_reload,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
},
{
.cmd = DEVLINK_CMD_PARAM_GET,
@@ -9267,8 +9404,7 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_health_reporter_get_doit,
.dumpit = devlink_nl_cmd_health_reporter_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT |
- DEVLINK_NL_FLAG_NO_LOCK,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
/* can be retrieved by unprivileged users */
},
{
@@ -9276,24 +9412,21 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_health_reporter_set_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT |
- DEVLINK_NL_FLAG_NO_LOCK,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
},
{
.cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_health_reporter_recover_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT |
- DEVLINK_NL_FLAG_NO_LOCK,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
},
{
.cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_health_reporter_diagnose_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT |
- DEVLINK_NL_FLAG_NO_LOCK,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
},
{
.cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
@@ -9307,16 +9440,14 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_health_reporter_dump_clear_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT |
- DEVLINK_NL_FLAG_NO_LOCK,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
},
{
.cmd = DEVLINK_CMD_HEALTH_REPORTER_TEST,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_health_reporter_test_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT |
- DEVLINK_NL_FLAG_NO_LOCK,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
},
{
.cmd = DEVLINK_CMD_FLASH_UPDATE,
@@ -9357,6 +9488,17 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.doit = devlink_nl_cmd_trap_policer_set_doit,
.flags = GENL_ADMIN_PERM,
},
+ {
+ .cmd = DEVLINK_CMD_SELFTESTS_GET,
+ .doit = devlink_nl_cmd_selftests_get_doit,
+ .dumpit = devlink_nl_cmd_selftests_get_dumpit
+ /* can be retrieved by unprivileged users */
+ },
+ {
+ .cmd = DEVLINK_CMD_SELFTESTS_RUN,
+ .doit = devlink_nl_cmd_selftests_run,
+ .flags = GENL_ADMIN_PERM,
+ },
};
static struct genl_family devlink_nl_family __ro_after_init = {
@@ -9365,6 +9507,7 @@ static struct genl_family devlink_nl_family __ro_after_init = {
.maxattr = DEVLINK_ATTR_MAX,
.policy = devlink_nl_policy,
.netnsok = true,
+ .parallel_ops = true,
.pre_doit = devlink_nl_pre_doit,
.post_doit = devlink_nl_post_doit,
.module = THIS_MODULE,
@@ -9473,7 +9616,9 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
INIT_LIST_HEAD(&devlink->trap_list);
INIT_LIST_HEAD(&devlink->trap_group_list);
INIT_LIST_HEAD(&devlink->trap_policer_list);
+ lockdep_register_key(&devlink->lock_key);
mutex_init(&devlink->lock);
+ lockdep_set_class(&devlink->lock, &devlink->lock_key);
mutex_init(&devlink->reporters_lock);
mutex_init(&devlink->linecards_lock);
refcount_set(&devlink->refcount, 1);
@@ -9581,10 +9726,8 @@ void devlink_register(struct devlink *devlink)
ASSERT_DEVLINK_NOT_REGISTERED(devlink);
/* Make sure that we are in .probe() routine */
- mutex_lock(&devlink_mutex);
xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
devlink_notify_register(devlink);
- mutex_unlock(&devlink_mutex);
}
EXPORT_SYMBOL_GPL(devlink_register);
@@ -9598,13 +9741,13 @@ void devlink_unregister(struct devlink *devlink)
ASSERT_DEVLINK_REGISTERED(devlink);
/* Make sure that we are in .remove() routine */
+ xa_set_mark(&devlinks, devlink->index, DEVLINK_UNREGISTERING);
devlink_put(devlink);
wait_for_completion(&devlink->comp);
- mutex_lock(&devlink_mutex);
devlink_notify_unregister(devlink);
xa_clear_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
- mutex_unlock(&devlink_mutex);
+ xa_clear_mark(&devlinks, devlink->index, DEVLINK_UNREGISTERING);
}
EXPORT_SYMBOL_GPL(devlink_unregister);
@@ -9620,6 +9763,7 @@ void devlink_free(struct devlink *devlink)
mutex_destroy(&devlink->linecards_lock);
mutex_destroy(&devlink->reporters_lock);
mutex_destroy(&devlink->lock);
+ lockdep_unregister_key(&devlink->lock_key);
WARN_ON(!list_empty(&devlink->trap_policer_list));
WARN_ON(!list_empty(&devlink->trap_group_list));
WARN_ON(!list_empty(&devlink->trap_list));
@@ -9673,11 +9817,24 @@ static void devlink_port_type_warn_cancel(struct devlink_port *devlink_port)
cancel_delayed_work_sync(&devlink_port->type_warn_dw);
}
+/**
+ * devl_port_register() - Register devlink port
+ *
+ * @devlink: devlink
+ * @devlink_port: devlink port
+ * @port_index: driver-specific numerical identifier of the port
+ *
+ * Register devlink port with provided port index. User can use
+ * any indexing, even hw-related one. devlink_port structure
+ * is convenient to be embedded inside user driver private structure.
+ * Note that the caller should take care of zeroing the devlink_port
+ * structure.
+ */
int devl_port_register(struct devlink *devlink,
struct devlink_port *devlink_port,
unsigned int port_index)
{
- lockdep_assert_held(&devlink->lock);
+ devl_assert_locked(devlink);
if (devlink_port_index_exists(devlink, port_index))
return -EEXIST;
@@ -9711,6 +9868,8 @@ EXPORT_SYMBOL_GPL(devl_port_register);
* is convenient to be embedded inside user driver private structure.
* Note that the caller should take care of zeroing the devlink_port
* structure.
+ *
+ * Context: Takes and release devlink->lock <mutex>.
*/
int devlink_port_register(struct devlink *devlink,
struct devlink_port *devlink_port,
@@ -9718,13 +9877,18 @@ int devlink_port_register(struct devlink *devlink,
{
int err;
- mutex_lock(&devlink->lock);
+ devl_lock(devlink);
err = devl_port_register(devlink, devlink_port, port_index);
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
return err;
}
EXPORT_SYMBOL_GPL(devlink_port_register);
+/**
+ * devl_port_unregister() - Unregister devlink port
+ *
+ * @devlink_port: devlink port
+ */
void devl_port_unregister(struct devlink_port *devlink_port)
{
lockdep_assert_held(&devlink_port->devlink->lock);
@@ -9742,14 +9906,16 @@ EXPORT_SYMBOL_GPL(devl_port_unregister);
* devlink_port_unregister - Unregister devlink port
*
* @devlink_port: devlink port
+ *
+ * Context: Takes and release devlink->lock <mutex>.
*/
void devlink_port_unregister(struct devlink_port *devlink_port)
{
struct devlink *devlink = devlink_port->devlink;
- mutex_lock(&devlink->lock);
+ devl_lock(devlink);
devl_port_unregister(devlink_port);
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
}
EXPORT_SYMBOL_GPL(devlink_port_unregister);
@@ -10002,20 +10168,13 @@ int devl_rate_leaf_create(struct devlink_port *devlink_port, void *priv)
}
EXPORT_SYMBOL_GPL(devl_rate_leaf_create);
-int
-devlink_rate_leaf_create(struct devlink_port *devlink_port, void *priv)
-{
- struct devlink *devlink = devlink_port->devlink;
- int ret;
-
- mutex_lock(&devlink->lock);
- ret = devl_rate_leaf_create(devlink_port, priv);
- mutex_unlock(&devlink->lock);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(devlink_rate_leaf_create);
-
+/**
+ * devl_rate_leaf_destroy - destroy devlink rate leaf
+ *
+ * @devlink_port: devlink port linked to the rate object
+ *
+ * Destroy the devlink rate object of type leaf on provided @devlink_port.
+ */
void devl_rate_leaf_destroy(struct devlink_port *devlink_port)
{
struct devlink_rate *devlink_rate = devlink_port->devlink_rate;
@@ -10034,27 +10193,6 @@ void devl_rate_leaf_destroy(struct devlink_port *devlink_port)
EXPORT_SYMBOL_GPL(devl_rate_leaf_destroy);
/**
- * devlink_rate_leaf_destroy - destroy devlink rate leaf
- *
- * @devlink_port: devlink port linked to the rate object
- *
- * Context: Takes and release devlink->lock <mutex>.
- */
-void devlink_rate_leaf_destroy(struct devlink_port *devlink_port)
-{
- struct devlink_rate *devlink_rate = devlink_port->devlink_rate;
- struct devlink *devlink = devlink_port->devlink;
-
- if (!devlink_rate)
- return;
-
- mutex_lock(&devlink->lock);
- devl_rate_leaf_destroy(devlink_port);
- mutex_unlock(&devlink->lock);
-}
-EXPORT_SYMBOL_GPL(devlink_rate_leaf_destroy);
-
-/**
* devl_rate_nodes_destroy - destroy all devlink rate nodes on device
* @devlink: devlink instance
*
@@ -10092,24 +10230,6 @@ void devl_rate_nodes_destroy(struct devlink *devlink)
EXPORT_SYMBOL_GPL(devl_rate_nodes_destroy);
/**
- * devlink_rate_nodes_destroy - destroy all devlink rate nodes on device
- *
- * @devlink: devlink instance
- *
- * Unset parent for all rate objects and destroy all rate nodes
- * on specified device.
- *
- * Context: Takes and release devlink->lock <mutex>.
- */
-void devlink_rate_nodes_destroy(struct devlink *devlink)
-{
- mutex_lock(&devlink->lock);
- devl_rate_nodes_destroy(devlink);
- mutex_unlock(&devlink->lock);
-}
-EXPORT_SYMBOL_GPL(devlink_rate_nodes_destroy);
-
-/**
* devlink_port_linecard_set - Link port with a linecard
*
* @devlink_port: devlink port
@@ -10331,6 +10451,7 @@ EXPORT_SYMBOL_GPL(devlink_linecard_provision_set);
void devlink_linecard_provision_clear(struct devlink_linecard *linecard)
{
mutex_lock(&linecard->state_lock);
+ WARN_ON(linecard->nested_devlink);
linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
linecard->type = NULL;
devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
@@ -10349,6 +10470,7 @@ EXPORT_SYMBOL_GPL(devlink_linecard_provision_clear);
void devlink_linecard_provision_fail(struct devlink_linecard *linecard)
{
mutex_lock(&linecard->state_lock);
+ WARN_ON(linecard->nested_devlink);
linecard->state = DEVLINK_LINECARD_STATE_PROVISIONING_FAILED;
devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
mutex_unlock(&linecard->state_lock);
@@ -10396,25 +10518,38 @@ void devlink_linecard_deactivate(struct devlink_linecard *linecard)
}
EXPORT_SYMBOL_GPL(devlink_linecard_deactivate);
-int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
- u32 size, u16 ingress_pools_count,
- u16 egress_pools_count, u16 ingress_tc_count,
- u16 egress_tc_count)
+/**
+ * devlink_linecard_nested_dl_set - Attach/detach nested devlink
+ * instance to linecard.
+ *
+ * @linecard: devlink linecard
+ * @nested_devlink: devlink instance to attach or NULL to detach
+ */
+void devlink_linecard_nested_dl_set(struct devlink_linecard *linecard,
+ struct devlink *nested_devlink)
+{
+ mutex_lock(&linecard->state_lock);
+ linecard->nested_devlink = nested_devlink;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_nested_dl_set);
+
+int devl_sb_register(struct devlink *devlink, unsigned int sb_index,
+ u32 size, u16 ingress_pools_count,
+ u16 egress_pools_count, u16 ingress_tc_count,
+ u16 egress_tc_count)
{
struct devlink_sb *devlink_sb;
- int err = 0;
- mutex_lock(&devlink->lock);
- if (devlink_sb_index_exists(devlink, sb_index)) {
- err = -EEXIST;
- goto unlock;
- }
+ lockdep_assert_held(&devlink->lock);
+
+ if (devlink_sb_index_exists(devlink, sb_index))
+ return -EEXIST;
devlink_sb = kzalloc(sizeof(*devlink_sb), GFP_KERNEL);
- if (!devlink_sb) {
- err = -ENOMEM;
- goto unlock;
- }
+ if (!devlink_sb)
+ return -ENOMEM;
devlink_sb->index = sb_index;
devlink_sb->size = size;
devlink_sb->ingress_pools_count = ingress_pools_count;
@@ -10422,57 +10557,78 @@ int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
devlink_sb->ingress_tc_count = ingress_tc_count;
devlink_sb->egress_tc_count = egress_tc_count;
list_add_tail(&devlink_sb->list, &devlink->sb_list);
-unlock:
- mutex_unlock(&devlink->lock);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devl_sb_register);
+
+int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
+ u32 size, u16 ingress_pools_count,
+ u16 egress_pools_count, u16 ingress_tc_count,
+ u16 egress_tc_count)
+{
+ int err;
+
+ devl_lock(devlink);
+ err = devl_sb_register(devlink, sb_index, size, ingress_pools_count,
+ egress_pools_count, ingress_tc_count,
+ egress_tc_count);
+ devl_unlock(devlink);
return err;
}
EXPORT_SYMBOL_GPL(devlink_sb_register);
-void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index)
+void devl_sb_unregister(struct devlink *devlink, unsigned int sb_index)
{
struct devlink_sb *devlink_sb;
- mutex_lock(&devlink->lock);
+ lockdep_assert_held(&devlink->lock);
+
devlink_sb = devlink_sb_get_by_index(devlink, sb_index);
WARN_ON(!devlink_sb);
list_del(&devlink_sb->list);
- mutex_unlock(&devlink->lock);
kfree(devlink_sb);
}
+EXPORT_SYMBOL_GPL(devl_sb_unregister);
+
+void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index)
+{
+ devl_lock(devlink);
+ devl_sb_unregister(devlink, sb_index);
+ devl_unlock(devlink);
+}
EXPORT_SYMBOL_GPL(devlink_sb_unregister);
/**
- * devlink_dpipe_headers_register - register dpipe headers
+ * devl_dpipe_headers_register - register dpipe headers
*
- * @devlink: devlink
- * @dpipe_headers: dpipe header array
+ * @devlink: devlink
+ * @dpipe_headers: dpipe header array
*
- * Register the headers supported by hardware.
+ * Register the headers supported by hardware.
*/
-int devlink_dpipe_headers_register(struct devlink *devlink,
- struct devlink_dpipe_headers *dpipe_headers)
+void devl_dpipe_headers_register(struct devlink *devlink,
+ struct devlink_dpipe_headers *dpipe_headers)
{
- mutex_lock(&devlink->lock);
+ lockdep_assert_held(&devlink->lock);
+
devlink->dpipe_headers = dpipe_headers;
- mutex_unlock(&devlink->lock);
- return 0;
}
-EXPORT_SYMBOL_GPL(devlink_dpipe_headers_register);
+EXPORT_SYMBOL_GPL(devl_dpipe_headers_register);
/**
- * devlink_dpipe_headers_unregister - unregister dpipe headers
+ * devl_dpipe_headers_unregister - unregister dpipe headers
*
- * @devlink: devlink
+ * @devlink: devlink
*
- * Unregister the headers supported by hardware.
+ * Unregister the headers supported by hardware.
*/
-void devlink_dpipe_headers_unregister(struct devlink *devlink)
+void devl_dpipe_headers_unregister(struct devlink *devlink)
{
- mutex_lock(&devlink->lock);
+ lockdep_assert_held(&devlink->lock);
+
devlink->dpipe_headers = NULL;
- mutex_unlock(&devlink->lock);
}
-EXPORT_SYMBOL_GPL(devlink_dpipe_headers_unregister);
+EXPORT_SYMBOL_GPL(devl_dpipe_headers_unregister);
/**
* devlink_dpipe_table_counter_enabled - check if counter allocation
@@ -10506,38 +10662,33 @@ bool devlink_dpipe_table_counter_enabled(struct devlink *devlink,
EXPORT_SYMBOL_GPL(devlink_dpipe_table_counter_enabled);
/**
- * devlink_dpipe_table_register - register dpipe table
+ * devl_dpipe_table_register - register dpipe table
*
- * @devlink: devlink
- * @table_name: table name
- * @table_ops: table ops
- * @priv: priv
- * @counter_control_extern: external control for counters
+ * @devlink: devlink
+ * @table_name: table name
+ * @table_ops: table ops
+ * @priv: priv
+ * @counter_control_extern: external control for counters
*/
-int devlink_dpipe_table_register(struct devlink *devlink,
- const char *table_name,
- struct devlink_dpipe_table_ops *table_ops,
- void *priv, bool counter_control_extern)
+int devl_dpipe_table_register(struct devlink *devlink,
+ const char *table_name,
+ struct devlink_dpipe_table_ops *table_ops,
+ void *priv, bool counter_control_extern)
{
struct devlink_dpipe_table *table;
- int err = 0;
+
+ lockdep_assert_held(&devlink->lock);
if (WARN_ON(!table_ops->size_get))
return -EINVAL;
- mutex_lock(&devlink->lock);
-
if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name,
- devlink)) {
- err = -EEXIST;
- goto unlock;
- }
+ devlink))
+ return -EEXIST;
table = kzalloc(sizeof(*table), GFP_KERNEL);
- if (!table) {
- err = -ENOMEM;
- goto unlock;
- }
+ if (!table)
+ return -ENOMEM;
table->name = table_name;
table->table_ops = table_ops;
@@ -10545,77 +10696,69 @@ int devlink_dpipe_table_register(struct devlink *devlink,
table->counter_control_extern = counter_control_extern;
list_add_tail_rcu(&table->list, &devlink->dpipe_table_list);
-unlock:
- mutex_unlock(&devlink->lock);
- return err;
+
+ return 0;
}
-EXPORT_SYMBOL_GPL(devlink_dpipe_table_register);
+EXPORT_SYMBOL_GPL(devl_dpipe_table_register);
/**
- * devlink_dpipe_table_unregister - unregister dpipe table
+ * devl_dpipe_table_unregister - unregister dpipe table
*
- * @devlink: devlink
- * @table_name: table name
+ * @devlink: devlink
+ * @table_name: table name
*/
-void devlink_dpipe_table_unregister(struct devlink *devlink,
- const char *table_name)
+void devl_dpipe_table_unregister(struct devlink *devlink,
+ const char *table_name)
{
struct devlink_dpipe_table *table;
- mutex_lock(&devlink->lock);
+ lockdep_assert_held(&devlink->lock);
+
table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
table_name, devlink);
if (!table)
- goto unlock;
+ return;
list_del_rcu(&table->list);
- mutex_unlock(&devlink->lock);
kfree_rcu(table, rcu);
- return;
-unlock:
- mutex_unlock(&devlink->lock);
}
-EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister);
+EXPORT_SYMBOL_GPL(devl_dpipe_table_unregister);
/**
- * devlink_resource_register - devlink resource register
+ * devl_resource_register - devlink resource register
*
- * @devlink: devlink
- * @resource_name: resource's name
- * @resource_size: resource's size
- * @resource_id: resource's id
- * @parent_resource_id: resource's parent id
- * @size_params: size parameters
+ * @devlink: devlink
+ * @resource_name: resource's name
+ * @resource_size: resource's size
+ * @resource_id: resource's id
+ * @parent_resource_id: resource's parent id
+ * @size_params: size parameters
*
- * Generic resources should reuse the same names across drivers.
- * Please see the generic resources list at:
- * Documentation/networking/devlink/devlink-resource.rst
+ * Generic resources should reuse the same names across drivers.
+ * Please see the generic resources list at:
+ * Documentation/networking/devlink/devlink-resource.rst
*/
-int devlink_resource_register(struct devlink *devlink,
- const char *resource_name,
- u64 resource_size,
- u64 resource_id,
- u64 parent_resource_id,
- const struct devlink_resource_size_params *size_params)
+int devl_resource_register(struct devlink *devlink,
+ const char *resource_name,
+ u64 resource_size,
+ u64 resource_id,
+ u64 parent_resource_id,
+ const struct devlink_resource_size_params *size_params)
{
struct devlink_resource *resource;
struct list_head *resource_list;
bool top_hierarchy;
- int err = 0;
+
+ lockdep_assert_held(&devlink->lock);
top_hierarchy = parent_resource_id == DEVLINK_RESOURCE_ID_PARENT_TOP;
- mutex_lock(&devlink->lock);
resource = devlink_resource_find(devlink, NULL, resource_id);
- if (resource) {
- err = -EINVAL;
- goto out;
- }
+ if (resource)
+ return -EINVAL;
resource = kzalloc(sizeof(*resource), GFP_KERNEL);
- if (!resource) {
- err = -ENOMEM;
- goto out;
- }
+ if (!resource)
+ return -ENOMEM;
if (top_hierarchy) {
resource_list = &devlink->resource_list;
@@ -10629,8 +10772,7 @@ int devlink_resource_register(struct devlink *devlink,
resource->parent = parent_resource;
} else {
kfree(resource);
- err = -EINVAL;
- goto out;
+ return -EINVAL;
}
}
@@ -10643,8 +10785,40 @@ int devlink_resource_register(struct devlink *devlink,
sizeof(resource->size_params));
INIT_LIST_HEAD(&resource->resource_list);
list_add_tail(&resource->list, resource_list);
-out:
- mutex_unlock(&devlink->lock);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devl_resource_register);
+
+/**
+ * devlink_resource_register - devlink resource register
+ *
+ * @devlink: devlink
+ * @resource_name: resource's name
+ * @resource_size: resource's size
+ * @resource_id: resource's id
+ * @parent_resource_id: resource's parent id
+ * @size_params: size parameters
+ *
+ * Generic resources should reuse the same names across drivers.
+ * Please see the generic resources list at:
+ * Documentation/networking/devlink/devlink-resource.rst
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+int devlink_resource_register(struct devlink *devlink,
+ const char *resource_name,
+ u64 resource_size,
+ u64 resource_id,
+ u64 parent_resource_id,
+ const struct devlink_resource_size_params *size_params)
+{
+ int err;
+
+ devl_lock(devlink);
+ err = devl_resource_register(devlink, resource_name, resource_size,
+ resource_id, parent_resource_id, size_params);
+ devl_unlock(devlink);
return err;
}
EXPORT_SYMBOL_GPL(devlink_resource_register);
@@ -10663,15 +10837,15 @@ static void devlink_resource_unregister(struct devlink *devlink,
}
/**
- * devlink_resources_unregister - free all resources
+ * devl_resources_unregister - free all resources
*
- * @devlink: devlink
+ * @devlink: devlink
*/
-void devlink_resources_unregister(struct devlink *devlink)
+void devl_resources_unregister(struct devlink *devlink)
{
struct devlink_resource *tmp, *child_resource;
- mutex_lock(&devlink->lock);
+ lockdep_assert_held(&devlink->lock);
list_for_each_entry_safe(child_resource, tmp, &devlink->resource_list,
list) {
@@ -10679,69 +10853,100 @@ void devlink_resources_unregister(struct devlink *devlink)
list_del(&child_resource->list);
kfree(child_resource);
}
+}
+EXPORT_SYMBOL_GPL(devl_resources_unregister);
- mutex_unlock(&devlink->lock);
+/**
+ * devlink_resources_unregister - free all resources
+ *
+ * @devlink: devlink
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_resources_unregister(struct devlink *devlink)
+{
+ devl_lock(devlink);
+ devl_resources_unregister(devlink);
+ devl_unlock(devlink);
}
EXPORT_SYMBOL_GPL(devlink_resources_unregister);
/**
- * devlink_resource_size_get - get and update size
+ * devl_resource_size_get - get and update size
*
- * @devlink: devlink
- * @resource_id: the requested resource id
- * @p_resource_size: ptr to update
+ * @devlink: devlink
+ * @resource_id: the requested resource id
+ * @p_resource_size: ptr to update
*/
-int devlink_resource_size_get(struct devlink *devlink,
- u64 resource_id,
- u64 *p_resource_size)
+int devl_resource_size_get(struct devlink *devlink,
+ u64 resource_id,
+ u64 *p_resource_size)
{
struct devlink_resource *resource;
- int err = 0;
- mutex_lock(&devlink->lock);
+ lockdep_assert_held(&devlink->lock);
+
resource = devlink_resource_find(devlink, NULL, resource_id);
- if (!resource) {
- err = -EINVAL;
- goto out;
- }
+ if (!resource)
+ return -EINVAL;
*p_resource_size = resource->size_new;
resource->size = resource->size_new;
-out:
- mutex_unlock(&devlink->lock);
- return err;
+ return 0;
}
-EXPORT_SYMBOL_GPL(devlink_resource_size_get);
+EXPORT_SYMBOL_GPL(devl_resource_size_get);
/**
- * devlink_dpipe_table_resource_set - set the resource id
+ * devl_dpipe_table_resource_set - set the resource id
*
- * @devlink: devlink
- * @table_name: table name
- * @resource_id: resource id
- * @resource_units: number of resource's units consumed per table's entry
+ * @devlink: devlink
+ * @table_name: table name
+ * @resource_id: resource id
+ * @resource_units: number of resource's units consumed per table's entry
*/
-int devlink_dpipe_table_resource_set(struct devlink *devlink,
- const char *table_name, u64 resource_id,
- u64 resource_units)
+int devl_dpipe_table_resource_set(struct devlink *devlink,
+ const char *table_name, u64 resource_id,
+ u64 resource_units)
{
struct devlink_dpipe_table *table;
- int err = 0;
- mutex_lock(&devlink->lock);
table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
table_name, devlink);
- if (!table) {
- err = -EINVAL;
- goto out;
- }
+ if (!table)
+ return -EINVAL;
+
table->resource_id = resource_id;
table->resource_units = resource_units;
table->resource_valid = true;
-out:
- mutex_unlock(&devlink->lock);
- return err;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devl_dpipe_table_resource_set);
+
+/**
+ * devl_resource_occ_get_register - register occupancy getter
+ *
+ * @devlink: devlink
+ * @resource_id: resource id
+ * @occ_get: occupancy getter callback
+ * @occ_get_priv: occupancy getter callback priv
+ */
+void devl_resource_occ_get_register(struct devlink *devlink,
+ u64 resource_id,
+ devlink_resource_occ_get_t *occ_get,
+ void *occ_get_priv)
+{
+ struct devlink_resource *resource;
+
+ lockdep_assert_held(&devlink->lock);
+
+ resource = devlink_resource_find(devlink, NULL, resource_id);
+ if (WARN_ON(!resource))
+ return;
+ WARN_ON(resource->occ_get);
+
+ resource->occ_get = occ_get;
+ resource->occ_get_priv = occ_get_priv;
}
-EXPORT_SYMBOL_GPL(devlink_dpipe_table_resource_set);
+EXPORT_SYMBOL_GPL(devl_resource_occ_get_register);
/**
* devlink_resource_occ_get_register - register occupancy getter
@@ -10750,48 +10955,58 @@ EXPORT_SYMBOL_GPL(devlink_dpipe_table_resource_set);
* @resource_id: resource id
* @occ_get: occupancy getter callback
* @occ_get_priv: occupancy getter callback priv
+ *
+ * Context: Takes and release devlink->lock <mutex>.
*/
void devlink_resource_occ_get_register(struct devlink *devlink,
u64 resource_id,
devlink_resource_occ_get_t *occ_get,
void *occ_get_priv)
{
+ devl_lock(devlink);
+ devl_resource_occ_get_register(devlink, resource_id,
+ occ_get, occ_get_priv);
+ devl_unlock(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_resource_occ_get_register);
+
+/**
+ * devl_resource_occ_get_unregister - unregister occupancy getter
+ *
+ * @devlink: devlink
+ * @resource_id: resource id
+ */
+void devl_resource_occ_get_unregister(struct devlink *devlink,
+ u64 resource_id)
+{
struct devlink_resource *resource;
- mutex_lock(&devlink->lock);
+ lockdep_assert_held(&devlink->lock);
+
resource = devlink_resource_find(devlink, NULL, resource_id);
if (WARN_ON(!resource))
- goto out;
- WARN_ON(resource->occ_get);
+ return;
+ WARN_ON(!resource->occ_get);
- resource->occ_get = occ_get;
- resource->occ_get_priv = occ_get_priv;
-out:
- mutex_unlock(&devlink->lock);
+ resource->occ_get = NULL;
+ resource->occ_get_priv = NULL;
}
-EXPORT_SYMBOL_GPL(devlink_resource_occ_get_register);
+EXPORT_SYMBOL_GPL(devl_resource_occ_get_unregister);
/**
* devlink_resource_occ_get_unregister - unregister occupancy getter
*
* @devlink: devlink
* @resource_id: resource id
+ *
+ * Context: Takes and release devlink->lock <mutex>.
*/
void devlink_resource_occ_get_unregister(struct devlink *devlink,
u64 resource_id)
{
- struct devlink_resource *resource;
-
- mutex_lock(&devlink->lock);
- resource = devlink_resource_find(devlink, NULL, resource_id);
- if (WARN_ON(!resource))
- goto out;
- WARN_ON(!resource->occ_get);
-
- resource->occ_get = NULL;
- resource->occ_get_priv = NULL;
-out:
- mutex_unlock(&devlink->lock);
+ devl_lock(devlink);
+ devl_resource_occ_get_unregister(devlink, resource_id);
+ devl_unlock(devlink);
}
EXPORT_SYMBOL_GPL(devlink_resource_occ_get_unregister);
@@ -11012,51 +11227,67 @@ void devlink_param_value_changed(struct devlink *devlink, u32 param_id)
EXPORT_SYMBOL_GPL(devlink_param_value_changed);
/**
- * devlink_region_create - create a new address region
+ * devl_region_create - create a new address region
*
- * @devlink: devlink
- * @ops: region operations and name
- * @region_max_snapshots: Maximum supported number of snapshots for region
- * @region_size: size of region
+ * @devlink: devlink
+ * @ops: region operations and name
+ * @region_max_snapshots: Maximum supported number of snapshots for region
+ * @region_size: size of region
*/
-struct devlink_region *
-devlink_region_create(struct devlink *devlink,
- const struct devlink_region_ops *ops,
- u32 region_max_snapshots, u64 region_size)
+struct devlink_region *devl_region_create(struct devlink *devlink,
+ const struct devlink_region_ops *ops,
+ u32 region_max_snapshots,
+ u64 region_size)
{
struct devlink_region *region;
- int err = 0;
+
+ devl_assert_locked(devlink);
if (WARN_ON(!ops) || WARN_ON(!ops->destructor))
return ERR_PTR(-EINVAL);
- mutex_lock(&devlink->lock);
-
- if (devlink_region_get_by_name(devlink, ops->name)) {
- err = -EEXIST;
- goto unlock;
- }
+ if (devlink_region_get_by_name(devlink, ops->name))
+ return ERR_PTR(-EEXIST);
region = kzalloc(sizeof(*region), GFP_KERNEL);
- if (!region) {
- err = -ENOMEM;
- goto unlock;
- }
+ if (!region)
+ return ERR_PTR(-ENOMEM);
region->devlink = devlink;
region->max_snapshots = region_max_snapshots;
region->ops = ops;
region->size = region_size;
INIT_LIST_HEAD(&region->snapshot_list);
+ mutex_init(&region->snapshot_lock);
list_add_tail(&region->list, &devlink->region_list);
devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW);
- mutex_unlock(&devlink->lock);
return region;
+}
+EXPORT_SYMBOL_GPL(devl_region_create);
-unlock:
- mutex_unlock(&devlink->lock);
- return ERR_PTR(err);
+/**
+ * devlink_region_create - create a new address region
+ *
+ * @devlink: devlink
+ * @ops: region operations and name
+ * @region_max_snapshots: Maximum supported number of snapshots for region
+ * @region_size: size of region
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+struct devlink_region *
+devlink_region_create(struct devlink *devlink,
+ const struct devlink_region_ops *ops,
+ u32 region_max_snapshots, u64 region_size)
+{
+ struct devlink_region *region;
+
+ devl_lock(devlink);
+ region = devl_region_create(devlink, ops, region_max_snapshots,
+ region_size);
+ devl_unlock(devlink);
+ return region;
}
EXPORT_SYMBOL_GPL(devlink_region_create);
@@ -11067,6 +11298,8 @@ EXPORT_SYMBOL_GPL(devlink_region_create);
* @ops: region operations and name
* @region_max_snapshots: Maximum supported number of snapshots for region
* @region_size: size of region
+ *
+ * Context: Takes and release devlink->lock <mutex>.
*/
struct devlink_region *
devlink_port_region_create(struct devlink_port *port,
@@ -11080,7 +11313,7 @@ devlink_port_region_create(struct devlink_port *port,
if (WARN_ON(!ops) || WARN_ON(!ops->destructor))
return ERR_PTR(-EINVAL);
- mutex_lock(&devlink->lock);
+ devl_lock(devlink);
if (devlink_port_region_get_by_name(port, ops->name)) {
err = -EEXIST;
@@ -11099,40 +11332,58 @@ devlink_port_region_create(struct devlink_port *port,
region->port_ops = ops;
region->size = region_size;
INIT_LIST_HEAD(&region->snapshot_list);
+ mutex_init(&region->snapshot_lock);
list_add_tail(&region->list, &port->region_list);
devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW);
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
return region;
unlock:
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
return ERR_PTR(err);
}
EXPORT_SYMBOL_GPL(devlink_port_region_create);
/**
- * devlink_region_destroy - destroy address region
+ * devl_region_destroy - destroy address region
*
- * @region: devlink region to destroy
+ * @region: devlink region to destroy
*/
-void devlink_region_destroy(struct devlink_region *region)
+void devl_region_destroy(struct devlink_region *region)
{
struct devlink *devlink = region->devlink;
struct devlink_snapshot *snapshot, *ts;
- mutex_lock(&devlink->lock);
+ devl_assert_locked(devlink);
/* Free all snapshots of region */
list_for_each_entry_safe(snapshot, ts, &region->snapshot_list, list)
devlink_region_snapshot_del(region, snapshot);
list_del(&region->list);
+ mutex_destroy(&region->snapshot_lock);
devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_DEL);
- mutex_unlock(&devlink->lock);
kfree(region);
}
+EXPORT_SYMBOL_GPL(devl_region_destroy);
+
+/**
+ * devlink_region_destroy - destroy address region
+ *
+ * @region: devlink region to destroy
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_region_destroy(struct devlink_region *region)
+{
+ struct devlink *devlink = region->devlink;
+
+ devl_lock(devlink);
+ devl_region_destroy(region);
+ devl_unlock(devlink);
+}
EXPORT_SYMBOL_GPL(devlink_region_destroy);
/**
@@ -11152,13 +11403,7 @@ EXPORT_SYMBOL_GPL(devlink_region_destroy);
*/
int devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id)
{
- int err;
-
- mutex_lock(&devlink->lock);
- err = __devlink_region_snapshot_id_get(devlink, id);
- mutex_unlock(&devlink->lock);
-
- return err;
+ return __devlink_region_snapshot_id_get(devlink, id);
}
EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_get);
@@ -11174,9 +11419,7 @@ EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_get);
*/
void devlink_region_snapshot_id_put(struct devlink *devlink, u32 id)
{
- mutex_lock(&devlink->lock);
__devlink_snapshot_id_decrement(devlink, id);
- mutex_unlock(&devlink->lock);
}
EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_put);
@@ -11195,13 +11438,11 @@ EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_put);
int devlink_region_snapshot_create(struct devlink_region *region,
u8 *data, u32 snapshot_id)
{
- struct devlink *devlink = region->devlink;
int err;
- mutex_lock(&devlink->lock);
+ mutex_lock(&region->snapshot_lock);
err = __devlink_region_snapshot_create(region, data, snapshot_id);
- mutex_unlock(&devlink->lock);
-
+ mutex_unlock(&region->snapshot_lock);
return err;
}
EXPORT_SYMBOL_GPL(devlink_region_snapshot_create);
@@ -11566,7 +11807,7 @@ static void devlink_trap_disable(struct devlink *devlink,
}
/**
- * devlink_traps_register - Register packet traps with devlink.
+ * devl_traps_register - Register packet traps with devlink.
* @devlink: devlink.
* @traps: Packet traps.
* @traps_count: Count of provided packet traps.
@@ -11574,16 +11815,16 @@ static void devlink_trap_disable(struct devlink *devlink,
*
* Return: Non-zero value on failure.
*/
-int devlink_traps_register(struct devlink *devlink,
- const struct devlink_trap *traps,
- size_t traps_count, void *priv)
+int devl_traps_register(struct devlink *devlink,
+ const struct devlink_trap *traps,
+ size_t traps_count, void *priv)
{
int i, err;
if (!devlink->ops->trap_init || !devlink->ops->trap_action_set)
return -EINVAL;
- mutex_lock(&devlink->lock);
+ devl_assert_locked(devlink);
for (i = 0; i < traps_count; i++) {
const struct devlink_trap *trap = &traps[i];
@@ -11595,7 +11836,6 @@ int devlink_traps_register(struct devlink *devlink,
if (err)
goto err_trap_register;
}
- mutex_unlock(&devlink->lock);
return 0;
@@ -11603,24 +11843,47 @@ err_trap_register:
err_trap_verify:
for (i--; i >= 0; i--)
devlink_trap_unregister(devlink, &traps[i]);
- mutex_unlock(&devlink->lock);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devl_traps_register);
+
+/**
+ * devlink_traps_register - Register packet traps with devlink.
+ * @devlink: devlink.
+ * @traps: Packet traps.
+ * @traps_count: Count of provided packet traps.
+ * @priv: Driver private information.
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ *
+ * Return: Non-zero value on failure.
+ */
+int devlink_traps_register(struct devlink *devlink,
+ const struct devlink_trap *traps,
+ size_t traps_count, void *priv)
+{
+ int err;
+
+ devl_lock(devlink);
+ err = devl_traps_register(devlink, traps, traps_count, priv);
+ devl_unlock(devlink);
return err;
}
EXPORT_SYMBOL_GPL(devlink_traps_register);
/**
- * devlink_traps_unregister - Unregister packet traps from devlink.
+ * devl_traps_unregister - Unregister packet traps from devlink.
* @devlink: devlink.
* @traps: Packet traps.
* @traps_count: Count of provided packet traps.
*/
-void devlink_traps_unregister(struct devlink *devlink,
- const struct devlink_trap *traps,
- size_t traps_count)
+void devl_traps_unregister(struct devlink *devlink,
+ const struct devlink_trap *traps,
+ size_t traps_count)
{
int i;
- mutex_lock(&devlink->lock);
+ devl_assert_locked(devlink);
/* Make sure we do not have any packets in-flight while unregistering
* traps by disabling all of them and waiting for a grace period.
*/
@@ -11629,7 +11892,24 @@ void devlink_traps_unregister(struct devlink *devlink,
synchronize_rcu();
for (i = traps_count - 1; i >= 0; i--)
devlink_trap_unregister(devlink, &traps[i]);
- mutex_unlock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devl_traps_unregister);
+
+/**
+ * devlink_traps_unregister - Unregister packet traps from devlink.
+ * @devlink: devlink.
+ * @traps: Packet traps.
+ * @traps_count: Count of provided packet traps.
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_traps_unregister(struct devlink *devlink,
+ const struct devlink_trap *traps,
+ size_t traps_count)
+{
+ devl_lock(devlink);
+ devl_traps_unregister(devlink, traps, traps_count);
+ devl_unlock(devlink);
}
EXPORT_SYMBOL_GPL(devlink_traps_unregister);
@@ -11641,8 +11921,8 @@ devlink_trap_stats_update(struct devlink_stats __percpu *trap_stats,
stats = this_cpu_ptr(trap_stats);
u64_stats_update_begin(&stats->syncp);
- stats->rx_bytes += skb_len;
- stats->rx_packets++;
+ u64_stats_add(&stats->rx_bytes, skb_len);
+ u64_stats_inc(&stats->rx_packets);
u64_stats_update_end(&stats->syncp);
}
@@ -11788,20 +12068,20 @@ devlink_trap_group_unregister(struct devlink *devlink,
}
/**
- * devlink_trap_groups_register - Register packet trap groups with devlink.
+ * devl_trap_groups_register - Register packet trap groups with devlink.
* @devlink: devlink.
* @groups: Packet trap groups.
* @groups_count: Count of provided packet trap groups.
*
* Return: Non-zero value on failure.
*/
-int devlink_trap_groups_register(struct devlink *devlink,
- const struct devlink_trap_group *groups,
- size_t groups_count)
+int devl_trap_groups_register(struct devlink *devlink,
+ const struct devlink_trap_group *groups,
+ size_t groups_count)
{
int i, err;
- mutex_lock(&devlink->lock);
+ devl_assert_locked(devlink);
for (i = 0; i < groups_count; i++) {
const struct devlink_trap_group *group = &groups[i];
@@ -11813,7 +12093,6 @@ int devlink_trap_groups_register(struct devlink *devlink,
if (err)
goto err_trap_group_register;
}
- mutex_unlock(&devlink->lock);
return 0;
@@ -11821,27 +12100,66 @@ err_trap_group_register:
err_trap_group_verify:
for (i--; i >= 0; i--)
devlink_trap_group_unregister(devlink, &groups[i]);
- mutex_unlock(&devlink->lock);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devl_trap_groups_register);
+
+/**
+ * devlink_trap_groups_register - Register packet trap groups with devlink.
+ * @devlink: devlink.
+ * @groups: Packet trap groups.
+ * @groups_count: Count of provided packet trap groups.
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ *
+ * Return: Non-zero value on failure.
+ */
+int devlink_trap_groups_register(struct devlink *devlink,
+ const struct devlink_trap_group *groups,
+ size_t groups_count)
+{
+ int err;
+
+ devl_lock(devlink);
+ err = devl_trap_groups_register(devlink, groups, groups_count);
+ devl_unlock(devlink);
return err;
}
EXPORT_SYMBOL_GPL(devlink_trap_groups_register);
/**
- * devlink_trap_groups_unregister - Unregister packet trap groups from devlink.
+ * devl_trap_groups_unregister - Unregister packet trap groups from devlink.
* @devlink: devlink.
* @groups: Packet trap groups.
* @groups_count: Count of provided packet trap groups.
*/
-void devlink_trap_groups_unregister(struct devlink *devlink,
- const struct devlink_trap_group *groups,
- size_t groups_count)
+void devl_trap_groups_unregister(struct devlink *devlink,
+ const struct devlink_trap_group *groups,
+ size_t groups_count)
{
int i;
- mutex_lock(&devlink->lock);
+ devl_assert_locked(devlink);
for (i = groups_count - 1; i >= 0; i--)
devlink_trap_group_unregister(devlink, &groups[i]);
- mutex_unlock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devl_trap_groups_unregister);
+
+/**
+ * devlink_trap_groups_unregister - Unregister packet trap groups from devlink.
+ * @devlink: devlink.
+ * @groups: Packet trap groups.
+ * @groups_count: Count of provided packet trap groups.
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_trap_groups_unregister(struct devlink *devlink,
+ const struct devlink_trap_group *groups,
+ size_t groups_count)
+{
+ devl_lock(devlink);
+ devl_trap_groups_unregister(devlink, groups, groups_count);
+ devl_unlock(devlink);
}
EXPORT_SYMBOL_GPL(devlink_trap_groups_unregister);
@@ -11927,7 +12245,7 @@ devlink_trap_policer_unregister(struct devlink *devlink,
}
/**
- * devlink_trap_policers_register - Register packet trap policers with devlink.
+ * devl_trap_policers_register - Register packet trap policers with devlink.
* @devlink: devlink.
* @policers: Packet trap policers.
* @policers_count: Count of provided packet trap policers.
@@ -11935,13 +12253,13 @@ devlink_trap_policer_unregister(struct devlink *devlink,
* Return: Non-zero value on failure.
*/
int
-devlink_trap_policers_register(struct devlink *devlink,
- const struct devlink_trap_policer *policers,
- size_t policers_count)
+devl_trap_policers_register(struct devlink *devlink,
+ const struct devlink_trap_policer *policers,
+ size_t policers_count)
{
int i, err;
- mutex_lock(&devlink->lock);
+ devl_assert_locked(devlink);
for (i = 0; i < policers_count; i++) {
const struct devlink_trap_policer *policer = &policers[i];
@@ -11956,38 +12274,34 @@ devlink_trap_policers_register(struct devlink *devlink,
if (err)
goto err_trap_policer_register;
}
- mutex_unlock(&devlink->lock);
-
return 0;
err_trap_policer_register:
err_trap_policer_verify:
for (i--; i >= 0; i--)
devlink_trap_policer_unregister(devlink, &policers[i]);
- mutex_unlock(&devlink->lock);
return err;
}
-EXPORT_SYMBOL_GPL(devlink_trap_policers_register);
+EXPORT_SYMBOL_GPL(devl_trap_policers_register);
/**
- * devlink_trap_policers_unregister - Unregister packet trap policers from devlink.
+ * devl_trap_policers_unregister - Unregister packet trap policers from devlink.
* @devlink: devlink.
* @policers: Packet trap policers.
* @policers_count: Count of provided packet trap policers.
*/
void
-devlink_trap_policers_unregister(struct devlink *devlink,
- const struct devlink_trap_policer *policers,
- size_t policers_count)
+devl_trap_policers_unregister(struct devlink *devlink,
+ const struct devlink_trap_policer *policers,
+ size_t policers_count)
{
int i;
- mutex_lock(&devlink->lock);
+ devl_assert_locked(devlink);
for (i = policers_count - 1; i >= 0; i--)
devlink_trap_policer_unregister(devlink, &policers[i]);
- mutex_unlock(&devlink->lock);
}
-EXPORT_SYMBOL_GPL(devlink_trap_policers_unregister);
+EXPORT_SYMBOL_GPL(devl_trap_policers_unregister);
static void __devlink_compat_running_version(struct devlink *devlink,
char *buf, size_t len)
@@ -12039,9 +12353,9 @@ void devlink_compat_running_version(struct devlink *devlink,
if (!devlink->ops->info_get)
return;
- mutex_lock(&devlink->lock);
+ devl_lock(devlink);
__devlink_compat_running_version(devlink, buf, len);
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
}
int devlink_compat_flash_update(struct devlink *devlink, const char *file_name)
@@ -12056,11 +12370,11 @@ int devlink_compat_flash_update(struct devlink *devlink, const char *file_name)
if (ret)
return ret;
- mutex_lock(&devlink->lock);
+ devl_lock(devlink);
devlink_flash_update_begin_notify(devlink);
ret = devlink->ops->flash_update(devlink, &params, NULL);
devlink_flash_update_end_notify(devlink);
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
release_firmware(params.fw);
@@ -12113,25 +12427,18 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net)
/* In case network namespace is getting destroyed, reload
* all devlink instances from this namespace into init_net.
*/
- mutex_lock(&devlink_mutex);
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
- if (!devlink_try_get(devlink))
- continue;
-
- if (!net_eq(devlink_net(devlink), net))
- goto retry;
-
+ devlinks_xa_for_each_registered_get(net, index, devlink) {
WARN_ON(!(devlink->features & DEVLINK_F_RELOAD));
+ mutex_lock(&devlink->lock);
err = devlink_reload(devlink, &init_net,
DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
DEVLINK_RELOAD_LIMIT_UNSPEC,
&actions_performed, NULL);
+ mutex_unlock(&devlink->lock);
if (err && err != -EOPNOTSUPP)
pr_warn("Failed to reload devlink instance into init_net\n");
-retry:
devlink_put(devlink);
}
- mutex_unlock(&devlink_mutex);
}
static struct pernet_operations devlink_pernet_ops __net_initdata = {
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 41cac0e4834e..75501e1bdd25 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -48,19 +48,6 @@
static int trace_state = TRACE_OFF;
static bool monitor_hw;
-#undef EM
-#undef EMe
-
-#define EM(a, b) [a] = #b,
-#define EMe(a, b) [a] = #b
-
-/* drop_reasons is used to translate 'enum skb_drop_reason' to string,
- * which is reported to user space.
- */
-static const char * const drop_reasons[] = {
- TRACE_SKB_DROP_REASON
-};
-
/* net_dm_mutex
*
* An overall lock guarding every operation coming from userspace.
@@ -68,7 +55,7 @@ static const char * const drop_reasons[] = {
static DEFINE_MUTEX(net_dm_mutex);
struct net_dm_stats {
- u64 dropped;
+ u64_stats_t dropped;
struct u64_stats_sync syncp;
};
@@ -543,7 +530,7 @@ static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
unlock_free:
spin_unlock_irqrestore(&data->drop_queue.lock, flags);
u64_stats_update_begin(&data->stats.syncp);
- data->stats.dropped++;
+ u64_stats_inc(&data->stats.dropped);
u64_stats_update_end(&data->stats.syncp);
consume_skb(nskb);
}
@@ -877,7 +864,8 @@ net_dm_hw_metadata_copy(const struct devlink_trap_metadata *metadata)
}
hw_metadata->input_dev = metadata->input_dev;
- dev_hold_track(hw_metadata->input_dev, &hw_metadata->dev_tracker, GFP_ATOMIC);
+ netdev_hold(hw_metadata->input_dev, &hw_metadata->dev_tracker,
+ GFP_ATOMIC);
return hw_metadata;
@@ -893,7 +881,7 @@ free_hw_metadata:
static void
net_dm_hw_metadata_free(struct devlink_trap_metadata *hw_metadata)
{
- dev_put_track(hw_metadata->input_dev, &hw_metadata->dev_tracker);
+ netdev_put(hw_metadata->input_dev, &hw_metadata->dev_tracker);
kfree(hw_metadata->fa_cookie);
kfree(hw_metadata->trap_name);
kfree(hw_metadata->trap_group_name);
@@ -998,7 +986,7 @@ net_dm_hw_trap_packet_probe(void *ignore, const struct devlink *devlink,
unlock_free:
spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags);
u64_stats_update_begin(&hw_data->stats.syncp);
- hw_data->stats.dropped++;
+ u64_stats_inc(&hw_data->stats.dropped);
u64_stats_update_end(&hw_data->stats.syncp);
net_dm_hw_metadata_free(n_hw_metadata);
free:
@@ -1445,10 +1433,10 @@ static void net_dm_stats_read(struct net_dm_stats *stats)
do {
start = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
- dropped = cpu_stats->dropped;
+ dropped = u64_stats_read(&cpu_stats->dropped);
} while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
- stats->dropped += dropped;
+ u64_stats_add(&stats->dropped, dropped);
}
}
@@ -1464,7 +1452,7 @@ static int net_dm_stats_put(struct sk_buff *msg)
return -EMSGSIZE;
if (nla_put_u64_64bit(msg, NET_DM_ATTR_STATS_DROPPED,
- stats.dropped, NET_DM_ATTR_PAD))
+ u64_stats_read(&stats.dropped), NET_DM_ATTR_PAD))
goto nla_put_failure;
nla_nest_end(msg, attr);
@@ -1489,10 +1477,10 @@ static void net_dm_hw_stats_read(struct net_dm_stats *stats)
do {
start = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
- dropped = cpu_stats->dropped;
+ dropped = u64_stats_read(&cpu_stats->dropped);
} while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
- stats->dropped += dropped;
+ u64_stats_add(&stats->dropped, dropped);
}
}
@@ -1508,7 +1496,7 @@ static int net_dm_hw_stats_put(struct sk_buff *msg)
return -EMSGSIZE;
if (nla_put_u64_64bit(msg, NET_DM_ATTR_STATS_DROPPED,
- stats.dropped, NET_DM_ATTR_PAD))
+ u64_stats_read(&stats.dropped), NET_DM_ATTR_PAD))
goto nla_put_failure;
nla_nest_end(msg, attr);
diff --git a/net/core/dst.c b/net/core/dst.c
index d16c2c9bfebd..bc9c9be4e080 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -49,7 +49,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
unsigned short flags)
{
dst->dev = dev;
- dev_hold_track(dev, &dst->dev_tracker, GFP_ATOMIC);
+ netdev_hold(dev, &dst->dev_tracker, GFP_ATOMIC);
dst->ops = ops;
dst_init_metrics(dst, dst_default_metrics.metrics, true);
dst->expires = 0UL;
@@ -117,7 +117,7 @@ struct dst_entry *dst_destroy(struct dst_entry * dst)
if (dst->ops->destroy)
dst->ops->destroy(dst);
- dev_put_track(dst->dev, &dst->dev_tracker);
+ netdev_put(dst->dev, &dst->dev_tracker);
lwtstate_put(dst->lwtstate);
@@ -159,8 +159,8 @@ void dst_dev_put(struct dst_entry *dst)
dst->input = dst_discard;
dst->output = dst_discard_out;
dst->dev = blackhole_netdev;
- dev_replace_track(dev, blackhole_netdev, &dst->dev_tracker,
- GFP_ATOMIC);
+ netdev_ref_replace(dev, blackhole_netdev, &dst->dev_tracker,
+ GFP_ATOMIC);
}
EXPORT_SYMBOL(dst_dev_put);
diff --git a/net/core/failover.c b/net/core/failover.c
index dcaa92a85ea2..864d2d83eff4 100644
--- a/net/core/failover.c
+++ b/net/core/failover.c
@@ -252,7 +252,7 @@ struct failover *failover_register(struct net_device *dev,
return ERR_PTR(-ENOMEM);
rcu_assign_pointer(failover->ops, ops);
- dev_hold_track(dev, &failover->dev_tracker, GFP_KERNEL);
+ netdev_hold(dev, &failover->dev_tracker, GFP_KERNEL);
dev->priv_flags |= IFF_FAILOVER;
rcu_assign_pointer(failover->failover_dev, dev);
@@ -285,7 +285,7 @@ void failover_unregister(struct failover *failover)
failover_dev->name);
failover_dev->priv_flags &= ~IFF_FAILOVER;
- dev_put_track(failover_dev, &failover->dev_tracker);
+ netdev_put(failover_dev, &failover->dev_tracker);
spin_lock(&failover_lock);
list_del(&failover->list);
diff --git a/net/core/filter.c b/net/core/filter.c
index 7950f7520765..5669248aff25 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -237,7 +237,7 @@ BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb,
BPF_CALL_4(bpf_skb_load_helper_16, const struct sk_buff *, skb, const void *,
data, int, headlen, int, offset)
{
- u16 tmp, *ptr;
+ __be16 tmp, *ptr;
const int len = sizeof(tmp);
if (offset >= 0) {
@@ -264,7 +264,7 @@ BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb,
BPF_CALL_4(bpf_skb_load_helper_32, const struct sk_buff *, skb, const void *,
data, int, headlen, int, offset)
{
- u32 tmp, *ptr;
+ __be32 tmp, *ptr;
const int len = sizeof(tmp);
if (likely(offset >= 0)) {
@@ -3918,7 +3918,7 @@ static void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len)
offset -= frag_size;
}
out:
- return offset + len < size ? addr + offset : NULL;
+ return offset + len <= size ? addr + offset : NULL;
}
BPF_CALL_4(bpf_xdp_load_bytes, struct xdp_buff *, xdp, u32, offset,
@@ -4653,6 +4653,7 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
} else {
info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
info->key.u.ipv4.src = cpu_to_be32(from->local_ipv4);
+ info->key.flow_flags = FLOWI_FLAG_ANYSRC;
}
return 0;
@@ -5012,8 +5013,8 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
-static int _bpf_setsockopt(struct sock *sk, int level, int optname,
- char *optval, int optlen)
+static int __bpf_setsockopt(struct sock *sk, int level, int optname,
+ char *optval, int optlen)
{
char devname[IFNAMSIZ];
int val, valbool;
@@ -5024,8 +5025,6 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
if (!sk_fullsock(sk))
return -EINVAL;
- sock_owned_by_me(sk);
-
if (level == SOL_SOCKET) {
if (optlen != sizeof(int) && optname != SO_BINDTODEVICE)
return -EINVAL;
@@ -5258,14 +5257,20 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
return ret;
}
-static int _bpf_getsockopt(struct sock *sk, int level, int optname,
+static int _bpf_setsockopt(struct sock *sk, int level, int optname,
char *optval, int optlen)
{
+ if (sk_fullsock(sk))
+ sock_owned_by_me(sk);
+ return __bpf_setsockopt(sk, level, optname, optval, optlen);
+}
+
+static int __bpf_getsockopt(struct sock *sk, int level, int optname,
+ char *optval, int optlen)
+{
if (!sk_fullsock(sk))
goto err_clear;
- sock_owned_by_me(sk);
-
if (level == SOL_SOCKET) {
if (optlen != sizeof(int))
goto err_clear;
@@ -5360,6 +5365,14 @@ err_clear:
return -EINVAL;
}
+static int _bpf_getsockopt(struct sock *sk, int level, int optname,
+ char *optval, int optlen)
+{
+ if (sk_fullsock(sk))
+ sock_owned_by_me(sk);
+ return __bpf_getsockopt(sk, level, optname, optval, optlen);
+}
+
BPF_CALL_5(bpf_sk_setsockopt, struct sock *, sk, int, level,
int, optname, char *, optval, int, optlen)
{
@@ -5400,6 +5413,40 @@ const struct bpf_func_proto bpf_sk_getsockopt_proto = {
.arg5_type = ARG_CONST_SIZE,
};
+BPF_CALL_5(bpf_unlocked_sk_setsockopt, struct sock *, sk, int, level,
+ int, optname, char *, optval, int, optlen)
+{
+ return __bpf_setsockopt(sk, level, optname, optval, optlen);
+}
+
+const struct bpf_func_proto bpf_unlocked_sk_setsockopt_proto = {
+ .func = bpf_unlocked_sk_setsockopt,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg5_type = ARG_CONST_SIZE,
+};
+
+BPF_CALL_5(bpf_unlocked_sk_getsockopt, struct sock *, sk, int, level,
+ int, optname, char *, optval, int, optlen)
+{
+ return __bpf_getsockopt(sk, level, optname, optval, optlen);
+}
+
+const struct bpf_func_proto bpf_unlocked_sk_getsockopt_proto = {
+ .func = bpf_unlocked_sk_getsockopt,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg5_type = ARG_CONST_SIZE,
+};
+
BPF_CALL_5(bpf_sock_addr_setsockopt, struct bpf_sock_addr_kern *, ctx,
int, level, int, optname, char *, optval, int, optlen)
{
@@ -6462,8 +6509,6 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
/* bpf_skc_lookup performs the core lookup for different types of sockets,
* taking a reference on the socket if it doesn't have the flag SOCK_RCU_FREE.
- * Returns the socket as an 'unsigned long' to simplify the casting in the
- * callers to satisfy BPF_CALL declarations.
*/
static struct sock *
__bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
@@ -6471,8 +6516,8 @@ __bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
u64 flags)
{
struct sock *sk = NULL;
- u8 family = AF_UNSPEC;
struct net *net;
+ u8 family;
int sdif;
if (len == sizeof(tuple->ipv4))
@@ -6482,8 +6527,7 @@ __bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
else
return NULL;
- if (unlikely(family == AF_UNSPEC || flags ||
- !((s32)netns_id < 0 || netns_id <= S32_MAX)))
+ if (unlikely(flags || !((s32)netns_id < 0 || netns_id <= S32_MAX)))
goto out;
if (family == AF_INET)
@@ -7465,6 +7509,114 @@ static const struct bpf_func_proto bpf_skb_set_tstamp_proto = {
.arg3_type = ARG_ANYTHING,
};
+#ifdef CONFIG_SYN_COOKIES
+BPF_CALL_3(bpf_tcp_raw_gen_syncookie_ipv4, struct iphdr *, iph,
+ struct tcphdr *, th, u32, th_len)
+{
+ u32 cookie;
+ u16 mss;
+
+ if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4))
+ return -EINVAL;
+
+ mss = tcp_parse_mss_option(th, 0) ?: TCP_MSS_DEFAULT;
+ cookie = __cookie_v4_init_sequence(iph, th, &mss);
+
+ return cookie | ((u64)mss << 32);
+}
+
+static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv4_proto = {
+ .func = bpf_tcp_raw_gen_syncookie_ipv4,
+ .gpl_only = true, /* __cookie_v4_init_sequence() is GPL */
+ .pkt_access = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
+ .arg1_size = sizeof(struct iphdr),
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+};
+
+BPF_CALL_3(bpf_tcp_raw_gen_syncookie_ipv6, struct ipv6hdr *, iph,
+ struct tcphdr *, th, u32, th_len)
+{
+#if IS_BUILTIN(CONFIG_IPV6)
+ const u16 mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
+ sizeof(struct ipv6hdr);
+ u32 cookie;
+ u16 mss;
+
+ if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4))
+ return -EINVAL;
+
+ mss = tcp_parse_mss_option(th, 0) ?: mss_clamp;
+ cookie = __cookie_v6_init_sequence(iph, th, &mss);
+
+ return cookie | ((u64)mss << 32);
+#else
+ return -EPROTONOSUPPORT;
+#endif
+}
+
+static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv6_proto = {
+ .func = bpf_tcp_raw_gen_syncookie_ipv6,
+ .gpl_only = true, /* __cookie_v6_init_sequence() is GPL */
+ .pkt_access = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
+ .arg1_size = sizeof(struct ipv6hdr),
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+};
+
+BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv4, struct iphdr *, iph,
+ struct tcphdr *, th)
+{
+ u32 cookie = ntohl(th->ack_seq) - 1;
+
+ if (__cookie_v4_check(iph, th, cookie) > 0)
+ return 0;
+
+ return -EACCES;
+}
+
+static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_ipv4_proto = {
+ .func = bpf_tcp_raw_check_syncookie_ipv4,
+ .gpl_only = true, /* __cookie_v4_check is GPL */
+ .pkt_access = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
+ .arg1_size = sizeof(struct iphdr),
+ .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM,
+ .arg2_size = sizeof(struct tcphdr),
+};
+
+BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv6, struct ipv6hdr *, iph,
+ struct tcphdr *, th)
+{
+#if IS_BUILTIN(CONFIG_IPV6)
+ u32 cookie = ntohl(th->ack_seq) - 1;
+
+ if (__cookie_v6_check(iph, th, cookie) > 0)
+ return 0;
+
+ return -EACCES;
+#else
+ return -EPROTONOSUPPORT;
+#endif
+}
+
+static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_ipv6_proto = {
+ .func = bpf_tcp_raw_check_syncookie_ipv6,
+ .gpl_only = true, /* __cookie_v6_check is GPL */
+ .pkt_access = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
+ .arg1_size = sizeof(struct ipv6hdr),
+ .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM,
+ .arg2_size = sizeof(struct tcphdr),
+};
+#endif /* CONFIG_SYN_COOKIES */
+
#endif /* CONFIG_INET */
bool bpf_helper_changes_pkt_data(void *func)
@@ -7828,6 +7980,16 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sk_assign_proto;
case BPF_FUNC_skb_set_tstamp:
return &bpf_skb_set_tstamp_proto;
+#ifdef CONFIG_SYN_COOKIES
+ case BPF_FUNC_tcp_raw_gen_syncookie_ipv4:
+ return &bpf_tcp_raw_gen_syncookie_ipv4_proto;
+ case BPF_FUNC_tcp_raw_gen_syncookie_ipv6:
+ return &bpf_tcp_raw_gen_syncookie_ipv6_proto;
+ case BPF_FUNC_tcp_raw_check_syncookie_ipv4:
+ return &bpf_tcp_raw_check_syncookie_ipv4_proto;
+ case BPF_FUNC_tcp_raw_check_syncookie_ipv6:
+ return &bpf_tcp_raw_check_syncookie_ipv6_proto;
+#endif
#endif
default:
return bpf_sk_base_func_proto(func_id);
@@ -7877,6 +8039,16 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_tcp_check_syncookie_proto;
case BPF_FUNC_tcp_gen_syncookie:
return &bpf_tcp_gen_syncookie_proto;
+#ifdef CONFIG_SYN_COOKIES
+ case BPF_FUNC_tcp_raw_gen_syncookie_ipv4:
+ return &bpf_tcp_raw_gen_syncookie_ipv4_proto;
+ case BPF_FUNC_tcp_raw_gen_syncookie_ipv6:
+ return &bpf_tcp_raw_gen_syncookie_ipv6_proto;
+ case BPF_FUNC_tcp_raw_check_syncookie_ipv4:
+ return &bpf_tcp_raw_check_syncookie_ipv4_proto;
+ case BPF_FUNC_tcp_raw_check_syncookie_ipv6:
+ return &bpf_tcp_raw_check_syncookie_ipv6_proto;
+#endif
#endif
default:
return bpf_sk_base_func_proto(func_id);
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 6aee04f75e3e..764c4cb3fe8f 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -895,6 +895,11 @@ bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
return result == BPF_OK;
}
+static bool is_pppoe_ses_hdr_valid(const struct pppoe_hdr *hdr)
+{
+ return hdr->ver == 1 && hdr->type == 1 && hdr->code == 0;
+}
+
/**
* __skb_flow_dissect - extract the flow_keys struct and return it
* @net: associated network namespace, derived from @skb if NULL
@@ -1214,26 +1219,60 @@ proto_again:
struct pppoe_hdr hdr;
__be16 proto;
} *hdr, _hdr;
+ u16 ppp_proto;
+
hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
if (!hdr) {
fdret = FLOW_DISSECT_RET_OUT_BAD;
break;
}
- nhoff += PPPOE_SES_HLEN;
- switch (hdr->proto) {
- case htons(PPP_IP):
+ if (!is_pppoe_ses_hdr_valid(&hdr->hdr)) {
+ fdret = FLOW_DISSECT_RET_OUT_BAD;
+ break;
+ }
+
+ /* least significant bit of the most significant octet
+ * indicates if protocol field was compressed
+ */
+ ppp_proto = ntohs(hdr->proto);
+ if (ppp_proto & 0x0100) {
+ ppp_proto = ppp_proto >> 8;
+ nhoff += PPPOE_SES_HLEN - 1;
+ } else {
+ nhoff += PPPOE_SES_HLEN;
+ }
+
+ if (ppp_proto == PPP_IP) {
proto = htons(ETH_P_IP);
fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
- break;
- case htons(PPP_IPV6):
+ } else if (ppp_proto == PPP_IPV6) {
proto = htons(ETH_P_IPV6);
fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
- break;
- default:
+ } else if (ppp_proto == PPP_MPLS_UC) {
+ proto = htons(ETH_P_MPLS_UC);
+ fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+ } else if (ppp_proto == PPP_MPLS_MC) {
+ proto = htons(ETH_P_MPLS_MC);
+ fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+ } else if (ppp_proto_is_valid(ppp_proto)) {
+ fdret = FLOW_DISSECT_RET_OUT_GOOD;
+ } else {
fdret = FLOW_DISSECT_RET_OUT_BAD;
break;
}
+
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_PPPOE)) {
+ struct flow_dissector_key_pppoe *key_pppoe;
+
+ key_pppoe = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_PPPOE,
+ target_container);
+ key_pppoe->session_id = hdr->hdr.sid;
+ key_pppoe->ppp_proto = htons(ppp_proto);
+ key_pppoe->type = htons(ETH_P_PPP_SES);
+ }
break;
}
case htons(ETH_P_TIPC): {
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index 929f6379a279..8cfb63528d18 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -125,6 +125,13 @@ void flow_rule_match_ports(const struct flow_rule *rule,
}
EXPORT_SYMBOL(flow_rule_match_ports);
+void flow_rule_match_ports_range(const struct flow_rule *rule,
+ struct flow_match_ports_range *out)
+{
+ FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_PORTS_RANGE, out);
+}
+EXPORT_SYMBOL(flow_rule_match_ports_range);
+
void flow_rule_match_tcp(const struct flow_rule *rule,
struct flow_match_tcp *out)
{
@@ -223,6 +230,13 @@ void flow_rule_match_ct(const struct flow_rule *rule,
}
EXPORT_SYMBOL(flow_rule_match_ct);
+void flow_rule_match_pppoe(const struct flow_rule *rule,
+ struct flow_match_pppoe *out)
+{
+ FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_PPPOE, out);
+}
+EXPORT_SYMBOL(flow_rule_match_pppoe);
+
struct flow_block_cb *flow_block_cb_alloc(flow_setup_cb_t *cb,
void *cb_ident, void *cb_priv,
void (*release)(void *cb_priv))
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index a244d3bade7d..aa6cb1f90966 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -110,7 +110,7 @@ static void linkwatch_add_event(struct net_device *dev)
spin_lock_irqsave(&lweventlist_lock, flags);
if (list_empty(&dev->link_watch_list)) {
list_add_tail(&dev->link_watch_list, &lweventlist);
- dev_hold_track(dev, &dev->linkwatch_dev_tracker, GFP_ATOMIC);
+ netdev_hold(dev, &dev->linkwatch_dev_tracker, GFP_ATOMIC);
}
spin_unlock_irqrestore(&lweventlist_lock, flags);
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 54625287ee5b..6a8c2596ebab 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -624,7 +624,7 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey,
memcpy(n->primary_key, pkey, key_len);
n->dev = dev;
- dev_hold_track(dev, &n->dev_tracker, GFP_ATOMIC);
+ netdev_hold(dev, &n->dev_tracker, GFP_ATOMIC);
/* Protocol specific setup. */
if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
@@ -770,10 +770,10 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
write_pnet(&n->net, net);
memcpy(n->key, pkey, key_len);
n->dev = dev;
- dev_hold_track(dev, &n->dev_tracker, GFP_KERNEL);
+ netdev_hold(dev, &n->dev_tracker, GFP_KERNEL);
if (tbl->pconstructor && tbl->pconstructor(n)) {
- dev_put_track(dev, &n->dev_tracker);
+ netdev_put(dev, &n->dev_tracker);
kfree(n);
n = NULL;
goto out;
@@ -805,7 +805,7 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
write_unlock_bh(&tbl->lock);
if (tbl->pdestructor)
tbl->pdestructor(n);
- dev_put_track(n->dev, &n->dev_tracker);
+ netdev_put(n->dev, &n->dev_tracker);
kfree(n);
return 0;
}
@@ -838,7 +838,7 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
n->next = NULL;
if (tbl->pdestructor)
tbl->pdestructor(n);
- dev_put_track(n->dev, &n->dev_tracker);
+ netdev_put(n->dev, &n->dev_tracker);
kfree(n);
}
return -ENOENT;
@@ -879,7 +879,7 @@ void neigh_destroy(struct neighbour *neigh)
if (dev->netdev_ops->ndo_neigh_destroy)
dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
- dev_put_track(dev, &neigh->dev_tracker);
+ netdev_put(dev, &neigh->dev_tracker);
neigh_parms_put(neigh->parms);
neigh_dbg(2, "neigh %p is destroyed\n", neigh);
@@ -1579,7 +1579,7 @@ static void neigh_managed_work(struct work_struct *work)
list_for_each_entry(neigh, &tbl->managed_list, managed_list)
neigh_event_send_probe(neigh, NULL, false);
queue_delayed_work(system_power_efficient_wq, &tbl->managed_work,
- max(NEIGH_VAR(&tbl->parms, DELAY_PROBE_TIME), HZ));
+ NEIGH_VAR(&tbl->parms, INTERVAL_PROBE_TIME_MS));
write_unlock_bh(&tbl->lock);
}
@@ -1671,13 +1671,13 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
refcount_set(&p->refcnt, 1);
p->reachable_time =
neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
- dev_hold_track(dev, &p->dev_tracker, GFP_KERNEL);
+ netdev_hold(dev, &p->dev_tracker, GFP_KERNEL);
p->dev = dev;
write_pnet(&p->net, net);
p->sysctl_table = NULL;
if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
- dev_put_track(dev, &p->dev_tracker);
+ netdev_put(dev, &p->dev_tracker);
kfree(p);
return NULL;
}
@@ -1708,7 +1708,7 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
list_del(&parms->list);
parms->dead = 1;
write_unlock_bh(&tbl->lock);
- dev_put_track(parms->dev, &parms->dev_tracker);
+ netdev_put(parms->dev, &parms->dev_tracker);
call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
}
EXPORT_SYMBOL(neigh_parms_release);
@@ -2100,7 +2100,9 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
nla_put_msecs(skb, NDTPA_PROXY_DELAY,
NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
nla_put_msecs(skb, NDTPA_LOCKTIME,
- NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD))
+ NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD) ||
+ nla_put_msecs(skb, NDTPA_INTERVAL_PROBE_TIME_MS,
+ NEIGH_VAR(parms, INTERVAL_PROBE_TIME_MS), NDTPA_PAD))
goto nla_put_failure;
return nla_nest_end(skb, nest);
@@ -2255,6 +2257,7 @@ static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
[NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 },
[NDTPA_PROXY_DELAY] = { .type = NLA_U64 },
[NDTPA_LOCKTIME] = { .type = NLA_U64 },
+ [NDTPA_INTERVAL_PROBE_TIME_MS] = { .type = NLA_U64, .min = 1 },
};
static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -2373,6 +2376,10 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
nla_get_msecs(tbp[i]));
call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
break;
+ case NDTPA_INTERVAL_PROBE_TIME_MS:
+ NEIGH_VAR_SET(p, INTERVAL_PROBE_TIME_MS,
+ nla_get_msecs(tbp[i]));
+ break;
case NDTPA_RETRANS_TIME:
NEIGH_VAR_SET(p, RETRANS_TIME,
nla_get_msecs(tbp[i]));
@@ -3562,6 +3569,22 @@ static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
return ret;
}
+static int neigh_proc_dointvec_ms_jiffies_positive(struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct ctl_table tmp = *ctl;
+ int ret;
+
+ int min = msecs_to_jiffies(1);
+
+ tmp.extra1 = &min;
+ tmp.extra2 = NULL;
+
+ ret = proc_dointvec_ms_jiffies_minmax(&tmp, write, buffer, lenp, ppos);
+ neigh_proc_update(ctl, write);
+ return ret;
+}
+
int neigh_proc_dointvec(struct ctl_table *ctl, int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
@@ -3658,6 +3681,9 @@ static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
#define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
+#define NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(attr, name) \
+ NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies_positive)
+
#define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
@@ -3676,6 +3702,8 @@ static struct neigh_sysctl_table {
NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
+ NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(INTERVAL_PROBE_TIME_MS,
+ "interval_probe_time_ms"),
NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index a3642569fe53..d61afd21aab5 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1017,7 +1017,7 @@ static void rx_queue_release(struct kobject *kobj)
#endif
memset(kobj, 0, sizeof(*kobj));
- dev_put_track(queue->dev, &queue->dev_tracker);
+ netdev_put(queue->dev, &queue->dev_tracker);
}
static const void *rx_queue_namespace(struct kobject *kobj)
@@ -1057,7 +1057,7 @@ static int rx_queue_add_kobject(struct net_device *dev, int index)
/* Kobject_put later will trigger rx_queue_release call which
* decreases dev refcount: Take that reference here
*/
- dev_hold_track(queue->dev, &queue->dev_tracker, GFP_KERNEL);
+ netdev_hold(queue->dev, &queue->dev_tracker, GFP_KERNEL);
kobj->kset = dev->queues_kset;
error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
@@ -1620,7 +1620,7 @@ static void netdev_queue_release(struct kobject *kobj)
struct netdev_queue *queue = to_netdev_queue(kobj);
memset(kobj, 0, sizeof(*kobj));
- dev_put_track(queue->dev, &queue->dev_tracker);
+ netdev_put(queue->dev, &queue->dev_tracker);
}
static const void *netdev_queue_namespace(struct kobject *kobj)
@@ -1660,7 +1660,7 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index)
/* Kobject_put later will trigger netdev_queue_release call
* which decreases dev refcount: Take that reference here
*/
- dev_hold_track(queue->dev, &queue->dev_tracker, GFP_KERNEL);
+ netdev_hold(queue->dev, &queue->dev_tracker, GFP_KERNEL);
kobj->kset = dev->queues_kset;
error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL,
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index db724463e7cd..5d27067b72d5 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -853,7 +853,7 @@ void netpoll_cleanup(struct netpoll *np)
if (!np->dev)
goto out;
__netpoll_cleanup(np);
- dev_put_track(np->dev, &np->dev_tracker);
+ netdev_put(np->dev, &np->dev_tracker);
np->dev = NULL;
out:
rtnl_unlock();
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index f18e6e771993..b74905fcc3a1 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -389,7 +389,8 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
/* Mark empty alloc.cache slots "empty" for alloc_pages_bulk_array */
memset(&pool->alloc.cache, 0, sizeof(void *) * bulk);
- nr_pages = alloc_pages_bulk_array(gfp, bulk, pool->alloc.cache);
+ nr_pages = alloc_pages_bulk_array_node(gfp, pool->p.nid, bulk,
+ pool->alloc.cache);
if (unlikely(!nr_pages))
return NULL;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 84b62cd7bc57..88906ba6d9a7 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2100,7 +2100,7 @@ static int pktgen_setup_dev(const struct pktgen_net *pn,
/* Clean old setups */
if (pkt_dev->odev) {
- dev_put_track(pkt_dev->odev, &pkt_dev->dev_tracker);
+ netdev_put(pkt_dev->odev, &pkt_dev->dev_tracker);
pkt_dev->odev = NULL;
}
@@ -3807,7 +3807,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
return add_dev_to_thread(t, pkt_dev);
out2:
- dev_put_track(pkt_dev->odev, &pkt_dev->dev_tracker);
+ netdev_put(pkt_dev->odev, &pkt_dev->dev_tracker);
out1:
#ifdef CONFIG_XFRM
free_SAs(pkt_dev);
@@ -3901,7 +3901,7 @@ static int pktgen_remove_device(struct pktgen_thread *t,
/* Dis-associate from the interface */
if (pkt_dev->odev) {
- dev_put_track(pkt_dev->odev, &pkt_dev->dev_tracker);
+ netdev_put(pkt_dev->odev, &pkt_dev->dev_tracker);
pkt_dev->odev = NULL;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index cf4107d80bc4..974bbbbe7138 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -91,6 +91,9 @@ static struct kmem_cache *skbuff_ext_cache __ro_after_init;
int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
EXPORT_SYMBOL(sysctl_max_skb_frags);
+/* The array 'drop_reasons' is auto-generated in dropreason_str.c */
+EXPORT_SYMBOL(drop_reasons);
+
/**
* skb_panic - private function for out-of-line support
* @skb: buffer
@@ -172,13 +175,14 @@ static struct sk_buff *napi_skb_cache_get(void)
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
struct sk_buff *skb;
- if (unlikely(!nc->skb_count))
+ if (unlikely(!nc->skb_count)) {
nc->skb_count = kmem_cache_alloc_bulk(skbuff_head_cache,
GFP_ATOMIC,
NAPI_SKB_CACHE_BULK,
nc->skb_cache);
- if (unlikely(!nc->skb_count))
- return NULL;
+ if (unlikely(!nc->skb_count))
+ return NULL;
+ }
skb = nc->skb_cache[--nc->skb_count];
kasan_unpoison_object_data(skbuff_head_cache, skb);
@@ -450,8 +454,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
skb->fclone = SKB_FCLONE_ORIG;
refcount_set(&fclones->fclone_ref, 1);
-
- fclones->skb2.fclone = SKB_FCLONE_CLONE;
}
return skb;
@@ -557,6 +559,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
struct sk_buff *skb;
void *data;
+ DEBUG_NET_WARN_ON_ONCE(!in_softirq());
len += NET_SKB_PAD + NET_IP_ALIGN;
/* If requested length is either too small or too big,
@@ -732,7 +735,7 @@ void skb_release_head_state(struct sk_buff *skb)
{
skb_dst_drop(skb);
if (skb->destructor) {
- WARN_ON(in_hardirq());
+ DEBUG_NET_WARN_ON_ONCE(in_hardirq());
skb->destructor(skb);
}
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
@@ -988,7 +991,7 @@ void napi_consume_skb(struct sk_buff *skb, int budget)
return;
}
- lockdep_assert_in_softirq();
+ DEBUG_NET_WARN_ON_ONCE(!in_softirq());
if (!skb_unref(skb))
return;
@@ -1532,6 +1535,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
refcount_read(&fclones->fclone_ref) == 1) {
n = &fclones->skb2;
refcount_set(&fclones->fclone_ref, 2);
+ n->fclone = SKB_FCLONE_CLONE;
} else {
if (skb_pfmemalloc(skb))
gfp_mask |= __GFP_MEMALLOC;
@@ -3216,9 +3220,7 @@ skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen)
}
}
- to->truesize += len + plen;
- to->len += len + plen;
- to->data_len += len + plen;
+ skb_len_add(to, len + plen);
if (unlikely(skb_orphan_frags(from, GFP_ATOMIC))) {
skb_tx_error(from);
@@ -3657,13 +3659,8 @@ onlymerged:
tgt->ip_summed = CHECKSUM_PARTIAL;
skb->ip_summed = CHECKSUM_PARTIAL;
- /* Yak, is it really working this way? Some helper please? */
- skb->len -= shiftlen;
- skb->data_len -= shiftlen;
- skb->truesize -= shiftlen;
- tgt->len += shiftlen;
- tgt->data_len += shiftlen;
- tgt->truesize += shiftlen;
+ skb_len_add(skb, -shiftlen);
+ skb_len_add(tgt, shiftlen);
return shiftlen;
}
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index b0fcd0200e84..81627892bdd4 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -462,7 +462,7 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
if (copied == len)
break;
- } while (i != msg_rx->sg.end);
+ } while (!sg_is_last(sge));
if (unlikely(peek)) {
msg_rx = sk_psock_next_msg(psock, msg_rx);
@@ -472,7 +472,7 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
}
msg_rx->sg.start = i;
- if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) {
+ if (!sge->length && sg_is_last(sge)) {
msg_rx = sk_psock_dequeue_msg(psock);
kfree_sk_msg(msg_rx);
}
@@ -497,23 +497,27 @@ bool sk_msg_is_readable(struct sock *sk)
}
EXPORT_SYMBOL_GPL(sk_msg_is_readable);
-static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
- struct sk_buff *skb)
+static struct sk_msg *alloc_sk_msg(void)
{
struct sk_msg *msg;
- if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
+ msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_KERNEL);
+ if (unlikely(!msg))
return NULL;
+ sg_init_marker(msg->sg.data, NR_MSG_FRAG_IDS);
+ return msg;
+}
- if (!sk_rmem_schedule(sk, skb, skb->truesize))
+static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
+ struct sk_buff *skb)
+{
+ if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
return NULL;
- msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_KERNEL);
- if (unlikely(!msg))
+ if (!sk_rmem_schedule(sk, skb, skb->truesize))
return NULL;
- sk_msg_init(msg);
- return msg;
+ return alloc_sk_msg();
}
static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
@@ -590,13 +594,12 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
u32 off, u32 len)
{
- struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
+ struct sk_msg *msg = alloc_sk_msg();
struct sock *sk = psock->sk;
int err;
if (unlikely(!msg))
return -EAGAIN;
- sk_msg_init(msg);
skb_set_owner_r(skb, sk);
err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
if (err < 0)
@@ -720,6 +723,7 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
psock->eval = __SK_NONE;
psock->sk_proto = prot;
psock->saved_unhash = prot->unhash;
+ psock->saved_destroy = prot->destroy;
psock->saved_close = prot->close;
psock->saved_write_space = sk->sk_write_space;
@@ -1164,21 +1168,14 @@ static void sk_psock_done_strp(struct sk_psock *psock)
}
#endif /* CONFIG_BPF_STREAM_PARSER */
-static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
- unsigned int offset, size_t orig_len)
+static int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb)
{
- struct sock *sk = (struct sock *)desc->arg.data;
struct sk_psock *psock;
struct bpf_prog *prog;
int ret = __SK_DROP;
- int len = orig_len;
+ int len = skb->len;
- /* clone here so sk_eat_skb() in tcp_read_sock does not drop our data */
- skb = skb_clone(skb, GFP_ATOMIC);
- if (!skb) {
- desc->error = -ENOMEM;
- return 0;
- }
+ skb_get(skb);
rcu_read_lock();
psock = sk_psock(sk);
@@ -1191,12 +1188,10 @@ static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
if (!prog)
prog = READ_ONCE(psock->progs.skb_verdict);
if (likely(prog)) {
- skb->sk = sk;
skb_dst_drop(skb);
skb_bpf_redirect_clear(skb);
ret = bpf_prog_run_pin_on_cpu(prog, skb);
ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
- skb->sk = NULL;
}
if (sk_psock_verdict_apply(psock, skb, ret) < 0)
len = 0;
@@ -1208,16 +1203,10 @@ out:
static void sk_psock_verdict_data_ready(struct sock *sk)
{
struct socket *sock = sk->sk_socket;
- read_descriptor_t desc;
- if (unlikely(!sock || !sock->ops || !sock->ops->read_sock))
+ if (unlikely(!sock || !sock->ops || !sock->ops->read_skb))
return;
-
- desc.arg.data = sk;
- desc.error = 0;
- desc.count = 1;
-
- sock->ops->read_sock(sk, &desc, sk_psock_verdict_recv);
+ sock->ops->read_skb(sk, sk_psock_verdict_recv);
}
void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock)
diff --git a/net/core/sock.c b/net/core/sock.c
index 2ff40dd0a7a6..4cb957d934a2 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -991,7 +991,7 @@ EXPORT_SYMBOL(sock_set_mark);
static void sock_release_reserved_memory(struct sock *sk, int bytes)
{
/* Round down bytes to multiple of pages */
- bytes &= ~(SK_MEM_QUANTUM - 1);
+ bytes = round_down(bytes, PAGE_SIZE);
WARN_ON(bytes > sk->sk_reserved_mem);
sk->sk_reserved_mem -= bytes;
@@ -1019,7 +1019,8 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
return -ENOMEM;
/* pre-charge to forward_alloc */
- allocated = sk_memory_allocated_add(sk, pages);
+ sk_memory_allocated_add(sk, pages);
+ allocated = sk_memory_allocated(sk);
/* If the system goes into memory pressure with this
* precharge, give up and return error.
*/
@@ -1028,9 +1029,9 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
mem_cgroup_uncharge_skmem(sk->sk_memcg, pages);
return -ENOMEM;
}
- sk->sk_forward_alloc += pages << SK_MEM_QUANTUM_SHIFT;
+ sk->sk_forward_alloc += pages << PAGE_SHIFT;
- sk->sk_reserved_mem += pages << SK_MEM_QUANTUM_SHIFT;
+ sk->sk_reserved_mem += pages << PAGE_SHIFT;
return 0;
}
@@ -2844,7 +2845,7 @@ void __release_sock(struct sock *sk)
do {
next = skb->next;
prefetch(next);
- WARN_ON_ONCE(skb_dst_is_noref(skb));
+ DEBUG_NET_WARN_ON_ONCE(skb_dst_is_noref(skb));
skb_mark_not_on_list(skb);
sk_backlog_rcv(sk, skb);
@@ -2869,6 +2870,7 @@ void __sk_flush_backlog(struct sock *sk)
__release_sock(sk);
spin_unlock_bh(&sk->sk_lock.slock);
}
+EXPORT_SYMBOL_GPL(__sk_flush_backlog);
/**
* sk_wait_data - wait for data to arrive at sk_receive_queue
@@ -2906,11 +2908,13 @@ EXPORT_SYMBOL(sk_wait_data);
*/
int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
{
- struct proto *prot = sk->sk_prot;
- long allocated = sk_memory_allocated_add(sk, amt);
bool memcg_charge = mem_cgroup_sockets_enabled && sk->sk_memcg;
+ struct proto *prot = sk->sk_prot;
bool charged = true;
+ long allocated;
+ sk_memory_allocated_add(sk, amt);
+ allocated = sk_memory_allocated(sk);
if (memcg_charge &&
!(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt,
gfp_memcg_charge())))
@@ -2987,7 +2991,6 @@ suppress_allocation:
return 0;
}
-EXPORT_SYMBOL(__sk_mem_raise_allocated);
/**
* __sk_mem_schedule - increase sk_forward_alloc and memory_allocated
@@ -3003,10 +3006,10 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
{
int ret, amt = sk_mem_pages(size);
- sk->sk_forward_alloc += amt << SK_MEM_QUANTUM_SHIFT;
+ sk->sk_forward_alloc += amt << PAGE_SHIFT;
ret = __sk_mem_raise_allocated(sk, size, amt, kind);
if (!ret)
- sk->sk_forward_alloc -= amt << SK_MEM_QUANTUM_SHIFT;
+ sk->sk_forward_alloc -= amt << PAGE_SHIFT;
return ret;
}
EXPORT_SYMBOL(__sk_mem_schedule);
@@ -3029,17 +3032,16 @@ void __sk_mem_reduce_allocated(struct sock *sk, int amount)
(sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
sk_leave_memory_pressure(sk);
}
-EXPORT_SYMBOL(__sk_mem_reduce_allocated);
/**
* __sk_mem_reclaim - reclaim sk_forward_alloc and memory_allocated
* @sk: socket
- * @amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple)
+ * @amount: number of bytes (rounded down to a PAGE_SIZE multiple)
*/
void __sk_mem_reclaim(struct sock *sk, int amount)
{
- amount >>= SK_MEM_QUANTUM_SHIFT;
- sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
+ amount >>= PAGE_SHIFT;
+ sk->sk_forward_alloc -= amount << PAGE_SHIFT;
__sk_mem_reduce_allocated(sk, amount);
}
EXPORT_SYMBOL(__sk_mem_reclaim);
@@ -3798,6 +3800,10 @@ int proto_register(struct proto *prot, int alloc_slab)
pr_err("%s: missing sysctl_mem\n", prot->name);
return -EINVAL;
}
+ if (prot->memory_allocated && !prot->per_cpu_fw_alloc) {
+ pr_err("%s: missing per_cpu_fw_alloc\n", prot->name);
+ return -EINVAL;
+ }
if (alloc_slab) {
prot->slab = kmem_cache_create_usercopy(prot->name,
prot->obj_size, 0,
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 81d4b4756a02..028813dfecb0 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1561,6 +1561,29 @@ void sock_map_unhash(struct sock *sk)
}
EXPORT_SYMBOL_GPL(sock_map_unhash);
+void sock_map_destroy(struct sock *sk)
+{
+ void (*saved_destroy)(struct sock *sk);
+ struct sk_psock *psock;
+
+ rcu_read_lock();
+ psock = sk_psock_get(sk);
+ if (unlikely(!psock)) {
+ rcu_read_unlock();
+ if (sk->sk_prot->destroy)
+ sk->sk_prot->destroy(sk);
+ return;
+ }
+
+ saved_destroy = psock->saved_destroy;
+ sock_map_remove_links(sk, psock);
+ rcu_read_unlock();
+ sk_psock_stop(psock, false);
+ sk_psock_put(sk, psock);
+ saved_destroy(sk);
+}
+EXPORT_SYMBOL_GPL(sock_map_destroy);
+
void sock_map_close(struct sock *sk, long timeout)
{
void (*saved_close)(struct sock *sk, long timeout);
diff --git a/net/core/stream.c b/net/core/stream.c
index 06b36c730ce8..ccc083cdef23 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -196,13 +196,13 @@ void sk_stream_kill_queues(struct sock *sk)
__skb_queue_purge(&sk->sk_receive_queue);
/* Next, the write queue. */
- WARN_ON(!skb_queue_empty(&sk->sk_write_queue));
+ WARN_ON_ONCE(!skb_queue_empty(&sk->sk_write_queue));
/* Account for returned memory. */
sk_mem_reclaim_final(sk);
- WARN_ON(sk->sk_wmem_queued);
- WARN_ON(sk->sk_forward_alloc);
+ WARN_ON_ONCE(sk->sk_wmem_queued);
+ WARN_ON_ONCE(sk->sk_forward_alloc);
/* It is _impossible_ for the backlog to contain anything
* when we get here. All user references to this socket