diff options
author | 2022-10-01 19:16:16 -0700 | |
---|---|---|
committer | 2022-10-01 19:16:16 -0700 | |
commit | 305a72efa791c826fe84768ca55e31adc4113ea8 (patch) | |
tree | 35c72646ea4b81528d73d3d79650004abed3285b /net/core | |
parent | nvdimm/namespace: drop unneeded temporary variable in size_store() (diff) | |
parent | ACPI: HMAT: Release platform device in case of platform_device_add_data() fails (diff) | |
download | wireguard-linux-305a72efa791c826fe84768ca55e31adc4113ea8.tar.xz wireguard-linux-305a72efa791c826fe84768ca55e31adc4113ea8.zip |
Merge branch 'for-6.1/nvdimm' into libnvdimm-for-next
Add v6.1 content on top of some straggling updates that missed v6.0.
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/Makefile | 3 | ||||
-rw-r--r-- | net/core/bpf_sk_storage.c | 17 | ||||
-rw-r--r-- | net/core/datagram.c | 22 | ||||
-rw-r--r-- | net/core/dev.c | 77 | ||||
-rw-r--r-- | net/core/dev_ioctl.c | 4 | ||||
-rw-r--r-- | net/core/devlink.c | 1655 | ||||
-rw-r--r-- | net/core/drop_monitor.c | 36 | ||||
-rw-r--r-- | net/core/dst.c | 8 | ||||
-rw-r--r-- | net/core/failover.c | 4 | ||||
-rw-r--r-- | net/core/filter.c | 225 | ||||
-rw-r--r-- | net/core/flow_dissector.c | 53 | ||||
-rw-r--r-- | net/core/flow_offload.c | 14 | ||||
-rw-r--r-- | net/core/gen_stats.c | 2 | ||||
-rw-r--r-- | net/core/gro_cells.c | 2 | ||||
-rw-r--r-- | net/core/link_watch.c | 2 | ||||
-rw-r--r-- | net/core/neighbour.c | 98 | ||||
-rw-r--r-- | net/core/net-sysfs.c | 8 | ||||
-rw-r--r-- | net/core/net_namespace.c | 7 | ||||
-rw-r--r-- | net/core/netpoll.c | 2 | ||||
-rw-r--r-- | net/core/page_pool.c | 5 | ||||
-rw-r--r-- | net/core/pktgen.c | 6 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 1 | ||||
-rw-r--r-- | net/core/secure_seq.c | 4 | ||||
-rw-r--r-- | net/core/skbuff.c | 79 | ||||
-rw-r--r-- | net/core/skmsg.c | 65 | ||||
-rw-r--r-- | net/core/sock.c | 52 | ||||
-rw-r--r-- | net/core/sock_map.c | 43 | ||||
-rw-r--r-- | net/core/sock_reuseport.c | 4 | ||||
-rw-r--r-- | net/core/stream.c | 6 | ||||
-rw-r--r-- | net/core/sysctl_net_core.c | 15 |
30 files changed, 1610 insertions, 909 deletions
diff --git a/net/core/Makefile b/net/core/Makefile index a8e4f737692b..5857cec87b83 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -4,7 +4,8 @@ # obj-y := sock.o request_sock.o skbuff.o datagram.o stream.o scm.o \ - gen_stats.o gen_estimator.o net_namespace.o secure_seq.o flow_dissector.o + gen_stats.o gen_estimator.o net_namespace.o secure_seq.o \ + flow_dissector.o obj-$(CONFIG_SYSCTL) += sysctl_net_core.o diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index a25ec93729b9..94374d529ea4 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -310,11 +310,12 @@ BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk) static int bpf_sk_storage_charge(struct bpf_local_storage_map *smap, void *owner, u32 size) { + int optmem_max = READ_ONCE(sysctl_optmem_max); struct sock *sk = (struct sock *)owner; /* same check as in sock_kmalloc() */ - if (size <= sysctl_optmem_max && - atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) { + if (size <= optmem_max && + atomic_read(&sk->sk_omem_alloc) + size < optmem_max) { atomic_add(size, &sk->sk_omem_alloc); return 0; } @@ -875,10 +876,18 @@ static int bpf_iter_init_sk_storage_map(void *priv_data, { struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data; + bpf_map_inc_with_uref(aux->map); seq_info->map = aux->map; return 0; } +static void bpf_iter_fini_sk_storage_map(void *priv_data) +{ + struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data; + + bpf_map_put_with_uref(seq_info->map); +} + static int bpf_iter_attach_map(struct bpf_prog *prog, union bpf_iter_link_info *linfo, struct bpf_iter_aux_info *aux) @@ -896,7 +905,7 @@ static int bpf_iter_attach_map(struct bpf_prog *prog, if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) goto put_map; - if (prog->aux->max_rdonly_access > map->value_size) { + if (prog->aux->max_rdwr_access > map->value_size) { err = -EACCES; goto put_map; } @@ -924,7 +933,7 @@ static const struct seq_operations bpf_sk_storage_map_seq_ops = { static const struct bpf_iter_seq_info iter_seq_info = { .seq_ops = &bpf_sk_storage_map_seq_ops, .init_seq_private = bpf_iter_init_sk_storage_map, - .fini_seq_private = NULL, + .fini_seq_private = bpf_iter_fini_sk_storage_map, .seq_priv_size = sizeof(struct bpf_iter_seq_sk_storage_map_info), }; diff --git a/net/core/datagram.c b/net/core/datagram.c index 50f4faeea76c..e4ff2db40c98 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -320,7 +320,6 @@ EXPORT_SYMBOL(skb_recv_datagram); void skb_free_datagram(struct sock *sk, struct sk_buff *skb) { consume_skb(skb); - sk_mem_reclaim_partial(sk); } EXPORT_SYMBOL(skb_free_datagram); @@ -336,7 +335,6 @@ void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len) slow = lock_sock_fast(sk); sk_peek_offset_bwd(sk, len); skb_orphan(skb); - sk_mem_reclaim_partial(sk); unlock_sock_fast(sk, slow); /* skb is now orphaned, can be freed outside of locked section */ @@ -396,7 +394,6 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) NULL); kfree_skb(skb); - sk_mem_reclaim_partial(sk); return err; } EXPORT_SYMBOL(skb_kill_datagram); @@ -613,10 +610,16 @@ fault: } EXPORT_SYMBOL(skb_copy_datagram_from_iter); -int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb, - struct iov_iter *from, size_t length) +int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb, struct iov_iter *from, + size_t length) { - int frag = skb_shinfo(skb)->nr_frags; + int frag; + + if (msg && msg->msg_ubuf && msg->sg_from_iter) + return msg->sg_from_iter(sk, skb, from, length); + + frag = skb_shinfo(skb)->nr_frags; while (length && iov_iter_count(from)) { struct page *pages[MAX_SKB_FRAGS]; @@ -629,12 +632,11 @@ int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb, if (frag == MAX_SKB_FRAGS) return -EMSGSIZE; - copied = iov_iter_get_pages(from, pages, length, + copied = iov_iter_get_pages2(from, pages, length, MAX_SKB_FRAGS - frag, &start); if (copied < 0) return -EFAULT; - iov_iter_advance(from, copied); length -= copied; truesize = PAGE_ALIGN(copied + start); @@ -675,7 +677,7 @@ int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb, page_ref_sub(last_head, refs); refs = 0; } - skb_fill_page_desc(skb, frag++, head, start, size); + skb_fill_page_desc_noacc(skb, frag++, head, start, size); } if (refs) page_ref_sub(last_head, refs); @@ -702,7 +704,7 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from) if (skb_copy_datagram_from_iter(skb, 0, from, copy)) return -EFAULT; - return __zerocopy_sg_from_iter(NULL, skb, from, ~0U); + return __zerocopy_sg_from_iter(NULL, NULL, skb, from, ~0U); } EXPORT_SYMBOL(zerocopy_sg_from_iter); diff --git a/net/core/dev.c b/net/core/dev.c index 8e6f22961206..56c8b0921c9f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3927,7 +3927,7 @@ int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) skb->pkt_type = PACKET_LOOPBACK; if (skb->ip_summed == CHECKSUM_NONE) skb->ip_summed = CHECKSUM_UNNECESSARY; - WARN_ON(!skb_dst(skb)); + DEBUG_NET_WARN_ON_ONCE(!skb_dst(skb)); skb_dst_force(skb); netif_rx(skb); return 0; @@ -4168,6 +4168,7 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) bool again = false; skb_reset_mac_header(skb); + skb_assert_len(skb); if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP)) __skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SCHED); @@ -4623,7 +4624,7 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen) struct softnet_data *sd; unsigned int old_flow, new_flow; - if (qlen < (netdev_max_backlog >> 1)) + if (qlen < (READ_ONCE(netdev_max_backlog) >> 1)) return false; sd = this_cpu_ptr(&softnet_data); @@ -4671,7 +4672,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, if (!netif_running(skb->dev)) goto drop; qlen = skb_queue_len(&sd->input_pkt_queue); - if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) { + if (qlen <= READ_ONCE(netdev_max_backlog) && !skb_flow_limit(skb, qlen)) { if (qlen) { enqueue: __skb_queue_tail(&sd->input_pkt_queue, skb); @@ -4863,7 +4864,10 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, } /* When doing generic XDP we have to bypass the qdisc layer and the - * network taps in order to match in-driver-XDP behavior. + * network taps in order to match in-driver-XDP behavior. This also means + * that XDP packets are able to starve other packets going through a qdisc, + * and DDOS attacks will be more effective. In-driver-XDP use dedicated TX + * queues, so they do not have this starvation issue. */ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) { @@ -4875,7 +4879,7 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) txq = netdev_core_pick_tx(dev, skb, NULL); cpu = smp_processor_id(); HARD_TX_LOCK(dev, txq, cpu); - if (!netif_xmit_stopped(txq)) { + if (!netif_xmit_frozen_or_drv_stopped(txq)) { rc = netdev_start_xmit(skb, dev, txq, 0); if (dev_xmit_complete(rc)) free_skb = false; @@ -4883,6 +4887,7 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) HARD_TX_UNLOCK(dev, txq); if (free_skb) { trace_xdp_exception(dev, xdp_prog, XDP_TX); + dev_core_stats_tx_dropped_inc(dev); kfree_skb(skb); } } @@ -4923,7 +4928,7 @@ static int netif_rx_internal(struct sk_buff *skb) { int ret; - net_timestamp_check(netdev_tstamp_prequeue, skb); + net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb); trace_netif_rx(skb); @@ -5276,7 +5281,7 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, int ret = NET_RX_DROP; __be16 type; - net_timestamp_check(!netdev_tstamp_prequeue, skb); + net_timestamp_check(!READ_ONCE(netdev_tstamp_prequeue), skb); trace_netif_receive_skb(skb); @@ -5659,7 +5664,7 @@ static int netif_receive_skb_internal(struct sk_buff *skb) { int ret; - net_timestamp_check(netdev_tstamp_prequeue, skb); + net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb); if (skb_defer_rx_timestamp(skb)) return NET_RX_SUCCESS; @@ -5689,7 +5694,7 @@ void netif_receive_skb_list_internal(struct list_head *head) INIT_LIST_HEAD(&sublist); list_for_each_entry_safe(skb, next, head, list) { - net_timestamp_check(netdev_tstamp_prequeue, skb); + net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb); skb_list_del_init(skb); if (!skb_defer_rx_timestamp(skb)) list_add_tail(&skb->list, &sublist); @@ -5913,7 +5918,7 @@ static int process_backlog(struct napi_struct *napi, int quota) net_rps_action_and_irq_enable(sd); } - napi->weight = dev_rx_weight; + napi->weight = READ_ONCE(dev_rx_weight); while (again) { struct sk_buff *skb; @@ -6353,6 +6358,23 @@ int dev_set_threaded(struct net_device *dev, bool threaded) } EXPORT_SYMBOL(dev_set_threaded); +/* Double check that napi_get_frags() allocates skbs with + * skb->head being backed by slab, not a page fragment. + * This is to make sure bug fixed in 3226b158e67c + * ("net: avoid 32 x truesize under-estimation for tiny skbs") + * does not accidentally come back. + */ +static void napi_get_frags_check(struct napi_struct *napi) +{ + struct sk_buff *skb; + + local_bh_disable(); + skb = napi_get_frags(napi); + WARN_ON_ONCE(skb && skb->head_frag); + napi_free_frags(napi); + local_bh_enable(); +} + void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight) { @@ -6380,6 +6402,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, set_bit(NAPI_STATE_NPSVC, &napi->state); list_add_rcu(&napi->dev_list, &dev->napi_list); napi_hash_add(napi); + napi_get_frags_check(napi); /* Create kthread for this napi if dev->threaded is set. * Clear dev->threaded if kthread creation failed so that * threaded mode will not be enabled in napi_enable(). @@ -6642,8 +6665,8 @@ static __latent_entropy void net_rx_action(struct softirq_action *h) { struct softnet_data *sd = this_cpu_ptr(&softnet_data); unsigned long time_limit = jiffies + - usecs_to_jiffies(netdev_budget_usecs); - int budget = netdev_budget; + usecs_to_jiffies(READ_ONCE(netdev_budget_usecs)); + int budget = READ_ONCE(netdev_budget); LIST_HEAD(list); LIST_HEAD(repoll); @@ -7465,7 +7488,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, adj->ref_nr = 1; adj->private = private; adj->ignore = false; - dev_hold_track(adj_dev, &adj->dev_tracker, GFP_KERNEL); + netdev_hold(adj_dev, &adj->dev_tracker, GFP_KERNEL); pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d; dev_hold on %s\n", dev->name, adj_dev->name, adj->ref_nr, adj_dev->name); @@ -7494,7 +7517,7 @@ remove_symlinks: if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list); free_adj: - dev_put_track(adj_dev, &adj->dev_tracker); + netdev_put(adj_dev, &adj->dev_tracker); kfree(adj); return ret; @@ -7536,7 +7559,7 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, list_del_rcu(&adj->list); pr_debug("adjacency: dev_put for %s, because link removed from %s to %s\n", adj_dev->name, dev->name, adj_dev->name); - dev_put_track(adj_dev, &adj->dev_tracker); + netdev_put(adj_dev, &adj->dev_tracker); kfree_rcu(adj, rcu); } @@ -10064,7 +10087,7 @@ int register_netdevice(struct net_device *dev) dev_init_scheduler(dev); - dev_hold_track(dev, &dev->dev_registered_tracker, GFP_KERNEL); + netdev_hold(dev, &dev->dev_registered_tracker, GFP_KERNEL); list_netdevice(dev); add_device_randomness(dev->dev_addr, dev->addr_len); @@ -10261,7 +10284,7 @@ static struct net_device *netdev_wait_allrefs_any(struct list_head *list) return dev; if (time_after(jiffies, warning_time + - netdev_unregister_timeout_secs * HZ)) { + READ_ONCE(netdev_unregister_timeout_secs) * HZ)) { list_for_each_entry(dev, list, todo_list) { pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n", dev->name, netdev_refcnt_read(dev)); @@ -10463,23 +10486,23 @@ void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s, int cpu; for_each_possible_cpu(cpu) { + u64 rx_packets, rx_bytes, tx_packets, tx_bytes; const struct pcpu_sw_netstats *stats; - struct pcpu_sw_netstats tmp; unsigned int start; stats = per_cpu_ptr(netstats, cpu); do { start = u64_stats_fetch_begin_irq(&stats->syncp); - tmp.rx_packets = stats->rx_packets; - tmp.rx_bytes = stats->rx_bytes; - tmp.tx_packets = stats->tx_packets; - tmp.tx_bytes = stats->tx_bytes; + rx_packets = u64_stats_read(&stats->rx_packets); + rx_bytes = u64_stats_read(&stats->rx_bytes); + tx_packets = u64_stats_read(&stats->tx_packets); + tx_bytes = u64_stats_read(&stats->tx_bytes); } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); - s->rx_packets += tmp.rx_packets; - s->rx_bytes += tmp.rx_bytes; - s->tx_packets += tmp.tx_packets; - s->tx_bytes += tmp.tx_bytes; + s->rx_packets += rx_packets; + s->rx_bytes += rx_bytes; + s->tx_packets += tx_packets; + s->tx_bytes += tx_bytes; } } EXPORT_SYMBOL_GPL(dev_fetch_sw_netstats); @@ -10873,7 +10896,7 @@ void unregister_netdevice_many(struct list_head *head) synchronize_net(); list_for_each_entry(dev, head, unreg_list) { - dev_put_track(dev, &dev->dev_registered_tracker); + netdev_put(dev, &dev->dev_registered_tracker); net_set_todo(dev); } diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 4f6be442ae7e..7674bb9f3076 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -384,10 +384,10 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, return -ENODEV; if (!netif_is_bridge_master(dev)) return -EOPNOTSUPP; - dev_hold_track(dev, &dev_tracker, GFP_KERNEL); + netdev_hold(dev, &dev_tracker, GFP_KERNEL); rtnl_unlock(); err = br_ioctl_call(net, netdev_priv(dev), cmd, ifr, NULL); - dev_put_track(dev, &dev_tracker); + netdev_put(dev, &dev_tracker); rtnl_lock(); return err; diff --git a/net/core/devlink.c b/net/core/devlink.c index 5cc88490f18f..b50bcc18b8d9 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -66,9 +66,11 @@ struct devlink { * port, sb, dpipe, resource, params, region, traps and more. */ struct mutex lock; + struct lock_class_key lock_key; u8 reload_failed:1; refcount_t refcount; struct completion comp; + struct rcu_head rcu; char priv[] __aligned(NETDEV_ALIGN); }; @@ -87,6 +89,7 @@ struct devlink_linecard { const char *type; struct devlink_linecard_type *types; unsigned int types_count; + struct devlink *nested_devlink; }; /** @@ -198,8 +201,13 @@ static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ DEVLINK_PORT_FN_STATE_ACTIVE), }; +static const struct nla_policy devlink_selftest_nl_policy[DEVLINK_ATTR_SELFTEST_ID_MAX + 1] = { + [DEVLINK_ATTR_SELFTEST_ID_FLASH] = { .type = NLA_FLAG }, +}; + static DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC); #define DEVLINK_REGISTERED XA_MARK_1 +#define DEVLINK_UNREGISTERING XA_MARK_2 /* devlink instances are open to the access from the user space after * devlink_register() call. Such logical barrier allows us to have certain @@ -217,24 +225,27 @@ static DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC); #define ASSERT_DEVLINK_NOT_REGISTERED(d) \ WARN_ON_ONCE(xa_get_mark(&devlinks, (d)->index, DEVLINK_REGISTERED)) -/* devlink_mutex - * - * An overall lock guarding every operation coming from userspace. - * It also guards devlink devices list and it is taken when - * driver registers/unregisters it. - */ -static DEFINE_MUTEX(devlink_mutex); - struct net *devlink_net(const struct devlink *devlink) { return read_pnet(&devlink->_net); } EXPORT_SYMBOL_GPL(devlink_net); +static void __devlink_put_rcu(struct rcu_head *head) +{ + struct devlink *devlink = container_of(head, struct devlink, rcu); + + complete(&devlink->comp); +} + void devlink_put(struct devlink *devlink) { if (refcount_dec_and_test(&devlink->refcount)) - complete(&devlink->comp); + /* Make sure unregister operation that may await the completion + * is unblocked only after all users are after the end of + * RCU grace period. + */ + call_rcu(&devlink->rcu, __devlink_put_rcu); } struct devlink *__must_check devlink_try_get(struct devlink *devlink) @@ -265,18 +276,82 @@ void devl_lock(struct devlink *devlink) } EXPORT_SYMBOL_GPL(devl_lock); +int devl_trylock(struct devlink *devlink) +{ + return mutex_trylock(&devlink->lock); +} +EXPORT_SYMBOL_GPL(devl_trylock); + void devl_unlock(struct devlink *devlink) { mutex_unlock(&devlink->lock); } EXPORT_SYMBOL_GPL(devl_unlock); +static struct devlink * +devlinks_xa_find_get(struct net *net, unsigned long *indexp, xa_mark_t filter, + void * (*xa_find_fn)(struct xarray *, unsigned long *, + unsigned long, xa_mark_t)) +{ + struct devlink *devlink; + + rcu_read_lock(); +retry: + devlink = xa_find_fn(&devlinks, indexp, ULONG_MAX, DEVLINK_REGISTERED); + if (!devlink) + goto unlock; + + /* In case devlink_unregister() was already called and "unregistering" + * mark was set, do not allow to get a devlink reference here. + * This prevents live-lock of devlink_unregister() wait for completion. + */ + if (xa_get_mark(&devlinks, *indexp, DEVLINK_UNREGISTERING)) + goto retry; + + /* For a possible retry, the xa_find_after() should be always used */ + xa_find_fn = xa_find_after; + if (!devlink_try_get(devlink)) + goto retry; + if (!net_eq(devlink_net(devlink), net)) { + devlink_put(devlink); + goto retry; + } +unlock: + rcu_read_unlock(); + return devlink; +} + +static struct devlink *devlinks_xa_find_get_first(struct net *net, + unsigned long *indexp, + xa_mark_t filter) +{ + return devlinks_xa_find_get(net, indexp, filter, xa_find); +} + +static struct devlink *devlinks_xa_find_get_next(struct net *net, + unsigned long *indexp, + xa_mark_t filter) +{ + return devlinks_xa_find_get(net, indexp, filter, xa_find_after); +} + +/* Iterate over devlink pointers which were possible to get reference to. + * devlink_put() needs to be called for each iterated devlink pointer + * in loop body in order to release the reference. + */ +#define devlinks_xa_for_each_get(net, index, devlink, filter) \ + for (index = 0, \ + devlink = devlinks_xa_find_get_first(net, &index, filter); \ + devlink; devlink = devlinks_xa_find_get_next(net, &index, filter)) + +#define devlinks_xa_for_each_registered_get(net, index, devlink) \ + devlinks_xa_for_each_get(net, index, devlink, DEVLINK_REGISTERED) + static struct devlink *devlink_get_from_attrs(struct net *net, struct nlattr **attrs) { struct devlink *devlink; unsigned long index; - bool found = false; char *busname; char *devname; @@ -286,21 +361,14 @@ static struct devlink *devlink_get_from_attrs(struct net *net, busname = nla_data(attrs[DEVLINK_ATTR_BUS_NAME]); devname = nla_data(attrs[DEVLINK_ATTR_DEV_NAME]); - lockdep_assert_held(&devlink_mutex); - - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + devlinks_xa_for_each_registered_get(net, index, devlink) { if (strcmp(devlink->dev->bus->name, busname) == 0 && - strcmp(dev_name(devlink->dev), devname) == 0 && - net_eq(devlink_net(devlink), net)) { - found = true; - break; - } + strcmp(dev_name(devlink->dev), devname) == 0) + return devlink; + devlink_put(devlink); } - if (!found || !devlink_try_get(devlink)) - devlink = ERR_PTR(-ENODEV); - - return devlink; + return ERR_PTR(-ENODEV); } static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink, @@ -634,6 +702,10 @@ struct devlink_region { const struct devlink_region_ops *ops; const struct devlink_port_region_ops *port_ops; }; + struct mutex snapshot_lock; /* protects snapshot_list, + * max_snapshots and cur_snapshots + * consistency. + */ struct list_head snapshot_list; u32 max_snapshots; u32 cur_snapshots; @@ -690,12 +762,6 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id) #define DEVLINK_NL_FLAG_NEED_RATE_NODE BIT(3) #define DEVLINK_NL_FLAG_NEED_LINECARD BIT(4) -/* The per devlink instance lock is taken by default in the pre-doit - * operation, yet several commands do not require this. The global - * devlink lock is taken and protects from disruption by user-calls. - */ -#define DEVLINK_NL_FLAG_NO_LOCK BIT(5) - static int devlink_nl_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) { @@ -704,14 +770,10 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops, struct devlink *devlink; int err; - mutex_lock(&devlink_mutex); devlink = devlink_get_from_attrs(genl_info_net(info), info->attrs); - if (IS_ERR(devlink)) { - mutex_unlock(&devlink_mutex); + if (IS_ERR(devlink)) return PTR_ERR(devlink); - } - if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) - mutex_lock(&devlink->lock); + devl_lock(devlink); info->user_ptr[0] = devlink; if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) { devlink_port = devlink_port_get_from_info(devlink, info); @@ -753,10 +815,8 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops, return 0; unlock: - if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) - mutex_unlock(&devlink->lock); + devl_unlock(devlink); devlink_put(devlink); - mutex_unlock(&devlink_mutex); return err; } @@ -771,10 +831,8 @@ static void devlink_nl_post_doit(const struct genl_ops *ops, linecard = info->user_ptr[1]; devlink_linecard_put(linecard); } - if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) - mutex_unlock(&devlink->lock); + devl_unlock(devlink); devlink_put(devlink); - mutex_unlock(&devlink_mutex); } static struct genl_family devlink_nl_family; @@ -796,6 +854,24 @@ static int devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink) return 0; } +static int devlink_nl_put_nested_handle(struct sk_buff *msg, struct devlink *devlink) +{ + struct nlattr *nested_attr; + + nested_attr = nla_nest_start(msg, DEVLINK_ATTR_NESTED_DEVLINK); + if (!nested_attr) + return -EMSGSIZE; + if (devlink_nl_put_handle(msg, devlink)) + goto nla_put_failure; + + nla_nest_end(msg, nested_attr); + return 0; + +nla_put_failure: + nla_nest_cancel(msg, nested_attr); + return -EMSGSIZE; +} + struct devlink_reload_combination { enum devlink_reload_action action; enum devlink_reload_limit limit; @@ -1321,15 +1397,8 @@ static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg, int idx = 0; int err = 0; - mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry; - - mutex_lock(&devlink->lock); + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { + devl_lock(devlink); list_for_each_entry(devlink_rate, &devlink->rate_list, list) { enum devlink_command cmd = DEVLINK_CMD_RATE_NEW; u32 id = NETLINK_CB(cb->skb).portid; @@ -1342,18 +1411,16 @@ static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg, cb->nlh->nlmsg_seq, NLM_F_MULTI, NULL); if (err) { - mutex_unlock(&devlink->lock); + devl_unlock(devlink); devlink_put(devlink); goto out; } idx++; } - mutex_unlock(&devlink->lock); -retry: + devl_unlock(devlink); devlink_put(devlink); } out: - mutex_unlock(&devlink_mutex); if (err != -EMSGSIZE) return err; @@ -1424,16 +1491,7 @@ static int devlink_nl_cmd_get_dumpit(struct sk_buff *msg, int idx = 0; int err; - mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) { - devlink_put(devlink); - continue; - } - + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { if (idx < start) { idx++; devlink_put(devlink); @@ -1449,8 +1507,6 @@ static int devlink_nl_cmd_get_dumpit(struct sk_buff *msg, idx++; } out: - mutex_unlock(&devlink_mutex); - cb->args[0] = idx; return msg->len; } @@ -1487,15 +1543,8 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg, int idx = 0; int err; - mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry; - - mutex_lock(&devlink->lock); + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { + devl_lock(devlink); list_for_each_entry(devlink_port, &devlink->port_list, list) { if (idx < start) { idx++; @@ -1507,19 +1556,16 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->extack); if (err) { - mutex_unlock(&devlink->lock); + devl_unlock(devlink); devlink_put(devlink); goto out; } idx++; } - mutex_unlock(&devlink->lock); -retry: + devl_unlock(devlink); devlink_put(devlink); } out: - mutex_unlock(&devlink_mutex); - cb->args[0] = idx; return msg->len; } @@ -1700,9 +1746,9 @@ static int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb, return devlink->ops->port_unsplit(devlink, devlink_port, info->extack); } -static int devlink_port_new_notifiy(struct devlink *devlink, - unsigned int port_index, - struct genl_info *info) +static int devlink_port_new_notify(struct devlink *devlink, + unsigned int port_index, + struct genl_info *info) { struct devlink_port *devlink_port; struct sk_buff *msg; @@ -1712,7 +1758,7 @@ static int devlink_port_new_notifiy(struct devlink *devlink, if (!msg) return -ENOMEM; - mutex_lock(&devlink->lock); + lockdep_assert_held(&devlink->lock); devlink_port = devlink_port_get_by_index(devlink, port_index); if (!devlink_port) { err = -ENODEV; @@ -1724,12 +1770,9 @@ static int devlink_port_new_notifiy(struct devlink *devlink, if (err) goto out; - err = genlmsg_reply(msg, info); - mutex_unlock(&devlink->lock); - return err; + return genlmsg_reply(msg, info); out: - mutex_unlock(&devlink->lock); nlmsg_free(msg); return err; } @@ -1777,7 +1820,7 @@ static int devlink_nl_cmd_port_new_doit(struct sk_buff *skb, if (err) return err; - err = devlink_port_new_notifiy(devlink, new_port_index, info); + err = devlink_port_new_notify(devlink, new_port_index, info); if (err && err != -ENODEV) { /* Fail to send the response; destroy newly created port. */ devlink->ops->port_del(devlink, new_port_index, extack); @@ -2100,6 +2143,10 @@ static int devlink_nl_linecard_fill(struct sk_buff *msg, nla_nest_end(msg, attr); } + if (linecard->nested_devlink && + devlink_nl_put_nested_handle(msg, linecard->nested_devlink)) + goto nla_put_failure; + genlmsg_end(msg, hdr); return 0; @@ -2172,14 +2219,7 @@ static int devlink_nl_cmd_linecard_get_dumpit(struct sk_buff *msg, int idx = 0; int err; - mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry; - + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { mutex_lock(&devlink->linecards_lock); list_for_each_entry(linecard, &devlink->linecard_list, list) { if (idx < start) { @@ -2202,12 +2242,9 @@ static int devlink_nl_cmd_linecard_get_dumpit(struct sk_buff *msg, idx++; } mutex_unlock(&devlink->linecards_lock); -retry: devlink_put(devlink); } out: - mutex_unlock(&devlink_mutex); - cb->args[0] = idx; return msg->len; } @@ -2444,15 +2481,8 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg, int idx = 0; int err; - mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry; - - mutex_lock(&devlink->lock); + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { + devl_lock(devlink); list_for_each_entry(devlink_sb, &devlink->sb_list, list) { if (idx < start) { idx++; @@ -2464,19 +2494,16 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg, cb->nlh->nlmsg_seq, NLM_F_MULTI); if (err) { - mutex_unlock(&devlink->lock); + devl_unlock(devlink); devlink_put(devlink); goto out; } idx++; } - mutex_unlock(&devlink->lock); -retry: + devl_unlock(devlink); devlink_put(devlink); } out: - mutex_unlock(&devlink_mutex); - cb->args[0] = idx; return msg->len; } @@ -2596,16 +2623,11 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg, int idx = 0; int err = 0; - mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || - !devlink->ops->sb_pool_get) + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { + if (!devlink->ops->sb_pool_get) goto retry; - mutex_lock(&devlink->lock); + devl_lock(devlink); list_for_each_entry(devlink_sb, &devlink->sb_list, list) { err = __sb_pool_get_dumpit(msg, start, &idx, devlink, devlink_sb, @@ -2614,18 +2636,16 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg, if (err == -EOPNOTSUPP) { err = 0; } else if (err) { - mutex_unlock(&devlink->lock); + devl_unlock(devlink); devlink_put(devlink); goto out; } } - mutex_unlock(&devlink->lock); + devl_unlock(devlink); retry: devlink_put(devlink); } out: - mutex_unlock(&devlink_mutex); - if (err != -EMSGSIZE) return err; @@ -2817,16 +2837,11 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg, int idx = 0; int err = 0; - mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || - !devlink->ops->sb_port_pool_get) + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { + if (!devlink->ops->sb_port_pool_get) goto retry; - mutex_lock(&devlink->lock); + devl_lock(devlink); list_for_each_entry(devlink_sb, &devlink->sb_list, list) { err = __sb_port_pool_get_dumpit(msg, start, &idx, devlink, devlink_sb, @@ -2835,18 +2850,16 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg, if (err == -EOPNOTSUPP) { err = 0; } else if (err) { - mutex_unlock(&devlink->lock); + devl_unlock(devlink); devlink_put(devlink); goto out; } } - mutex_unlock(&devlink->lock); + devl_unlock(devlink); retry: devlink_put(devlink); } out: - mutex_unlock(&devlink_mutex); - if (err != -EMSGSIZE) return err; @@ -3066,16 +3079,11 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg, int idx = 0; int err = 0; - mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || - !devlink->ops->sb_tc_pool_bind_get) + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { + if (!devlink->ops->sb_tc_pool_bind_get) goto retry; - mutex_lock(&devlink->lock); + devl_lock(devlink); list_for_each_entry(devlink_sb, &devlink->sb_list, list) { err = __sb_tc_pool_bind_get_dumpit(msg, start, &idx, devlink, @@ -3085,18 +3093,16 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg, if (err == -EOPNOTSUPP) { err = 0; } else if (err) { - mutex_unlock(&devlink->lock); + devl_unlock(devlink); devlink_put(devlink); goto out; } } - mutex_unlock(&devlink->lock); + devl_unlock(devlink); retry: devlink_put(devlink); } out: - mutex_unlock(&devlink_mutex); - if (err != -EMSGSIZE) return err; @@ -4794,6 +4800,204 @@ static int devlink_nl_cmd_flash_update(struct sk_buff *skb, return ret; } +static int +devlink_nl_selftests_fill(struct sk_buff *msg, struct devlink *devlink, + u32 portid, u32 seq, int flags, + struct netlink_ext_ack *extack) +{ + struct nlattr *selftests; + void *hdr; + int err; + int i; + + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, + DEVLINK_CMD_SELFTESTS_GET); + if (!hdr) + return -EMSGSIZE; + + err = -EMSGSIZE; + if (devlink_nl_put_handle(msg, devlink)) + goto err_cancel_msg; + + selftests = nla_nest_start(msg, DEVLINK_ATTR_SELFTESTS); + if (!selftests) + goto err_cancel_msg; + + for (i = DEVLINK_ATTR_SELFTEST_ID_UNSPEC + 1; + i <= DEVLINK_ATTR_SELFTEST_ID_MAX; i++) { + if (devlink->ops->selftest_check(devlink, i, extack)) { + err = nla_put_flag(msg, i); + if (err) + goto err_cancel_msg; + } + } + + nla_nest_end(msg, selftests); + genlmsg_end(msg, hdr); + return 0; + +err_cancel_msg: + genlmsg_cancel(msg, hdr); + return err; +} + +static int devlink_nl_cmd_selftests_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct sk_buff *msg; + int err; + + if (!devlink->ops->selftest_check) + return -EOPNOTSUPP; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = devlink_nl_selftests_fill(msg, devlink, info->snd_portid, + info->snd_seq, 0, info->extack); + if (err) { + nlmsg_free(msg); + return err; + } + + return genlmsg_reply(msg, info); +} + +static int devlink_nl_cmd_selftests_get_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct devlink *devlink; + int start = cb->args[0]; + unsigned long index; + int idx = 0; + int err = 0; + + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { + if (idx < start || !devlink->ops->selftest_check) + goto inc; + + devl_lock(devlink); + err = devlink_nl_selftests_fill(msg, devlink, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + cb->extack); + devl_unlock(devlink); + if (err) { + devlink_put(devlink); + break; + } +inc: + idx++; + devlink_put(devlink); + } + + if (err != -EMSGSIZE) + return err; + + cb->args[0] = idx; + return msg->len; +} + +static int devlink_selftest_result_put(struct sk_buff *skb, unsigned int id, + enum devlink_selftest_status test_status) +{ + struct nlattr *result_attr; + + result_attr = nla_nest_start(skb, DEVLINK_ATTR_SELFTEST_RESULT); + if (!result_attr) + return -EMSGSIZE; + + if (nla_put_u32(skb, DEVLINK_ATTR_SELFTEST_RESULT_ID, id) || + nla_put_u8(skb, DEVLINK_ATTR_SELFTEST_RESULT_STATUS, + test_status)) + goto nla_put_failure; + + nla_nest_end(skb, result_attr); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, result_attr); + return -EMSGSIZE; +} + +static int devlink_nl_cmd_selftests_run(struct sk_buff *skb, + struct genl_info *info) +{ + struct nlattr *tb[DEVLINK_ATTR_SELFTEST_ID_MAX + 1]; + struct devlink *devlink = info->user_ptr[0]; + struct nlattr *attrs, *selftests; + struct sk_buff *msg; + void *hdr; + int err; + int i; + + if (!devlink->ops->selftest_run || !devlink->ops->selftest_check) + return -EOPNOTSUPP; + + if (!info->attrs[DEVLINK_ATTR_SELFTESTS]) { + NL_SET_ERR_MSG_MOD(info->extack, "selftest required"); + return -EINVAL; + } + + attrs = info->attrs[DEVLINK_ATTR_SELFTESTS]; + + err = nla_parse_nested(tb, DEVLINK_ATTR_SELFTEST_ID_MAX, attrs, + devlink_selftest_nl_policy, info->extack); + if (err < 0) + return err; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = -EMSGSIZE; + hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, + &devlink_nl_family, 0, DEVLINK_CMD_SELFTESTS_RUN); + if (!hdr) + goto free_msg; + + if (devlink_nl_put_handle(msg, devlink)) + goto genlmsg_cancel; + + selftests = nla_nest_start(msg, DEVLINK_ATTR_SELFTESTS); + if (!selftests) + goto genlmsg_cancel; + + for (i = DEVLINK_ATTR_SELFTEST_ID_UNSPEC + 1; + i <= DEVLINK_ATTR_SELFTEST_ID_MAX; i++) { + enum devlink_selftest_status test_status; + + if (nla_get_flag(tb[i])) { + if (!devlink->ops->selftest_check(devlink, i, + info->extack)) { + if (devlink_selftest_result_put(msg, i, + DEVLINK_SELFTEST_STATUS_SKIP)) + goto selftests_nest_cancel; + continue; + } + + test_status = devlink->ops->selftest_run(devlink, i, + info->extack); + if (devlink_selftest_result_put(msg, i, test_status)) + goto selftests_nest_cancel; + } + } + + nla_nest_end(msg, selftests); + genlmsg_end(msg, hdr); + return genlmsg_reply(msg, info); + +selftests_nest_cancel: + nla_nest_cancel(msg, selftests); +genlmsg_cancel: + genlmsg_cancel(msg, hdr); +free_msg: + nlmsg_free(msg); + return err; +} + static const struct devlink_param devlink_param_generic[] = { { .id = DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET, @@ -4943,7 +5147,7 @@ static int devlink_param_get(struct devlink *devlink, const struct devlink_param *param, struct devlink_param_gset_ctx *ctx) { - if (!param->get) + if (!param->get || devlink->reload_failed) return -EOPNOTSUPP; return param->get(devlink, param->id, ctx); } @@ -4952,7 +5156,7 @@ static int devlink_param_set(struct devlink *devlink, const struct devlink_param *param, struct devlink_param_gset_ctx *ctx) { - if (!param->set) + if (!param->set || devlink->reload_failed) return -EOPNOTSUPP; return param->set(devlink, param->id, ctx); } @@ -5153,15 +5357,8 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg, int idx = 0; int err = 0; - mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry; - - mutex_lock(&devlink->lock); + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { + devl_lock(devlink); list_for_each_entry(param_item, &devlink->param_list, list) { if (idx < start) { idx++; @@ -5175,19 +5372,16 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg, if (err == -EOPNOTSUPP) { err = 0; } else if (err) { - mutex_unlock(&devlink->lock); + devl_unlock(devlink); devlink_put(devlink); goto out; } idx++; } - mutex_unlock(&devlink->lock); -retry: + devl_unlock(devlink); devlink_put(devlink); } out: - mutex_unlock(&devlink_mutex); - if (err != -EMSGSIZE) return err; @@ -5388,15 +5582,8 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, int idx = 0; int err = 0; - mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry; - - mutex_lock(&devlink->lock); + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { + devl_lock(devlink); list_for_each_entry(devlink_port, &devlink->port_list, list) { list_for_each_entry(param_item, &devlink_port->param_list, list) { @@ -5414,20 +5601,17 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, if (err == -EOPNOTSUPP) { err = 0; } else if (err) { - mutex_unlock(&devlink->lock); + devl_unlock(devlink); devlink_put(devlink); goto out; } idx++; } } - mutex_unlock(&devlink->lock); -retry: + devl_unlock(devlink); devlink_put(devlink); } out: - mutex_unlock(&devlink_mutex); - if (err != -EMSGSIZE) return err; @@ -5672,21 +5856,28 @@ static int __devlink_snapshot_id_increment(struct devlink *devlink, u32 id) { unsigned long count; void *p; + int err; - lockdep_assert_held(&devlink->lock); - + xa_lock(&devlink->snapshot_ids); p = xa_load(&devlink->snapshot_ids, id); - if (WARN_ON(!p)) - return -EINVAL; + if (WARN_ON(!p)) { + err = -EINVAL; + goto unlock; + } - if (WARN_ON(!xa_is_value(p))) - return -EINVAL; + if (WARN_ON(!xa_is_value(p))) { + err = -EINVAL; + goto unlock; + } count = xa_to_value(p); count++; - return xa_err(xa_store(&devlink->snapshot_ids, id, xa_mk_value(count), - GFP_KERNEL)); + err = xa_err(__xa_store(&devlink->snapshot_ids, id, xa_mk_value(count), + GFP_ATOMIC)); +unlock: + xa_unlock(&devlink->snapshot_ids); + return err; } /** @@ -5709,25 +5900,26 @@ static void __devlink_snapshot_id_decrement(struct devlink *devlink, u32 id) unsigned long count; void *p; - lockdep_assert_held(&devlink->lock); - + xa_lock(&devlink->snapshot_ids); p = xa_load(&devlink->snapshot_ids, id); if (WARN_ON(!p)) - return; + goto unlock; if (WARN_ON(!xa_is_value(p))) - return; + goto unlock; count = xa_to_value(p); if (count > 1) { count--; - xa_store(&devlink->snapshot_ids, id, xa_mk_value(count), - GFP_KERNEL); + __xa_store(&devlink->snapshot_ids, id, xa_mk_value(count), + GFP_ATOMIC); } else { /* If this was the last user, we can erase this id */ - xa_erase(&devlink->snapshot_ids, id); + __xa_erase(&devlink->snapshot_ids, id); } +unlock: + xa_unlock(&devlink->snapshot_ids); } /** @@ -5748,13 +5940,17 @@ static void __devlink_snapshot_id_decrement(struct devlink *devlink, u32 id) */ static int __devlink_snapshot_id_insert(struct devlink *devlink, u32 id) { - lockdep_assert_held(&devlink->lock); + int err; - if (xa_load(&devlink->snapshot_ids, id)) + xa_lock(&devlink->snapshot_ids); + if (xa_load(&devlink->snapshot_ids, id)) { + xa_unlock(&devlink->snapshot_ids); return -EEXIST; - - return xa_err(xa_store(&devlink->snapshot_ids, id, xa_mk_value(0), - GFP_KERNEL)); + } + err = xa_err(__xa_store(&devlink->snapshot_ids, id, xa_mk_value(0), + GFP_ATOMIC)); + xa_unlock(&devlink->snapshot_ids); + return err; } /** @@ -5775,8 +5971,6 @@ static int __devlink_snapshot_id_insert(struct devlink *devlink, u32 id) */ static int __devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id) { - lockdep_assert_held(&devlink->lock); - return xa_alloc(&devlink->snapshot_ids, id, xa_mk_value(1), xa_limit_32b, GFP_KERNEL); } @@ -5789,7 +5983,7 @@ static int __devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id) * Multiple snapshots can be created on a region. * The @snapshot_id should be obtained using the getter function. * - * Must be called only while holding the devlink instance lock. + * Must be called only while holding the region snapshot lock. * * @region: devlink region of the snapshot * @data: snapshot data @@ -5803,7 +5997,7 @@ __devlink_region_snapshot_create(struct devlink_region *region, struct devlink_snapshot *snapshot; int err; - lockdep_assert_held(&devlink->lock); + lockdep_assert_held(®ion->snapshot_lock); /* check if region can hold one more snapshot */ if (region->cur_snapshots == region->max_snapshots) @@ -5841,7 +6035,7 @@ static void devlink_region_snapshot_del(struct devlink_region *region, { struct devlink *devlink = region->devlink; - lockdep_assert_held(&devlink->lock); + lockdep_assert_held(®ion->snapshot_lock); devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_DEL); region->cur_snapshots--; @@ -5935,7 +6129,7 @@ static int devlink_nl_cmd_region_get_devlink_dumpit(struct sk_buff *msg, struct devlink_port *port; int err = 0; - mutex_lock(&devlink->lock); + devl_lock(devlink); list_for_each_entry(region, &devlink->region_list, list) { if (*idx < start) { (*idx)++; @@ -5959,7 +6153,7 @@ static int devlink_nl_cmd_region_get_devlink_dumpit(struct sk_buff *msg, } out: - mutex_unlock(&devlink->lock); + devl_unlock(devlink); return err; } @@ -5972,23 +6166,14 @@ static int devlink_nl_cmd_region_get_dumpit(struct sk_buff *msg, int idx = 0; int err = 0; - mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry; - + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { err = devlink_nl_cmd_region_get_devlink_dumpit(msg, cb, devlink, &idx, start); -retry: devlink_put(devlink); if (err) goto out; } out: - mutex_unlock(&devlink_mutex); cb->args[0] = idx; return msg->len; } @@ -6027,11 +6212,15 @@ static int devlink_nl_cmd_region_del(struct sk_buff *skb, if (!region) return -EINVAL; + mutex_lock(®ion->snapshot_lock); snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id); - if (!snapshot) + if (!snapshot) { + mutex_unlock(®ion->snapshot_lock); return -EINVAL; + } devlink_region_snapshot_del(region, snapshot); + mutex_unlock(®ion->snapshot_lock); return 0; } @@ -6079,9 +6268,12 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info) return -EOPNOTSUPP; } + mutex_lock(®ion->snapshot_lock); + if (region->cur_snapshots == region->max_snapshots) { NL_SET_ERR_MSG_MOD(info->extack, "The region has reached the maximum number of stored snapshots"); - return -ENOSPC; + err = -ENOSPC; + goto unlock; } snapshot_id_attr = info->attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]; @@ -6090,17 +6282,18 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info) if (devlink_region_snapshot_get_by_id(region, snapshot_id)) { NL_SET_ERR_MSG_MOD(info->extack, "The requested snapshot id is already in use"); - return -EEXIST; + err = -EEXIST; + goto unlock; } err = __devlink_snapshot_id_insert(devlink, snapshot_id); if (err) - return err; + goto unlock; } else { err = __devlink_region_snapshot_id_get(devlink, &snapshot_id); if (err) { NL_SET_ERR_MSG_MOD(info->extack, "Failed to allocate a new snapshot id"); - return err; + goto unlock; } } @@ -6122,8 +6315,10 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info) snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id); - if (WARN_ON(!snapshot)) - return -EINVAL; + if (WARN_ON(!snapshot)) { + err = -EINVAL; + goto unlock; + } msg = devlink_nl_region_notify_build(region, snapshot, DEVLINK_CMD_REGION_NEW, @@ -6138,16 +6333,20 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info) goto err_notify; } + mutex_unlock(®ion->snapshot_lock); return 0; err_snapshot_create: region->ops->destructor(data); err_snapshot_capture: __devlink_snapshot_id_decrement(devlink, snapshot_id); + mutex_unlock(®ion->snapshot_lock); return err; err_notify: devlink_region_snapshot_del(region, snapshot); +unlock: + mutex_unlock(®ion->snapshot_lock); return err; } @@ -6242,14 +6441,11 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, start_offset = *((u64 *)&cb->args[0]); - mutex_lock(&devlink_mutex); devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs); - if (IS_ERR(devlink)) { - err = PTR_ERR(devlink); - goto out_dev; - } + if (IS_ERR(devlink)) + return PTR_ERR(devlink); - mutex_lock(&devlink->lock); + devl_lock(devlink); if (!attrs[DEVLINK_ATTR_REGION_NAME] || !attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]) { @@ -6345,19 +6541,15 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, nla_nest_end(skb, chunks_attr); genlmsg_end(skb, hdr); - mutex_unlock(&devlink->lock); + devl_unlock(devlink); devlink_put(devlink); - mutex_unlock(&devlink_mutex); - return skb->len; nla_put_failure: genlmsg_cancel(skb, hdr); out_unlock: - mutex_unlock(&devlink->lock); + devl_unlock(devlink); devlink_put(devlink); -out_dev: - mutex_unlock(&devlink_mutex); return err; } @@ -6506,23 +6698,16 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg, int idx = 0; int err = 0; - mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry; - + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { if (idx < start || !devlink->ops->info_get) goto inc; - mutex_lock(&devlink->lock); + devl_lock(devlink); err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->extack); - mutex_unlock(&devlink->lock); + devl_unlock(devlink); if (err == -EOPNOTSUPP) err = 0; else if (err) { @@ -6531,10 +6716,8 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg, } inc: idx++; -retry: devlink_put(devlink); } - mutex_unlock(&devlink_mutex); if (err != -EMSGSIZE) return err; @@ -7523,6 +7706,7 @@ int devlink_health_report(struct devlink_health_reporter *reporter, enum devlink_health_reporter_state prev_health_state; struct devlink *devlink = reporter->devlink; unsigned long recover_ts_threshold; + int ret; /* write a log message of the current error */ WARN_ON(!msg); @@ -7556,11 +7740,14 @@ int devlink_health_report(struct devlink_health_reporter *reporter, mutex_unlock(&reporter->dump_lock); } - if (reporter->auto_recover) - return devlink_health_reporter_recover(reporter, - priv_ctx, NULL); + if (!reporter->auto_recover) + return 0; - return 0; + devl_lock(devlink); + ret = devlink_health_reporter_recover(reporter, priv_ctx, NULL); + devl_unlock(devlink); + + return ret; } EXPORT_SYMBOL_GPL(devlink_health_report); @@ -7609,18 +7796,13 @@ devlink_health_reporter_get_from_cb(struct netlink_callback *cb) struct nlattr **attrs = info->attrs; struct devlink *devlink; - mutex_lock(&devlink_mutex); devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs); if (IS_ERR(devlink)) - goto unlock; + return NULL; reporter = devlink_health_reporter_get_from_attrs(devlink, attrs); devlink_put(devlink); - mutex_unlock(&devlink_mutex); return reporter; -unlock: - mutex_unlock(&devlink_mutex); - return NULL; } void @@ -7686,14 +7868,7 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, int idx = 0; int err; - mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry_rep; - + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { mutex_lock(&devlink->reporters_lock); list_for_each_entry(reporter, &devlink->reporter_list, list) { @@ -7713,18 +7888,11 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, idx++; } mutex_unlock(&devlink->reporters_lock); -retry_rep: devlink_put(devlink); } - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry_port; - - mutex_lock(&devlink->lock); + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { + devl_lock(devlink); list_for_each_entry(port, &devlink->port_list, list) { mutex_lock(&port->reporters_lock); list_for_each_entry(reporter, &port->reporter_list, list) { @@ -7739,7 +7907,7 @@ retry_rep: cb->nlh->nlmsg_seq, NLM_F_MULTI); if (err) { mutex_unlock(&port->reporters_lock); - mutex_unlock(&devlink->lock); + devl_unlock(devlink); devlink_put(devlink); goto out; } @@ -7747,13 +7915,10 @@ retry_rep: } mutex_unlock(&port->reporters_lock); } - mutex_unlock(&devlink->lock); -retry_port: + devl_unlock(devlink); devlink_put(devlink); } out: - mutex_unlock(&devlink_mutex); - cb->args[0] = idx; return msg->len; } @@ -7946,8 +8111,8 @@ static int devlink_nl_cmd_health_reporter_test_doit(struct sk_buff *skb, } struct devlink_stats { - u64 rx_bytes; - u64 rx_packets; + u64_stats_t rx_bytes; + u64_stats_t rx_packets; struct u64_stats_sync syncp; }; @@ -8104,12 +8269,12 @@ static void devlink_trap_stats_read(struct devlink_stats __percpu *trap_stats, cpu_stats = per_cpu_ptr(trap_stats, i); do { start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); - rx_packets = cpu_stats->rx_packets; - rx_bytes = cpu_stats->rx_bytes; + rx_packets = u64_stats_read(&cpu_stats->rx_packets); + rx_bytes = u64_stats_read(&cpu_stats->rx_bytes); } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); - stats->rx_packets += rx_packets; - stats->rx_bytes += rx_bytes; + u64_stats_add(&stats->rx_packets, rx_packets); + u64_stats_add(&stats->rx_bytes, rx_bytes); } } @@ -8127,11 +8292,13 @@ devlink_trap_group_stats_put(struct sk_buff *msg, return -EMSGSIZE; if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS, - stats.rx_packets, DEVLINK_ATTR_PAD)) + u64_stats_read(&stats.rx_packets), + DEVLINK_ATTR_PAD)) goto nla_put_failure; if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES, - stats.rx_bytes, DEVLINK_ATTR_PAD)) + u64_stats_read(&stats.rx_bytes), + DEVLINK_ATTR_PAD)) goto nla_put_failure; nla_nest_end(msg, attr); @@ -8171,11 +8338,13 @@ static int devlink_trap_stats_put(struct sk_buff *msg, struct devlink *devlink, goto nla_put_failure; if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS, - stats.rx_packets, DEVLINK_ATTR_PAD)) + u64_stats_read(&stats.rx_packets), + DEVLINK_ATTR_PAD)) goto nla_put_failure; if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES, - stats.rx_bytes, DEVLINK_ATTR_PAD)) + u64_stats_read(&stats.rx_bytes), + DEVLINK_ATTR_PAD)) goto nla_put_failure; nla_nest_end(msg, attr); @@ -8282,15 +8451,8 @@ static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg, int idx = 0; int err; - mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry; - - mutex_lock(&devlink->lock); + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { + devl_lock(devlink); list_for_each_entry(trap_item, &devlink->trap_list, list) { if (idx < start) { idx++; @@ -8302,19 +8464,16 @@ static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg, cb->nlh->nlmsg_seq, NLM_F_MULTI); if (err) { - mutex_unlock(&devlink->lock); + devl_unlock(devlink); devlink_put(devlink); goto out; } idx++; } - mutex_unlock(&devlink->lock); -retry: + devl_unlock(devlink); devlink_put(devlink); } out: - mutex_unlock(&devlink_mutex); - cb->args[0] = idx; return msg->len; } @@ -8509,15 +8668,8 @@ static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg, int idx = 0; int err; - mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry; - - mutex_lock(&devlink->lock); + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { + devl_lock(devlink); list_for_each_entry(group_item, &devlink->trap_group_list, list) { if (idx < start) { @@ -8530,19 +8682,16 @@ static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg, cb->nlh->nlmsg_seq, NLM_F_MULTI); if (err) { - mutex_unlock(&devlink->lock); + devl_unlock(devlink); devlink_put(devlink); goto out; } idx++; } - mutex_unlock(&devlink->lock); -retry: + devl_unlock(devlink); devlink_put(devlink); } out: - mutex_unlock(&devlink_mutex); - cb->args[0] = idx; return msg->len; } @@ -8823,15 +8972,8 @@ static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg, int idx = 0; int err; - mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry; - - mutex_lock(&devlink->lock); + devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { + devl_lock(devlink); list_for_each_entry(policer_item, &devlink->trap_policer_list, list) { if (idx < start) { @@ -8844,19 +8986,16 @@ static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg, cb->nlh->nlmsg_seq, NLM_F_MULTI); if (err) { - mutex_unlock(&devlink->lock); + devl_unlock(devlink); devlink_put(devlink); goto out; } idx++; } - mutex_unlock(&devlink->lock); -retry: + devl_unlock(devlink); devlink_put(devlink); } out: - mutex_unlock(&devlink_mutex); - cb->args[0] = idx; return msg->len; } @@ -8996,6 +9135,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_LINECARD_INDEX] = { .type = NLA_U32 }, [DEVLINK_ATTR_LINECARD_TYPE] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_SELFTESTS] = { .type = NLA_NESTED }, }; static const struct genl_small_ops devlink_nl_ops[] = { @@ -9063,13 +9203,11 @@ static const struct genl_small_ops devlink_nl_ops[] = { .cmd = DEVLINK_CMD_PORT_NEW, .doit = devlink_nl_cmd_port_new_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NO_LOCK, }, { .cmd = DEVLINK_CMD_PORT_DEL, .doit = devlink_nl_cmd_port_del_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NO_LOCK, }, { .cmd = DEVLINK_CMD_LINECARD_GET, @@ -9199,7 +9337,6 @@ static const struct genl_small_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_reload, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NO_LOCK, }, { .cmd = DEVLINK_CMD_PARAM_GET, @@ -9267,8 +9404,7 @@ static const struct genl_small_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_get_doit, .dumpit = devlink_nl_cmd_health_reporter_get_dumpit, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | - DEVLINK_NL_FLAG_NO_LOCK, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT, /* can be retrieved by unprivileged users */ }, { @@ -9276,24 +9412,21 @@ static const struct genl_small_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_set_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | - DEVLINK_NL_FLAG_NO_LOCK, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT, }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_recover_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | - DEVLINK_NL_FLAG_NO_LOCK, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT, }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_diagnose_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | - DEVLINK_NL_FLAG_NO_LOCK, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT, }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, @@ -9307,16 +9440,14 @@ static const struct genl_small_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_dump_clear_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | - DEVLINK_NL_FLAG_NO_LOCK, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT, }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_TEST, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_test_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | - DEVLINK_NL_FLAG_NO_LOCK, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT, }, { .cmd = DEVLINK_CMD_FLASH_UPDATE, @@ -9357,6 +9488,17 @@ static const struct genl_small_ops devlink_nl_ops[] = { .doit = devlink_nl_cmd_trap_policer_set_doit, .flags = GENL_ADMIN_PERM, }, + { + .cmd = DEVLINK_CMD_SELFTESTS_GET, + .doit = devlink_nl_cmd_selftests_get_doit, + .dumpit = devlink_nl_cmd_selftests_get_dumpit + /* can be retrieved by unprivileged users */ + }, + { + .cmd = DEVLINK_CMD_SELFTESTS_RUN, + .doit = devlink_nl_cmd_selftests_run, + .flags = GENL_ADMIN_PERM, + }, }; static struct genl_family devlink_nl_family __ro_after_init = { @@ -9365,6 +9507,7 @@ static struct genl_family devlink_nl_family __ro_after_init = { .maxattr = DEVLINK_ATTR_MAX, .policy = devlink_nl_policy, .netnsok = true, + .parallel_ops = true, .pre_doit = devlink_nl_pre_doit, .post_doit = devlink_nl_post_doit, .module = THIS_MODULE, @@ -9473,7 +9616,9 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, INIT_LIST_HEAD(&devlink->trap_list); INIT_LIST_HEAD(&devlink->trap_group_list); INIT_LIST_HEAD(&devlink->trap_policer_list); + lockdep_register_key(&devlink->lock_key); mutex_init(&devlink->lock); + lockdep_set_class(&devlink->lock, &devlink->lock_key); mutex_init(&devlink->reporters_lock); mutex_init(&devlink->linecards_lock); refcount_set(&devlink->refcount, 1); @@ -9581,10 +9726,8 @@ void devlink_register(struct devlink *devlink) ASSERT_DEVLINK_NOT_REGISTERED(devlink); /* Make sure that we are in .probe() routine */ - mutex_lock(&devlink_mutex); xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); devlink_notify_register(devlink); - mutex_unlock(&devlink_mutex); } EXPORT_SYMBOL_GPL(devlink_register); @@ -9598,13 +9741,13 @@ void devlink_unregister(struct devlink *devlink) ASSERT_DEVLINK_REGISTERED(devlink); /* Make sure that we are in .remove() routine */ + xa_set_mark(&devlinks, devlink->index, DEVLINK_UNREGISTERING); devlink_put(devlink); wait_for_completion(&devlink->comp); - mutex_lock(&devlink_mutex); devlink_notify_unregister(devlink); xa_clear_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); - mutex_unlock(&devlink_mutex); + xa_clear_mark(&devlinks, devlink->index, DEVLINK_UNREGISTERING); } EXPORT_SYMBOL_GPL(devlink_unregister); @@ -9620,6 +9763,7 @@ void devlink_free(struct devlink *devlink) mutex_destroy(&devlink->linecards_lock); mutex_destroy(&devlink->reporters_lock); mutex_destroy(&devlink->lock); + lockdep_unregister_key(&devlink->lock_key); WARN_ON(!list_empty(&devlink->trap_policer_list)); WARN_ON(!list_empty(&devlink->trap_group_list)); WARN_ON(!list_empty(&devlink->trap_list)); @@ -9673,11 +9817,24 @@ static void devlink_port_type_warn_cancel(struct devlink_port *devlink_port) cancel_delayed_work_sync(&devlink_port->type_warn_dw); } +/** + * devl_port_register() - Register devlink port + * + * @devlink: devlink + * @devlink_port: devlink port + * @port_index: driver-specific numerical identifier of the port + * + * Register devlink port with provided port index. User can use + * any indexing, even hw-related one. devlink_port structure + * is convenient to be embedded inside user driver private structure. + * Note that the caller should take care of zeroing the devlink_port + * structure. + */ int devl_port_register(struct devlink *devlink, struct devlink_port *devlink_port, unsigned int port_index) { - lockdep_assert_held(&devlink->lock); + devl_assert_locked(devlink); if (devlink_port_index_exists(devlink, port_index)) return -EEXIST; @@ -9711,6 +9868,8 @@ EXPORT_SYMBOL_GPL(devl_port_register); * is convenient to be embedded inside user driver private structure. * Note that the caller should take care of zeroing the devlink_port * structure. + * + * Context: Takes and release devlink->lock <mutex>. */ int devlink_port_register(struct devlink *devlink, struct devlink_port *devlink_port, @@ -9718,13 +9877,18 @@ int devlink_port_register(struct devlink *devlink, { int err; - mutex_lock(&devlink->lock); + devl_lock(devlink); err = devl_port_register(devlink, devlink_port, port_index); - mutex_unlock(&devlink->lock); + devl_unlock(devlink); return err; } EXPORT_SYMBOL_GPL(devlink_port_register); +/** + * devl_port_unregister() - Unregister devlink port + * + * @devlink_port: devlink port + */ void devl_port_unregister(struct devlink_port *devlink_port) { lockdep_assert_held(&devlink_port->devlink->lock); @@ -9742,14 +9906,16 @@ EXPORT_SYMBOL_GPL(devl_port_unregister); * devlink_port_unregister - Unregister devlink port * * @devlink_port: devlink port + * + * Context: Takes and release devlink->lock <mutex>. */ void devlink_port_unregister(struct devlink_port *devlink_port) { struct devlink *devlink = devlink_port->devlink; - mutex_lock(&devlink->lock); + devl_lock(devlink); devl_port_unregister(devlink_port); - mutex_unlock(&devlink->lock); + devl_unlock(devlink); } EXPORT_SYMBOL_GPL(devlink_port_unregister); @@ -10002,20 +10168,13 @@ int devl_rate_leaf_create(struct devlink_port *devlink_port, void *priv) } EXPORT_SYMBOL_GPL(devl_rate_leaf_create); -int -devlink_rate_leaf_create(struct devlink_port *devlink_port, void *priv) -{ - struct devlink *devlink = devlink_port->devlink; - int ret; - - mutex_lock(&devlink->lock); - ret = devl_rate_leaf_create(devlink_port, priv); - mutex_unlock(&devlink->lock); - - return ret; -} -EXPORT_SYMBOL_GPL(devlink_rate_leaf_create); - +/** + * devl_rate_leaf_destroy - destroy devlink rate leaf + * + * @devlink_port: devlink port linked to the rate object + * + * Destroy the devlink rate object of type leaf on provided @devlink_port. + */ void devl_rate_leaf_destroy(struct devlink_port *devlink_port) { struct devlink_rate *devlink_rate = devlink_port->devlink_rate; @@ -10034,27 +10193,6 @@ void devl_rate_leaf_destroy(struct devlink_port *devlink_port) EXPORT_SYMBOL_GPL(devl_rate_leaf_destroy); /** - * devlink_rate_leaf_destroy - destroy devlink rate leaf - * - * @devlink_port: devlink port linked to the rate object - * - * Context: Takes and release devlink->lock <mutex>. - */ -void devlink_rate_leaf_destroy(struct devlink_port *devlink_port) -{ - struct devlink_rate *devlink_rate = devlink_port->devlink_rate; - struct devlink *devlink = devlink_port->devlink; - - if (!devlink_rate) - return; - - mutex_lock(&devlink->lock); - devl_rate_leaf_destroy(devlink_port); - mutex_unlock(&devlink->lock); -} -EXPORT_SYMBOL_GPL(devlink_rate_leaf_destroy); - -/** * devl_rate_nodes_destroy - destroy all devlink rate nodes on device * @devlink: devlink instance * @@ -10092,24 +10230,6 @@ void devl_rate_nodes_destroy(struct devlink *devlink) EXPORT_SYMBOL_GPL(devl_rate_nodes_destroy); /** - * devlink_rate_nodes_destroy - destroy all devlink rate nodes on device - * - * @devlink: devlink instance - * - * Unset parent for all rate objects and destroy all rate nodes - * on specified device. - * - * Context: Takes and release devlink->lock <mutex>. - */ -void devlink_rate_nodes_destroy(struct devlink *devlink) -{ - mutex_lock(&devlink->lock); - devl_rate_nodes_destroy(devlink); - mutex_unlock(&devlink->lock); -} -EXPORT_SYMBOL_GPL(devlink_rate_nodes_destroy); - -/** * devlink_port_linecard_set - Link port with a linecard * * @devlink_port: devlink port @@ -10331,6 +10451,7 @@ EXPORT_SYMBOL_GPL(devlink_linecard_provision_set); void devlink_linecard_provision_clear(struct devlink_linecard *linecard) { mutex_lock(&linecard->state_lock); + WARN_ON(linecard->nested_devlink); linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED; linecard->type = NULL; devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); @@ -10349,6 +10470,7 @@ EXPORT_SYMBOL_GPL(devlink_linecard_provision_clear); void devlink_linecard_provision_fail(struct devlink_linecard *linecard) { mutex_lock(&linecard->state_lock); + WARN_ON(linecard->nested_devlink); linecard->state = DEVLINK_LINECARD_STATE_PROVISIONING_FAILED; devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); mutex_unlock(&linecard->state_lock); @@ -10396,25 +10518,38 @@ void devlink_linecard_deactivate(struct devlink_linecard *linecard) } EXPORT_SYMBOL_GPL(devlink_linecard_deactivate); -int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, - u32 size, u16 ingress_pools_count, - u16 egress_pools_count, u16 ingress_tc_count, - u16 egress_tc_count) +/** + * devlink_linecard_nested_dl_set - Attach/detach nested devlink + * instance to linecard. + * + * @linecard: devlink linecard + * @nested_devlink: devlink instance to attach or NULL to detach + */ +void devlink_linecard_nested_dl_set(struct devlink_linecard *linecard, + struct devlink *nested_devlink) +{ + mutex_lock(&linecard->state_lock); + linecard->nested_devlink = nested_devlink; + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); + mutex_unlock(&linecard->state_lock); +} +EXPORT_SYMBOL_GPL(devlink_linecard_nested_dl_set); + +int devl_sb_register(struct devlink *devlink, unsigned int sb_index, + u32 size, u16 ingress_pools_count, + u16 egress_pools_count, u16 ingress_tc_count, + u16 egress_tc_count) { struct devlink_sb *devlink_sb; - int err = 0; - mutex_lock(&devlink->lock); - if (devlink_sb_index_exists(devlink, sb_index)) { - err = -EEXIST; - goto unlock; - } + lockdep_assert_held(&devlink->lock); + + if (devlink_sb_index_exists(devlink, sb_index)) + return -EEXIST; devlink_sb = kzalloc(sizeof(*devlink_sb), GFP_KERNEL); - if (!devlink_sb) { - err = -ENOMEM; - goto unlock; - } + if (!devlink_sb) + return -ENOMEM; devlink_sb->index = sb_index; devlink_sb->size = size; devlink_sb->ingress_pools_count = ingress_pools_count; @@ -10422,57 +10557,78 @@ int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, devlink_sb->ingress_tc_count = ingress_tc_count; devlink_sb->egress_tc_count = egress_tc_count; list_add_tail(&devlink_sb->list, &devlink->sb_list); -unlock: - mutex_unlock(&devlink->lock); + return 0; +} +EXPORT_SYMBOL_GPL(devl_sb_register); + +int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, + u32 size, u16 ingress_pools_count, + u16 egress_pools_count, u16 ingress_tc_count, + u16 egress_tc_count) +{ + int err; + + devl_lock(devlink); + err = devl_sb_register(devlink, sb_index, size, ingress_pools_count, + egress_pools_count, ingress_tc_count, + egress_tc_count); + devl_unlock(devlink); return err; } EXPORT_SYMBOL_GPL(devlink_sb_register); -void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index) +void devl_sb_unregister(struct devlink *devlink, unsigned int sb_index) { struct devlink_sb *devlink_sb; - mutex_lock(&devlink->lock); + lockdep_assert_held(&devlink->lock); + devlink_sb = devlink_sb_get_by_index(devlink, sb_index); WARN_ON(!devlink_sb); list_del(&devlink_sb->list); - mutex_unlock(&devlink->lock); kfree(devlink_sb); } +EXPORT_SYMBOL_GPL(devl_sb_unregister); + +void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index) +{ + devl_lock(devlink); + devl_sb_unregister(devlink, sb_index); + devl_unlock(devlink); +} EXPORT_SYMBOL_GPL(devlink_sb_unregister); /** - * devlink_dpipe_headers_register - register dpipe headers + * devl_dpipe_headers_register - register dpipe headers * - * @devlink: devlink - * @dpipe_headers: dpipe header array + * @devlink: devlink + * @dpipe_headers: dpipe header array * - * Register the headers supported by hardware. + * Register the headers supported by hardware. */ -int devlink_dpipe_headers_register(struct devlink *devlink, - struct devlink_dpipe_headers *dpipe_headers) +void devl_dpipe_headers_register(struct devlink *devlink, + struct devlink_dpipe_headers *dpipe_headers) { - mutex_lock(&devlink->lock); + lockdep_assert_held(&devlink->lock); + devlink->dpipe_headers = dpipe_headers; - mutex_unlock(&devlink->lock); - return 0; } -EXPORT_SYMBOL_GPL(devlink_dpipe_headers_register); +EXPORT_SYMBOL_GPL(devl_dpipe_headers_register); /** - * devlink_dpipe_headers_unregister - unregister dpipe headers + * devl_dpipe_headers_unregister - unregister dpipe headers * - * @devlink: devlink + * @devlink: devlink * - * Unregister the headers supported by hardware. + * Unregister the headers supported by hardware. */ -void devlink_dpipe_headers_unregister(struct devlink *devlink) +void devl_dpipe_headers_unregister(struct devlink *devlink) { - mutex_lock(&devlink->lock); + lockdep_assert_held(&devlink->lock); + devlink->dpipe_headers = NULL; - mutex_unlock(&devlink->lock); } -EXPORT_SYMBOL_GPL(devlink_dpipe_headers_unregister); +EXPORT_SYMBOL_GPL(devl_dpipe_headers_unregister); /** * devlink_dpipe_table_counter_enabled - check if counter allocation @@ -10506,38 +10662,33 @@ bool devlink_dpipe_table_counter_enabled(struct devlink *devlink, EXPORT_SYMBOL_GPL(devlink_dpipe_table_counter_enabled); /** - * devlink_dpipe_table_register - register dpipe table + * devl_dpipe_table_register - register dpipe table * - * @devlink: devlink - * @table_name: table name - * @table_ops: table ops - * @priv: priv - * @counter_control_extern: external control for counters + * @devlink: devlink + * @table_name: table name + * @table_ops: table ops + * @priv: priv + * @counter_control_extern: external control for counters */ -int devlink_dpipe_table_register(struct devlink *devlink, - const char *table_name, - struct devlink_dpipe_table_ops *table_ops, - void *priv, bool counter_control_extern) +int devl_dpipe_table_register(struct devlink *devlink, + const char *table_name, + struct devlink_dpipe_table_ops *table_ops, + void *priv, bool counter_control_extern) { struct devlink_dpipe_table *table; - int err = 0; + + lockdep_assert_held(&devlink->lock); if (WARN_ON(!table_ops->size_get)) return -EINVAL; - mutex_lock(&devlink->lock); - if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name, - devlink)) { - err = -EEXIST; - goto unlock; - } + devlink)) + return -EEXIST; table = kzalloc(sizeof(*table), GFP_KERNEL); - if (!table) { - err = -ENOMEM; - goto unlock; - } + if (!table) + return -ENOMEM; table->name = table_name; table->table_ops = table_ops; @@ -10545,77 +10696,69 @@ int devlink_dpipe_table_register(struct devlink *devlink, table->counter_control_extern = counter_control_extern; list_add_tail_rcu(&table->list, &devlink->dpipe_table_list); -unlock: - mutex_unlock(&devlink->lock); - return err; + + return 0; } -EXPORT_SYMBOL_GPL(devlink_dpipe_table_register); +EXPORT_SYMBOL_GPL(devl_dpipe_table_register); /** - * devlink_dpipe_table_unregister - unregister dpipe table + * devl_dpipe_table_unregister - unregister dpipe table * - * @devlink: devlink - * @table_name: table name + * @devlink: devlink + * @table_name: table name */ -void devlink_dpipe_table_unregister(struct devlink *devlink, - const char *table_name) +void devl_dpipe_table_unregister(struct devlink *devlink, + const char *table_name) { struct devlink_dpipe_table *table; - mutex_lock(&devlink->lock); + lockdep_assert_held(&devlink->lock); + table = devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name, devlink); if (!table) - goto unlock; + return; list_del_rcu(&table->list); - mutex_unlock(&devlink->lock); kfree_rcu(table, rcu); - return; -unlock: - mutex_unlock(&devlink->lock); } -EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister); +EXPORT_SYMBOL_GPL(devl_dpipe_table_unregister); /** - * devlink_resource_register - devlink resource register + * devl_resource_register - devlink resource register * - * @devlink: devlink - * @resource_name: resource's name - * @resource_size: resource's size - * @resource_id: resource's id - * @parent_resource_id: resource's parent id - * @size_params: size parameters + * @devlink: devlink + * @resource_name: resource's name + * @resource_size: resource's size + * @resource_id: resource's id + * @parent_resource_id: resource's parent id + * @size_params: size parameters * - * Generic resources should reuse the same names across drivers. - * Please see the generic resources list at: - * Documentation/networking/devlink/devlink-resource.rst + * Generic resources should reuse the same names across drivers. + * Please see the generic resources list at: + * Documentation/networking/devlink/devlink-resource.rst */ -int devlink_resource_register(struct devlink *devlink, - const char *resource_name, - u64 resource_size, - u64 resource_id, - u64 parent_resource_id, - const struct devlink_resource_size_params *size_params) +int devl_resource_register(struct devlink *devlink, + const char *resource_name, + u64 resource_size, + u64 resource_id, + u64 parent_resource_id, + const struct devlink_resource_size_params *size_params) { struct devlink_resource *resource; struct list_head *resource_list; bool top_hierarchy; - int err = 0; + + lockdep_assert_held(&devlink->lock); top_hierarchy = parent_resource_id == DEVLINK_RESOURCE_ID_PARENT_TOP; - mutex_lock(&devlink->lock); resource = devlink_resource_find(devlink, NULL, resource_id); - if (resource) { - err = -EINVAL; - goto out; - } + if (resource) + return -EINVAL; resource = kzalloc(sizeof(*resource), GFP_KERNEL); - if (!resource) { - err = -ENOMEM; - goto out; - } + if (!resource) + return -ENOMEM; if (top_hierarchy) { resource_list = &devlink->resource_list; @@ -10629,8 +10772,7 @@ int devlink_resource_register(struct devlink *devlink, resource->parent = parent_resource; } else { kfree(resource); - err = -EINVAL; - goto out; + return -EINVAL; } } @@ -10643,8 +10785,40 @@ int devlink_resource_register(struct devlink *devlink, sizeof(resource->size_params)); INIT_LIST_HEAD(&resource->resource_list); list_add_tail(&resource->list, resource_list); -out: - mutex_unlock(&devlink->lock); + + return 0; +} +EXPORT_SYMBOL_GPL(devl_resource_register); + +/** + * devlink_resource_register - devlink resource register + * + * @devlink: devlink + * @resource_name: resource's name + * @resource_size: resource's size + * @resource_id: resource's id + * @parent_resource_id: resource's parent id + * @size_params: size parameters + * + * Generic resources should reuse the same names across drivers. + * Please see the generic resources list at: + * Documentation/networking/devlink/devlink-resource.rst + * + * Context: Takes and release devlink->lock <mutex>. + */ +int devlink_resource_register(struct devlink *devlink, + const char *resource_name, + u64 resource_size, + u64 resource_id, + u64 parent_resource_id, + const struct devlink_resource_size_params *size_params) +{ + int err; + + devl_lock(devlink); + err = devl_resource_register(devlink, resource_name, resource_size, + resource_id, parent_resource_id, size_params); + devl_unlock(devlink); return err; } EXPORT_SYMBOL_GPL(devlink_resource_register); @@ -10663,15 +10837,15 @@ static void devlink_resource_unregister(struct devlink *devlink, } /** - * devlink_resources_unregister - free all resources + * devl_resources_unregister - free all resources * - * @devlink: devlink + * @devlink: devlink */ -void devlink_resources_unregister(struct devlink *devlink) +void devl_resources_unregister(struct devlink *devlink) { struct devlink_resource *tmp, *child_resource; - mutex_lock(&devlink->lock); + lockdep_assert_held(&devlink->lock); list_for_each_entry_safe(child_resource, tmp, &devlink->resource_list, list) { @@ -10679,69 +10853,100 @@ void devlink_resources_unregister(struct devlink *devlink) list_del(&child_resource->list); kfree(child_resource); } +} +EXPORT_SYMBOL_GPL(devl_resources_unregister); - mutex_unlock(&devlink->lock); +/** + * devlink_resources_unregister - free all resources + * + * @devlink: devlink + * + * Context: Takes and release devlink->lock <mutex>. + */ +void devlink_resources_unregister(struct devlink *devlink) +{ + devl_lock(devlink); + devl_resources_unregister(devlink); + devl_unlock(devlink); } EXPORT_SYMBOL_GPL(devlink_resources_unregister); /** - * devlink_resource_size_get - get and update size + * devl_resource_size_get - get and update size * - * @devlink: devlink - * @resource_id: the requested resource id - * @p_resource_size: ptr to update + * @devlink: devlink + * @resource_id: the requested resource id + * @p_resource_size: ptr to update */ -int devlink_resource_size_get(struct devlink *devlink, - u64 resource_id, - u64 *p_resource_size) +int devl_resource_size_get(struct devlink *devlink, + u64 resource_id, + u64 *p_resource_size) { struct devlink_resource *resource; - int err = 0; - mutex_lock(&devlink->lock); + lockdep_assert_held(&devlink->lock); + resource = devlink_resource_find(devlink, NULL, resource_id); - if (!resource) { - err = -EINVAL; - goto out; - } + if (!resource) + return -EINVAL; *p_resource_size = resource->size_new; resource->size = resource->size_new; -out: - mutex_unlock(&devlink->lock); - return err; + return 0; } -EXPORT_SYMBOL_GPL(devlink_resource_size_get); +EXPORT_SYMBOL_GPL(devl_resource_size_get); /** - * devlink_dpipe_table_resource_set - set the resource id + * devl_dpipe_table_resource_set - set the resource id * - * @devlink: devlink - * @table_name: table name - * @resource_id: resource id - * @resource_units: number of resource's units consumed per table's entry + * @devlink: devlink + * @table_name: table name + * @resource_id: resource id + * @resource_units: number of resource's units consumed per table's entry */ -int devlink_dpipe_table_resource_set(struct devlink *devlink, - const char *table_name, u64 resource_id, - u64 resource_units) +int devl_dpipe_table_resource_set(struct devlink *devlink, + const char *table_name, u64 resource_id, + u64 resource_units) { struct devlink_dpipe_table *table; - int err = 0; - mutex_lock(&devlink->lock); table = devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name, devlink); - if (!table) { - err = -EINVAL; - goto out; - } + if (!table) + return -EINVAL; + table->resource_id = resource_id; table->resource_units = resource_units; table->resource_valid = true; -out: - mutex_unlock(&devlink->lock); - return err; + return 0; +} +EXPORT_SYMBOL_GPL(devl_dpipe_table_resource_set); + +/** + * devl_resource_occ_get_register - register occupancy getter + * + * @devlink: devlink + * @resource_id: resource id + * @occ_get: occupancy getter callback + * @occ_get_priv: occupancy getter callback priv + */ +void devl_resource_occ_get_register(struct devlink *devlink, + u64 resource_id, + devlink_resource_occ_get_t *occ_get, + void *occ_get_priv) +{ + struct devlink_resource *resource; + + lockdep_assert_held(&devlink->lock); + + resource = devlink_resource_find(devlink, NULL, resource_id); + if (WARN_ON(!resource)) + return; + WARN_ON(resource->occ_get); + + resource->occ_get = occ_get; + resource->occ_get_priv = occ_get_priv; } -EXPORT_SYMBOL_GPL(devlink_dpipe_table_resource_set); +EXPORT_SYMBOL_GPL(devl_resource_occ_get_register); /** * devlink_resource_occ_get_register - register occupancy getter @@ -10750,48 +10955,58 @@ EXPORT_SYMBOL_GPL(devlink_dpipe_table_resource_set); * @resource_id: resource id * @occ_get: occupancy getter callback * @occ_get_priv: occupancy getter callback priv + * + * Context: Takes and release devlink->lock <mutex>. */ void devlink_resource_occ_get_register(struct devlink *devlink, u64 resource_id, devlink_resource_occ_get_t *occ_get, void *occ_get_priv) { + devl_lock(devlink); + devl_resource_occ_get_register(devlink, resource_id, + occ_get, occ_get_priv); + devl_unlock(devlink); +} +EXPORT_SYMBOL_GPL(devlink_resource_occ_get_register); + +/** + * devl_resource_occ_get_unregister - unregister occupancy getter + * + * @devlink: devlink + * @resource_id: resource id + */ +void devl_resource_occ_get_unregister(struct devlink *devlink, + u64 resource_id) +{ struct devlink_resource *resource; - mutex_lock(&devlink->lock); + lockdep_assert_held(&devlink->lock); + resource = devlink_resource_find(devlink, NULL, resource_id); if (WARN_ON(!resource)) - goto out; - WARN_ON(resource->occ_get); + return; + WARN_ON(!resource->occ_get); - resource->occ_get = occ_get; - resource->occ_get_priv = occ_get_priv; -out: - mutex_unlock(&devlink->lock); + resource->occ_get = NULL; + resource->occ_get_priv = NULL; } -EXPORT_SYMBOL_GPL(devlink_resource_occ_get_register); +EXPORT_SYMBOL_GPL(devl_resource_occ_get_unregister); /** * devlink_resource_occ_get_unregister - unregister occupancy getter * * @devlink: devlink * @resource_id: resource id + * + * Context: Takes and release devlink->lock <mutex>. */ void devlink_resource_occ_get_unregister(struct devlink *devlink, u64 resource_id) { - struct devlink_resource *resource; - - mutex_lock(&devlink->lock); - resource = devlink_resource_find(devlink, NULL, resource_id); - if (WARN_ON(!resource)) - goto out; - WARN_ON(!resource->occ_get); - - resource->occ_get = NULL; - resource->occ_get_priv = NULL; -out: - mutex_unlock(&devlink->lock); + devl_lock(devlink); + devl_resource_occ_get_unregister(devlink, resource_id); + devl_unlock(devlink); } EXPORT_SYMBOL_GPL(devlink_resource_occ_get_unregister); @@ -11012,51 +11227,67 @@ void devlink_param_value_changed(struct devlink *devlink, u32 param_id) EXPORT_SYMBOL_GPL(devlink_param_value_changed); /** - * devlink_region_create - create a new address region + * devl_region_create - create a new address region * - * @devlink: devlink - * @ops: region operations and name - * @region_max_snapshots: Maximum supported number of snapshots for region - * @region_size: size of region + * @devlink: devlink + * @ops: region operations and name + * @region_max_snapshots: Maximum supported number of snapshots for region + * @region_size: size of region */ -struct devlink_region * -devlink_region_create(struct devlink *devlink, - const struct devlink_region_ops *ops, - u32 region_max_snapshots, u64 region_size) +struct devlink_region *devl_region_create(struct devlink *devlink, + const struct devlink_region_ops *ops, + u32 region_max_snapshots, + u64 region_size) { struct devlink_region *region; - int err = 0; + + devl_assert_locked(devlink); if (WARN_ON(!ops) || WARN_ON(!ops->destructor)) return ERR_PTR(-EINVAL); - mutex_lock(&devlink->lock); - - if (devlink_region_get_by_name(devlink, ops->name)) { - err = -EEXIST; - goto unlock; - } + if (devlink_region_get_by_name(devlink, ops->name)) + return ERR_PTR(-EEXIST); region = kzalloc(sizeof(*region), GFP_KERNEL); - if (!region) { - err = -ENOMEM; - goto unlock; - } + if (!region) + return ERR_PTR(-ENOMEM); region->devlink = devlink; region->max_snapshots = region_max_snapshots; region->ops = ops; region->size = region_size; INIT_LIST_HEAD(®ion->snapshot_list); + mutex_init(®ion->snapshot_lock); list_add_tail(®ion->list, &devlink->region_list); devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW); - mutex_unlock(&devlink->lock); return region; +} +EXPORT_SYMBOL_GPL(devl_region_create); -unlock: - mutex_unlock(&devlink->lock); - return ERR_PTR(err); +/** + * devlink_region_create - create a new address region + * + * @devlink: devlink + * @ops: region operations and name + * @region_max_snapshots: Maximum supported number of snapshots for region + * @region_size: size of region + * + * Context: Takes and release devlink->lock <mutex>. + */ +struct devlink_region * +devlink_region_create(struct devlink *devlink, + const struct devlink_region_ops *ops, + u32 region_max_snapshots, u64 region_size) +{ + struct devlink_region *region; + + devl_lock(devlink); + region = devl_region_create(devlink, ops, region_max_snapshots, + region_size); + devl_unlock(devlink); + return region; } EXPORT_SYMBOL_GPL(devlink_region_create); @@ -11067,6 +11298,8 @@ EXPORT_SYMBOL_GPL(devlink_region_create); * @ops: region operations and name * @region_max_snapshots: Maximum supported number of snapshots for region * @region_size: size of region + * + * Context: Takes and release devlink->lock <mutex>. */ struct devlink_region * devlink_port_region_create(struct devlink_port *port, @@ -11080,7 +11313,7 @@ devlink_port_region_create(struct devlink_port *port, if (WARN_ON(!ops) || WARN_ON(!ops->destructor)) return ERR_PTR(-EINVAL); - mutex_lock(&devlink->lock); + devl_lock(devlink); if (devlink_port_region_get_by_name(port, ops->name)) { err = -EEXIST; @@ -11099,40 +11332,58 @@ devlink_port_region_create(struct devlink_port *port, region->port_ops = ops; region->size = region_size; INIT_LIST_HEAD(®ion->snapshot_list); + mutex_init(®ion->snapshot_lock); list_add_tail(®ion->list, &port->region_list); devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW); - mutex_unlock(&devlink->lock); + devl_unlock(devlink); return region; unlock: - mutex_unlock(&devlink->lock); + devl_unlock(devlink); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(devlink_port_region_create); /** - * devlink_region_destroy - destroy address region + * devl_region_destroy - destroy address region * - * @region: devlink region to destroy + * @region: devlink region to destroy */ -void devlink_region_destroy(struct devlink_region *region) +void devl_region_destroy(struct devlink_region *region) { struct devlink *devlink = region->devlink; struct devlink_snapshot *snapshot, *ts; - mutex_lock(&devlink->lock); + devl_assert_locked(devlink); /* Free all snapshots of region */ list_for_each_entry_safe(snapshot, ts, ®ion->snapshot_list, list) devlink_region_snapshot_del(region, snapshot); list_del(®ion->list); + mutex_destroy(®ion->snapshot_lock); devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_DEL); - mutex_unlock(&devlink->lock); kfree(region); } +EXPORT_SYMBOL_GPL(devl_region_destroy); + +/** + * devlink_region_destroy - destroy address region + * + * @region: devlink region to destroy + * + * Context: Takes and release devlink->lock <mutex>. + */ +void devlink_region_destroy(struct devlink_region *region) +{ + struct devlink *devlink = region->devlink; + + devl_lock(devlink); + devl_region_destroy(region); + devl_unlock(devlink); +} EXPORT_SYMBOL_GPL(devlink_region_destroy); /** @@ -11152,13 +11403,7 @@ EXPORT_SYMBOL_GPL(devlink_region_destroy); */ int devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id) { - int err; - - mutex_lock(&devlink->lock); - err = __devlink_region_snapshot_id_get(devlink, id); - mutex_unlock(&devlink->lock); - - return err; + return __devlink_region_snapshot_id_get(devlink, id); } EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_get); @@ -11174,9 +11419,7 @@ EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_get); */ void devlink_region_snapshot_id_put(struct devlink *devlink, u32 id) { - mutex_lock(&devlink->lock); __devlink_snapshot_id_decrement(devlink, id); - mutex_unlock(&devlink->lock); } EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_put); @@ -11195,13 +11438,11 @@ EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_put); int devlink_region_snapshot_create(struct devlink_region *region, u8 *data, u32 snapshot_id) { - struct devlink *devlink = region->devlink; int err; - mutex_lock(&devlink->lock); + mutex_lock(®ion->snapshot_lock); err = __devlink_region_snapshot_create(region, data, snapshot_id); - mutex_unlock(&devlink->lock); - + mutex_unlock(®ion->snapshot_lock); return err; } EXPORT_SYMBOL_GPL(devlink_region_snapshot_create); @@ -11566,7 +11807,7 @@ static void devlink_trap_disable(struct devlink *devlink, } /** - * devlink_traps_register - Register packet traps with devlink. + * devl_traps_register - Register packet traps with devlink. * @devlink: devlink. * @traps: Packet traps. * @traps_count: Count of provided packet traps. @@ -11574,16 +11815,16 @@ static void devlink_trap_disable(struct devlink *devlink, * * Return: Non-zero value on failure. */ -int devlink_traps_register(struct devlink *devlink, - const struct devlink_trap *traps, - size_t traps_count, void *priv) +int devl_traps_register(struct devlink *devlink, + const struct devlink_trap *traps, + size_t traps_count, void *priv) { int i, err; if (!devlink->ops->trap_init || !devlink->ops->trap_action_set) return -EINVAL; - mutex_lock(&devlink->lock); + devl_assert_locked(devlink); for (i = 0; i < traps_count; i++) { const struct devlink_trap *trap = &traps[i]; @@ -11595,7 +11836,6 @@ int devlink_traps_register(struct devlink *devlink, if (err) goto err_trap_register; } - mutex_unlock(&devlink->lock); return 0; @@ -11603,24 +11843,47 @@ err_trap_register: err_trap_verify: for (i--; i >= 0; i--) devlink_trap_unregister(devlink, &traps[i]); - mutex_unlock(&devlink->lock); + return err; +} +EXPORT_SYMBOL_GPL(devl_traps_register); + +/** + * devlink_traps_register - Register packet traps with devlink. + * @devlink: devlink. + * @traps: Packet traps. + * @traps_count: Count of provided packet traps. + * @priv: Driver private information. + * + * Context: Takes and release devlink->lock <mutex>. + * + * Return: Non-zero value on failure. + */ +int devlink_traps_register(struct devlink *devlink, + const struct devlink_trap *traps, + size_t traps_count, void *priv) +{ + int err; + + devl_lock(devlink); + err = devl_traps_register(devlink, traps, traps_count, priv); + devl_unlock(devlink); return err; } EXPORT_SYMBOL_GPL(devlink_traps_register); /** - * devlink_traps_unregister - Unregister packet traps from devlink. + * devl_traps_unregister - Unregister packet traps from devlink. * @devlink: devlink. * @traps: Packet traps. * @traps_count: Count of provided packet traps. */ -void devlink_traps_unregister(struct devlink *devlink, - const struct devlink_trap *traps, - size_t traps_count) +void devl_traps_unregister(struct devlink *devlink, + const struct devlink_trap *traps, + size_t traps_count) { int i; - mutex_lock(&devlink->lock); + devl_assert_locked(devlink); /* Make sure we do not have any packets in-flight while unregistering * traps by disabling all of them and waiting for a grace period. */ @@ -11629,7 +11892,24 @@ void devlink_traps_unregister(struct devlink *devlink, synchronize_rcu(); for (i = traps_count - 1; i >= 0; i--) devlink_trap_unregister(devlink, &traps[i]); - mutex_unlock(&devlink->lock); +} +EXPORT_SYMBOL_GPL(devl_traps_unregister); + +/** + * devlink_traps_unregister - Unregister packet traps from devlink. + * @devlink: devlink. + * @traps: Packet traps. + * @traps_count: Count of provided packet traps. + * + * Context: Takes and release devlink->lock <mutex>. + */ +void devlink_traps_unregister(struct devlink *devlink, + const struct devlink_trap *traps, + size_t traps_count) +{ + devl_lock(devlink); + devl_traps_unregister(devlink, traps, traps_count); + devl_unlock(devlink); } EXPORT_SYMBOL_GPL(devlink_traps_unregister); @@ -11641,8 +11921,8 @@ devlink_trap_stats_update(struct devlink_stats __percpu *trap_stats, stats = this_cpu_ptr(trap_stats); u64_stats_update_begin(&stats->syncp); - stats->rx_bytes += skb_len; - stats->rx_packets++; + u64_stats_add(&stats->rx_bytes, skb_len); + u64_stats_inc(&stats->rx_packets); u64_stats_update_end(&stats->syncp); } @@ -11788,20 +12068,20 @@ devlink_trap_group_unregister(struct devlink *devlink, } /** - * devlink_trap_groups_register - Register packet trap groups with devlink. + * devl_trap_groups_register - Register packet trap groups with devlink. * @devlink: devlink. * @groups: Packet trap groups. * @groups_count: Count of provided packet trap groups. * * Return: Non-zero value on failure. */ -int devlink_trap_groups_register(struct devlink *devlink, - const struct devlink_trap_group *groups, - size_t groups_count) +int devl_trap_groups_register(struct devlink *devlink, + const struct devlink_trap_group *groups, + size_t groups_count) { int i, err; - mutex_lock(&devlink->lock); + devl_assert_locked(devlink); for (i = 0; i < groups_count; i++) { const struct devlink_trap_group *group = &groups[i]; @@ -11813,7 +12093,6 @@ int devlink_trap_groups_register(struct devlink *devlink, if (err) goto err_trap_group_register; } - mutex_unlock(&devlink->lock); return 0; @@ -11821,27 +12100,66 @@ err_trap_group_register: err_trap_group_verify: for (i--; i >= 0; i--) devlink_trap_group_unregister(devlink, &groups[i]); - mutex_unlock(&devlink->lock); + return err; +} +EXPORT_SYMBOL_GPL(devl_trap_groups_register); + +/** + * devlink_trap_groups_register - Register packet trap groups with devlink. + * @devlink: devlink. + * @groups: Packet trap groups. + * @groups_count: Count of provided packet trap groups. + * + * Context: Takes and release devlink->lock <mutex>. + * + * Return: Non-zero value on failure. + */ +int devlink_trap_groups_register(struct devlink *devlink, + const struct devlink_trap_group *groups, + size_t groups_count) +{ + int err; + + devl_lock(devlink); + err = devl_trap_groups_register(devlink, groups, groups_count); + devl_unlock(devlink); return err; } EXPORT_SYMBOL_GPL(devlink_trap_groups_register); /** - * devlink_trap_groups_unregister - Unregister packet trap groups from devlink. + * devl_trap_groups_unregister - Unregister packet trap groups from devlink. * @devlink: devlink. * @groups: Packet trap groups. * @groups_count: Count of provided packet trap groups. */ -void devlink_trap_groups_unregister(struct devlink *devlink, - const struct devlink_trap_group *groups, - size_t groups_count) +void devl_trap_groups_unregister(struct devlink *devlink, + const struct devlink_trap_group *groups, + size_t groups_count) { int i; - mutex_lock(&devlink->lock); + devl_assert_locked(devlink); for (i = groups_count - 1; i >= 0; i--) devlink_trap_group_unregister(devlink, &groups[i]); - mutex_unlock(&devlink->lock); +} +EXPORT_SYMBOL_GPL(devl_trap_groups_unregister); + +/** + * devlink_trap_groups_unregister - Unregister packet trap groups from devlink. + * @devlink: devlink. + * @groups: Packet trap groups. + * @groups_count: Count of provided packet trap groups. + * + * Context: Takes and release devlink->lock <mutex>. + */ +void devlink_trap_groups_unregister(struct devlink *devlink, + const struct devlink_trap_group *groups, + size_t groups_count) +{ + devl_lock(devlink); + devl_trap_groups_unregister(devlink, groups, groups_count); + devl_unlock(devlink); } EXPORT_SYMBOL_GPL(devlink_trap_groups_unregister); @@ -11927,7 +12245,7 @@ devlink_trap_policer_unregister(struct devlink *devlink, } /** - * devlink_trap_policers_register - Register packet trap policers with devlink. + * devl_trap_policers_register - Register packet trap policers with devlink. * @devlink: devlink. * @policers: Packet trap policers. * @policers_count: Count of provided packet trap policers. @@ -11935,13 +12253,13 @@ devlink_trap_policer_unregister(struct devlink *devlink, * Return: Non-zero value on failure. */ int -devlink_trap_policers_register(struct devlink *devlink, - const struct devlink_trap_policer *policers, - size_t policers_count) +devl_trap_policers_register(struct devlink *devlink, + const struct devlink_trap_policer *policers, + size_t policers_count) { int i, err; - mutex_lock(&devlink->lock); + devl_assert_locked(devlink); for (i = 0; i < policers_count; i++) { const struct devlink_trap_policer *policer = &policers[i]; @@ -11956,38 +12274,34 @@ devlink_trap_policers_register(struct devlink *devlink, if (err) goto err_trap_policer_register; } - mutex_unlock(&devlink->lock); - return 0; err_trap_policer_register: err_trap_policer_verify: for (i--; i >= 0; i--) devlink_trap_policer_unregister(devlink, &policers[i]); - mutex_unlock(&devlink->lock); return err; } -EXPORT_SYMBOL_GPL(devlink_trap_policers_register); +EXPORT_SYMBOL_GPL(devl_trap_policers_register); /** - * devlink_trap_policers_unregister - Unregister packet trap policers from devlink. + * devl_trap_policers_unregister - Unregister packet trap policers from devlink. * @devlink: devlink. * @policers: Packet trap policers. * @policers_count: Count of provided packet trap policers. */ void -devlink_trap_policers_unregister(struct devlink *devlink, - const struct devlink_trap_policer *policers, - size_t policers_count) +devl_trap_policers_unregister(struct devlink *devlink, + const struct devlink_trap_policer *policers, + size_t policers_count) { int i; - mutex_lock(&devlink->lock); + devl_assert_locked(devlink); for (i = policers_count - 1; i >= 0; i--) devlink_trap_policer_unregister(devlink, &policers[i]); - mutex_unlock(&devlink->lock); } -EXPORT_SYMBOL_GPL(devlink_trap_policers_unregister); +EXPORT_SYMBOL_GPL(devl_trap_policers_unregister); static void __devlink_compat_running_version(struct devlink *devlink, char *buf, size_t len) @@ -12039,9 +12353,9 @@ void devlink_compat_running_version(struct devlink *devlink, if (!devlink->ops->info_get) return; - mutex_lock(&devlink->lock); + devl_lock(devlink); __devlink_compat_running_version(devlink, buf, len); - mutex_unlock(&devlink->lock); + devl_unlock(devlink); } int devlink_compat_flash_update(struct devlink *devlink, const char *file_name) @@ -12056,11 +12370,11 @@ int devlink_compat_flash_update(struct devlink *devlink, const char *file_name) if (ret) return ret; - mutex_lock(&devlink->lock); + devl_lock(devlink); devlink_flash_update_begin_notify(devlink); ret = devlink->ops->flash_update(devlink, ¶ms, NULL); devlink_flash_update_end_notify(devlink); - mutex_unlock(&devlink->lock); + devl_unlock(devlink); release_firmware(params.fw); @@ -12113,25 +12427,18 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net) /* In case network namespace is getting destroyed, reload * all devlink instances from this namespace into init_net. */ - mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), net)) - goto retry; - + devlinks_xa_for_each_registered_get(net, index, devlink) { WARN_ON(!(devlink->features & DEVLINK_F_RELOAD)); + mutex_lock(&devlink->lock); err = devlink_reload(devlink, &init_net, DEVLINK_RELOAD_ACTION_DRIVER_REINIT, DEVLINK_RELOAD_LIMIT_UNSPEC, &actions_performed, NULL); + mutex_unlock(&devlink->lock); if (err && err != -EOPNOTSUPP) pr_warn("Failed to reload devlink instance into init_net\n"); -retry: devlink_put(devlink); } - mutex_unlock(&devlink_mutex); } static struct pernet_operations devlink_pernet_ops __net_initdata = { diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 41cac0e4834e..75501e1bdd25 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -48,19 +48,6 @@ static int trace_state = TRACE_OFF; static bool monitor_hw; -#undef EM -#undef EMe - -#define EM(a, b) [a] = #b, -#define EMe(a, b) [a] = #b - -/* drop_reasons is used to translate 'enum skb_drop_reason' to string, - * which is reported to user space. - */ -static const char * const drop_reasons[] = { - TRACE_SKB_DROP_REASON -}; - /* net_dm_mutex * * An overall lock guarding every operation coming from userspace. @@ -68,7 +55,7 @@ static const char * const drop_reasons[] = { static DEFINE_MUTEX(net_dm_mutex); struct net_dm_stats { - u64 dropped; + u64_stats_t dropped; struct u64_stats_sync syncp; }; @@ -543,7 +530,7 @@ static void net_dm_packet_trace_kfree_skb_hit(void *ignore, unlock_free: spin_unlock_irqrestore(&data->drop_queue.lock, flags); u64_stats_update_begin(&data->stats.syncp); - data->stats.dropped++; + u64_stats_inc(&data->stats.dropped); u64_stats_update_end(&data->stats.syncp); consume_skb(nskb); } @@ -877,7 +864,8 @@ net_dm_hw_metadata_copy(const struct devlink_trap_metadata *metadata) } hw_metadata->input_dev = metadata->input_dev; - dev_hold_track(hw_metadata->input_dev, &hw_metadata->dev_tracker, GFP_ATOMIC); + netdev_hold(hw_metadata->input_dev, &hw_metadata->dev_tracker, + GFP_ATOMIC); return hw_metadata; @@ -893,7 +881,7 @@ free_hw_metadata: static void net_dm_hw_metadata_free(struct devlink_trap_metadata *hw_metadata) { - dev_put_track(hw_metadata->input_dev, &hw_metadata->dev_tracker); + netdev_put(hw_metadata->input_dev, &hw_metadata->dev_tracker); kfree(hw_metadata->fa_cookie); kfree(hw_metadata->trap_name); kfree(hw_metadata->trap_group_name); @@ -998,7 +986,7 @@ net_dm_hw_trap_packet_probe(void *ignore, const struct devlink *devlink, unlock_free: spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags); u64_stats_update_begin(&hw_data->stats.syncp); - hw_data->stats.dropped++; + u64_stats_inc(&hw_data->stats.dropped); u64_stats_update_end(&hw_data->stats.syncp); net_dm_hw_metadata_free(n_hw_metadata); free: @@ -1445,10 +1433,10 @@ static void net_dm_stats_read(struct net_dm_stats *stats) do { start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); - dropped = cpu_stats->dropped; + dropped = u64_stats_read(&cpu_stats->dropped); } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); - stats->dropped += dropped; + u64_stats_add(&stats->dropped, dropped); } } @@ -1464,7 +1452,7 @@ static int net_dm_stats_put(struct sk_buff *msg) return -EMSGSIZE; if (nla_put_u64_64bit(msg, NET_DM_ATTR_STATS_DROPPED, - stats.dropped, NET_DM_ATTR_PAD)) + u64_stats_read(&stats.dropped), NET_DM_ATTR_PAD)) goto nla_put_failure; nla_nest_end(msg, attr); @@ -1489,10 +1477,10 @@ static void net_dm_hw_stats_read(struct net_dm_stats *stats) do { start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); - dropped = cpu_stats->dropped; + dropped = u64_stats_read(&cpu_stats->dropped); } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); - stats->dropped += dropped; + u64_stats_add(&stats->dropped, dropped); } } @@ -1508,7 +1496,7 @@ static int net_dm_hw_stats_put(struct sk_buff *msg) return -EMSGSIZE; if (nla_put_u64_64bit(msg, NET_DM_ATTR_STATS_DROPPED, - stats.dropped, NET_DM_ATTR_PAD)) + u64_stats_read(&stats.dropped), NET_DM_ATTR_PAD)) goto nla_put_failure; nla_nest_end(msg, attr); diff --git a/net/core/dst.c b/net/core/dst.c index d16c2c9bfebd..bc9c9be4e080 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -49,7 +49,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops, unsigned short flags) { dst->dev = dev; - dev_hold_track(dev, &dst->dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &dst->dev_tracker, GFP_ATOMIC); dst->ops = ops; dst_init_metrics(dst, dst_default_metrics.metrics, true); dst->expires = 0UL; @@ -117,7 +117,7 @@ struct dst_entry *dst_destroy(struct dst_entry * dst) if (dst->ops->destroy) dst->ops->destroy(dst); - dev_put_track(dst->dev, &dst->dev_tracker); + netdev_put(dst->dev, &dst->dev_tracker); lwtstate_put(dst->lwtstate); @@ -159,8 +159,8 @@ void dst_dev_put(struct dst_entry *dst) dst->input = dst_discard; dst->output = dst_discard_out; dst->dev = blackhole_netdev; - dev_replace_track(dev, blackhole_netdev, &dst->dev_tracker, - GFP_ATOMIC); + netdev_ref_replace(dev, blackhole_netdev, &dst->dev_tracker, + GFP_ATOMIC); } EXPORT_SYMBOL(dst_dev_put); diff --git a/net/core/failover.c b/net/core/failover.c index dcaa92a85ea2..864d2d83eff4 100644 --- a/net/core/failover.c +++ b/net/core/failover.c @@ -252,7 +252,7 @@ struct failover *failover_register(struct net_device *dev, return ERR_PTR(-ENOMEM); rcu_assign_pointer(failover->ops, ops); - dev_hold_track(dev, &failover->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &failover->dev_tracker, GFP_KERNEL); dev->priv_flags |= IFF_FAILOVER; rcu_assign_pointer(failover->failover_dev, dev); @@ -285,7 +285,7 @@ void failover_unregister(struct failover *failover) failover_dev->name); failover_dev->priv_flags &= ~IFF_FAILOVER; - dev_put_track(failover_dev, &failover->dev_tracker); + netdev_put(failover_dev, &failover->dev_tracker); spin_lock(&failover_lock); list_del(&failover->list); diff --git a/net/core/filter.c b/net/core/filter.c index 5d16d66727fc..c191db80ce93 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -237,7 +237,7 @@ BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb, BPF_CALL_4(bpf_skb_load_helper_16, const struct sk_buff *, skb, const void *, data, int, headlen, int, offset) { - u16 tmp, *ptr; + __be16 tmp, *ptr; const int len = sizeof(tmp); if (offset >= 0) { @@ -264,7 +264,7 @@ BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb, BPF_CALL_4(bpf_skb_load_helper_32, const struct sk_buff *, skb, const void *, data, int, headlen, int, offset) { - u32 tmp, *ptr; + __be32 tmp, *ptr; const int len = sizeof(tmp); if (likely(offset >= 0)) { @@ -1214,10 +1214,11 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp) { u32 filter_size = bpf_prog_size(fp->prog->len); + int optmem_max = READ_ONCE(sysctl_optmem_max); /* same check as in sock_kmalloc() */ - if (filter_size <= sysctl_optmem_max && - atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) { + if (filter_size <= optmem_max && + atomic_read(&sk->sk_omem_alloc) + filter_size < optmem_max) { atomic_add(filter_size, &sk->sk_omem_alloc); return true; } @@ -1548,7 +1549,7 @@ int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk) if (IS_ERR(prog)) return PTR_ERR(prog); - if (bpf_prog_size(prog->len) > sysctl_optmem_max) + if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max)) err = -ENOMEM; else err = reuseport_attach_prog(sk, prog); @@ -1615,7 +1616,7 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk) } } else { /* BPF_PROG_TYPE_SOCKET_FILTER */ - if (bpf_prog_size(prog->len) > sysctl_optmem_max) { + if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max)) { err = -ENOMEM; goto err_prog_put; } @@ -3918,7 +3919,7 @@ static void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len) offset -= frag_size; } out: - return offset + len < size ? addr + offset : NULL; + return offset + len <= size ? addr + offset : NULL; } BPF_CALL_4(bpf_xdp_load_bytes, struct xdp_buff *, xdp, u32, offset, @@ -4653,6 +4654,7 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, } else { info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4); info->key.u.ipv4.src = cpu_to_be32(from->local_ipv4); + info->key.flow_flags = FLOWI_FLAG_ANYSRC; } return 0; @@ -5012,8 +5014,8 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = { .arg1_type = ARG_PTR_TO_CTX, }; -static int _bpf_setsockopt(struct sock *sk, int level, int optname, - char *optval, int optlen) +static int __bpf_setsockopt(struct sock *sk, int level, int optname, + char *optval, int optlen) { char devname[IFNAMSIZ]; int val, valbool; @@ -5024,8 +5026,6 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, if (!sk_fullsock(sk)) return -EINVAL; - sock_owned_by_me(sk); - if (level == SOL_SOCKET) { if (optlen != sizeof(int) && optname != SO_BINDTODEVICE) return -EINVAL; @@ -5035,14 +5035,14 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, /* Only some socketops are supported */ switch (optname) { case SO_RCVBUF: - val = min_t(u32, val, sysctl_rmem_max); + val = min_t(u32, val, READ_ONCE(sysctl_rmem_max)); val = min_t(int, val, INT_MAX / 2); sk->sk_userlocks |= SOCK_RCVBUF_LOCK; WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * 2, SOCK_MIN_RCVBUF)); break; case SO_SNDBUF: - val = min_t(u32, val, sysctl_wmem_max); + val = min_t(u32, val, READ_ONCE(sysctl_wmem_max)); val = min_t(int, val, INT_MAX / 2); sk->sk_userlocks |= SOCK_SNDBUF_LOCK; WRITE_ONCE(sk->sk_sndbuf, @@ -5064,7 +5064,10 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, case SO_RCVLOWAT: if (val < 0) val = INT_MAX; - WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); + if (sk->sk_socket && sk->sk_socket->ops->set_rcvlowat) + ret = sk->sk_socket->ops->set_rcvlowat(sk, val); + else + WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); break; case SO_MARK: if (sk->sk_mark != val) { @@ -5258,14 +5261,20 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, return ret; } -static int _bpf_getsockopt(struct sock *sk, int level, int optname, +static int _bpf_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen) { + if (sk_fullsock(sk)) + sock_owned_by_me(sk); + return __bpf_setsockopt(sk, level, optname, optval, optlen); +} + +static int __bpf_getsockopt(struct sock *sk, int level, int optname, + char *optval, int optlen) +{ if (!sk_fullsock(sk)) goto err_clear; - sock_owned_by_me(sk); - if (level == SOL_SOCKET) { if (optlen != sizeof(int)) goto err_clear; @@ -5360,6 +5369,14 @@ err_clear: return -EINVAL; } +static int _bpf_getsockopt(struct sock *sk, int level, int optname, + char *optval, int optlen) +{ + if (sk_fullsock(sk)) + sock_owned_by_me(sk); + return __bpf_getsockopt(sk, level, optname, optval, optlen); +} + BPF_CALL_5(bpf_sk_setsockopt, struct sock *, sk, int, level, int, optname, char *, optval, int, optlen) { @@ -5400,6 +5417,40 @@ const struct bpf_func_proto bpf_sk_getsockopt_proto = { .arg5_type = ARG_CONST_SIZE, }; +BPF_CALL_5(bpf_unlocked_sk_setsockopt, struct sock *, sk, int, level, + int, optname, char *, optval, int, optlen) +{ + return __bpf_setsockopt(sk, level, optname, optval, optlen); +} + +const struct bpf_func_proto bpf_unlocked_sk_setsockopt_proto = { + .func = bpf_unlocked_sk_setsockopt, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg5_type = ARG_CONST_SIZE, +}; + +BPF_CALL_5(bpf_unlocked_sk_getsockopt, struct sock *, sk, int, level, + int, optname, char *, optval, int, optlen) +{ + return __bpf_getsockopt(sk, level, optname, optval, optlen); +} + +const struct bpf_func_proto bpf_unlocked_sk_getsockopt_proto = { + .func = bpf_unlocked_sk_getsockopt, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_UNINIT_MEM, + .arg5_type = ARG_CONST_SIZE, +}; + BPF_CALL_5(bpf_sock_addr_setsockopt, struct bpf_sock_addr_kern *, ctx, int, level, int, optname, char *, optval, int, optlen) { @@ -6158,7 +6209,6 @@ static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len if (err) return err; - ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); return seg6_lookup_nexthop(skb, NULL, 0); @@ -6463,8 +6513,6 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, /* bpf_skc_lookup performs the core lookup for different types of sockets, * taking a reference on the socket if it doesn't have the flag SOCK_RCU_FREE. - * Returns the socket as an 'unsigned long' to simplify the casting in the - * callers to satisfy BPF_CALL declarations. */ static struct sock * __bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, @@ -6472,8 +6520,8 @@ __bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, u64 flags) { struct sock *sk = NULL; - u8 family = AF_UNSPEC; struct net *net; + u8 family; int sdif; if (len == sizeof(tuple->ipv4)) @@ -6483,8 +6531,7 @@ __bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, else return NULL; - if (unlikely(family == AF_UNSPEC || flags || - !((s32)netns_id < 0 || netns_id <= S32_MAX))) + if (unlikely(flags || !((s32)netns_id < 0 || netns_id <= S32_MAX))) goto out; if (family == AF_INET) @@ -7042,7 +7089,7 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN) return -EINVAL; - if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies)) return -EINVAL; if (!th->ack || th->rst || th->syn) @@ -7117,7 +7164,7 @@ BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len, if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN) return -EINVAL; - if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies)) return -ENOENT; if (!th->syn || th->ack || th->fin || th->rst) @@ -7466,6 +7513,114 @@ static const struct bpf_func_proto bpf_skb_set_tstamp_proto = { .arg3_type = ARG_ANYTHING, }; +#ifdef CONFIG_SYN_COOKIES +BPF_CALL_3(bpf_tcp_raw_gen_syncookie_ipv4, struct iphdr *, iph, + struct tcphdr *, th, u32, th_len) +{ + u32 cookie; + u16 mss; + + if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4)) + return -EINVAL; + + mss = tcp_parse_mss_option(th, 0) ?: TCP_MSS_DEFAULT; + cookie = __cookie_v4_init_sequence(iph, th, &mss); + + return cookie | ((u64)mss << 32); +} + +static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv4_proto = { + .func = bpf_tcp_raw_gen_syncookie_ipv4, + .gpl_only = true, /* __cookie_v4_init_sequence() is GPL */ + .pkt_access = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM, + .arg1_size = sizeof(struct iphdr), + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, +}; + +BPF_CALL_3(bpf_tcp_raw_gen_syncookie_ipv6, struct ipv6hdr *, iph, + struct tcphdr *, th, u32, th_len) +{ +#if IS_BUILTIN(CONFIG_IPV6) + const u16 mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - + sizeof(struct ipv6hdr); + u32 cookie; + u16 mss; + + if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4)) + return -EINVAL; + + mss = tcp_parse_mss_option(th, 0) ?: mss_clamp; + cookie = __cookie_v6_init_sequence(iph, th, &mss); + + return cookie | ((u64)mss << 32); +#else + return -EPROTONOSUPPORT; +#endif +} + +static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv6_proto = { + .func = bpf_tcp_raw_gen_syncookie_ipv6, + .gpl_only = true, /* __cookie_v6_init_sequence() is GPL */ + .pkt_access = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM, + .arg1_size = sizeof(struct ipv6hdr), + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, +}; + +BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv4, struct iphdr *, iph, + struct tcphdr *, th) +{ + u32 cookie = ntohl(th->ack_seq) - 1; + + if (__cookie_v4_check(iph, th, cookie) > 0) + return 0; + + return -EACCES; +} + +static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_ipv4_proto = { + .func = bpf_tcp_raw_check_syncookie_ipv4, + .gpl_only = true, /* __cookie_v4_check is GPL */ + .pkt_access = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM, + .arg1_size = sizeof(struct iphdr), + .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM, + .arg2_size = sizeof(struct tcphdr), +}; + +BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv6, struct ipv6hdr *, iph, + struct tcphdr *, th) +{ +#if IS_BUILTIN(CONFIG_IPV6) + u32 cookie = ntohl(th->ack_seq) - 1; + + if (__cookie_v6_check(iph, th, cookie) > 0) + return 0; + + return -EACCES; +#else + return -EPROTONOSUPPORT; +#endif +} + +static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_ipv6_proto = { + .func = bpf_tcp_raw_check_syncookie_ipv6, + .gpl_only = true, /* __cookie_v6_check is GPL */ + .pkt_access = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM, + .arg1_size = sizeof(struct ipv6hdr), + .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM, + .arg2_size = sizeof(struct tcphdr), +}; +#endif /* CONFIG_SYN_COOKIES */ + #endif /* CONFIG_INET */ bool bpf_helper_changes_pkt_data(void *func) @@ -7829,6 +7984,16 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_assign_proto; case BPF_FUNC_skb_set_tstamp: return &bpf_skb_set_tstamp_proto; +#ifdef CONFIG_SYN_COOKIES + case BPF_FUNC_tcp_raw_gen_syncookie_ipv4: + return &bpf_tcp_raw_gen_syncookie_ipv4_proto; + case BPF_FUNC_tcp_raw_gen_syncookie_ipv6: + return &bpf_tcp_raw_gen_syncookie_ipv6_proto; + case BPF_FUNC_tcp_raw_check_syncookie_ipv4: + return &bpf_tcp_raw_check_syncookie_ipv4_proto; + case BPF_FUNC_tcp_raw_check_syncookie_ipv6: + return &bpf_tcp_raw_check_syncookie_ipv6_proto; +#endif #endif default: return bpf_sk_base_func_proto(func_id); @@ -7878,6 +8043,16 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_tcp_check_syncookie_proto; case BPF_FUNC_tcp_gen_syncookie: return &bpf_tcp_gen_syncookie_proto; +#ifdef CONFIG_SYN_COOKIES + case BPF_FUNC_tcp_raw_gen_syncookie_ipv4: + return &bpf_tcp_raw_gen_syncookie_ipv4_proto; + case BPF_FUNC_tcp_raw_gen_syncookie_ipv6: + return &bpf_tcp_raw_gen_syncookie_ipv6_proto; + case BPF_FUNC_tcp_raw_check_syncookie_ipv4: + return &bpf_tcp_raw_check_syncookie_ipv4_proto; + case BPF_FUNC_tcp_raw_check_syncookie_ipv6: + return &bpf_tcp_raw_check_syncookie_ipv6_proto; +#endif #endif default: return bpf_sk_base_func_proto(func_id); diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 6aee04f75e3e..764c4cb3fe8f 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -895,6 +895,11 @@ bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, return result == BPF_OK; } +static bool is_pppoe_ses_hdr_valid(const struct pppoe_hdr *hdr) +{ + return hdr->ver == 1 && hdr->type == 1 && hdr->code == 0; +} + /** * __skb_flow_dissect - extract the flow_keys struct and return it * @net: associated network namespace, derived from @skb if NULL @@ -1214,26 +1219,60 @@ proto_again: struct pppoe_hdr hdr; __be16 proto; } *hdr, _hdr; + u16 ppp_proto; + hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); if (!hdr) { fdret = FLOW_DISSECT_RET_OUT_BAD; break; } - nhoff += PPPOE_SES_HLEN; - switch (hdr->proto) { - case htons(PPP_IP): + if (!is_pppoe_ses_hdr_valid(&hdr->hdr)) { + fdret = FLOW_DISSECT_RET_OUT_BAD; + break; + } + + /* least significant bit of the most significant octet + * indicates if protocol field was compressed + */ + ppp_proto = ntohs(hdr->proto); + if (ppp_proto & 0x0100) { + ppp_proto = ppp_proto >> 8; + nhoff += PPPOE_SES_HLEN - 1; + } else { + nhoff += PPPOE_SES_HLEN; + } + + if (ppp_proto == PPP_IP) { proto = htons(ETH_P_IP); fdret = FLOW_DISSECT_RET_PROTO_AGAIN; - break; - case htons(PPP_IPV6): + } else if (ppp_proto == PPP_IPV6) { proto = htons(ETH_P_IPV6); fdret = FLOW_DISSECT_RET_PROTO_AGAIN; - break; - default: + } else if (ppp_proto == PPP_MPLS_UC) { + proto = htons(ETH_P_MPLS_UC); + fdret = FLOW_DISSECT_RET_PROTO_AGAIN; + } else if (ppp_proto == PPP_MPLS_MC) { + proto = htons(ETH_P_MPLS_MC); + fdret = FLOW_DISSECT_RET_PROTO_AGAIN; + } else if (ppp_proto_is_valid(ppp_proto)) { + fdret = FLOW_DISSECT_RET_OUT_GOOD; + } else { fdret = FLOW_DISSECT_RET_OUT_BAD; break; } + + if (dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_PPPOE)) { + struct flow_dissector_key_pppoe *key_pppoe; + + key_pppoe = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_PPPOE, + target_container); + key_pppoe->session_id = hdr->hdr.sid; + key_pppoe->ppp_proto = htons(ppp_proto); + key_pppoe->type = htons(ETH_P_PPP_SES); + } break; } case htons(ETH_P_TIPC): { diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index 929f6379a279..8cfb63528d18 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -125,6 +125,13 @@ void flow_rule_match_ports(const struct flow_rule *rule, } EXPORT_SYMBOL(flow_rule_match_ports); +void flow_rule_match_ports_range(const struct flow_rule *rule, + struct flow_match_ports_range *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_PORTS_RANGE, out); +} +EXPORT_SYMBOL(flow_rule_match_ports_range); + void flow_rule_match_tcp(const struct flow_rule *rule, struct flow_match_tcp *out) { @@ -223,6 +230,13 @@ void flow_rule_match_ct(const struct flow_rule *rule, } EXPORT_SYMBOL(flow_rule_match_ct); +void flow_rule_match_pppoe(const struct flow_rule *rule, + struct flow_match_pppoe *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_PPPOE, out); +} +EXPORT_SYMBOL(flow_rule_match_pppoe); + struct flow_block_cb *flow_block_cb_alloc(flow_setup_cb_t *cb, void *cb_ident, void *cb_priv, void (*release)(void *cb_priv)) diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index a10335b4ba2d..c8d137ef5980 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -345,7 +345,7 @@ static void gnet_stats_add_queue_cpu(struct gnet_stats_queue *qstats, for_each_possible_cpu(i) { const struct gnet_stats_queue *qcpu = per_cpu_ptr(q, i); - qstats->qlen += qcpu->backlog; + qstats->qlen += qcpu->qlen; qstats->backlog += qcpu->backlog; qstats->drops += qcpu->drops; qstats->requeues += qcpu->requeues; diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c index 541c7a72a28a..21619c70a82b 100644 --- a/net/core/gro_cells.c +++ b/net/core/gro_cells.c @@ -26,7 +26,7 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb) cell = this_cpu_ptr(gcells->cells); - if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) { + if (skb_queue_len(&cell->napi_skbs) > READ_ONCE(netdev_max_backlog)) { drop: dev_core_stats_rx_dropped_inc(dev); kfree_skb(skb); diff --git a/net/core/link_watch.c b/net/core/link_watch.c index a244d3bade7d..aa6cb1f90966 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -110,7 +110,7 @@ static void linkwatch_add_event(struct net_device *dev) spin_lock_irqsave(&lweventlist_lock, flags); if (list_empty(&dev->link_watch_list)) { list_add_tail(&dev->link_watch_list, &lweventlist); - dev_hold_track(dev, &dev->linkwatch_dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &dev->linkwatch_dev_tracker, GFP_ATOMIC); } spin_unlock_irqrestore(&lweventlist_lock, flags); } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 54625287ee5b..78cc8fb68814 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -307,11 +307,35 @@ static int neigh_del_timer(struct neighbour *n) return 0; } -static void pneigh_queue_purge(struct sk_buff_head *list) +static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net) { + struct sk_buff_head tmp; + unsigned long flags; struct sk_buff *skb; - while ((skb = skb_dequeue(list)) != NULL) { + skb_queue_head_init(&tmp); + spin_lock_irqsave(&list->lock, flags); + skb = skb_peek(list); + while (skb != NULL) { + struct sk_buff *skb_next = skb_peek_next(skb, list); + struct net_device *dev = skb->dev; + + if (net == NULL || net_eq(dev_net(dev), net)) { + struct in_device *in_dev; + + rcu_read_lock(); + in_dev = __in_dev_get_rcu(dev); + if (in_dev) + in_dev->arp_parms->qlen--; + rcu_read_unlock(); + __skb_unlink(skb, list); + __skb_queue_tail(&tmp, skb); + } + skb = skb_next; + } + spin_unlock_irqrestore(&list->lock, flags); + + while ((skb = __skb_dequeue(&tmp))) { dev_put(skb->dev); kfree_skb(skb); } @@ -385,9 +409,9 @@ static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev, write_lock_bh(&tbl->lock); neigh_flush_dev(tbl, dev, skip_perm); pneigh_ifdown_and_unlock(tbl, dev); - - del_timer_sync(&tbl->proxy_timer); - pneigh_queue_purge(&tbl->proxy_queue); + pneigh_queue_purge(&tbl->proxy_queue, dev_net(dev)); + if (skb_queue_empty_lockless(&tbl->proxy_queue)) + del_timer_sync(&tbl->proxy_timer); return 0; } @@ -624,7 +648,7 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey, memcpy(n->primary_key, pkey, key_len); n->dev = dev; - dev_hold_track(dev, &n->dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &n->dev_tracker, GFP_ATOMIC); /* Protocol specific setup. */ if (tbl->constructor && (error = tbl->constructor(n)) < 0) { @@ -770,10 +794,10 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, write_pnet(&n->net, net); memcpy(n->key, pkey, key_len); n->dev = dev; - dev_hold_track(dev, &n->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &n->dev_tracker, GFP_KERNEL); if (tbl->pconstructor && tbl->pconstructor(n)) { - dev_put_track(dev, &n->dev_tracker); + netdev_put(dev, &n->dev_tracker); kfree(n); n = NULL; goto out; @@ -805,7 +829,7 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, write_unlock_bh(&tbl->lock); if (tbl->pdestructor) tbl->pdestructor(n); - dev_put_track(n->dev, &n->dev_tracker); + netdev_put(n->dev, &n->dev_tracker); kfree(n); return 0; } @@ -838,7 +862,7 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, n->next = NULL; if (tbl->pdestructor) tbl->pdestructor(n); - dev_put_track(n->dev, &n->dev_tracker); + netdev_put(n->dev, &n->dev_tracker); kfree(n); } return -ENOENT; @@ -879,7 +903,7 @@ void neigh_destroy(struct neighbour *neigh) if (dev->netdev_ops->ndo_neigh_destroy) dev->netdev_ops->ndo_neigh_destroy(dev, neigh); - dev_put_track(dev, &neigh->dev_tracker); + netdev_put(dev, &neigh->dev_tracker); neigh_parms_put(neigh->parms); neigh_dbg(2, "neigh %p is destroyed\n", neigh); @@ -1579,7 +1603,7 @@ static void neigh_managed_work(struct work_struct *work) list_for_each_entry(neigh, &tbl->managed_list, managed_list) neigh_event_send_probe(neigh, NULL, false); queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, - max(NEIGH_VAR(&tbl->parms, DELAY_PROBE_TIME), HZ)); + NEIGH_VAR(&tbl->parms, INTERVAL_PROBE_TIME_MS)); write_unlock_bh(&tbl->lock); } @@ -1597,8 +1621,15 @@ static void neigh_proxy_process(struct timer_list *t) if (tdif <= 0) { struct net_device *dev = skb->dev; + struct in_device *in_dev; + rcu_read_lock(); + in_dev = __in_dev_get_rcu(dev); + if (in_dev) + in_dev->arp_parms->qlen--; + rcu_read_unlock(); __skb_unlink(skb, &tbl->proxy_queue); + if (tbl->proxy_redo && netif_running(dev)) { rcu_read_lock(); tbl->proxy_redo(skb); @@ -1623,7 +1654,7 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, unsigned long sched_next = jiffies + prandom_u32_max(NEIGH_VAR(p, PROXY_DELAY)); - if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) { + if (p->qlen > NEIGH_VAR(p, PROXY_QLEN)) { kfree_skb(skb); return; } @@ -1639,6 +1670,7 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, skb_dst_drop(skb); dev_hold(skb->dev); __skb_queue_tail(&tbl->proxy_queue, skb); + p->qlen++; mod_timer(&tbl->proxy_timer, sched_next); spin_unlock(&tbl->proxy_queue.lock); } @@ -1671,13 +1703,14 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, refcount_set(&p->refcnt, 1); p->reachable_time = neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); - dev_hold_track(dev, &p->dev_tracker, GFP_KERNEL); + p->qlen = 0; + netdev_hold(dev, &p->dev_tracker, GFP_KERNEL); p->dev = dev; write_pnet(&p->net, net); p->sysctl_table = NULL; if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) { - dev_put_track(dev, &p->dev_tracker); + netdev_put(dev, &p->dev_tracker); kfree(p); return NULL; } @@ -1708,7 +1741,7 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) list_del(&parms->list); parms->dead = 1; write_unlock_bh(&tbl->lock); - dev_put_track(parms->dev, &parms->dev_tracker); + netdev_put(parms->dev, &parms->dev_tracker); call_rcu(&parms->rcu_head, neigh_rcu_free_parms); } EXPORT_SYMBOL(neigh_parms_release); @@ -1736,6 +1769,7 @@ void neigh_table_init(int index, struct neigh_table *tbl) refcount_set(&tbl->parms.refcnt, 1); tbl->parms.reachable_time = neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME)); + tbl->parms.qlen = 0; tbl->stats = alloc_percpu(struct neigh_statistics); if (!tbl->stats) @@ -1787,7 +1821,7 @@ int neigh_table_clear(int index, struct neigh_table *tbl) cancel_delayed_work_sync(&tbl->managed_work); cancel_delayed_work_sync(&tbl->gc_work); del_timer_sync(&tbl->proxy_timer); - pneigh_queue_purge(&tbl->proxy_queue); + pneigh_queue_purge(&tbl->proxy_queue, NULL); neigh_ifdown(tbl, NULL); if (atomic_read(&tbl->entries)) pr_crit("neighbour leakage\n"); @@ -2100,7 +2134,9 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) nla_put_msecs(skb, NDTPA_PROXY_DELAY, NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) || nla_put_msecs(skb, NDTPA_LOCKTIME, - NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD)) + NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD) || + nla_put_msecs(skb, NDTPA_INTERVAL_PROBE_TIME_MS, + NEIGH_VAR(parms, INTERVAL_PROBE_TIME_MS), NDTPA_PAD)) goto nla_put_failure; return nla_nest_end(skb, nest); @@ -2255,6 +2291,7 @@ static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = { [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 }, [NDTPA_PROXY_DELAY] = { .type = NLA_U64 }, [NDTPA_LOCKTIME] = { .type = NLA_U64 }, + [NDTPA_INTERVAL_PROBE_TIME_MS] = { .type = NLA_U64, .min = 1 }, }; static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -2373,6 +2410,10 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, nla_get_msecs(tbp[i])); call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p); break; + case NDTPA_INTERVAL_PROBE_TIME_MS: + NEIGH_VAR_SET(p, INTERVAL_PROBE_TIME_MS, + nla_get_msecs(tbp[i])); + break; case NDTPA_RETRANS_TIME: NEIGH_VAR_SET(p, RETRANS_TIME, nla_get_msecs(tbp[i])); @@ -3562,6 +3603,22 @@ static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write, return ret; } +static int neigh_proc_dointvec_ms_jiffies_positive(struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table tmp = *ctl; + int ret; + + int min = msecs_to_jiffies(1); + + tmp.extra1 = &min; + tmp.extra2 = NULL; + + ret = proc_dointvec_ms_jiffies_minmax(&tmp, write, buffer, lenp, ppos); + neigh_proc_update(ctl, write); + return ret; +} + int neigh_proc_dointvec(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { @@ -3658,6 +3715,9 @@ static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write, #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \ NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies) +#define NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(attr, name) \ + NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies_positive) + #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \ NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies) @@ -3676,6 +3736,8 @@ static struct neigh_sysctl_table { NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"), NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"), NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"), + NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(INTERVAL_PROBE_TIME_MS, + "interval_probe_time_ms"), NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"), NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"), NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"), diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index a3642569fe53..d61afd21aab5 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1017,7 +1017,7 @@ static void rx_queue_release(struct kobject *kobj) #endif memset(kobj, 0, sizeof(*kobj)); - dev_put_track(queue->dev, &queue->dev_tracker); + netdev_put(queue->dev, &queue->dev_tracker); } static const void *rx_queue_namespace(struct kobject *kobj) @@ -1057,7 +1057,7 @@ static int rx_queue_add_kobject(struct net_device *dev, int index) /* Kobject_put later will trigger rx_queue_release call which * decreases dev refcount: Take that reference here */ - dev_hold_track(queue->dev, &queue->dev_tracker, GFP_KERNEL); + netdev_hold(queue->dev, &queue->dev_tracker, GFP_KERNEL); kobj->kset = dev->queues_kset; error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL, @@ -1620,7 +1620,7 @@ static void netdev_queue_release(struct kobject *kobj) struct netdev_queue *queue = to_netdev_queue(kobj); memset(kobj, 0, sizeof(*kobj)); - dev_put_track(queue->dev, &queue->dev_tracker); + netdev_put(queue->dev, &queue->dev_tracker); } static const void *netdev_queue_namespace(struct kobject *kobj) @@ -1660,7 +1660,7 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index) /* Kobject_put later will trigger netdev_queue_release call * which decreases dev refcount: Take that reference here */ - dev_hold_track(queue->dev, &queue->dev_tracker, GFP_KERNEL); + netdev_hold(queue->dev, &queue->dev_tracker, GFP_KERNEL); kobj->kset = dev->queues_kset; error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL, diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 0ec2f5906a27..6b9f19122ec1 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -18,6 +18,7 @@ #include <linux/user_namespace.h> #include <linux/net_namespace.h> #include <linux/sched/task.h> +#include <linux/sched/mm.h> #include <linux/uidgid.h> #include <linux/cookie.h> @@ -1143,7 +1144,13 @@ static int __register_pernet_operations(struct list_head *list, * setup_net() and cleanup_net() are not possible. */ for_each_net(net) { + struct mem_cgroup *old, *memcg; + + memcg = mem_cgroup_or_root(get_mem_cgroup_from_obj(net)); + old = set_active_memcg(memcg); error = ops_init(ops, net); + set_active_memcg(old); + mem_cgroup_put(memcg); if (error) goto out_undo; list_add_tail(&net->exit_list, &net_exit_list); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index db724463e7cd..5d27067b72d5 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -853,7 +853,7 @@ void netpoll_cleanup(struct netpoll *np) if (!np->dev) goto out; __netpoll_cleanup(np); - dev_put_track(np->dev, &np->dev_tracker); + netdev_put(np->dev, &np->dev_tracker); np->dev = NULL; out: rtnl_unlock(); diff --git a/net/core/page_pool.c b/net/core/page_pool.c index f18e6e771993..9b203d8660e4 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -16,7 +16,7 @@ #include <linux/dma-direction.h> #include <linux/dma-mapping.h> #include <linux/page-flags.h> -#include <linux/mm.h> /* for __put_page() */ +#include <linux/mm.h> /* for put_page() */ #include <linux/poison.h> #include <linux/ethtool.h> @@ -389,7 +389,8 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, /* Mark empty alloc.cache slots "empty" for alloc_pages_bulk_array */ memset(&pool->alloc.cache, 0, sizeof(void *) * bulk); - nr_pages = alloc_pages_bulk_array(gfp, bulk, pool->alloc.cache); + nr_pages = alloc_pages_bulk_array_node(gfp, pool->p.nid, bulk, + pool->alloc.cache); if (unlikely(!nr_pages)) return NULL; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 84b62cd7bc57..88906ba6d9a7 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2100,7 +2100,7 @@ static int pktgen_setup_dev(const struct pktgen_net *pn, /* Clean old setups */ if (pkt_dev->odev) { - dev_put_track(pkt_dev->odev, &pkt_dev->dev_tracker); + netdev_put(pkt_dev->odev, &pkt_dev->dev_tracker); pkt_dev->odev = NULL; } @@ -3807,7 +3807,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) return add_dev_to_thread(t, pkt_dev); out2: - dev_put_track(pkt_dev->odev, &pkt_dev->dev_tracker); + netdev_put(pkt_dev->odev, &pkt_dev->dev_tracker); out1: #ifdef CONFIG_XFRM free_SAs(pkt_dev); @@ -3901,7 +3901,7 @@ static int pktgen_remove_device(struct pktgen_thread *t, /* Dis-associate from the interface */ if (pkt_dev->odev) { - dev_put_track(pkt_dev->odev, &pkt_dev->dev_tracker); + netdev_put(pkt_dev->odev, &pkt_dev->dev_tracker); pkt_dev->odev = NULL; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index ac45328607f7..4b5b15c684ed 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -6070,6 +6070,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, if (kind == RTNL_KIND_DEL && (nlh->nlmsg_flags & NLM_F_BULK) && !(flags & RTNL_FLAG_BULK_DEL_SUPPORTED)) { NL_SET_ERR_MSG(extack, "Bulk delete is not supported"); + module_put(owner); goto err_unlock; } diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 5f85e01d4093..b0ff6153be62 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -64,7 +64,7 @@ u32 secure_tcpv6_ts_off(const struct net *net, .daddr = *(struct in6_addr *)daddr, }; - if (net->ipv4.sysctl_tcp_timestamps != 1) + if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1) return 0; ts_secret_init(); @@ -120,7 +120,7 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral); #ifdef CONFIG_INET u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr) { - if (net->ipv4.sysctl_tcp_timestamps != 1) + if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1) return 0; ts_secret_init(); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5b3559cb1d82..417463da4fac 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -91,6 +91,13 @@ static struct kmem_cache *skbuff_ext_cache __ro_after_init; int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS; EXPORT_SYMBOL(sysctl_max_skb_frags); +#undef FN +#define FN(reason) [SKB_DROP_REASON_##reason] = #reason, +const char * const drop_reasons[] = { + DEFINE_DROP_REASON(FN, FN) +}; +EXPORT_SYMBOL(drop_reasons); + /** * skb_panic - private function for out-of-line support * @skb: buffer @@ -172,13 +179,14 @@ static struct sk_buff *napi_skb_cache_get(void) struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); struct sk_buff *skb; - if (unlikely(!nc->skb_count)) + if (unlikely(!nc->skb_count)) { nc->skb_count = kmem_cache_alloc_bulk(skbuff_head_cache, GFP_ATOMIC, NAPI_SKB_CACHE_BULK, nc->skb_cache); - if (unlikely(!nc->skb_count)) - return NULL; + if (unlikely(!nc->skb_count)) + return NULL; + } skb = nc->skb_cache[--nc->skb_count]; kasan_unpoison_object_data(skbuff_head_cache, skb); @@ -450,8 +458,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, skb->fclone = SKB_FCLONE_ORIG; refcount_set(&fclones->fclone_ref, 1); - - fclones->skb2.fclone = SKB_FCLONE_CLONE; } return skb; @@ -557,6 +563,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, struct sk_buff *skb; void *data; + DEBUG_NET_WARN_ON_ONCE(!in_softirq()); len += NET_SKB_PAD + NET_IP_ALIGN; /* If requested length is either too small or too big, @@ -666,11 +673,18 @@ static void skb_release_data(struct sk_buff *skb) &shinfo->dataref)) goto exit; - skb_zcopy_clear(skb, true); + if (skb_zcopy(skb)) { + bool skip_unref = shinfo->flags & SKBFL_MANAGED_FRAG_REFS; + + skb_zcopy_clear(skb, true); + if (skip_unref) + goto free_head; + } for (i = 0; i < shinfo->nr_frags; i++) __skb_frag_unref(&shinfo->frags[i], skb->pp_recycle); +free_head: if (shinfo->frag_list) kfree_skb_list(shinfo->frag_list); @@ -725,7 +739,7 @@ void skb_release_head_state(struct sk_buff *skb) { skb_dst_drop(skb); if (skb->destructor) { - WARN_ON(in_hardirq()); + DEBUG_NET_WARN_ON_ONCE(in_hardirq()); skb->destructor(skb); } #if IS_ENABLED(CONFIG_NF_CONNTRACK) @@ -895,7 +909,10 @@ EXPORT_SYMBOL(skb_dump); */ void skb_tx_error(struct sk_buff *skb) { - skb_zcopy_clear(skb, true); + if (skb) { + skb_zcopy_downgrade_managed(skb); + skb_zcopy_clear(skb, true); + } } EXPORT_SYMBOL(skb_tx_error); @@ -978,7 +995,7 @@ void napi_consume_skb(struct sk_buff *skb, int budget) return; } - lockdep_assert_in_softirq(); + DEBUG_NET_WARN_ON_ONCE(!in_softirq()); if (!skb_unref(skb)) return; @@ -1193,7 +1210,7 @@ static struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size) uarg->len = 1; uarg->bytelen = size; uarg->zerocopy = 1; - uarg->flags = SKBFL_ZEROCOPY_FRAG; + uarg->flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN; refcount_set(&uarg->refcnt, 1); sock_hold(sk); @@ -1212,6 +1229,10 @@ struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size, const u32 byte_limit = 1 << 19; /* limit to a few TSO */ u32 bytelen, next; + /* there might be non MSG_ZEROCOPY users */ + if (uarg->callback != msg_zerocopy_callback) + return NULL; + /* realloc only when socket is locked (TCP, UDP cork), * so uarg->len and sk_zckey access is serialized */ @@ -1354,7 +1375,7 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, if (orig_uarg && uarg != orig_uarg) return -EEXIST; - err = __zerocopy_sg_from_iter(sk, skb, &msg->msg_iter, len); + err = __zerocopy_sg_from_iter(msg, sk, skb, &msg->msg_iter, len); if (err == -EFAULT || (err == -EMSGSIZE && skb->len == orig_len)) { struct sock *save_sk = skb->sk; @@ -1371,6 +1392,16 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, } EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream); +void __skb_zcopy_downgrade_managed(struct sk_buff *skb) +{ + int i; + + skb_shinfo(skb)->flags &= ~SKBFL_MANAGED_FRAG_REFS; + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) + skb_frag_ref(skb, i); +} +EXPORT_SYMBOL_GPL(__skb_zcopy_downgrade_managed); + static int skb_zerocopy_clone(struct sk_buff *nskb, struct sk_buff *orig, gfp_t gfp_mask) { @@ -1508,6 +1539,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) refcount_read(&fclones->fclone_ref) == 1) { n = &fclones->skb2; refcount_set(&fclones->fclone_ref, 2); + n->fclone = SKB_FCLONE_CLONE; } else { if (skb_pfmemalloc(skb)) gfp_mask |= __GFP_MEMALLOC; @@ -1688,6 +1720,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, BUG_ON(skb_shared(skb)); + skb_zcopy_downgrade_managed(skb); + size = SKB_DATA_ALIGN(size); if (skb_pfmemalloc(skb)) @@ -3190,9 +3224,7 @@ skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen) } } - to->truesize += len + plen; - to->len += len + plen; - to->data_len += len + plen; + skb_len_add(to, len + plen); if (unlikely(skb_orphan_frags(from, GFP_ATOMIC))) { skb_tx_error(from); @@ -3484,6 +3516,8 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) int pos = skb_headlen(skb); const int zc_flags = SKBFL_SHARED_FRAG | SKBFL_PURE_ZEROCOPY; + skb_zcopy_downgrade_managed(skb); + skb_shinfo(skb1)->flags |= skb_shinfo(skb)->flags & zc_flags; skb_zerocopy_clone(skb1, skb, 0); if (len < pos) /* Split line is inside header. */ @@ -3629,13 +3663,8 @@ onlymerged: tgt->ip_summed = CHECKSUM_PARTIAL; skb->ip_summed = CHECKSUM_PARTIAL; - /* Yak, is it really working this way? Some helper please? */ - skb->len -= shiftlen; - skb->data_len -= shiftlen; - skb->truesize -= shiftlen; - tgt->len += shiftlen; - tgt->data_len += shiftlen; - tgt->truesize += shiftlen; + skb_len_add(skb, -shiftlen); + skb_len_add(tgt, shiftlen); return shiftlen; } @@ -3837,6 +3866,7 @@ int skb_append_pagefrags(struct sk_buff *skb, struct page *page, if (skb_can_coalesce(skb, i, page, offset)) { skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size); } else if (i < MAX_SKB_FRAGS) { + skb_zcopy_downgrade_managed(skb); get_page(page); skb_fill_page_desc(skb, i, page, offset, size); } else { @@ -4179,9 +4209,8 @@ normal: SKB_GSO_CB(nskb)->csum_start = skb_headroom(nskb) + doffset; } else { - skb_copy_bits(head_skb, offset, - skb_put(nskb, len), - len); + if (skb_copy_bits(head_skb, offset, skb_put(nskb, len), len)) + goto err; } continue; } @@ -4772,7 +4801,7 @@ static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly) { bool ret; - if (likely(sysctl_tstamp_allow_data || tsonly)) + if (likely(READ_ONCE(sysctl_tstamp_allow_data) || tsonly)) return true; read_lock_bh(&sk->sk_callback_lock); diff --git a/net/core/skmsg.c b/net/core/skmsg.c index b0fcd0200e84..188f8558d27d 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -324,14 +324,13 @@ int sk_msg_zerocopy_from_iter(struct sock *sk, struct iov_iter *from, goto out; } - copied = iov_iter_get_pages(from, pages, bytes, maxpages, + copied = iov_iter_get_pages2(from, pages, bytes, maxpages, &offset); if (copied <= 0) { ret = -EFAULT; goto out; } - iov_iter_advance(from, copied); bytes -= copied; msg->sg.size += copied; @@ -462,7 +461,7 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, if (copied == len) break; - } while (i != msg_rx->sg.end); + } while ((i != msg_rx->sg.end) && !sg_is_last(sge)); if (unlikely(peek)) { msg_rx = sk_psock_next_msg(psock, msg_rx); @@ -472,7 +471,7 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, } msg_rx->sg.start = i; - if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) { + if (!sge->length && (i == msg_rx->sg.end || sg_is_last(sge))) { msg_rx = sk_psock_dequeue_msg(psock); kfree_sk_msg(msg_rx); } @@ -497,23 +496,27 @@ bool sk_msg_is_readable(struct sock *sk) } EXPORT_SYMBOL_GPL(sk_msg_is_readable); -static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk, - struct sk_buff *skb) +static struct sk_msg *alloc_sk_msg(void) { struct sk_msg *msg; - if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) + msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_KERNEL); + if (unlikely(!msg)) return NULL; + sg_init_marker(msg->sg.data, NR_MSG_FRAG_IDS); + return msg; +} - if (!sk_rmem_schedule(sk, skb, skb->truesize)) +static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk, + struct sk_buff *skb) +{ + if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) return NULL; - msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_KERNEL); - if (unlikely(!msg)) + if (!sk_rmem_schedule(sk, skb, skb->truesize)) return NULL; - sk_msg_init(msg); - return msg; + return alloc_sk_msg(); } static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, @@ -590,13 +593,12 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb, static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb, u32 off, u32 len) { - struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC); + struct sk_msg *msg = alloc_sk_msg(); struct sock *sk = psock->sk; int err; if (unlikely(!msg)) return -EAGAIN; - sk_msg_init(msg); skb_set_owner_r(skb, sk); err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg); if (err < 0) @@ -720,6 +722,7 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node) psock->eval = __SK_NONE; psock->sk_proto = prot; psock->saved_unhash = prot->unhash; + psock->saved_destroy = prot->destroy; psock->saved_close = prot->close; psock->saved_write_space = sk->sk_write_space; @@ -735,7 +738,9 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node) sk_psock_set_state(psock, SK_PSOCK_TX_ENABLED); refcount_set(&psock->refcnt, 1); - rcu_assign_sk_user_data_nocopy(sk, psock); + __rcu_assign_sk_user_data_with_flags(sk, psock, + SK_USER_DATA_NOCOPY | + SK_USER_DATA_PSOCK); sock_hold(sk); out: @@ -1164,21 +1169,14 @@ static void sk_psock_done_strp(struct sk_psock *psock) } #endif /* CONFIG_BPF_STREAM_PARSER */ -static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb, - unsigned int offset, size_t orig_len) +static int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb) { - struct sock *sk = (struct sock *)desc->arg.data; struct sk_psock *psock; struct bpf_prog *prog; int ret = __SK_DROP; - int len = orig_len; + int len = skb->len; - /* clone here so sk_eat_skb() in tcp_read_sock does not drop our data */ - skb = skb_clone(skb, GFP_ATOMIC); - if (!skb) { - desc->error = -ENOMEM; - return 0; - } + skb_get(skb); rcu_read_lock(); psock = sk_psock(sk); @@ -1191,15 +1189,14 @@ static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb, if (!prog) prog = READ_ONCE(psock->progs.skb_verdict); if (likely(prog)) { - skb->sk = sk; skb_dst_drop(skb); skb_bpf_redirect_clear(skb); ret = bpf_prog_run_pin_on_cpu(prog, skb); ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb)); - skb->sk = NULL; } - if (sk_psock_verdict_apply(psock, skb, ret) < 0) - len = 0; + ret = sk_psock_verdict_apply(psock, skb, ret); + if (ret < 0) + len = ret; out: rcu_read_unlock(); return len; @@ -1208,16 +1205,10 @@ out: static void sk_psock_verdict_data_ready(struct sock *sk) { struct socket *sock = sk->sk_socket; - read_descriptor_t desc; - if (unlikely(!sock || !sock->ops || !sock->ops->read_sock)) + if (unlikely(!sock || !sock->ops || !sock->ops->read_skb)) return; - - desc.arg.data = sk; - desc.error = 0; - desc.count = 1; - - sock->ops->read_sock(sk, &desc, sk_psock_verdict_recv); + sock->ops->read_skb(sk, sk_psock_verdict_recv); } void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock) diff --git a/net/core/sock.c b/net/core/sock.c index 2ff40dd0a7a6..788c1372663c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -991,7 +991,7 @@ EXPORT_SYMBOL(sock_set_mark); static void sock_release_reserved_memory(struct sock *sk, int bytes) { /* Round down bytes to multiple of pages */ - bytes &= ~(SK_MEM_QUANTUM - 1); + bytes = round_down(bytes, PAGE_SIZE); WARN_ON(bytes > sk->sk_reserved_mem); sk->sk_reserved_mem -= bytes; @@ -1019,7 +1019,8 @@ static int sock_reserve_memory(struct sock *sk, int bytes) return -ENOMEM; /* pre-charge to forward_alloc */ - allocated = sk_memory_allocated_add(sk, pages); + sk_memory_allocated_add(sk, pages); + allocated = sk_memory_allocated(sk); /* If the system goes into memory pressure with this * precharge, give up and return error. */ @@ -1028,9 +1029,9 @@ static int sock_reserve_memory(struct sock *sk, int bytes) mem_cgroup_uncharge_skmem(sk->sk_memcg, pages); return -ENOMEM; } - sk->sk_forward_alloc += pages << SK_MEM_QUANTUM_SHIFT; + sk->sk_forward_alloc += pages << PAGE_SHIFT; - sk->sk_reserved_mem += pages << SK_MEM_QUANTUM_SHIFT; + sk->sk_reserved_mem += pages << PAGE_SHIFT; return 0; } @@ -1100,7 +1101,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname, * play 'guess the biggest size' games. RCVBUF/SNDBUF * are treated in BSD as hints */ - val = min_t(u32, val, sysctl_wmem_max); + val = min_t(u32, val, READ_ONCE(sysctl_wmem_max)); set_sndbuf: /* Ensure val * 2 fits into an int, to prevent max_t() * from treating it as a negative value. @@ -1132,7 +1133,7 @@ set_sndbuf: * play 'guess the biggest size' games. RCVBUF/SNDBUF * are treated in BSD as hints */ - __sock_set_rcvbuf(sk, min_t(u32, val, sysctl_rmem_max)); + __sock_set_rcvbuf(sk, min_t(u32, val, READ_ONCE(sysctl_rmem_max))); break; case SO_RCVBUFFORCE: @@ -2535,7 +2536,7 @@ struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size, /* small safe race: SKB_TRUESIZE may differ from final skb->truesize */ if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) > - sysctl_optmem_max) + READ_ONCE(sysctl_optmem_max)) return NULL; skb = alloc_skb(size, priority); @@ -2553,8 +2554,10 @@ struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size, */ void *sock_kmalloc(struct sock *sk, int size, gfp_t priority) { - if ((unsigned int)size <= sysctl_optmem_max && - atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) { + int optmem_max = READ_ONCE(sysctl_optmem_max); + + if ((unsigned int)size <= optmem_max && + atomic_read(&sk->sk_omem_alloc) + size < optmem_max) { void *mem; /* First do the add, to avoid the race if kmalloc * might sleep. @@ -2844,7 +2847,7 @@ void __release_sock(struct sock *sk) do { next = skb->next; prefetch(next); - WARN_ON_ONCE(skb_dst_is_noref(skb)); + DEBUG_NET_WARN_ON_ONCE(skb_dst_is_noref(skb)); skb_mark_not_on_list(skb); sk_backlog_rcv(sk, skb); @@ -2869,6 +2872,7 @@ void __sk_flush_backlog(struct sock *sk) __release_sock(sk); spin_unlock_bh(&sk->sk_lock.slock); } +EXPORT_SYMBOL_GPL(__sk_flush_backlog); /** * sk_wait_data - wait for data to arrive at sk_receive_queue @@ -2906,11 +2910,13 @@ EXPORT_SYMBOL(sk_wait_data); */ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) { - struct proto *prot = sk->sk_prot; - long allocated = sk_memory_allocated_add(sk, amt); bool memcg_charge = mem_cgroup_sockets_enabled && sk->sk_memcg; + struct proto *prot = sk->sk_prot; bool charged = true; + long allocated; + sk_memory_allocated_add(sk, amt); + allocated = sk_memory_allocated(sk); if (memcg_charge && !(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt, gfp_memcg_charge()))) @@ -2987,7 +2993,6 @@ suppress_allocation: return 0; } -EXPORT_SYMBOL(__sk_mem_raise_allocated); /** * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated @@ -3003,10 +3008,10 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind) { int ret, amt = sk_mem_pages(size); - sk->sk_forward_alloc += amt << SK_MEM_QUANTUM_SHIFT; + sk->sk_forward_alloc += amt << PAGE_SHIFT; ret = __sk_mem_raise_allocated(sk, size, amt, kind); if (!ret) - sk->sk_forward_alloc -= amt << SK_MEM_QUANTUM_SHIFT; + sk->sk_forward_alloc -= amt << PAGE_SHIFT; return ret; } EXPORT_SYMBOL(__sk_mem_schedule); @@ -3029,17 +3034,16 @@ void __sk_mem_reduce_allocated(struct sock *sk, int amount) (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) sk_leave_memory_pressure(sk); } -EXPORT_SYMBOL(__sk_mem_reduce_allocated); /** * __sk_mem_reclaim - reclaim sk_forward_alloc and memory_allocated * @sk: socket - * @amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple) + * @amount: number of bytes (rounded down to a PAGE_SIZE multiple) */ void __sk_mem_reclaim(struct sock *sk, int amount) { - amount >>= SK_MEM_QUANTUM_SHIFT; - sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT; + amount >>= PAGE_SHIFT; + sk->sk_forward_alloc -= amount << PAGE_SHIFT; __sk_mem_reduce_allocated(sk, amount); } EXPORT_SYMBOL(__sk_mem_reclaim); @@ -3307,8 +3311,8 @@ void sock_init_data(struct socket *sock, struct sock *sk) timer_setup(&sk->sk_timer, NULL, 0); sk->sk_allocation = GFP_KERNEL; - sk->sk_rcvbuf = sysctl_rmem_default; - sk->sk_sndbuf = sysctl_wmem_default; + sk->sk_rcvbuf = READ_ONCE(sysctl_rmem_default); + sk->sk_sndbuf = READ_ONCE(sysctl_wmem_default); sk->sk_state = TCP_CLOSE; sk_set_socket(sk, sock); @@ -3363,7 +3367,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) #ifdef CONFIG_NET_RX_BUSY_POLL sk->sk_napi_id = 0; - sk->sk_ll_usec = sysctl_net_busy_read; + sk->sk_ll_usec = READ_ONCE(sysctl_net_busy_read); #endif sk->sk_max_pacing_rate = ~0UL; @@ -3798,6 +3802,10 @@ int proto_register(struct proto *prot, int alloc_slab) pr_err("%s: missing sysctl_mem\n", prot->name); return -EINVAL; } + if (prot->memory_allocated && !prot->per_cpu_fw_alloc) { + pr_err("%s: missing per_cpu_fw_alloc\n", prot->name); + return -EINVAL; + } if (alloc_slab) { prot->slab = kmem_cache_create_usercopy(prot->name, prot->obj_size, 0, diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 81d4b4756a02..9a9fb9487d63 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -783,13 +783,22 @@ static int sock_map_init_seq_private(void *priv_data, { struct sock_map_seq_info *info = priv_data; + bpf_map_inc_with_uref(aux->map); info->map = aux->map; return 0; } +static void sock_map_fini_seq_private(void *priv_data) +{ + struct sock_map_seq_info *info = priv_data; + + bpf_map_put_with_uref(info->map); +} + static const struct bpf_iter_seq_info sock_map_iter_seq_info = { .seq_ops = &sock_map_seq_ops, .init_seq_private = sock_map_init_seq_private, + .fini_seq_private = sock_map_fini_seq_private, .seq_priv_size = sizeof(struct sock_map_seq_info), }; @@ -1369,18 +1378,27 @@ static const struct seq_operations sock_hash_seq_ops = { }; static int sock_hash_init_seq_private(void *priv_data, - struct bpf_iter_aux_info *aux) + struct bpf_iter_aux_info *aux) { struct sock_hash_seq_info *info = priv_data; + bpf_map_inc_with_uref(aux->map); info->map = aux->map; info->htab = container_of(aux->map, struct bpf_shtab, map); return 0; } +static void sock_hash_fini_seq_private(void *priv_data) +{ + struct sock_hash_seq_info *info = priv_data; + + bpf_map_put_with_uref(info->map); +} + static const struct bpf_iter_seq_info sock_hash_iter_seq_info = { .seq_ops = &sock_hash_seq_ops, .init_seq_private = sock_hash_init_seq_private, + .fini_seq_private = sock_hash_fini_seq_private, .seq_priv_size = sizeof(struct sock_hash_seq_info), }; @@ -1561,6 +1579,29 @@ void sock_map_unhash(struct sock *sk) } EXPORT_SYMBOL_GPL(sock_map_unhash); +void sock_map_destroy(struct sock *sk) +{ + void (*saved_destroy)(struct sock *sk); + struct sk_psock *psock; + + rcu_read_lock(); + psock = sk_psock_get(sk); + if (unlikely(!psock)) { + rcu_read_unlock(); + if (sk->sk_prot->destroy) + sk->sk_prot->destroy(sk); + return; + } + + saved_destroy = psock->saved_destroy; + sock_map_remove_links(sk, psock); + rcu_read_unlock(); + sk_psock_stop(psock, false); + sk_psock_put(sk, psock); + saved_destroy(sk); +} +EXPORT_SYMBOL_GPL(sock_map_destroy); + void sock_map_close(struct sock *sk, long timeout) { void (*saved_close)(struct sock *sk, long timeout); diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index 3f00a28fe762..5daa1fa54249 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -387,7 +387,7 @@ void reuseport_stop_listen_sock(struct sock *sk) prog = rcu_dereference_protected(reuse->prog, lockdep_is_held(&reuseport_lock)); - if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req || + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_migrate_req) || (prog && prog->expected_attach_type == BPF_SK_REUSEPORT_SELECT_OR_MIGRATE)) { /* Migration capable, move sk from the listening section * to the closed section. @@ -545,7 +545,7 @@ struct sock *reuseport_migrate_sock(struct sock *sk, hash = migrating_sk->sk_hash; prog = rcu_dereference(reuse->prog); if (!prog || prog->expected_attach_type != BPF_SK_REUSEPORT_SELECT_OR_MIGRATE) { - if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req) + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_migrate_req)) goto select_by_hash; goto failure; } diff --git a/net/core/stream.c b/net/core/stream.c index 06b36c730ce8..ccc083cdef23 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -196,13 +196,13 @@ void sk_stream_kill_queues(struct sock *sk) __skb_queue_purge(&sk->sk_receive_queue); /* Next, the write queue. */ - WARN_ON(!skb_queue_empty(&sk->sk_write_queue)); + WARN_ON_ONCE(!skb_queue_empty(&sk->sk_write_queue)); /* Account for returned memory. */ sk_mem_reclaim_final(sk); - WARN_ON(sk->sk_wmem_queued); - WARN_ON(sk->sk_forward_alloc); + WARN_ON_ONCE(sk->sk_wmem_queued); + WARN_ON_ONCE(sk->sk_forward_alloc); /* It is _impossible_ for the backlog to contain anything * when we get here. All user references to this socket diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 71a13596ea2b..725891527814 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -234,14 +234,17 @@ static int set_default_qdisc(struct ctl_table *table, int write, static int proc_do_dev_weight(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { - int ret; + static DEFINE_MUTEX(dev_weight_mutex); + int ret, weight; + mutex_lock(&dev_weight_mutex); ret = proc_dointvec(table, write, buffer, lenp, ppos); - if (ret != 0) - return ret; - - dev_rx_weight = weight_p * dev_weight_rx_bias; - dev_tx_weight = weight_p * dev_weight_tx_bias; + if (!ret && write) { + weight = READ_ONCE(weight_p); + WRITE_ONCE(dev_rx_weight, weight * dev_weight_rx_bias); + WRITE_ONCE(dev_tx_weight, weight * dev_weight_tx_bias); + } + mutex_unlock(&dev_weight_mutex); return ret; } |