diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/Makefile | 2 | ||||
-rw-r--r-- | net/core/bpf_sk_storage.c | 5 | ||||
-rw-r--r-- | net/core/datagram.c | 27 | ||||
-rw-r--r-- | net/core/dev.c | 286 | ||||
-rw-r--r-- | net/core/dev_ioctl.c | 1 | ||||
-rw-r--r-- | net/core/devlink.c | 255 | ||||
-rw-r--r-- | net/core/drop_monitor.c | 12 | ||||
-rw-r--r-- | net/core/ethtool.c | 3116 | ||||
-rw-r--r-- | net/core/fib_rules.c | 2 | ||||
-rw-r--r-- | net/core/filter.c | 223 | ||||
-rw-r--r-- | net/core/flow_dissector.c | 13 | ||||
-rw-r--r-- | net/core/neighbour.c | 1 | ||||
-rw-r--r-- | net/core/net_namespace.c | 99 | ||||
-rw-r--r-- | net/core/netclassid_cgroup.c | 47 | ||||
-rw-r--r-- | net/core/page_pool.c | 93 | ||||
-rw-r--r-- | net/core/pktgen.c | 44 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 48 | ||||
-rw-r--r-- | net/core/skbuff.c | 149 | ||||
-rw-r--r-- | net/core/skmsg.c | 2 | ||||
-rw-r--r-- | net/core/sock.c | 7 | ||||
-rw-r--r-- | net/core/sock_map.c | 28 | ||||
-rw-r--r-- | net/core/sock_reuseport.c | 1 | ||||
-rw-r--r-- | net/core/timestamping.c | 20 | ||||
-rw-r--r-- | net/core/utils.c | 20 |
24 files changed, 828 insertions, 3673 deletions
diff --git a/net/core/Makefile b/net/core/Makefile index a104dc8faafc..3e2c378e5f31 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -8,7 +8,7 @@ obj-y := sock.o request_sock.o skbuff.o datagram.o stream.o scm.o \ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o -obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ +obj-y += dev.o dev_addr_lists.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \ fib_notifier.o xdp.o flow_offload.o diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 458be6b3eda9..3ab23f698221 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -643,9 +643,10 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) return ERR_PTR(-ENOMEM); bpf_map_init_from_attr(&smap->map, attr); + nbuckets = roundup_pow_of_two(num_possible_cpus()); /* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */ - smap->bucket_log = max_t(u32, 1, ilog2(roundup_pow_of_two(num_possible_cpus()))); - nbuckets = 1U << smap->bucket_log; + nbuckets = max_t(u32, 2, nbuckets); + smap->bucket_log = ilog2(nbuckets); cost = sizeof(*smap->buckets) * nbuckets + sizeof(*smap); ret = bpf_map_charge_init(&smap->map.memory, cost); diff --git a/net/core/datagram.c b/net/core/datagram.c index da3c24ed129c..a78e7f864c1e 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -84,7 +84,8 @@ static int receiver_wake_function(wait_queue_entry_t *wait, unsigned int mode, i /* * Wait for the last received packet to be different from skb */ -int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, +int __skb_wait_for_more_packets(struct sock *sk, struct sk_buff_head *queue, + int *err, long *timeo_p, const struct sk_buff *skb) { int error; @@ -97,7 +98,7 @@ int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, if (error) goto out_err; - if (READ_ONCE(sk->sk_receive_queue.prev) != skb) + if (READ_ONCE(queue->prev) != skb) goto out; /* Socket shut down? */ @@ -209,6 +210,7 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, /** * __skb_try_recv_datagram - Receive a datagram skbuff * @sk: socket + * @queue: socket queue from which to receive * @flags: MSG\_ flags * @destructor: invoked under the receive lock on successful dequeue * @off: an offset in bytes to peek skb from. Returns an offset @@ -241,13 +243,14 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, * quite explicitly by POSIX 1003.1g, don't change them without having * the standard around please. */ -struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, +struct sk_buff *__skb_try_recv_datagram(struct sock *sk, + struct sk_buff_head *queue, + unsigned int flags, void (*destructor)(struct sock *sk, struct sk_buff *skb), int *off, int *err, struct sk_buff **last) { - struct sk_buff_head *queue = &sk->sk_receive_queue; struct sk_buff *skb; unsigned long cpu_flags; /* @@ -278,7 +281,7 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, break; sk_busy_loop(sk, flags & MSG_DONTWAIT); - } while (READ_ONCE(sk->sk_receive_queue.prev) != *last); + } while (READ_ONCE(queue->prev) != *last); error = -EAGAIN; @@ -288,7 +291,9 @@ no_packet: } EXPORT_SYMBOL(__skb_try_recv_datagram); -struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, +struct sk_buff *__skb_recv_datagram(struct sock *sk, + struct sk_buff_head *sk_queue, + unsigned int flags, void (*destructor)(struct sock *sk, struct sk_buff *skb), int *off, int *err) @@ -299,15 +304,16 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); do { - skb = __skb_try_recv_datagram(sk, flags, destructor, off, err, - &last); + skb = __skb_try_recv_datagram(sk, sk_queue, flags, destructor, + off, err, &last); if (skb) return skb; if (*err != -EAGAIN) break; } while (timeo && - !__skb_wait_for_more_packets(sk, err, &timeo, last)); + !__skb_wait_for_more_packets(sk, sk_queue, err, + &timeo, last)); return NULL; } @@ -318,7 +324,8 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags, { int off = 0; - return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), + return __skb_recv_datagram(sk, &sk->sk_receive_queue, + flags | (noblock ? MSG_DONTWAIT : 0), NULL, &off, err); } EXPORT_SYMBOL(skb_recv_datagram); diff --git a/net/core/dev.c b/net/core/dev.c index 7e885d069707..c6c985fe7b1b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -146,7 +146,6 @@ #include "net-sysfs.h" #define MAX_GRO_SKBS 8 -#define MAX_NEST_DEV 8 /* This should be increased if a protocol with a bigger head is added. */ #define GRO_MAX_HEAD (MAX_HEADER + 128) @@ -331,6 +330,12 @@ int netdev_name_node_alt_destroy(struct net_device *dev, const char *name) name_node = netdev_name_node_lookup(net, name); if (!name_node) return -ENOENT; + /* lookup might have found our primary name or a name belonging + * to another device. + */ + if (name_node == dev->name_node || name_node->dev != dev) + return -EINVAL; + __netdev_name_node_alt_destroy(name_node); return 0; @@ -928,7 +933,7 @@ EXPORT_SYMBOL(dev_get_by_napi_id); * * The use of raw_seqcount_begin() and cond_resched() before * retrying is required as we want to give the writers a chance - * to complete when CONFIG_PREEMPT is not set. + * to complete when CONFIG_PREEMPTION is not set. */ int netdev_get_name(struct net *net, char *name, int ifindex) { @@ -1764,7 +1769,6 @@ EXPORT_SYMBOL(register_netdevice_notifier); int unregister_netdevice_notifier(struct notifier_block *nb) { - struct net_device *dev; struct net *net; int err; @@ -1775,16 +1779,9 @@ int unregister_netdevice_notifier(struct notifier_block *nb) if (err) goto unlock; - for_each_net(net) { - for_each_netdev(net, dev) { - if (dev->flags & IFF_UP) { - call_netdevice_notifier(nb, NETDEV_GOING_DOWN, - dev); - call_netdevice_notifier(nb, NETDEV_DOWN, dev); - } - call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev); - } - } + for_each_net(net) + call_netdevice_unregister_net_notifiers(nb, net); + unlock: rtnl_unlock(); up_write(&pernet_ops_rwsem); @@ -1792,6 +1789,42 @@ unlock: } EXPORT_SYMBOL(unregister_netdevice_notifier); +static int __register_netdevice_notifier_net(struct net *net, + struct notifier_block *nb, + bool ignore_call_fail) +{ + int err; + + err = raw_notifier_chain_register(&net->netdev_chain, nb); + if (err) + return err; + if (dev_boot_phase) + return 0; + + err = call_netdevice_register_net_notifiers(nb, net); + if (err && !ignore_call_fail) + goto chain_unregister; + + return 0; + +chain_unregister: + raw_notifier_chain_unregister(&net->netdev_chain, nb); + return err; +} + +static int __unregister_netdevice_notifier_net(struct net *net, + struct notifier_block *nb) +{ + int err; + + err = raw_notifier_chain_unregister(&net->netdev_chain, nb); + if (err) + return err; + + call_netdevice_unregister_net_notifiers(nb, net); + return 0; +} + /** * register_netdevice_notifier_net - register a per-netns network notifier block * @net: network namespace @@ -1812,23 +1845,9 @@ int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb) int err; rtnl_lock(); - err = raw_notifier_chain_register(&net->netdev_chain, nb); - if (err) - goto unlock; - if (dev_boot_phase) - goto unlock; - - err = call_netdevice_register_net_notifiers(nb, net); - if (err) - goto chain_unregister; - -unlock: + err = __register_netdevice_notifier_net(net, nb, false); rtnl_unlock(); return err; - -chain_unregister: - raw_notifier_chain_unregister(&netdev_chain, nb); - goto unlock; } EXPORT_SYMBOL(register_netdevice_notifier_net); @@ -1854,17 +1873,53 @@ int unregister_netdevice_notifier_net(struct net *net, int err; rtnl_lock(); - err = raw_notifier_chain_unregister(&net->netdev_chain, nb); - if (err) - goto unlock; + err = __unregister_netdevice_notifier_net(net, nb); + rtnl_unlock(); + return err; +} +EXPORT_SYMBOL(unregister_netdevice_notifier_net); - call_netdevice_unregister_net_notifiers(nb, net); +int register_netdevice_notifier_dev_net(struct net_device *dev, + struct notifier_block *nb, + struct netdev_net_notifier *nn) +{ + int err; -unlock: + rtnl_lock(); + err = __register_netdevice_notifier_net(dev_net(dev), nb, false); + if (!err) { + nn->nb = nb; + list_add(&nn->list, &dev->net_notifier_list); + } rtnl_unlock(); return err; } -EXPORT_SYMBOL(unregister_netdevice_notifier_net); +EXPORT_SYMBOL(register_netdevice_notifier_dev_net); + +int unregister_netdevice_notifier_dev_net(struct net_device *dev, + struct notifier_block *nb, + struct netdev_net_notifier *nn) +{ + int err; + + rtnl_lock(); + list_del(&nn->list); + err = __unregister_netdevice_notifier_net(dev_net(dev), nb); + rtnl_unlock(); + return err; +} +EXPORT_SYMBOL(unregister_netdevice_notifier_dev_net); + +static void move_netdevice_notifiers_dev_net(struct net_device *dev, + struct net *net) +{ + struct netdev_net_notifier *nn; + + list_for_each_entry(nn, &dev->net_notifier_list, list) { + __unregister_netdevice_notifier_net(dev_net(dev), nn->nb); + __register_netdevice_notifier_net(net, nn->nb, true); + } +} /** * call_netdevice_notifiers_info - call all network notifier blocks @@ -3021,6 +3076,8 @@ static u16 skb_tx_hash(const struct net_device *dev, if (skb_rx_queue_recorded(skb)) { hash = skb_get_rx_queue(skb); + if (hash >= qoffset) + hash -= qoffset; while (unlikely(hash >= qcount)) hash -= qcount; return hash + qoffset; @@ -3249,7 +3306,7 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, segs = skb_mac_gso_segment(skb, features); - if (unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs))) + if (segs != skb && unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs))) skb_warn_bad_offload(skb); return segs; @@ -3607,26 +3664,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, qdisc_calculate_pkt_len(skb, q); if (q->flags & TCQ_F_NOLOCK) { - if ((q->flags & TCQ_F_CAN_BYPASS) && READ_ONCE(q->empty) && - qdisc_run_begin(q)) { - if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, - &q->state))) { - __qdisc_drop(skb, &to_free); - rc = NET_XMIT_DROP; - goto end_run; - } - qdisc_bstats_cpu_update(q, skb); - - rc = NET_XMIT_SUCCESS; - if (sch_direct_xmit(skb, q, dev, txq, NULL, true)) - __qdisc_run(q); - -end_run: - qdisc_run_end(q); - } else { - rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; - qdisc_run(q); - } + rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; + qdisc_run(q); if (unlikely(to_free)) kfree_skb_list(to_free); @@ -4477,14 +4516,14 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, /* Reinjected packets coming from act_mirred or similar should * not get XDP generic processing. */ - if (skb_cloned(skb) || skb_is_tc_redirected(skb)) + if (skb_is_tc_redirected(skb)) return XDP_PASS; /* XDP packets must be linear and must have sufficient headroom * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also * native XDP provides, thus we need to do it here as well. */ - if (skb_is_nonlinear(skb) || + if (skb_cloned(skb) || skb_is_nonlinear(skb) || skb_headroom(skb) < XDP_PACKET_HEADROOM) { int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb); int troom = skb->tail + skb->data_len - skb->end; @@ -4932,7 +4971,6 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb) static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, struct net_device *orig_dev) { -#ifdef CONFIG_NETFILTER_INGRESS if (nf_hook_ingress_active(skb)) { int ingress_retval; @@ -4946,7 +4984,6 @@ static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev, rcu_read_unlock(); return ingress_retval; } -#endif /* CONFIG_NETFILTER_INGRESS */ return 0; } @@ -5491,9 +5528,29 @@ static void flush_all_backlogs(void) put_online_cpus(); } +/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ +static void gro_normal_list(struct napi_struct *napi) +{ + if (!napi->rx_count) + return; + netif_receive_skb_list_internal(&napi->rx_list); + INIT_LIST_HEAD(&napi->rx_list); + napi->rx_count = 0; +} + +/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded, + * pass the whole batch up to the stack. + */ +static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb) +{ + list_add_tail(&skb->list, &napi->rx_list); + if (++napi->rx_count >= gro_normal_batch) + gro_normal_list(napi); +} + INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int)); INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int)); -static int napi_gro_complete(struct sk_buff *skb) +static int napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb) { struct packet_offload *ptype; __be16 type = skb->protocol; @@ -5526,7 +5583,8 @@ static int napi_gro_complete(struct sk_buff *skb) } out: - return netif_receive_skb_internal(skb); + gro_normal_one(napi, skb); + return NET_RX_SUCCESS; } static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index, @@ -5539,7 +5597,7 @@ static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index, if (flush_old && NAPI_GRO_CB(skb)->age == jiffies) return; skb_list_del_init(skb); - napi_gro_complete(skb); + napi_gro_complete(napi, skb); napi->gro_hash[index].count--; } @@ -5641,7 +5699,7 @@ static void gro_pull_from_frag0(struct sk_buff *skb, int grow) } } -static void gro_flush_oldest(struct list_head *head) +static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head) { struct sk_buff *oldest; @@ -5657,7 +5715,7 @@ static void gro_flush_oldest(struct list_head *head) * SKB to the chain. */ skb_list_del_init(oldest); - napi_gro_complete(oldest); + napi_gro_complete(napi, oldest); } INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *, @@ -5723,7 +5781,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff if (&ptype->list == head) goto normal; - if (IS_ERR(pp) && PTR_ERR(pp) == -EINPROGRESS) { + if (PTR_ERR(pp) == -EINPROGRESS) { ret = GRO_CONSUMED; goto ok; } @@ -5733,7 +5791,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff if (pp) { skb_list_del_init(pp); - napi_gro_complete(pp); + napi_gro_complete(napi, pp); napi->gro_hash[hash].count--; } @@ -5744,7 +5802,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff goto normal; if (unlikely(napi->gro_hash[hash].count >= MAX_GRO_SKBS)) { - gro_flush_oldest(gro_head); + gro_flush_oldest(napi, gro_head); } else { napi->gro_hash[hash].count++; } @@ -5802,26 +5860,6 @@ struct packet_offload *gro_find_complete_by_type(__be16 type) } EXPORT_SYMBOL(gro_find_complete_by_type); -/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ -static void gro_normal_list(struct napi_struct *napi) -{ - if (!napi->rx_count) - return; - netif_receive_skb_list_internal(&napi->rx_list); - INIT_LIST_HEAD(&napi->rx_list); - napi->rx_count = 0; -} - -/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded, - * pass the whole batch up to the stack. - */ -static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb) -{ - list_add_tail(&skb->list, &napi->rx_list); - if (++napi->rx_count >= gro_normal_batch) - gro_normal_list(napi); -} - static void napi_skb_free_stolen_head(struct sk_buff *skb) { skb_dst_drop(skb); @@ -6200,8 +6238,6 @@ bool napi_complete_done(struct napi_struct *n, int work_done) NAPIF_STATE_IN_BUSY_POLL))) return false; - gro_normal_list(n); - if (n->gro_bitmask) { unsigned long timeout = 0; @@ -6217,6 +6253,9 @@ bool napi_complete_done(struct napi_struct *n, int work_done) hrtimer_start(&n->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED); } + + gro_normal_list(n); + if (unlikely(!list_empty(&n->poll_list))) { /* If n->poll_list is not empty, we need to mask irqs */ local_irq_save(flags); @@ -6548,8 +6587,6 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll) goto out_unlock; } - gro_normal_list(n); - if (n->gro_bitmask) { /* flush too old packets * If HZ < 1000, flush all packets. @@ -6557,6 +6594,8 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll) napi_gro_flush(n, HZ >= 1000); } + gro_normal_list(n); + /* Some drivers may have called napi_schedule * prior to exhausting their budget. */ @@ -7151,8 +7190,8 @@ static int __netdev_walk_all_lower_dev(struct net_device *dev, return 0; } -static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, - struct list_head **iter) +struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, + struct list_head **iter) { struct netdev_adjacent *lower; @@ -7164,6 +7203,7 @@ static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, return lower->dev; } +EXPORT_SYMBOL(netdev_next_lower_dev_rcu); static u8 __netdev_upper_depth(struct net_device *dev) { @@ -8194,6 +8234,22 @@ int __dev_set_mtu(struct net_device *dev, int new_mtu) } EXPORT_SYMBOL(__dev_set_mtu); +int dev_validate_mtu(struct net_device *dev, int new_mtu, + struct netlink_ext_ack *extack) +{ + /* MTU must be positive, and in range */ + if (new_mtu < 0 || new_mtu < dev->min_mtu) { + NL_SET_ERR_MSG(extack, "mtu less than device minimum"); + return -EINVAL; + } + + if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) { + NL_SET_ERR_MSG(extack, "mtu greater than device maximum"); + return -EINVAL; + } + return 0; +} + /** * dev_set_mtu_ext - Change maximum transfer unit * @dev: device @@ -8210,16 +8266,9 @@ int dev_set_mtu_ext(struct net_device *dev, int new_mtu, if (new_mtu == dev->mtu) return 0; - /* MTU must be positive, and in range */ - if (new_mtu < 0 || new_mtu < dev->min_mtu) { - NL_SET_ERR_MSG(extack, "mtu less than device minimum"); - return -EINVAL; - } - - if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) { - NL_SET_ERR_MSG(extack, "mtu greater than device maximum"); - return -EINVAL; - } + err = dev_validate_mtu(dev, new_mtu, extack); + if (err) + return err; if (!netif_device_present(dev)) return -ENODEV; @@ -8542,7 +8591,17 @@ static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op, struct netlink_ext_ack *extack, u32 flags, struct bpf_prog *prog) { + bool non_hw = !(flags & XDP_FLAGS_HW_MODE); + struct bpf_prog *prev_prog = NULL; struct netdev_bpf xdp; + int err; + + if (non_hw) { + prev_prog = bpf_prog_by_id(__dev_xdp_query(dev, bpf_op, + XDP_QUERY_PROG)); + if (IS_ERR(prev_prog)) + prev_prog = NULL; + } memset(&xdp, 0, sizeof(xdp)); if (flags & XDP_FLAGS_HW_MODE) @@ -8553,7 +8612,14 @@ static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op, xdp.flags = flags; xdp.prog = prog; - return bpf_op(dev, &xdp); + err = bpf_op(dev, &xdp); + if (!err && non_hw) + bpf_prog_change_xdp(prev_prog, prog); + + if (prev_prog) + bpf_prog_put(prev_prog); + + return err; } static void dev_xdp_uninstall(struct net_device *dev) @@ -9257,7 +9323,7 @@ int register_netdevice(struct net_device *dev) /* Transfer changeable features to wanted_features and enable * software offloads (GSO and GRO). */ - dev->hw_features |= NETIF_F_SOFT_FEATURES; + dev->hw_features |= (NETIF_F_SOFT_FEATURES | NETIF_F_SOFT_FEATURES_OFF); dev->features |= NETIF_F_SOFT_FEATURES; if (dev->netdev_ops->ndo_udp_tunnel_add) { @@ -9302,8 +9368,10 @@ int register_netdevice(struct net_device *dev) goto err_uninit; ret = netdev_register_kobject(dev); - if (ret) + if (ret) { + dev->reg_state = NETREG_UNREGISTERED; goto err_uninit; + } dev->reg_state = NETREG_REGISTERED; __netdev_update_features(dev); @@ -9750,6 +9818,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, INIT_LIST_HEAD(&dev->adj_list.lower); INIT_LIST_HEAD(&dev->ptype_all); INIT_LIST_HEAD(&dev->ptype_specific); + INIT_LIST_HEAD(&dev->net_notifier_list); #ifdef CONFIG_NET_SCHED hash_init(dev->qdisc_hash); #endif @@ -9820,6 +9889,8 @@ void free_netdev(struct net_device *dev) free_percpu(dev->pcpu_refcnt); dev->pcpu_refcnt = NULL; + free_percpu(dev->xdp_bulkq); + dev->xdp_bulkq = NULL; netdev_unregister_lockdep_key(dev); @@ -10011,6 +10082,9 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE); netdev_adjacent_del_links(dev); + /* Move per-net netdevice notifiers that are following the netdevice */ + move_netdevice_notifiers_dev_net(dev, net); + /* Actually switch the network namespace */ dev_net_set(dev, net); dev->ifindex = new_ifindex; diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 5163d900bb4f..dbaebbe573f0 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -187,6 +187,7 @@ static int net_hwtstamp_validate(struct ifreq *ifr) case HWTSTAMP_TX_OFF: case HWTSTAMP_TX_ON: case HWTSTAMP_TX_ONESTEP_SYNC: + case HWTSTAMP_TX_ONESTEP_P2P: tx_type_valid = 1; break; } diff --git a/net/core/devlink.c b/net/core/devlink.c index f76219bf0c21..b831c5545d6a 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -2103,11 +2103,11 @@ err_action_values_put: static struct devlink_dpipe_table * devlink_dpipe_table_find(struct list_head *dpipe_tables, - const char *table_name) + const char *table_name, struct devlink *devlink) { struct devlink_dpipe_table *table; - - list_for_each_entry_rcu(table, dpipe_tables, list) { + list_for_each_entry_rcu(table, dpipe_tables, list, + lockdep_is_held(&devlink->lock)) { if (!strcmp(table->name, table_name)) return table; } @@ -2226,7 +2226,7 @@ static int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb, table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]); table = devlink_dpipe_table_find(&devlink->dpipe_table_list, - table_name); + table_name, devlink); if (!table) return -EINVAL; @@ -2382,7 +2382,7 @@ static int devlink_dpipe_table_counters_set(struct devlink *devlink, struct devlink_dpipe_table *table; table = devlink_dpipe_table_find(&devlink->dpipe_table_list, - table_name); + table_name, devlink); if (!table) return -EINVAL; @@ -3352,34 +3352,41 @@ devlink_param_value_get_from_info(const struct devlink_param *param, struct genl_info *info, union devlink_param_value *value) { + struct nlattr *param_data; int len; - if (param->type != DEVLINK_PARAM_TYPE_BOOL && - !info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]) + param_data = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]; + + if (param->type != DEVLINK_PARAM_TYPE_BOOL && !param_data) return -EINVAL; switch (param->type) { case DEVLINK_PARAM_TYPE_U8: - value->vu8 = nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]); + if (nla_len(param_data) != sizeof(u8)) + return -EINVAL; + value->vu8 = nla_get_u8(param_data); break; case DEVLINK_PARAM_TYPE_U16: - value->vu16 = nla_get_u16(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]); + if (nla_len(param_data) != sizeof(u16)) + return -EINVAL; + value->vu16 = nla_get_u16(param_data); break; case DEVLINK_PARAM_TYPE_U32: - value->vu32 = nla_get_u32(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]); + if (nla_len(param_data) != sizeof(u32)) + return -EINVAL; + value->vu32 = nla_get_u32(param_data); break; case DEVLINK_PARAM_TYPE_STRING: - len = strnlen(nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]), - nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA])); - if (len == nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]) || + len = strnlen(nla_data(param_data), nla_len(param_data)); + if (len == nla_len(param_data) || len >= __DEVLINK_PARAM_MAX_STRING_VALUE) return -EINVAL; - strcpy(value->vstr, - nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA])); + strcpy(value->vstr, nla_data(param_data)); break; case DEVLINK_PARAM_TYPE_BOOL: - value->vbool = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA] ? - true : false; + if (param_data && nla_len(param_data)) + return -EINVAL; + value->vbool = nla_get_flag(param_data); break; } return 0; @@ -3986,6 +3993,12 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, goto out_unlock; } + /* return 0 if there is no further data to read */ + if (start_offset >= region->size) { + err = 0; + goto out_unlock; + } + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &devlink_nl_family, NLM_F_ACK | NLM_F_MULTI, DEVLINK_CMD_REGION_READ); @@ -4843,22 +4856,100 @@ devlink_health_reporter_destroy(struct devlink_health_reporter *reporter) } EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy); -void -devlink_health_reporter_state_update(struct devlink_health_reporter *reporter, - enum devlink_health_reporter_state state) +static int +devlink_nl_health_reporter_fill(struct sk_buff *msg, + struct devlink *devlink, + struct devlink_health_reporter *reporter, + enum devlink_command cmd, u32 portid, + u32 seq, int flags) { - if (WARN_ON(state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY && - state != DEVLINK_HEALTH_REPORTER_STATE_ERROR)) + struct nlattr *reporter_attr; + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(msg, devlink)) + goto genlmsg_cancel; + + reporter_attr = nla_nest_start_noflag(msg, + DEVLINK_ATTR_HEALTH_REPORTER); + if (!reporter_attr) + goto genlmsg_cancel; + if (nla_put_string(msg, DEVLINK_ATTR_HEALTH_REPORTER_NAME, + reporter->ops->name)) + goto reporter_nest_cancel; + if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_STATE, + reporter->health_state)) + goto reporter_nest_cancel; + if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR_COUNT, + reporter->error_count, DEVLINK_ATTR_PAD)) + goto reporter_nest_cancel; + if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER_COUNT, + reporter->recovery_count, DEVLINK_ATTR_PAD)) + goto reporter_nest_cancel; + if (reporter->ops->recover && + nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD, + reporter->graceful_period, + DEVLINK_ATTR_PAD)) + goto reporter_nest_cancel; + if (reporter->ops->recover && + nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER, + reporter->auto_recover)) + goto reporter_nest_cancel; + if (reporter->dump_fmsg && + nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS, + jiffies_to_msecs(reporter->dump_ts), + DEVLINK_ATTR_PAD)) + goto reporter_nest_cancel; + if (reporter->dump_fmsg && + nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS, + reporter->dump_real_ts, DEVLINK_ATTR_PAD)) + goto reporter_nest_cancel; + + nla_nest_end(msg, reporter_attr); + genlmsg_end(msg, hdr); + return 0; + +reporter_nest_cancel: + nla_nest_end(msg, reporter_attr); +genlmsg_cancel: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static void devlink_recover_notify(struct devlink_health_reporter *reporter, + enum devlink_command cmd) +{ + struct sk_buff *msg; + int err; + + WARN_ON(cmd != DEVLINK_CMD_HEALTH_REPORTER_RECOVER); + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) return; - if (reporter->health_state == state) + err = devlink_nl_health_reporter_fill(msg, reporter->devlink, + reporter, cmd, 0, 0, 0); + if (err) { + nlmsg_free(msg); return; + } - reporter->health_state = state; - trace_devlink_health_reporter_state_update(reporter->devlink, - reporter->ops->name, state); + genlmsg_multicast_netns(&devlink_nl_family, + devlink_net(reporter->devlink), + msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); } -EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update); + +void +devlink_health_reporter_recovery_done(struct devlink_health_reporter *reporter) +{ + reporter->recovery_count++; + reporter->last_recovery_ts = jiffies; +} +EXPORT_SYMBOL_GPL(devlink_health_reporter_recovery_done); static int devlink_health_reporter_recover(struct devlink_health_reporter *reporter, @@ -4876,9 +4967,9 @@ devlink_health_reporter_recover(struct devlink_health_reporter *reporter, if (err) return err; - reporter->recovery_count++; + devlink_health_reporter_recovery_done(reporter); reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY; - reporter->last_recovery_ts = jiffies; + devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER); return 0; } @@ -4945,6 +5036,7 @@ int devlink_health_report(struct devlink_health_reporter *reporter, reporter->error_count++; prev_health_state = reporter->health_state; reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR; + devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER); /* abort if the previous error wasn't recovered */ if (reporter->auto_recover && @@ -5027,68 +5119,23 @@ devlink_health_reporter_put(struct devlink_health_reporter *reporter) refcount_dec(&reporter->refcount); } -static int -devlink_nl_health_reporter_fill(struct sk_buff *msg, - struct devlink *devlink, - struct devlink_health_reporter *reporter, - enum devlink_command cmd, u32 portid, - u32 seq, int flags) +void +devlink_health_reporter_state_update(struct devlink_health_reporter *reporter, + enum devlink_health_reporter_state state) { - struct nlattr *reporter_attr; - void *hdr; - - hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); - if (!hdr) - return -EMSGSIZE; - - if (devlink_nl_put_handle(msg, devlink)) - goto genlmsg_cancel; - - reporter_attr = nla_nest_start_noflag(msg, - DEVLINK_ATTR_HEALTH_REPORTER); - if (!reporter_attr) - goto genlmsg_cancel; - if (nla_put_string(msg, DEVLINK_ATTR_HEALTH_REPORTER_NAME, - reporter->ops->name)) - goto reporter_nest_cancel; - if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_STATE, - reporter->health_state)) - goto reporter_nest_cancel; - if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR_COUNT, - reporter->error_count, DEVLINK_ATTR_PAD)) - goto reporter_nest_cancel; - if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER_COUNT, - reporter->recovery_count, DEVLINK_ATTR_PAD)) - goto reporter_nest_cancel; - if (reporter->ops->recover && - nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD, - reporter->graceful_period, - DEVLINK_ATTR_PAD)) - goto reporter_nest_cancel; - if (reporter->ops->recover && - nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER, - reporter->auto_recover)) - goto reporter_nest_cancel; - if (reporter->dump_fmsg && - nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS, - jiffies_to_msecs(reporter->dump_ts), - DEVLINK_ATTR_PAD)) - goto reporter_nest_cancel; - if (reporter->dump_fmsg && - nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS, - reporter->dump_real_ts, DEVLINK_ATTR_PAD)) - goto reporter_nest_cancel; + if (WARN_ON(state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY && + state != DEVLINK_HEALTH_REPORTER_STATE_ERROR)) + return; - nla_nest_end(msg, reporter_attr); - genlmsg_end(msg, hdr); - return 0; + if (reporter->health_state == state) + return; -reporter_nest_cancel: - nla_nest_end(msg, reporter_attr); -genlmsg_cancel: - genlmsg_cancel(msg, hdr); - return -EMSGSIZE; + reporter->health_state = state; + trace_devlink_health_reporter_state_update(reporter->devlink, + reporter->ops->name, state); + devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER); } +EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update); static int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb, struct genl_info *info) @@ -5911,6 +5958,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 }, [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 }, + [DEVLINK_ATTR_REGION_CHUNK_ADDR] = { .type = NLA_U64 }, + [DEVLINK_ATTR_REGION_CHUNK_LEN] = { .type = NLA_U64 }, [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64 }, [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 }, @@ -6814,7 +6863,7 @@ bool devlink_dpipe_table_counter_enabled(struct devlink *devlink, rcu_read_lock(); table = devlink_dpipe_table_find(&devlink->dpipe_table_list, - table_name); + table_name, devlink); enabled = false; if (table) enabled = table->counters_enabled; @@ -6838,26 +6887,34 @@ int devlink_dpipe_table_register(struct devlink *devlink, void *priv, bool counter_control_extern) { struct devlink_dpipe_table *table; - - if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name)) - return -EEXIST; + int err = 0; if (WARN_ON(!table_ops->size_get)) return -EINVAL; + mutex_lock(&devlink->lock); + + if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name, + devlink)) { + err = -EEXIST; + goto unlock; + } + table = kzalloc(sizeof(*table), GFP_KERNEL); - if (!table) - return -ENOMEM; + if (!table) { + err = -ENOMEM; + goto unlock; + } table->name = table_name; table->table_ops = table_ops; table->priv = priv; table->counter_control_extern = counter_control_extern; - mutex_lock(&devlink->lock); list_add_tail_rcu(&table->list, &devlink->dpipe_table_list); +unlock: mutex_unlock(&devlink->lock); - return 0; + return err; } EXPORT_SYMBOL_GPL(devlink_dpipe_table_register); @@ -6874,7 +6931,7 @@ void devlink_dpipe_table_unregister(struct devlink *devlink, mutex_lock(&devlink->lock); table = devlink_dpipe_table_find(&devlink->dpipe_table_list, - table_name); + table_name, devlink); if (!table) goto unlock; list_del_rcu(&table->list); @@ -7031,7 +7088,7 @@ int devlink_dpipe_table_resource_set(struct devlink *devlink, mutex_lock(&devlink->lock); table = devlink_dpipe_table_find(&devlink->dpipe_table_list, - table_name); + table_name, devlink); if (!table) { err = -EINVAL; goto out; @@ -7674,6 +7731,9 @@ static const struct devlink_trap devlink_trap_generic[] = { DEVLINK_TRAP(REJECT_ROUTE, EXCEPTION), DEVLINK_TRAP(IPV4_LPM_UNICAST_MISS, EXCEPTION), DEVLINK_TRAP(IPV6_LPM_UNICAST_MISS, EXCEPTION), + DEVLINK_TRAP(NON_ROUTABLE, DROP), + DEVLINK_TRAP(DECAP_ERROR, EXCEPTION), + DEVLINK_TRAP(OVERLAY_SMAC_MC, DROP), }; #define DEVLINK_TRAP_GROUP(_id) \ @@ -7686,6 +7746,7 @@ static const struct devlink_trap_group devlink_trap_group_generic[] = { DEVLINK_TRAP_GROUP(L2_DROPS), DEVLINK_TRAP_GROUP(L3_DROPS), DEVLINK_TRAP_GROUP(BUFFER_DROPS), + DEVLINK_TRAP_GROUP(TUNNEL_DROPS), }; static int devlink_trap_generic_verify(const struct devlink_trap *trap) diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 536e032d95c8..31700e0c3928 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -802,16 +802,12 @@ net_dm_hw_metadata_clone(const struct net_dm_hw_metadata *hw_metadata) if (!n_hw_metadata) return NULL; - trap_group_name = kmemdup(hw_metadata->trap_group_name, - strlen(hw_metadata->trap_group_name) + 1, - GFP_ATOMIC | __GFP_ZERO); + trap_group_name = kstrdup(hw_metadata->trap_group_name, GFP_ATOMIC); if (!trap_group_name) goto free_hw_metadata; n_hw_metadata->trap_group_name = trap_group_name; - trap_name = kmemdup(hw_metadata->trap_name, - strlen(hw_metadata->trap_name) + 1, - GFP_ATOMIC | __GFP_ZERO); + trap_name = kstrdup(hw_metadata->trap_name, GFP_ATOMIC); if (!trap_name) goto free_trap_group; n_hw_metadata->trap_name = trap_name; @@ -1004,8 +1000,10 @@ static void net_dm_hw_monitor_stop(struct netlink_ext_ack *extack) { int cpu; - if (!monitor_hw) + if (!monitor_hw) { NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already disabled"); + return; + } monitor_hw = false; diff --git a/net/core/ethtool.c b/net/core/ethtool.c deleted file mode 100644 index cd9bc67381b2..000000000000 --- a/net/core/ethtool.c +++ /dev/null @@ -1,3116 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * net/core/ethtool.c - Ethtool ioctl handler - * Copyright (c) 2003 Matthew Wilcox <matthew@wil.cx> - * - * This file is where we call all the ethtool_ops commands to get - * the information ethtool needs. - */ - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/capability.h> -#include <linux/errno.h> -#include <linux/ethtool.h> -#include <linux/netdevice.h> -#include <linux/net_tstamp.h> -#include <linux/phy.h> -#include <linux/bitops.h> -#include <linux/uaccess.h> -#include <linux/vmalloc.h> -#include <linux/sfp.h> -#include <linux/slab.h> -#include <linux/rtnetlink.h> -#include <linux/sched/signal.h> -#include <linux/net.h> -#include <net/devlink.h> -#include <net/xdp_sock.h> -#include <net/flow_offload.h> - -/* - * Some useful ethtool_ops methods that're device independent. - * If we find that all drivers want to do the same thing here, - * we can turn these into dev_() function calls. - */ - -u32 ethtool_op_get_link(struct net_device *dev) -{ - return netif_carrier_ok(dev) ? 1 : 0; -} -EXPORT_SYMBOL(ethtool_op_get_link); - -int ethtool_op_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info) -{ - info->so_timestamping = - SOF_TIMESTAMPING_TX_SOFTWARE | - SOF_TIMESTAMPING_RX_SOFTWARE | - SOF_TIMESTAMPING_SOFTWARE; - info->phc_index = -1; - return 0; -} -EXPORT_SYMBOL(ethtool_op_get_ts_info); - -/* Handlers for each ethtool command */ - -#define ETHTOOL_DEV_FEATURE_WORDS ((NETDEV_FEATURE_COUNT + 31) / 32) - -static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = { - [NETIF_F_SG_BIT] = "tx-scatter-gather", - [NETIF_F_IP_CSUM_BIT] = "tx-checksum-ipv4", - [NETIF_F_HW_CSUM_BIT] = "tx-checksum-ip-generic", - [NETIF_F_IPV6_CSUM_BIT] = "tx-checksum-ipv6", - [NETIF_F_HIGHDMA_BIT] = "highdma", - [NETIF_F_FRAGLIST_BIT] = "tx-scatter-gather-fraglist", - [NETIF_F_HW_VLAN_CTAG_TX_BIT] = "tx-vlan-hw-insert", - - [NETIF_F_HW_VLAN_CTAG_RX_BIT] = "rx-vlan-hw-parse", - [NETIF_F_HW_VLAN_CTAG_FILTER_BIT] = "rx-vlan-filter", - [NETIF_F_HW_VLAN_STAG_TX_BIT] = "tx-vlan-stag-hw-insert", - [NETIF_F_HW_VLAN_STAG_RX_BIT] = "rx-vlan-stag-hw-parse", - [NETIF_F_HW_VLAN_STAG_FILTER_BIT] = "rx-vlan-stag-filter", - [NETIF_F_VLAN_CHALLENGED_BIT] = "vlan-challenged", - [NETIF_F_GSO_BIT] = "tx-generic-segmentation", - [NETIF_F_LLTX_BIT] = "tx-lockless", - [NETIF_F_NETNS_LOCAL_BIT] = "netns-local", - [NETIF_F_GRO_BIT] = "rx-gro", - [NETIF_F_GRO_HW_BIT] = "rx-gro-hw", - [NETIF_F_LRO_BIT] = "rx-lro", - - [NETIF_F_TSO_BIT] = "tx-tcp-segmentation", - [NETIF_F_GSO_ROBUST_BIT] = "tx-gso-robust", - [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation", - [NETIF_F_TSO_MANGLEID_BIT] = "tx-tcp-mangleid-segmentation", - [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation", - [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", - [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", - [NETIF_F_GSO_GRE_CSUM_BIT] = "tx-gre-csum-segmentation", - [NETIF_F_GSO_IPXIP4_BIT] = "tx-ipxip4-segmentation", - [NETIF_F_GSO_IPXIP6_BIT] = "tx-ipxip6-segmentation", - [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", - [NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation", - [NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial", - [NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation", - [NETIF_F_GSO_ESP_BIT] = "tx-esp-segmentation", - [NETIF_F_GSO_UDP_L4_BIT] = "tx-udp-segmentation", - - [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", - [NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp", - [NETIF_F_FCOE_MTU_BIT] = "fcoe-mtu", - [NETIF_F_NTUPLE_BIT] = "rx-ntuple-filter", - [NETIF_F_RXHASH_BIT] = "rx-hashing", - [NETIF_F_RXCSUM_BIT] = "rx-checksum", - [NETIF_F_NOCACHE_COPY_BIT] = "tx-nocache-copy", - [NETIF_F_LOOPBACK_BIT] = "loopback", - [NETIF_F_RXFCS_BIT] = "rx-fcs", - [NETIF_F_RXALL_BIT] = "rx-all", - [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload", - [NETIF_F_HW_TC_BIT] = "hw-tc-offload", - [NETIF_F_HW_ESP_BIT] = "esp-hw-offload", - [NETIF_F_HW_ESP_TX_CSUM_BIT] = "esp-tx-csum-hw-offload", - [NETIF_F_RX_UDP_TUNNEL_PORT_BIT] = "rx-udp_tunnel-port-offload", - [NETIF_F_HW_TLS_RECORD_BIT] = "tls-hw-record", - [NETIF_F_HW_TLS_TX_BIT] = "tls-hw-tx-offload", - [NETIF_F_HW_TLS_RX_BIT] = "tls-hw-rx-offload", -}; - -static const char -rss_hash_func_strings[ETH_RSS_HASH_FUNCS_COUNT][ETH_GSTRING_LEN] = { - [ETH_RSS_HASH_TOP_BIT] = "toeplitz", - [ETH_RSS_HASH_XOR_BIT] = "xor", - [ETH_RSS_HASH_CRC32_BIT] = "crc32", -}; - -static const char -tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN] = { - [ETHTOOL_ID_UNSPEC] = "Unspec", - [ETHTOOL_RX_COPYBREAK] = "rx-copybreak", - [ETHTOOL_TX_COPYBREAK] = "tx-copybreak", - [ETHTOOL_PFC_PREVENTION_TOUT] = "pfc-prevention-tout", -}; - -static const char -phy_tunable_strings[__ETHTOOL_PHY_TUNABLE_COUNT][ETH_GSTRING_LEN] = { - [ETHTOOL_ID_UNSPEC] = "Unspec", - [ETHTOOL_PHY_DOWNSHIFT] = "phy-downshift", - [ETHTOOL_PHY_FAST_LINK_DOWN] = "phy-fast-link-down", - [ETHTOOL_PHY_EDPD] = "phy-energy-detect-power-down", -}; - -static int ethtool_get_features(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_gfeatures cmd = { - .cmd = ETHTOOL_GFEATURES, - .size = ETHTOOL_DEV_FEATURE_WORDS, - }; - struct ethtool_get_features_block features[ETHTOOL_DEV_FEATURE_WORDS]; - u32 __user *sizeaddr; - u32 copy_size; - int i; - - /* in case feature bits run out again */ - BUILD_BUG_ON(ETHTOOL_DEV_FEATURE_WORDS * sizeof(u32) > sizeof(netdev_features_t)); - - for (i = 0; i < ETHTOOL_DEV_FEATURE_WORDS; ++i) { - features[i].available = (u32)(dev->hw_features >> (32 * i)); - features[i].requested = (u32)(dev->wanted_features >> (32 * i)); - features[i].active = (u32)(dev->features >> (32 * i)); - features[i].never_changed = - (u32)(NETIF_F_NEVER_CHANGE >> (32 * i)); - } - - sizeaddr = useraddr + offsetof(struct ethtool_gfeatures, size); - if (get_user(copy_size, sizeaddr)) - return -EFAULT; - - if (copy_size > ETHTOOL_DEV_FEATURE_WORDS) - copy_size = ETHTOOL_DEV_FEATURE_WORDS; - - if (copy_to_user(useraddr, &cmd, sizeof(cmd))) - return -EFAULT; - useraddr += sizeof(cmd); - if (copy_to_user(useraddr, features, copy_size * sizeof(*features))) - return -EFAULT; - - return 0; -} - -static int ethtool_set_features(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_sfeatures cmd; - struct ethtool_set_features_block features[ETHTOOL_DEV_FEATURE_WORDS]; - netdev_features_t wanted = 0, valid = 0; - int i, ret = 0; - - if (copy_from_user(&cmd, useraddr, sizeof(cmd))) - return -EFAULT; - useraddr += sizeof(cmd); - - if (cmd.size != ETHTOOL_DEV_FEATURE_WORDS) - return -EINVAL; - - if (copy_from_user(features, useraddr, sizeof(features))) - return -EFAULT; - - for (i = 0; i < ETHTOOL_DEV_FEATURE_WORDS; ++i) { - valid |= (netdev_features_t)features[i].valid << (32 * i); - wanted |= (netdev_features_t)features[i].requested << (32 * i); - } - - if (valid & ~NETIF_F_ETHTOOL_BITS) - return -EINVAL; - - if (valid & ~dev->hw_features) { - valid &= dev->hw_features; - ret |= ETHTOOL_F_UNSUPPORTED; - } - - dev->wanted_features &= ~valid; - dev->wanted_features |= wanted & valid; - __netdev_update_features(dev); - - if ((dev->wanted_features ^ dev->features) & valid) - ret |= ETHTOOL_F_WISH; - - return ret; -} - -static int __ethtool_get_sset_count(struct net_device *dev, int sset) -{ - const struct ethtool_ops *ops = dev->ethtool_ops; - - if (sset == ETH_SS_FEATURES) - return ARRAY_SIZE(netdev_features_strings); - - if (sset == ETH_SS_RSS_HASH_FUNCS) - return ARRAY_SIZE(rss_hash_func_strings); - - if (sset == ETH_SS_TUNABLES) - return ARRAY_SIZE(tunable_strings); - - if (sset == ETH_SS_PHY_TUNABLES) - return ARRAY_SIZE(phy_tunable_strings); - - if (sset == ETH_SS_PHY_STATS && dev->phydev && - !ops->get_ethtool_phy_stats) - return phy_ethtool_get_sset_count(dev->phydev); - - if (ops->get_sset_count && ops->get_strings) - return ops->get_sset_count(dev, sset); - else - return -EOPNOTSUPP; -} - -static void __ethtool_get_strings(struct net_device *dev, - u32 stringset, u8 *data) -{ - const struct ethtool_ops *ops = dev->ethtool_ops; - - if (stringset == ETH_SS_FEATURES) - memcpy(data, netdev_features_strings, - sizeof(netdev_features_strings)); - else if (stringset == ETH_SS_RSS_HASH_FUNCS) - memcpy(data, rss_hash_func_strings, - sizeof(rss_hash_func_strings)); - else if (stringset == ETH_SS_TUNABLES) - memcpy(data, tunable_strings, sizeof(tunable_strings)); - else if (stringset == ETH_SS_PHY_TUNABLES) - memcpy(data, phy_tunable_strings, sizeof(phy_tunable_strings)); - else if (stringset == ETH_SS_PHY_STATS && dev->phydev && - !ops->get_ethtool_phy_stats) - phy_ethtool_get_strings(dev->phydev, data); - else - /* ops->get_strings is valid because checked earlier */ - ops->get_strings(dev, stringset, data); -} - -static netdev_features_t ethtool_get_feature_mask(u32 eth_cmd) -{ - /* feature masks of legacy discrete ethtool ops */ - - switch (eth_cmd) { - case ETHTOOL_GTXCSUM: - case ETHTOOL_STXCSUM: - return NETIF_F_CSUM_MASK | NETIF_F_SCTP_CRC; - case ETHTOOL_GRXCSUM: - case ETHTOOL_SRXCSUM: - return NETIF_F_RXCSUM; - case ETHTOOL_GSG: - case ETHTOOL_SSG: - return NETIF_F_SG; - case ETHTOOL_GTSO: - case ETHTOOL_STSO: - return NETIF_F_ALL_TSO; - case ETHTOOL_GGSO: - case ETHTOOL_SGSO: - return NETIF_F_GSO; - case ETHTOOL_GGRO: - case ETHTOOL_SGRO: - return NETIF_F_GRO; - default: - BUG(); - } -} - -static int ethtool_get_one_feature(struct net_device *dev, - char __user *useraddr, u32 ethcmd) -{ - netdev_features_t mask = ethtool_get_feature_mask(ethcmd); - struct ethtool_value edata = { - .cmd = ethcmd, - .data = !!(dev->features & mask), - }; - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_one_feature(struct net_device *dev, - void __user *useraddr, u32 ethcmd) -{ - struct ethtool_value edata; - netdev_features_t mask; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - mask = ethtool_get_feature_mask(ethcmd); - mask &= dev->hw_features; - if (!mask) - return -EOPNOTSUPP; - - if (edata.data) - dev->wanted_features |= mask; - else - dev->wanted_features &= ~mask; - - __netdev_update_features(dev); - - return 0; -} - -#define ETH_ALL_FLAGS (ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | \ - ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH) -#define ETH_ALL_FEATURES (NETIF_F_LRO | NETIF_F_HW_VLAN_CTAG_RX | \ - NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_NTUPLE | \ - NETIF_F_RXHASH) - -static u32 __ethtool_get_flags(struct net_device *dev) -{ - u32 flags = 0; - - if (dev->features & NETIF_F_LRO) - flags |= ETH_FLAG_LRO; - if (dev->features & NETIF_F_HW_VLAN_CTAG_RX) - flags |= ETH_FLAG_RXVLAN; - if (dev->features & NETIF_F_HW_VLAN_CTAG_TX) - flags |= ETH_FLAG_TXVLAN; - if (dev->features & NETIF_F_NTUPLE) - flags |= ETH_FLAG_NTUPLE; - if (dev->features & NETIF_F_RXHASH) - flags |= ETH_FLAG_RXHASH; - - return flags; -} - -static int __ethtool_set_flags(struct net_device *dev, u32 data) -{ - netdev_features_t features = 0, changed; - - if (data & ~ETH_ALL_FLAGS) - return -EINVAL; - - if (data & ETH_FLAG_LRO) - features |= NETIF_F_LRO; - if (data & ETH_FLAG_RXVLAN) - features |= NETIF_F_HW_VLAN_CTAG_RX; - if (data & ETH_FLAG_TXVLAN) - features |= NETIF_F_HW_VLAN_CTAG_TX; - if (data & ETH_FLAG_NTUPLE) - features |= NETIF_F_NTUPLE; - if (data & ETH_FLAG_RXHASH) - features |= NETIF_F_RXHASH; - - /* allow changing only bits set in hw_features */ - changed = (features ^ dev->features) & ETH_ALL_FEATURES; - if (changed & ~dev->hw_features) - return (changed & dev->hw_features) ? -EINVAL : -EOPNOTSUPP; - - dev->wanted_features = - (dev->wanted_features & ~changed) | (features & changed); - - __netdev_update_features(dev); - - return 0; -} - -/* Given two link masks, AND them together and save the result in dst. */ -void ethtool_intersect_link_masks(struct ethtool_link_ksettings *dst, - struct ethtool_link_ksettings *src) -{ - unsigned int size = BITS_TO_LONGS(__ETHTOOL_LINK_MODE_MASK_NBITS); - unsigned int idx = 0; - - for (; idx < size; idx++) { - dst->link_modes.supported[idx] &= - src->link_modes.supported[idx]; - dst->link_modes.advertising[idx] &= - src->link_modes.advertising[idx]; - } -} -EXPORT_SYMBOL(ethtool_intersect_link_masks); - -void ethtool_convert_legacy_u32_to_link_mode(unsigned long *dst, - u32 legacy_u32) -{ - bitmap_zero(dst, __ETHTOOL_LINK_MODE_MASK_NBITS); - dst[0] = legacy_u32; -} -EXPORT_SYMBOL(ethtool_convert_legacy_u32_to_link_mode); - -/* return false if src had higher bits set. lower bits always updated. */ -bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, - const unsigned long *src) -{ - bool retval = true; - - /* TODO: following test will soon always be true */ - if (__ETHTOOL_LINK_MODE_MASK_NBITS > 32) { - __ETHTOOL_DECLARE_LINK_MODE_MASK(ext); - - bitmap_zero(ext, __ETHTOOL_LINK_MODE_MASK_NBITS); - bitmap_fill(ext, 32); - bitmap_complement(ext, ext, __ETHTOOL_LINK_MODE_MASK_NBITS); - if (bitmap_intersects(ext, src, - __ETHTOOL_LINK_MODE_MASK_NBITS)) { - /* src mask goes beyond bit 31 */ - retval = false; - } - } - *legacy_u32 = src[0]; - return retval; -} -EXPORT_SYMBOL(ethtool_convert_link_mode_to_legacy_u32); - -/* return false if legacy contained non-0 deprecated fields - * maxtxpkt/maxrxpkt. rest of ksettings always updated - */ -static bool -convert_legacy_settings_to_link_ksettings( - struct ethtool_link_ksettings *link_ksettings, - const struct ethtool_cmd *legacy_settings) -{ - bool retval = true; - - memset(link_ksettings, 0, sizeof(*link_ksettings)); - - /* This is used to tell users that driver is still using these - * deprecated legacy fields, and they should not use - * %ETHTOOL_GLINKSETTINGS/%ETHTOOL_SLINKSETTINGS - */ - if (legacy_settings->maxtxpkt || - legacy_settings->maxrxpkt) - retval = false; - - ethtool_convert_legacy_u32_to_link_mode( - link_ksettings->link_modes.supported, - legacy_settings->supported); - ethtool_convert_legacy_u32_to_link_mode( - link_ksettings->link_modes.advertising, - legacy_settings->advertising); - ethtool_convert_legacy_u32_to_link_mode( - link_ksettings->link_modes.lp_advertising, - legacy_settings->lp_advertising); - link_ksettings->base.speed - = ethtool_cmd_speed(legacy_settings); - link_ksettings->base.duplex - = legacy_settings->duplex; - link_ksettings->base.port - = legacy_settings->port; - link_ksettings->base.phy_address - = legacy_settings->phy_address; - link_ksettings->base.autoneg - = legacy_settings->autoneg; - link_ksettings->base.mdio_support - = legacy_settings->mdio_support; - link_ksettings->base.eth_tp_mdix - = legacy_settings->eth_tp_mdix; - link_ksettings->base.eth_tp_mdix_ctrl - = legacy_settings->eth_tp_mdix_ctrl; - return retval; -} - -/* return false if ksettings link modes had higher bits - * set. legacy_settings always updated (best effort) - */ -static bool -convert_link_ksettings_to_legacy_settings( - struct ethtool_cmd *legacy_settings, - const struct ethtool_link_ksettings *link_ksettings) -{ - bool retval = true; - - memset(legacy_settings, 0, sizeof(*legacy_settings)); - /* this also clears the deprecated fields in legacy structure: - * __u8 transceiver; - * __u32 maxtxpkt; - * __u32 maxrxpkt; - */ - - retval &= ethtool_convert_link_mode_to_legacy_u32( - &legacy_settings->supported, - link_ksettings->link_modes.supported); - retval &= ethtool_convert_link_mode_to_legacy_u32( - &legacy_settings->advertising, - link_ksettings->link_modes.advertising); - retval &= ethtool_convert_link_mode_to_legacy_u32( - &legacy_settings->lp_advertising, - link_ksettings->link_modes.lp_advertising); - ethtool_cmd_speed_set(legacy_settings, link_ksettings->base.speed); - legacy_settings->duplex - = link_ksettings->base.duplex; - legacy_settings->port - = link_ksettings->base.port; - legacy_settings->phy_address - = link_ksettings->base.phy_address; - legacy_settings->autoneg - = link_ksettings->base.autoneg; - legacy_settings->mdio_support - = link_ksettings->base.mdio_support; - legacy_settings->eth_tp_mdix - = link_ksettings->base.eth_tp_mdix; - legacy_settings->eth_tp_mdix_ctrl - = link_ksettings->base.eth_tp_mdix_ctrl; - legacy_settings->transceiver - = link_ksettings->base.transceiver; - return retval; -} - -/* number of 32-bit words to store the user's link mode bitmaps */ -#define __ETHTOOL_LINK_MODE_MASK_NU32 \ - DIV_ROUND_UP(__ETHTOOL_LINK_MODE_MASK_NBITS, 32) - -/* layout of the struct passed from/to userland */ -struct ethtool_link_usettings { - struct ethtool_link_settings base; - struct { - __u32 supported[__ETHTOOL_LINK_MODE_MASK_NU32]; - __u32 advertising[__ETHTOOL_LINK_MODE_MASK_NU32]; - __u32 lp_advertising[__ETHTOOL_LINK_MODE_MASK_NU32]; - } link_modes; -}; - -/* Internal kernel helper to query a device ethtool_link_settings. */ -int __ethtool_get_link_ksettings(struct net_device *dev, - struct ethtool_link_ksettings *link_ksettings) -{ - ASSERT_RTNL(); - - if (!dev->ethtool_ops->get_link_ksettings) - return -EOPNOTSUPP; - - memset(link_ksettings, 0, sizeof(*link_ksettings)); - return dev->ethtool_ops->get_link_ksettings(dev, link_ksettings); -} -EXPORT_SYMBOL(__ethtool_get_link_ksettings); - -/* convert ethtool_link_usettings in user space to a kernel internal - * ethtool_link_ksettings. return 0 on success, errno on error. - */ -static int load_link_ksettings_from_user(struct ethtool_link_ksettings *to, - const void __user *from) -{ - struct ethtool_link_usettings link_usettings; - - if (copy_from_user(&link_usettings, from, sizeof(link_usettings))) - return -EFAULT; - - memcpy(&to->base, &link_usettings.base, sizeof(to->base)); - bitmap_from_arr32(to->link_modes.supported, - link_usettings.link_modes.supported, - __ETHTOOL_LINK_MODE_MASK_NBITS); - bitmap_from_arr32(to->link_modes.advertising, - link_usettings.link_modes.advertising, - __ETHTOOL_LINK_MODE_MASK_NBITS); - bitmap_from_arr32(to->link_modes.lp_advertising, - link_usettings.link_modes.lp_advertising, - __ETHTOOL_LINK_MODE_MASK_NBITS); - - return 0; -} - -/* convert a kernel internal ethtool_link_ksettings to - * ethtool_link_usettings in user space. return 0 on success, errno on - * error. - */ -static int -store_link_ksettings_for_user(void __user *to, - const struct ethtool_link_ksettings *from) -{ - struct ethtool_link_usettings link_usettings; - - memcpy(&link_usettings.base, &from->base, sizeof(link_usettings)); - bitmap_to_arr32(link_usettings.link_modes.supported, - from->link_modes.supported, - __ETHTOOL_LINK_MODE_MASK_NBITS); - bitmap_to_arr32(link_usettings.link_modes.advertising, - from->link_modes.advertising, - __ETHTOOL_LINK_MODE_MASK_NBITS); - bitmap_to_arr32(link_usettings.link_modes.lp_advertising, - from->link_modes.lp_advertising, - __ETHTOOL_LINK_MODE_MASK_NBITS); - - if (copy_to_user(to, &link_usettings, sizeof(link_usettings))) - return -EFAULT; - - return 0; -} - -/* Query device for its ethtool_link_settings. */ -static int ethtool_get_link_ksettings(struct net_device *dev, - void __user *useraddr) -{ - int err = 0; - struct ethtool_link_ksettings link_ksettings; - - ASSERT_RTNL(); - if (!dev->ethtool_ops->get_link_ksettings) - return -EOPNOTSUPP; - - /* handle bitmap nbits handshake */ - if (copy_from_user(&link_ksettings.base, useraddr, - sizeof(link_ksettings.base))) - return -EFAULT; - - if (__ETHTOOL_LINK_MODE_MASK_NU32 - != link_ksettings.base.link_mode_masks_nwords) { - /* wrong link mode nbits requested */ - memset(&link_ksettings, 0, sizeof(link_ksettings)); - link_ksettings.base.cmd = ETHTOOL_GLINKSETTINGS; - /* send back number of words required as negative val */ - compiletime_assert(__ETHTOOL_LINK_MODE_MASK_NU32 <= S8_MAX, - "need too many bits for link modes!"); - link_ksettings.base.link_mode_masks_nwords - = -((s8)__ETHTOOL_LINK_MODE_MASK_NU32); - - /* copy the base fields back to user, not the link - * mode bitmaps - */ - if (copy_to_user(useraddr, &link_ksettings.base, - sizeof(link_ksettings.base))) - return -EFAULT; - - return 0; - } - - /* handshake successful: user/kernel agree on - * link_mode_masks_nwords - */ - - memset(&link_ksettings, 0, sizeof(link_ksettings)); - err = dev->ethtool_ops->get_link_ksettings(dev, &link_ksettings); - if (err < 0) - return err; - - /* make sure we tell the right values to user */ - link_ksettings.base.cmd = ETHTOOL_GLINKSETTINGS; - link_ksettings.base.link_mode_masks_nwords - = __ETHTOOL_LINK_MODE_MASK_NU32; - - return store_link_ksettings_for_user(useraddr, &link_ksettings); -} - -/* Update device ethtool_link_settings. */ -static int ethtool_set_link_ksettings(struct net_device *dev, - void __user *useraddr) -{ - int err; - struct ethtool_link_ksettings link_ksettings; - - ASSERT_RTNL(); - - if (!dev->ethtool_ops->set_link_ksettings) - return -EOPNOTSUPP; - - /* make sure nbits field has expected value */ - if (copy_from_user(&link_ksettings.base, useraddr, - sizeof(link_ksettings.base))) - return -EFAULT; - - if (__ETHTOOL_LINK_MODE_MASK_NU32 - != link_ksettings.base.link_mode_masks_nwords) - return -EINVAL; - - /* copy the whole structure, now that we know it has expected - * format - */ - err = load_link_ksettings_from_user(&link_ksettings, useraddr); - if (err) - return err; - - /* re-check nwords field, just in case */ - if (__ETHTOOL_LINK_MODE_MASK_NU32 - != link_ksettings.base.link_mode_masks_nwords) - return -EINVAL; - - return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings); -} - -/* Query device for its ethtool_cmd settings. - * - * Backward compatibility note: for compatibility with legacy ethtool, this is - * now implemented via get_link_ksettings. When driver reports higher link mode - * bits, a kernel warning is logged once (with name of 1st driver/device) to - * recommend user to upgrade ethtool, but the command is successful (only the - * lower link mode bits reported back to user). Deprecated fields from - * ethtool_cmd (transceiver/maxrxpkt/maxtxpkt) are always set to zero. - */ -static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_link_ksettings link_ksettings; - struct ethtool_cmd cmd; - int err; - - ASSERT_RTNL(); - if (!dev->ethtool_ops->get_link_ksettings) - return -EOPNOTSUPP; - - memset(&link_ksettings, 0, sizeof(link_ksettings)); - err = dev->ethtool_ops->get_link_ksettings(dev, &link_ksettings); - if (err < 0) - return err; - convert_link_ksettings_to_legacy_settings(&cmd, &link_ksettings); - - /* send a sensible cmd tag back to user */ - cmd.cmd = ETHTOOL_GSET; - - if (copy_to_user(useraddr, &cmd, sizeof(cmd))) - return -EFAULT; - - return 0; -} - -/* Update device link settings with given ethtool_cmd. - * - * Backward compatibility note: for compatibility with legacy ethtool, this is - * now always implemented via set_link_settings. When user's request updates - * deprecated ethtool_cmd fields (transceiver/maxrxpkt/maxtxpkt), a kernel - * warning is logged once (with name of 1st driver/device) to recommend user to - * upgrade ethtool, and the request is rejected. - */ -static int ethtool_set_settings(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_link_ksettings link_ksettings; - struct ethtool_cmd cmd; - - ASSERT_RTNL(); - - if (copy_from_user(&cmd, useraddr, sizeof(cmd))) - return -EFAULT; - if (!dev->ethtool_ops->set_link_ksettings) - return -EOPNOTSUPP; - - if (!convert_legacy_settings_to_link_ksettings(&link_ksettings, &cmd)) - return -EINVAL; - link_ksettings.base.link_mode_masks_nwords = - __ETHTOOL_LINK_MODE_MASK_NU32; - return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings); -} - -static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, - void __user *useraddr) -{ - struct ethtool_drvinfo info; - const struct ethtool_ops *ops = dev->ethtool_ops; - - memset(&info, 0, sizeof(info)); - info.cmd = ETHTOOL_GDRVINFO; - if (ops->get_drvinfo) { - ops->get_drvinfo(dev, &info); - } else if (dev->dev.parent && dev->dev.parent->driver) { - strlcpy(info.bus_info, dev_name(dev->dev.parent), - sizeof(info.bus_info)); - strlcpy(info.driver, dev->dev.parent->driver->name, - sizeof(info.driver)); - } else { - return -EOPNOTSUPP; - } - - /* - * this method of obtaining string set info is deprecated; - * Use ETHTOOL_GSSET_INFO instead. - */ - if (ops->get_sset_count) { - int rc; - - rc = ops->get_sset_count(dev, ETH_SS_TEST); - if (rc >= 0) - info.testinfo_len = rc; - rc = ops->get_sset_count(dev, ETH_SS_STATS); - if (rc >= 0) - info.n_stats = rc; - rc = ops->get_sset_count(dev, ETH_SS_PRIV_FLAGS); - if (rc >= 0) - info.n_priv_flags = rc; - } - if (ops->get_regs_len) { - int ret = ops->get_regs_len(dev); - - if (ret > 0) - info.regdump_len = ret; - } - - if (ops->get_eeprom_len) - info.eedump_len = ops->get_eeprom_len(dev); - - if (!info.fw_version[0]) - devlink_compat_running_version(dev, info.fw_version, - sizeof(info.fw_version)); - - if (copy_to_user(useraddr, &info, sizeof(info))) - return -EFAULT; - return 0; -} - -static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev, - void __user *useraddr) -{ - struct ethtool_sset_info info; - u64 sset_mask; - int i, idx = 0, n_bits = 0, ret, rc; - u32 *info_buf = NULL; - - if (copy_from_user(&info, useraddr, sizeof(info))) - return -EFAULT; - - /* store copy of mask, because we zero struct later on */ - sset_mask = info.sset_mask; - if (!sset_mask) - return 0; - - /* calculate size of return buffer */ - n_bits = hweight64(sset_mask); - - memset(&info, 0, sizeof(info)); - info.cmd = ETHTOOL_GSSET_INFO; - - info_buf = kcalloc(n_bits, sizeof(u32), GFP_USER); - if (!info_buf) - return -ENOMEM; - - /* - * fill return buffer based on input bitmask and successful - * get_sset_count return - */ - for (i = 0; i < 64; i++) { - if (!(sset_mask & (1ULL << i))) - continue; - - rc = __ethtool_get_sset_count(dev, i); - if (rc >= 0) { - info.sset_mask |= (1ULL << i); - info_buf[idx++] = rc; - } - } - - ret = -EFAULT; - if (copy_to_user(useraddr, &info, sizeof(info))) - goto out; - - useraddr += offsetof(struct ethtool_sset_info, data); - if (copy_to_user(useraddr, info_buf, idx * sizeof(u32))) - goto out; - - ret = 0; - -out: - kfree(info_buf); - return ret; -} - -static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, - u32 cmd, void __user *useraddr) -{ - struct ethtool_rxnfc info; - size_t info_size = sizeof(info); - int rc; - - if (!dev->ethtool_ops->set_rxnfc) - return -EOPNOTSUPP; - - /* struct ethtool_rxnfc was originally defined for - * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data - * members. User-space might still be using that - * definition. */ - if (cmd == ETHTOOL_SRXFH) - info_size = (offsetof(struct ethtool_rxnfc, data) + - sizeof(info.data)); - - if (copy_from_user(&info, useraddr, info_size)) - return -EFAULT; - - rc = dev->ethtool_ops->set_rxnfc(dev, &info); - if (rc) - return rc; - - if (cmd == ETHTOOL_SRXCLSRLINS && - copy_to_user(useraddr, &info, info_size)) - return -EFAULT; - - return 0; -} - -static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, - u32 cmd, void __user *useraddr) -{ - struct ethtool_rxnfc info; - size_t info_size = sizeof(info); - const struct ethtool_ops *ops = dev->ethtool_ops; - int ret; - void *rule_buf = NULL; - - if (!ops->get_rxnfc) - return -EOPNOTSUPP; - - /* struct ethtool_rxnfc was originally defined for - * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data - * members. User-space might still be using that - * definition. */ - if (cmd == ETHTOOL_GRXFH) - info_size = (offsetof(struct ethtool_rxnfc, data) + - sizeof(info.data)); - - if (copy_from_user(&info, useraddr, info_size)) - return -EFAULT; - - /* If FLOW_RSS was requested then user-space must be using the - * new definition, as FLOW_RSS is newer. - */ - if (cmd == ETHTOOL_GRXFH && info.flow_type & FLOW_RSS) { - info_size = sizeof(info); - if (copy_from_user(&info, useraddr, info_size)) - return -EFAULT; - /* Since malicious users may modify the original data, - * we need to check whether FLOW_RSS is still requested. - */ - if (!(info.flow_type & FLOW_RSS)) - return -EINVAL; - } - - if (info.cmd != cmd) - return -EINVAL; - - if (info.cmd == ETHTOOL_GRXCLSRLALL) { - if (info.rule_cnt > 0) { - if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32)) - rule_buf = kcalloc(info.rule_cnt, sizeof(u32), - GFP_USER); - if (!rule_buf) - return -ENOMEM; - } - } - - ret = ops->get_rxnfc(dev, &info, rule_buf); - if (ret < 0) - goto err_out; - - ret = -EFAULT; - if (copy_to_user(useraddr, &info, info_size)) - goto err_out; - - if (rule_buf) { - useraddr += offsetof(struct ethtool_rxnfc, rule_locs); - if (copy_to_user(useraddr, rule_buf, - info.rule_cnt * sizeof(u32))) - goto err_out; - } - ret = 0; - -err_out: - kfree(rule_buf); - - return ret; -} - -static int ethtool_copy_validate_indir(u32 *indir, void __user *useraddr, - struct ethtool_rxnfc *rx_rings, - u32 size) -{ - int i; - - if (copy_from_user(indir, useraddr, size * sizeof(indir[0]))) - return -EFAULT; - - /* Validate ring indices */ - for (i = 0; i < size; i++) - if (indir[i] >= rx_rings->data) - return -EINVAL; - - return 0; -} - -u8 netdev_rss_key[NETDEV_RSS_KEY_LEN] __read_mostly; - -void netdev_rss_key_fill(void *buffer, size_t len) -{ - BUG_ON(len > sizeof(netdev_rss_key)); - net_get_random_once(netdev_rss_key, sizeof(netdev_rss_key)); - memcpy(buffer, netdev_rss_key, len); -} -EXPORT_SYMBOL(netdev_rss_key_fill); - -static int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max) -{ - u32 dev_size, current_max = 0; - u32 *indir; - int ret; - - if (!dev->ethtool_ops->get_rxfh_indir_size || - !dev->ethtool_ops->get_rxfh) - return -EOPNOTSUPP; - dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev); - if (dev_size == 0) - return -EOPNOTSUPP; - - indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER); - if (!indir) - return -ENOMEM; - - ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL, NULL); - if (ret) - goto out; - - while (dev_size--) - current_max = max(current_max, indir[dev_size]); - - *max = current_max; - -out: - kfree(indir); - return ret; -} - -static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, - void __user *useraddr) -{ - u32 user_size, dev_size; - u32 *indir; - int ret; - - if (!dev->ethtool_ops->get_rxfh_indir_size || - !dev->ethtool_ops->get_rxfh) - return -EOPNOTSUPP; - dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev); - if (dev_size == 0) - return -EOPNOTSUPP; - - if (copy_from_user(&user_size, - useraddr + offsetof(struct ethtool_rxfh_indir, size), - sizeof(user_size))) - return -EFAULT; - - if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh_indir, size), - &dev_size, sizeof(dev_size))) - return -EFAULT; - - /* If the user buffer size is 0, this is just a query for the - * device table size. Otherwise, if it's smaller than the - * device table size it's an error. - */ - if (user_size < dev_size) - return user_size == 0 ? 0 : -EINVAL; - - indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER); - if (!indir) - return -ENOMEM; - - ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL, NULL); - if (ret) - goto out; - - if (copy_to_user(useraddr + - offsetof(struct ethtool_rxfh_indir, ring_index[0]), - indir, dev_size * sizeof(indir[0]))) - ret = -EFAULT; - -out: - kfree(indir); - return ret; -} - -static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, - void __user *useraddr) -{ - struct ethtool_rxnfc rx_rings; - u32 user_size, dev_size, i; - u32 *indir; - const struct ethtool_ops *ops = dev->ethtool_ops; - int ret; - u32 ringidx_offset = offsetof(struct ethtool_rxfh_indir, ring_index[0]); - - if (!ops->get_rxfh_indir_size || !ops->set_rxfh || - !ops->get_rxnfc) - return -EOPNOTSUPP; - - dev_size = ops->get_rxfh_indir_size(dev); - if (dev_size == 0) - return -EOPNOTSUPP; - - if (copy_from_user(&user_size, - useraddr + offsetof(struct ethtool_rxfh_indir, size), - sizeof(user_size))) - return -EFAULT; - - if (user_size != 0 && user_size != dev_size) - return -EINVAL; - - indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER); - if (!indir) - return -ENOMEM; - - rx_rings.cmd = ETHTOOL_GRXRINGS; - ret = ops->get_rxnfc(dev, &rx_rings, NULL); - if (ret) - goto out; - - if (user_size == 0) { - for (i = 0; i < dev_size; i++) - indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data); - } else { - ret = ethtool_copy_validate_indir(indir, - useraddr + ringidx_offset, - &rx_rings, - dev_size); - if (ret) - goto out; - } - - ret = ops->set_rxfh(dev, indir, NULL, ETH_RSS_HASH_NO_CHANGE); - if (ret) - goto out; - - /* indicate whether rxfh was set to default */ - if (user_size == 0) - dev->priv_flags &= ~IFF_RXFH_CONFIGURED; - else - dev->priv_flags |= IFF_RXFH_CONFIGURED; - -out: - kfree(indir); - return ret; -} - -static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, - void __user *useraddr) -{ - int ret; - const struct ethtool_ops *ops = dev->ethtool_ops; - u32 user_indir_size, user_key_size; - u32 dev_indir_size = 0, dev_key_size = 0; - struct ethtool_rxfh rxfh; - u32 total_size; - u32 indir_bytes; - u32 *indir = NULL; - u8 dev_hfunc = 0; - u8 *hkey = NULL; - u8 *rss_config; - - if (!ops->get_rxfh) - return -EOPNOTSUPP; - - if (ops->get_rxfh_indir_size) - dev_indir_size = ops->get_rxfh_indir_size(dev); - if (ops->get_rxfh_key_size) - dev_key_size = ops->get_rxfh_key_size(dev); - - if (copy_from_user(&rxfh, useraddr, sizeof(rxfh))) - return -EFAULT; - user_indir_size = rxfh.indir_size; - user_key_size = rxfh.key_size; - - /* Check that reserved fields are 0 for now */ - if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd8[2] || rxfh.rsvd32) - return -EINVAL; - /* Most drivers don't handle rss_context, check it's 0 as well */ - if (rxfh.rss_context && !ops->get_rxfh_context) - return -EOPNOTSUPP; - - rxfh.indir_size = dev_indir_size; - rxfh.key_size = dev_key_size; - if (copy_to_user(useraddr, &rxfh, sizeof(rxfh))) - return -EFAULT; - - if ((user_indir_size && (user_indir_size != dev_indir_size)) || - (user_key_size && (user_key_size != dev_key_size))) - return -EINVAL; - - indir_bytes = user_indir_size * sizeof(indir[0]); - total_size = indir_bytes + user_key_size; - rss_config = kzalloc(total_size, GFP_USER); - if (!rss_config) - return -ENOMEM; - - if (user_indir_size) - indir = (u32 *)rss_config; - - if (user_key_size) - hkey = rss_config + indir_bytes; - - if (rxfh.rss_context) - ret = dev->ethtool_ops->get_rxfh_context(dev, indir, hkey, - &dev_hfunc, - rxfh.rss_context); - else - ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey, &dev_hfunc); - if (ret) - goto out; - - if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, hfunc), - &dev_hfunc, sizeof(rxfh.hfunc))) { - ret = -EFAULT; - } else if (copy_to_user(useraddr + - offsetof(struct ethtool_rxfh, rss_config[0]), - rss_config, total_size)) { - ret = -EFAULT; - } -out: - kfree(rss_config); - - return ret; -} - -static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, - void __user *useraddr) -{ - int ret; - const struct ethtool_ops *ops = dev->ethtool_ops; - struct ethtool_rxnfc rx_rings; - struct ethtool_rxfh rxfh; - u32 dev_indir_size = 0, dev_key_size = 0, i; - u32 *indir = NULL, indir_bytes = 0; - u8 *hkey = NULL; - u8 *rss_config; - u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]); - bool delete = false; - - if (!ops->get_rxnfc || !ops->set_rxfh) - return -EOPNOTSUPP; - - if (ops->get_rxfh_indir_size) - dev_indir_size = ops->get_rxfh_indir_size(dev); - if (ops->get_rxfh_key_size) - dev_key_size = ops->get_rxfh_key_size(dev); - - if (copy_from_user(&rxfh, useraddr, sizeof(rxfh))) - return -EFAULT; - - /* Check that reserved fields are 0 for now */ - if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd8[2] || rxfh.rsvd32) - return -EINVAL; - /* Most drivers don't handle rss_context, check it's 0 as well */ - if (rxfh.rss_context && !ops->set_rxfh_context) - return -EOPNOTSUPP; - - /* If either indir, hash key or function is valid, proceed further. - * Must request at least one change: indir size, hash key or function. - */ - if ((rxfh.indir_size && - rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE && - rxfh.indir_size != dev_indir_size) || - (rxfh.key_size && (rxfh.key_size != dev_key_size)) || - (rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE && - rxfh.key_size == 0 && rxfh.hfunc == ETH_RSS_HASH_NO_CHANGE)) - return -EINVAL; - - if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) - indir_bytes = dev_indir_size * sizeof(indir[0]); - - rss_config = kzalloc(indir_bytes + rxfh.key_size, GFP_USER); - if (!rss_config) - return -ENOMEM; - - rx_rings.cmd = ETHTOOL_GRXRINGS; - ret = ops->get_rxnfc(dev, &rx_rings, NULL); - if (ret) - goto out; - - /* rxfh.indir_size == 0 means reset the indir table to default (master - * context) or delete the context (other RSS contexts). - * rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE means leave it unchanged. - */ - if (rxfh.indir_size && - rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) { - indir = (u32 *)rss_config; - ret = ethtool_copy_validate_indir(indir, - useraddr + rss_cfg_offset, - &rx_rings, - rxfh.indir_size); - if (ret) - goto out; - } else if (rxfh.indir_size == 0) { - if (rxfh.rss_context == 0) { - indir = (u32 *)rss_config; - for (i = 0; i < dev_indir_size; i++) - indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data); - } else { - delete = true; - } - } - - if (rxfh.key_size) { - hkey = rss_config + indir_bytes; - if (copy_from_user(hkey, - useraddr + rss_cfg_offset + indir_bytes, - rxfh.key_size)) { - ret = -EFAULT; - goto out; - } - } - - if (rxfh.rss_context) - ret = ops->set_rxfh_context(dev, indir, hkey, rxfh.hfunc, - &rxfh.rss_context, delete); - else - ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc); - if (ret) - goto out; - - if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, rss_context), - &rxfh.rss_context, sizeof(rxfh.rss_context))) - ret = -EFAULT; - - if (!rxfh.rss_context) { - /* indicate whether rxfh was set to default */ - if (rxfh.indir_size == 0) - dev->priv_flags &= ~IFF_RXFH_CONFIGURED; - else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) - dev->priv_flags |= IFF_RXFH_CONFIGURED; - } - -out: - kfree(rss_config); - return ret; -} - -static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_regs regs; - const struct ethtool_ops *ops = dev->ethtool_ops; - void *regbuf; - int reglen, ret; - - if (!ops->get_regs || !ops->get_regs_len) - return -EOPNOTSUPP; - - if (copy_from_user(®s, useraddr, sizeof(regs))) - return -EFAULT; - - reglen = ops->get_regs_len(dev); - if (reglen <= 0) - return reglen; - - if (regs.len > reglen) - regs.len = reglen; - - regbuf = vzalloc(reglen); - if (!regbuf) - return -ENOMEM; - - if (regs.len < reglen) - reglen = regs.len; - - ops->get_regs(dev, ®s, regbuf); - - ret = -EFAULT; - if (copy_to_user(useraddr, ®s, sizeof(regs))) - goto out; - useraddr += offsetof(struct ethtool_regs, data); - if (copy_to_user(useraddr, regbuf, reglen)) - goto out; - ret = 0; - - out: - vfree(regbuf); - return ret; -} - -static int ethtool_reset(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value reset; - int ret; - - if (!dev->ethtool_ops->reset) - return -EOPNOTSUPP; - - if (copy_from_user(&reset, useraddr, sizeof(reset))) - return -EFAULT; - - ret = dev->ethtool_ops->reset(dev, &reset.data); - if (ret) - return ret; - - if (copy_to_user(useraddr, &reset, sizeof(reset))) - return -EFAULT; - return 0; -} - -static int ethtool_get_wol(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_wolinfo wol; - - if (!dev->ethtool_ops->get_wol) - return -EOPNOTSUPP; - - memset(&wol, 0, sizeof(struct ethtool_wolinfo)); - wol.cmd = ETHTOOL_GWOL; - dev->ethtool_ops->get_wol(dev, &wol); - - if (copy_to_user(useraddr, &wol, sizeof(wol))) - return -EFAULT; - return 0; -} - -static int ethtool_set_wol(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_wolinfo wol; - int ret; - - if (!dev->ethtool_ops->set_wol) - return -EOPNOTSUPP; - - if (copy_from_user(&wol, useraddr, sizeof(wol))) - return -EFAULT; - - ret = dev->ethtool_ops->set_wol(dev, &wol); - if (ret) - return ret; - - dev->wol_enabled = !!wol.wolopts; - - return 0; -} - -static int ethtool_get_eee(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_eee edata; - int rc; - - if (!dev->ethtool_ops->get_eee) - return -EOPNOTSUPP; - - memset(&edata, 0, sizeof(struct ethtool_eee)); - edata.cmd = ETHTOOL_GEEE; - rc = dev->ethtool_ops->get_eee(dev, &edata); - - if (rc) - return rc; - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - - return 0; -} - -static int ethtool_set_eee(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_eee edata; - - if (!dev->ethtool_ops->set_eee) - return -EOPNOTSUPP; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - return dev->ethtool_ops->set_eee(dev, &edata); -} - -static int ethtool_nway_reset(struct net_device *dev) -{ - if (!dev->ethtool_ops->nway_reset) - return -EOPNOTSUPP; - - return dev->ethtool_ops->nway_reset(dev); -} - -static int ethtool_get_link(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata = { .cmd = ETHTOOL_GLINK }; - - if (!dev->ethtool_ops->get_link) - return -EOPNOTSUPP; - - edata.data = netif_running(dev) && dev->ethtool_ops->get_link(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_get_any_eeprom(struct net_device *dev, void __user *useraddr, - int (*getter)(struct net_device *, - struct ethtool_eeprom *, u8 *), - u32 total_len) -{ - struct ethtool_eeprom eeprom; - void __user *userbuf = useraddr + sizeof(eeprom); - u32 bytes_remaining; - u8 *data; - int ret = 0; - - if (copy_from_user(&eeprom, useraddr, sizeof(eeprom))) - return -EFAULT; - - /* Check for wrap and zero */ - if (eeprom.offset + eeprom.len <= eeprom.offset) - return -EINVAL; - - /* Check for exceeding total eeprom len */ - if (eeprom.offset + eeprom.len > total_len) - return -EINVAL; - - data = kmalloc(PAGE_SIZE, GFP_USER); - if (!data) - return -ENOMEM; - - bytes_remaining = eeprom.len; - while (bytes_remaining > 0) { - eeprom.len = min(bytes_remaining, (u32)PAGE_SIZE); - - ret = getter(dev, &eeprom, data); - if (ret) - break; - if (copy_to_user(userbuf, data, eeprom.len)) { - ret = -EFAULT; - break; - } - userbuf += eeprom.len; - eeprom.offset += eeprom.len; - bytes_remaining -= eeprom.len; - } - - eeprom.len = userbuf - (useraddr + sizeof(eeprom)); - eeprom.offset -= eeprom.len; - if (copy_to_user(useraddr, &eeprom, sizeof(eeprom))) - ret = -EFAULT; - - kfree(data); - return ret; -} - -static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr) -{ - const struct ethtool_ops *ops = dev->ethtool_ops; - - if (!ops->get_eeprom || !ops->get_eeprom_len || - !ops->get_eeprom_len(dev)) - return -EOPNOTSUPP; - - return ethtool_get_any_eeprom(dev, useraddr, ops->get_eeprom, - ops->get_eeprom_len(dev)); -} - -static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_eeprom eeprom; - const struct ethtool_ops *ops = dev->ethtool_ops; - void __user *userbuf = useraddr + sizeof(eeprom); - u32 bytes_remaining; - u8 *data; - int ret = 0; - - if (!ops->set_eeprom || !ops->get_eeprom_len || - !ops->get_eeprom_len(dev)) - return -EOPNOTSUPP; - - if (copy_from_user(&eeprom, useraddr, sizeof(eeprom))) - return -EFAULT; - - /* Check for wrap and zero */ - if (eeprom.offset + eeprom.len <= eeprom.offset) - return -EINVAL; - - /* Check for exceeding total eeprom len */ - if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev)) - return -EINVAL; - - data = kmalloc(PAGE_SIZE, GFP_USER); - if (!data) - return -ENOMEM; - - bytes_remaining = eeprom.len; - while (bytes_remaining > 0) { - eeprom.len = min(bytes_remaining, (u32)PAGE_SIZE); - - if (copy_from_user(data, userbuf, eeprom.len)) { - ret = -EFAULT; - break; - } - ret = ops->set_eeprom(dev, &eeprom, data); - if (ret) - break; - userbuf += eeprom.len; - eeprom.offset += eeprom.len; - bytes_remaining -= eeprom.len; - } - - kfree(data); - return ret; -} - -static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev, - void __user *useraddr) -{ - struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE }; - - if (!dev->ethtool_ops->get_coalesce) - return -EOPNOTSUPP; - - dev->ethtool_ops->get_coalesce(dev, &coalesce); - - if (copy_to_user(useraddr, &coalesce, sizeof(coalesce))) - return -EFAULT; - return 0; -} - -static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev, - void __user *useraddr) -{ - struct ethtool_coalesce coalesce; - - if (!dev->ethtool_ops->set_coalesce) - return -EOPNOTSUPP; - - if (copy_from_user(&coalesce, useraddr, sizeof(coalesce))) - return -EFAULT; - - return dev->ethtool_ops->set_coalesce(dev, &coalesce); -} - -static int ethtool_get_ringparam(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_ringparam ringparam = { .cmd = ETHTOOL_GRINGPARAM }; - - if (!dev->ethtool_ops->get_ringparam) - return -EOPNOTSUPP; - - dev->ethtool_ops->get_ringparam(dev, &ringparam); - - if (copy_to_user(useraddr, &ringparam, sizeof(ringparam))) - return -EFAULT; - return 0; -} - -static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_ringparam ringparam, max = { .cmd = ETHTOOL_GRINGPARAM }; - - if (!dev->ethtool_ops->set_ringparam || !dev->ethtool_ops->get_ringparam) - return -EOPNOTSUPP; - - if (copy_from_user(&ringparam, useraddr, sizeof(ringparam))) - return -EFAULT; - - dev->ethtool_ops->get_ringparam(dev, &max); - - /* ensure new ring parameters are within the maximums */ - if (ringparam.rx_pending > max.rx_max_pending || - ringparam.rx_mini_pending > max.rx_mini_max_pending || - ringparam.rx_jumbo_pending > max.rx_jumbo_max_pending || - ringparam.tx_pending > max.tx_max_pending) - return -EINVAL; - - return dev->ethtool_ops->set_ringparam(dev, &ringparam); -} - -static noinline_for_stack int ethtool_get_channels(struct net_device *dev, - void __user *useraddr) -{ - struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS }; - - if (!dev->ethtool_ops->get_channels) - return -EOPNOTSUPP; - - dev->ethtool_ops->get_channels(dev, &channels); - - if (copy_to_user(useraddr, &channels, sizeof(channels))) - return -EFAULT; - return 0; -} - -static noinline_for_stack int ethtool_set_channels(struct net_device *dev, - void __user *useraddr) -{ - struct ethtool_channels channels, curr = { .cmd = ETHTOOL_GCHANNELS }; - u16 from_channel, to_channel; - u32 max_rx_in_use = 0; - unsigned int i; - - if (!dev->ethtool_ops->set_channels || !dev->ethtool_ops->get_channels) - return -EOPNOTSUPP; - - if (copy_from_user(&channels, useraddr, sizeof(channels))) - return -EFAULT; - - dev->ethtool_ops->get_channels(dev, &curr); - - /* ensure new counts are within the maximums */ - if (channels.rx_count > curr.max_rx || - channels.tx_count > curr.max_tx || - channels.combined_count > curr.max_combined || - channels.other_count > curr.max_other) - return -EINVAL; - - /* ensure the new Rx count fits within the configured Rx flow - * indirection table settings */ - if (netif_is_rxfh_configured(dev) && - !ethtool_get_max_rxfh_channel(dev, &max_rx_in_use) && - (channels.combined_count + channels.rx_count) <= max_rx_in_use) - return -EINVAL; - - /* Disabling channels, query zero-copy AF_XDP sockets */ - from_channel = channels.combined_count + - min(channels.rx_count, channels.tx_count); - to_channel = curr.combined_count + max(curr.rx_count, curr.tx_count); - for (i = from_channel; i < to_channel; i++) - if (xdp_get_umem_from_qid(dev, i)) - return -EINVAL; - - return dev->ethtool_ops->set_channels(dev, &channels); -} - -static int ethtool_get_pauseparam(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_pauseparam pauseparam = { .cmd = ETHTOOL_GPAUSEPARAM }; - - if (!dev->ethtool_ops->get_pauseparam) - return -EOPNOTSUPP; - - dev->ethtool_ops->get_pauseparam(dev, &pauseparam); - - if (copy_to_user(useraddr, &pauseparam, sizeof(pauseparam))) - return -EFAULT; - return 0; -} - -static int ethtool_set_pauseparam(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_pauseparam pauseparam; - - if (!dev->ethtool_ops->set_pauseparam) - return -EOPNOTSUPP; - - if (copy_from_user(&pauseparam, useraddr, sizeof(pauseparam))) - return -EFAULT; - - return dev->ethtool_ops->set_pauseparam(dev, &pauseparam); -} - -static int ethtool_self_test(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_test test; - const struct ethtool_ops *ops = dev->ethtool_ops; - u64 *data; - int ret, test_len; - - if (!ops->self_test || !ops->get_sset_count) - return -EOPNOTSUPP; - - test_len = ops->get_sset_count(dev, ETH_SS_TEST); - if (test_len < 0) - return test_len; - WARN_ON(test_len == 0); - - if (copy_from_user(&test, useraddr, sizeof(test))) - return -EFAULT; - - test.len = test_len; - data = kmalloc_array(test_len, sizeof(u64), GFP_USER); - if (!data) - return -ENOMEM; - - ops->self_test(dev, &test, data); - - ret = -EFAULT; - if (copy_to_user(useraddr, &test, sizeof(test))) - goto out; - useraddr += sizeof(test); - if (copy_to_user(useraddr, data, test.len * sizeof(u64))) - goto out; - ret = 0; - - out: - kfree(data); - return ret; -} - -static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_gstrings gstrings; - u8 *data; - int ret; - - if (copy_from_user(&gstrings, useraddr, sizeof(gstrings))) - return -EFAULT; - - ret = __ethtool_get_sset_count(dev, gstrings.string_set); - if (ret < 0) - return ret; - if (ret > S32_MAX / ETH_GSTRING_LEN) - return -ENOMEM; - WARN_ON_ONCE(!ret); - - gstrings.len = ret; - - if (gstrings.len) { - data = vzalloc(array_size(gstrings.len, ETH_GSTRING_LEN)); - if (!data) - return -ENOMEM; - - __ethtool_get_strings(dev, gstrings.string_set, data); - } else { - data = NULL; - } - - ret = -EFAULT; - if (copy_to_user(useraddr, &gstrings, sizeof(gstrings))) - goto out; - useraddr += sizeof(gstrings); - if (gstrings.len && - copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN)) - goto out; - ret = 0; - -out: - vfree(data); - return ret; -} - -static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_value id; - static bool busy; - const struct ethtool_ops *ops = dev->ethtool_ops; - int rc; - - if (!ops->set_phys_id) - return -EOPNOTSUPP; - - if (busy) - return -EBUSY; - - if (copy_from_user(&id, useraddr, sizeof(id))) - return -EFAULT; - - rc = ops->set_phys_id(dev, ETHTOOL_ID_ACTIVE); - if (rc < 0) - return rc; - - /* Drop the RTNL lock while waiting, but prevent reentry or - * removal of the device. - */ - busy = true; - dev_hold(dev); - rtnl_unlock(); - - if (rc == 0) { - /* Driver will handle this itself */ - schedule_timeout_interruptible( - id.data ? (id.data * HZ) : MAX_SCHEDULE_TIMEOUT); - } else { - /* Driver expects to be called at twice the frequency in rc */ - int n = rc * 2, i, interval = HZ / n; - - /* Count down seconds */ - do { - /* Count down iterations per second */ - i = n; - do { - rtnl_lock(); - rc = ops->set_phys_id(dev, - (i & 1) ? ETHTOOL_ID_OFF : ETHTOOL_ID_ON); - rtnl_unlock(); - if (rc) - break; - schedule_timeout_interruptible(interval); - } while (!signal_pending(current) && --i != 0); - } while (!signal_pending(current) && - (id.data == 0 || --id.data != 0)); - } - - rtnl_lock(); - dev_put(dev); - busy = false; - - (void) ops->set_phys_id(dev, ETHTOOL_ID_INACTIVE); - return rc; -} - -static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_stats stats; - const struct ethtool_ops *ops = dev->ethtool_ops; - u64 *data; - int ret, n_stats; - - if (!ops->get_ethtool_stats || !ops->get_sset_count) - return -EOPNOTSUPP; - - n_stats = ops->get_sset_count(dev, ETH_SS_STATS); - if (n_stats < 0) - return n_stats; - if (n_stats > S32_MAX / sizeof(u64)) - return -ENOMEM; - WARN_ON_ONCE(!n_stats); - if (copy_from_user(&stats, useraddr, sizeof(stats))) - return -EFAULT; - - stats.n_stats = n_stats; - - if (n_stats) { - data = vzalloc(array_size(n_stats, sizeof(u64))); - if (!data) - return -ENOMEM; - ops->get_ethtool_stats(dev, &stats, data); - } else { - data = NULL; - } - - ret = -EFAULT; - if (copy_to_user(useraddr, &stats, sizeof(stats))) - goto out; - useraddr += sizeof(stats); - if (n_stats && copy_to_user(useraddr, data, n_stats * sizeof(u64))) - goto out; - ret = 0; - - out: - vfree(data); - return ret; -} - -static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr) -{ - const struct ethtool_ops *ops = dev->ethtool_ops; - struct phy_device *phydev = dev->phydev; - struct ethtool_stats stats; - u64 *data; - int ret, n_stats; - - if (!phydev && (!ops->get_ethtool_phy_stats || !ops->get_sset_count)) - return -EOPNOTSUPP; - - if (dev->phydev && !ops->get_ethtool_phy_stats) - n_stats = phy_ethtool_get_sset_count(dev->phydev); - else - n_stats = ops->get_sset_count(dev, ETH_SS_PHY_STATS); - if (n_stats < 0) - return n_stats; - if (n_stats > S32_MAX / sizeof(u64)) - return -ENOMEM; - WARN_ON_ONCE(!n_stats); - - if (copy_from_user(&stats, useraddr, sizeof(stats))) - return -EFAULT; - - stats.n_stats = n_stats; - - if (n_stats) { - data = vzalloc(array_size(n_stats, sizeof(u64))); - if (!data) - return -ENOMEM; - - if (dev->phydev && !ops->get_ethtool_phy_stats) { - ret = phy_ethtool_get_stats(dev->phydev, &stats, data); - if (ret < 0) - goto out; - } else { - ops->get_ethtool_phy_stats(dev, &stats, data); - } - } else { - data = NULL; - } - - ret = -EFAULT; - if (copy_to_user(useraddr, &stats, sizeof(stats))) - goto out; - useraddr += sizeof(stats); - if (n_stats && copy_to_user(useraddr, data, n_stats * sizeof(u64))) - goto out; - ret = 0; - - out: - vfree(data); - return ret; -} - -static int ethtool_get_perm_addr(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_perm_addr epaddr; - - if (copy_from_user(&epaddr, useraddr, sizeof(epaddr))) - return -EFAULT; - - if (epaddr.size < dev->addr_len) - return -ETOOSMALL; - epaddr.size = dev->addr_len; - - if (copy_to_user(useraddr, &epaddr, sizeof(epaddr))) - return -EFAULT; - useraddr += sizeof(epaddr); - if (copy_to_user(useraddr, dev->perm_addr, epaddr.size)) - return -EFAULT; - return 0; -} - -static int ethtool_get_value(struct net_device *dev, char __user *useraddr, - u32 cmd, u32 (*actor)(struct net_device *)) -{ - struct ethtool_value edata = { .cmd = cmd }; - - if (!actor) - return -EOPNOTSUPP; - - edata.data = actor(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_value_void(struct net_device *dev, char __user *useraddr, - void (*actor)(struct net_device *, u32)) -{ - struct ethtool_value edata; - - if (!actor) - return -EOPNOTSUPP; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - actor(dev, edata.data); - return 0; -} - -static int ethtool_set_value(struct net_device *dev, char __user *useraddr, - int (*actor)(struct net_device *, u32)) -{ - struct ethtool_value edata; - - if (!actor) - return -EOPNOTSUPP; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - return actor(dev, edata.data); -} - -static noinline_for_stack int ethtool_flash_device(struct net_device *dev, - char __user *useraddr) -{ - struct ethtool_flash efl; - - if (copy_from_user(&efl, useraddr, sizeof(efl))) - return -EFAULT; - efl.data[ETHTOOL_FLASH_MAX_FILENAME - 1] = 0; - - if (!dev->ethtool_ops->flash_device) - return devlink_compat_flash_update(dev, efl.data); - - return dev->ethtool_ops->flash_device(dev, &efl); -} - -static int ethtool_set_dump(struct net_device *dev, - void __user *useraddr) -{ - struct ethtool_dump dump; - - if (!dev->ethtool_ops->set_dump) - return -EOPNOTSUPP; - - if (copy_from_user(&dump, useraddr, sizeof(dump))) - return -EFAULT; - - return dev->ethtool_ops->set_dump(dev, &dump); -} - -static int ethtool_get_dump_flag(struct net_device *dev, - void __user *useraddr) -{ - int ret; - struct ethtool_dump dump; - const struct ethtool_ops *ops = dev->ethtool_ops; - - if (!ops->get_dump_flag) - return -EOPNOTSUPP; - - if (copy_from_user(&dump, useraddr, sizeof(dump))) - return -EFAULT; - - ret = ops->get_dump_flag(dev, &dump); - if (ret) - return ret; - - if (copy_to_user(useraddr, &dump, sizeof(dump))) - return -EFAULT; - return 0; -} - -static int ethtool_get_dump_data(struct net_device *dev, - void __user *useraddr) -{ - int ret; - __u32 len; - struct ethtool_dump dump, tmp; - const struct ethtool_ops *ops = dev->ethtool_ops; - void *data = NULL; - - if (!ops->get_dump_data || !ops->get_dump_flag) - return -EOPNOTSUPP; - - if (copy_from_user(&dump, useraddr, sizeof(dump))) - return -EFAULT; - - memset(&tmp, 0, sizeof(tmp)); - tmp.cmd = ETHTOOL_GET_DUMP_FLAG; - ret = ops->get_dump_flag(dev, &tmp); - if (ret) - return ret; - - len = min(tmp.len, dump.len); - if (!len) - return -EFAULT; - - /* Don't ever let the driver think there's more space available - * than it requested with .get_dump_flag(). - */ - dump.len = len; - - /* Always allocate enough space to hold the whole thing so that the - * driver does not need to check the length and bother with partial - * dumping. - */ - data = vzalloc(tmp.len); - if (!data) - return -ENOMEM; - ret = ops->get_dump_data(dev, &dump, data); - if (ret) - goto out; - - /* There are two sane possibilities: - * 1. The driver's .get_dump_data() does not touch dump.len. - * 2. Or it may set dump.len to how much it really writes, which - * should be tmp.len (or len if it can do a partial dump). - * In any case respond to userspace with the actual length of data - * it's receiving. - */ - WARN_ON(dump.len != len && dump.len != tmp.len); - dump.len = len; - - if (copy_to_user(useraddr, &dump, sizeof(dump))) { - ret = -EFAULT; - goto out; - } - useraddr += offsetof(struct ethtool_dump, data); - if (copy_to_user(useraddr, data, len)) - ret = -EFAULT; -out: - vfree(data); - return ret; -} - -static int ethtool_get_ts_info(struct net_device *dev, void __user *useraddr) -{ - int err = 0; - struct ethtool_ts_info info; - const struct ethtool_ops *ops = dev->ethtool_ops; - struct phy_device *phydev = dev->phydev; - - memset(&info, 0, sizeof(info)); - info.cmd = ETHTOOL_GET_TS_INFO; - - if (phydev && phydev->drv && phydev->drv->ts_info) { - err = phydev->drv->ts_info(phydev, &info); - } else if (ops->get_ts_info) { - err = ops->get_ts_info(dev, &info); - } else { - info.so_timestamping = - SOF_TIMESTAMPING_RX_SOFTWARE | - SOF_TIMESTAMPING_SOFTWARE; - info.phc_index = -1; - } - - if (err) - return err; - - if (copy_to_user(useraddr, &info, sizeof(info))) - err = -EFAULT; - - return err; -} - -static int __ethtool_get_module_info(struct net_device *dev, - struct ethtool_modinfo *modinfo) -{ - const struct ethtool_ops *ops = dev->ethtool_ops; - struct phy_device *phydev = dev->phydev; - - if (dev->sfp_bus) - return sfp_get_module_info(dev->sfp_bus, modinfo); - - if (phydev && phydev->drv && phydev->drv->module_info) - return phydev->drv->module_info(phydev, modinfo); - - if (ops->get_module_info) - return ops->get_module_info(dev, modinfo); - - return -EOPNOTSUPP; -} - -static int ethtool_get_module_info(struct net_device *dev, - void __user *useraddr) -{ - int ret; - struct ethtool_modinfo modinfo; - - if (copy_from_user(&modinfo, useraddr, sizeof(modinfo))) - return -EFAULT; - - ret = __ethtool_get_module_info(dev, &modinfo); - if (ret) - return ret; - - if (copy_to_user(useraddr, &modinfo, sizeof(modinfo))) - return -EFAULT; - - return 0; -} - -static int __ethtool_get_module_eeprom(struct net_device *dev, - struct ethtool_eeprom *ee, u8 *data) -{ - const struct ethtool_ops *ops = dev->ethtool_ops; - struct phy_device *phydev = dev->phydev; - - if (dev->sfp_bus) - return sfp_get_module_eeprom(dev->sfp_bus, ee, data); - - if (phydev && phydev->drv && phydev->drv->module_eeprom) - return phydev->drv->module_eeprom(phydev, ee, data); - - if (ops->get_module_eeprom) - return ops->get_module_eeprom(dev, ee, data); - - return -EOPNOTSUPP; -} - -static int ethtool_get_module_eeprom(struct net_device *dev, - void __user *useraddr) -{ - int ret; - struct ethtool_modinfo modinfo; - - ret = __ethtool_get_module_info(dev, &modinfo); - if (ret) - return ret; - - return ethtool_get_any_eeprom(dev, useraddr, - __ethtool_get_module_eeprom, - modinfo.eeprom_len); -} - -static int ethtool_tunable_valid(const struct ethtool_tunable *tuna) -{ - switch (tuna->id) { - case ETHTOOL_RX_COPYBREAK: - case ETHTOOL_TX_COPYBREAK: - if (tuna->len != sizeof(u32) || - tuna->type_id != ETHTOOL_TUNABLE_U32) - return -EINVAL; - break; - case ETHTOOL_PFC_PREVENTION_TOUT: - if (tuna->len != sizeof(u16) || - tuna->type_id != ETHTOOL_TUNABLE_U16) - return -EINVAL; - break; - default: - return -EINVAL; - } - - return 0; -} - -static int ethtool_get_tunable(struct net_device *dev, void __user *useraddr) -{ - int ret; - struct ethtool_tunable tuna; - const struct ethtool_ops *ops = dev->ethtool_ops; - void *data; - - if (!ops->get_tunable) - return -EOPNOTSUPP; - if (copy_from_user(&tuna, useraddr, sizeof(tuna))) - return -EFAULT; - ret = ethtool_tunable_valid(&tuna); - if (ret) - return ret; - data = kmalloc(tuna.len, GFP_USER); - if (!data) - return -ENOMEM; - ret = ops->get_tunable(dev, &tuna, data); - if (ret) - goto out; - useraddr += sizeof(tuna); - ret = -EFAULT; - if (copy_to_user(useraddr, data, tuna.len)) - goto out; - ret = 0; - -out: - kfree(data); - return ret; -} - -static int ethtool_set_tunable(struct net_device *dev, void __user *useraddr) -{ - int ret; - struct ethtool_tunable tuna; - const struct ethtool_ops *ops = dev->ethtool_ops; - void *data; - - if (!ops->set_tunable) - return -EOPNOTSUPP; - if (copy_from_user(&tuna, useraddr, sizeof(tuna))) - return -EFAULT; - ret = ethtool_tunable_valid(&tuna); - if (ret) - return ret; - useraddr += sizeof(tuna); - data = memdup_user(useraddr, tuna.len); - if (IS_ERR(data)) - return PTR_ERR(data); - ret = ops->set_tunable(dev, &tuna, data); - - kfree(data); - return ret; -} - -static noinline_for_stack int -ethtool_get_per_queue_coalesce(struct net_device *dev, - void __user *useraddr, - struct ethtool_per_queue_op *per_queue_opt) -{ - u32 bit; - int ret; - DECLARE_BITMAP(queue_mask, MAX_NUM_QUEUE); - - if (!dev->ethtool_ops->get_per_queue_coalesce) - return -EOPNOTSUPP; - - useraddr += sizeof(*per_queue_opt); - - bitmap_from_arr32(queue_mask, per_queue_opt->queue_mask, - MAX_NUM_QUEUE); - - for_each_set_bit(bit, queue_mask, MAX_NUM_QUEUE) { - struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE }; - - ret = dev->ethtool_ops->get_per_queue_coalesce(dev, bit, &coalesce); - if (ret != 0) - return ret; - if (copy_to_user(useraddr, &coalesce, sizeof(coalesce))) - return -EFAULT; - useraddr += sizeof(coalesce); - } - - return 0; -} - -static noinline_for_stack int -ethtool_set_per_queue_coalesce(struct net_device *dev, - void __user *useraddr, - struct ethtool_per_queue_op *per_queue_opt) -{ - u32 bit; - int i, ret = 0; - int n_queue; - struct ethtool_coalesce *backup = NULL, *tmp = NULL; - DECLARE_BITMAP(queue_mask, MAX_NUM_QUEUE); - - if ((!dev->ethtool_ops->set_per_queue_coalesce) || - (!dev->ethtool_ops->get_per_queue_coalesce)) - return -EOPNOTSUPP; - - useraddr += sizeof(*per_queue_opt); - - bitmap_from_arr32(queue_mask, per_queue_opt->queue_mask, MAX_NUM_QUEUE); - n_queue = bitmap_weight(queue_mask, MAX_NUM_QUEUE); - tmp = backup = kmalloc_array(n_queue, sizeof(*backup), GFP_KERNEL); - if (!backup) - return -ENOMEM; - - for_each_set_bit(bit, queue_mask, MAX_NUM_QUEUE) { - struct ethtool_coalesce coalesce; - - ret = dev->ethtool_ops->get_per_queue_coalesce(dev, bit, tmp); - if (ret != 0) - goto roll_back; - - tmp++; - - if (copy_from_user(&coalesce, useraddr, sizeof(coalesce))) { - ret = -EFAULT; - goto roll_back; - } - - ret = dev->ethtool_ops->set_per_queue_coalesce(dev, bit, &coalesce); - if (ret != 0) - goto roll_back; - - useraddr += sizeof(coalesce); - } - -roll_back: - if (ret != 0) { - tmp = backup; - for_each_set_bit(i, queue_mask, bit) { - dev->ethtool_ops->set_per_queue_coalesce(dev, i, tmp); - tmp++; - } - } - kfree(backup); - - return ret; -} - -static int noinline_for_stack ethtool_set_per_queue(struct net_device *dev, - void __user *useraddr, u32 sub_cmd) -{ - struct ethtool_per_queue_op per_queue_opt; - - if (copy_from_user(&per_queue_opt, useraddr, sizeof(per_queue_opt))) - return -EFAULT; - - if (per_queue_opt.sub_command != sub_cmd) - return -EINVAL; - - switch (per_queue_opt.sub_command) { - case ETHTOOL_GCOALESCE: - return ethtool_get_per_queue_coalesce(dev, useraddr, &per_queue_opt); - case ETHTOOL_SCOALESCE: - return ethtool_set_per_queue_coalesce(dev, useraddr, &per_queue_opt); - default: - return -EOPNOTSUPP; - }; -} - -static int ethtool_phy_tunable_valid(const struct ethtool_tunable *tuna) -{ - switch (tuna->id) { - case ETHTOOL_PHY_DOWNSHIFT: - case ETHTOOL_PHY_FAST_LINK_DOWN: - if (tuna->len != sizeof(u8) || - tuna->type_id != ETHTOOL_TUNABLE_U8) - return -EINVAL; - break; - case ETHTOOL_PHY_EDPD: - if (tuna->len != sizeof(u16) || - tuna->type_id != ETHTOOL_TUNABLE_U16) - return -EINVAL; - break; - default: - return -EINVAL; - } - - return 0; -} - -static int get_phy_tunable(struct net_device *dev, void __user *useraddr) -{ - int ret; - struct ethtool_tunable tuna; - struct phy_device *phydev = dev->phydev; - void *data; - - if (!(phydev && phydev->drv && phydev->drv->get_tunable)) - return -EOPNOTSUPP; - - if (copy_from_user(&tuna, useraddr, sizeof(tuna))) - return -EFAULT; - ret = ethtool_phy_tunable_valid(&tuna); - if (ret) - return ret; - data = kmalloc(tuna.len, GFP_USER); - if (!data) - return -ENOMEM; - mutex_lock(&phydev->lock); - ret = phydev->drv->get_tunable(phydev, &tuna, data); - mutex_unlock(&phydev->lock); - if (ret) - goto out; - useraddr += sizeof(tuna); - ret = -EFAULT; - if (copy_to_user(useraddr, data, tuna.len)) - goto out; - ret = 0; - -out: - kfree(data); - return ret; -} - -static int set_phy_tunable(struct net_device *dev, void __user *useraddr) -{ - int ret; - struct ethtool_tunable tuna; - struct phy_device *phydev = dev->phydev; - void *data; - - if (!(phydev && phydev->drv && phydev->drv->set_tunable)) - return -EOPNOTSUPP; - if (copy_from_user(&tuna, useraddr, sizeof(tuna))) - return -EFAULT; - ret = ethtool_phy_tunable_valid(&tuna); - if (ret) - return ret; - useraddr += sizeof(tuna); - data = memdup_user(useraddr, tuna.len); - if (IS_ERR(data)) - return PTR_ERR(data); - mutex_lock(&phydev->lock); - ret = phydev->drv->set_tunable(phydev, &tuna, data); - mutex_unlock(&phydev->lock); - - kfree(data); - return ret; -} - -static int ethtool_get_fecparam(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_fecparam fecparam = { .cmd = ETHTOOL_GFECPARAM }; - int rc; - - if (!dev->ethtool_ops->get_fecparam) - return -EOPNOTSUPP; - - rc = dev->ethtool_ops->get_fecparam(dev, &fecparam); - if (rc) - return rc; - - if (copy_to_user(useraddr, &fecparam, sizeof(fecparam))) - return -EFAULT; - return 0; -} - -static int ethtool_set_fecparam(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_fecparam fecparam; - - if (!dev->ethtool_ops->set_fecparam) - return -EOPNOTSUPP; - - if (copy_from_user(&fecparam, useraddr, sizeof(fecparam))) - return -EFAULT; - - return dev->ethtool_ops->set_fecparam(dev, &fecparam); -} - -/* The main entry point in this file. Called from net/core/dev_ioctl.c */ - -int dev_ethtool(struct net *net, struct ifreq *ifr) -{ - struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); - void __user *useraddr = ifr->ifr_data; - u32 ethcmd, sub_cmd; - int rc; - netdev_features_t old_features; - - if (!dev || !netif_device_present(dev)) - return -ENODEV; - - if (copy_from_user(ðcmd, useraddr, sizeof(ethcmd))) - return -EFAULT; - - if (ethcmd == ETHTOOL_PERQUEUE) { - if (copy_from_user(&sub_cmd, useraddr + sizeof(ethcmd), sizeof(sub_cmd))) - return -EFAULT; - } else { - sub_cmd = ethcmd; - } - /* Allow some commands to be done by anyone */ - switch (sub_cmd) { - case ETHTOOL_GSET: - case ETHTOOL_GDRVINFO: - case ETHTOOL_GMSGLVL: - case ETHTOOL_GLINK: - case ETHTOOL_GCOALESCE: - case ETHTOOL_GRINGPARAM: - case ETHTOOL_GPAUSEPARAM: - case ETHTOOL_GRXCSUM: - case ETHTOOL_GTXCSUM: - case ETHTOOL_GSG: - case ETHTOOL_GSSET_INFO: - case ETHTOOL_GSTRINGS: - case ETHTOOL_GSTATS: - case ETHTOOL_GPHYSTATS: - case ETHTOOL_GTSO: - case ETHTOOL_GPERMADDR: - case ETHTOOL_GUFO: - case ETHTOOL_GGSO: - case ETHTOOL_GGRO: - case ETHTOOL_GFLAGS: - case ETHTOOL_GPFLAGS: - case ETHTOOL_GRXFH: - case ETHTOOL_GRXRINGS: - case ETHTOOL_GRXCLSRLCNT: - case ETHTOOL_GRXCLSRULE: - case ETHTOOL_GRXCLSRLALL: - case ETHTOOL_GRXFHINDIR: - case ETHTOOL_GRSSH: - case ETHTOOL_GFEATURES: - case ETHTOOL_GCHANNELS: - case ETHTOOL_GET_TS_INFO: - case ETHTOOL_GEEE: - case ETHTOOL_GTUNABLE: - case ETHTOOL_PHY_GTUNABLE: - case ETHTOOL_GLINKSETTINGS: - case ETHTOOL_GFECPARAM: - break; - default: - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - return -EPERM; - } - - if (dev->ethtool_ops->begin) { - rc = dev->ethtool_ops->begin(dev); - if (rc < 0) - return rc; - } - old_features = dev->features; - - switch (ethcmd) { - case ETHTOOL_GSET: - rc = ethtool_get_settings(dev, useraddr); - break; - case ETHTOOL_SSET: - rc = ethtool_set_settings(dev, useraddr); - break; - case ETHTOOL_GDRVINFO: - rc = ethtool_get_drvinfo(dev, useraddr); - break; - case ETHTOOL_GREGS: - rc = ethtool_get_regs(dev, useraddr); - break; - case ETHTOOL_GWOL: - rc = ethtool_get_wol(dev, useraddr); - break; - case ETHTOOL_SWOL: - rc = ethtool_set_wol(dev, useraddr); - break; - case ETHTOOL_GMSGLVL: - rc = ethtool_get_value(dev, useraddr, ethcmd, - dev->ethtool_ops->get_msglevel); - break; - case ETHTOOL_SMSGLVL: - rc = ethtool_set_value_void(dev, useraddr, - dev->ethtool_ops->set_msglevel); - break; - case ETHTOOL_GEEE: - rc = ethtool_get_eee(dev, useraddr); - break; - case ETHTOOL_SEEE: - rc = ethtool_set_eee(dev, useraddr); - break; - case ETHTOOL_NWAY_RST: - rc = ethtool_nway_reset(dev); - break; - case ETHTOOL_GLINK: - rc = ethtool_get_link(dev, useraddr); - break; - case ETHTOOL_GEEPROM: - rc = ethtool_get_eeprom(dev, useraddr); - break; - case ETHTOOL_SEEPROM: - rc = ethtool_set_eeprom(dev, useraddr); - break; - case ETHTOOL_GCOALESCE: - rc = ethtool_get_coalesce(dev, useraddr); - break; - case ETHTOOL_SCOALESCE: - rc = ethtool_set_coalesce(dev, useraddr); - break; - case ETHTOOL_GRINGPARAM: - rc = ethtool_get_ringparam(dev, useraddr); - break; - case ETHTOOL_SRINGPARAM: - rc = ethtool_set_ringparam(dev, useraddr); - break; - case ETHTOOL_GPAUSEPARAM: - rc = ethtool_get_pauseparam(dev, useraddr); - break; - case ETHTOOL_SPAUSEPARAM: - rc = ethtool_set_pauseparam(dev, useraddr); - break; - case ETHTOOL_TEST: - rc = ethtool_self_test(dev, useraddr); - break; - case ETHTOOL_GSTRINGS: - rc = ethtool_get_strings(dev, useraddr); - break; - case ETHTOOL_PHYS_ID: - rc = ethtool_phys_id(dev, useraddr); - break; - case ETHTOOL_GSTATS: - rc = ethtool_get_stats(dev, useraddr); - break; - case ETHTOOL_GPERMADDR: - rc = ethtool_get_perm_addr(dev, useraddr); - break; - case ETHTOOL_GFLAGS: - rc = ethtool_get_value(dev, useraddr, ethcmd, - __ethtool_get_flags); - break; - case ETHTOOL_SFLAGS: - rc = ethtool_set_value(dev, useraddr, __ethtool_set_flags); - break; - case ETHTOOL_GPFLAGS: - rc = ethtool_get_value(dev, useraddr, ethcmd, - dev->ethtool_ops->get_priv_flags); - break; - case ETHTOOL_SPFLAGS: - rc = ethtool_set_value(dev, useraddr, - dev->ethtool_ops->set_priv_flags); - break; - case ETHTOOL_GRXFH: - case ETHTOOL_GRXRINGS: - case ETHTOOL_GRXCLSRLCNT: - case ETHTOOL_GRXCLSRULE: - case ETHTOOL_GRXCLSRLALL: - rc = ethtool_get_rxnfc(dev, ethcmd, useraddr); - break; - case ETHTOOL_SRXFH: - case ETHTOOL_SRXCLSRLDEL: - case ETHTOOL_SRXCLSRLINS: - rc = ethtool_set_rxnfc(dev, ethcmd, useraddr); - break; - case ETHTOOL_FLASHDEV: - rc = ethtool_flash_device(dev, useraddr); - break; - case ETHTOOL_RESET: - rc = ethtool_reset(dev, useraddr); - break; - case ETHTOOL_GSSET_INFO: - rc = ethtool_get_sset_info(dev, useraddr); - break; - case ETHTOOL_GRXFHINDIR: - rc = ethtool_get_rxfh_indir(dev, useraddr); - break; - case ETHTOOL_SRXFHINDIR: - rc = ethtool_set_rxfh_indir(dev, useraddr); - break; - case ETHTOOL_GRSSH: - rc = ethtool_get_rxfh(dev, useraddr); - break; - case ETHTOOL_SRSSH: - rc = ethtool_set_rxfh(dev, useraddr); - break; - case ETHTOOL_GFEATURES: - rc = ethtool_get_features(dev, useraddr); - break; - case ETHTOOL_SFEATURES: - rc = ethtool_set_features(dev, useraddr); - break; - case ETHTOOL_GTXCSUM: - case ETHTOOL_GRXCSUM: - case ETHTOOL_GSG: - case ETHTOOL_GTSO: - case ETHTOOL_GGSO: - case ETHTOOL_GGRO: - rc = ethtool_get_one_feature(dev, useraddr, ethcmd); - break; - case ETHTOOL_STXCSUM: - case ETHTOOL_SRXCSUM: - case ETHTOOL_SSG: - case ETHTOOL_STSO: - case ETHTOOL_SGSO: - case ETHTOOL_SGRO: - rc = ethtool_set_one_feature(dev, useraddr, ethcmd); - break; - case ETHTOOL_GCHANNELS: - rc = ethtool_get_channels(dev, useraddr); - break; - case ETHTOOL_SCHANNELS: - rc = ethtool_set_channels(dev, useraddr); - break; - case ETHTOOL_SET_DUMP: - rc = ethtool_set_dump(dev, useraddr); - break; - case ETHTOOL_GET_DUMP_FLAG: - rc = ethtool_get_dump_flag(dev, useraddr); - break; - case ETHTOOL_GET_DUMP_DATA: - rc = ethtool_get_dump_data(dev, useraddr); - break; - case ETHTOOL_GET_TS_INFO: - rc = ethtool_get_ts_info(dev, useraddr); - break; - case ETHTOOL_GMODULEINFO: - rc = ethtool_get_module_info(dev, useraddr); - break; - case ETHTOOL_GMODULEEEPROM: - rc = ethtool_get_module_eeprom(dev, useraddr); - break; - case ETHTOOL_GTUNABLE: - rc = ethtool_get_tunable(dev, useraddr); - break; - case ETHTOOL_STUNABLE: - rc = ethtool_set_tunable(dev, useraddr); - break; - case ETHTOOL_GPHYSTATS: - rc = ethtool_get_phy_stats(dev, useraddr); - break; - case ETHTOOL_PERQUEUE: - rc = ethtool_set_per_queue(dev, useraddr, sub_cmd); - break; - case ETHTOOL_GLINKSETTINGS: - rc = ethtool_get_link_ksettings(dev, useraddr); - break; - case ETHTOOL_SLINKSETTINGS: - rc = ethtool_set_link_ksettings(dev, useraddr); - break; - case ETHTOOL_PHY_GTUNABLE: - rc = get_phy_tunable(dev, useraddr); - break; - case ETHTOOL_PHY_STUNABLE: - rc = set_phy_tunable(dev, useraddr); - break; - case ETHTOOL_GFECPARAM: - rc = ethtool_get_fecparam(dev, useraddr); - break; - case ETHTOOL_SFECPARAM: - rc = ethtool_set_fecparam(dev, useraddr); - break; - default: - rc = -EOPNOTSUPP; - } - - if (dev->ethtool_ops->complete) - dev->ethtool_ops->complete(dev); - - if (old_features != dev->features) - netdev_features_change(dev); - - return rc; -} - -struct ethtool_rx_flow_key { - struct flow_dissector_key_basic basic; - union { - struct flow_dissector_key_ipv4_addrs ipv4; - struct flow_dissector_key_ipv6_addrs ipv6; - }; - struct flow_dissector_key_ports tp; - struct flow_dissector_key_ip ip; - struct flow_dissector_key_vlan vlan; - struct flow_dissector_key_eth_addrs eth_addrs; -} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ - -struct ethtool_rx_flow_match { - struct flow_dissector dissector; - struct ethtool_rx_flow_key key; - struct ethtool_rx_flow_key mask; -}; - -struct ethtool_rx_flow_rule * -ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input) -{ - const struct ethtool_rx_flow_spec *fs = input->fs; - static struct in6_addr zero_addr = {}; - struct ethtool_rx_flow_match *match; - struct ethtool_rx_flow_rule *flow; - struct flow_action_entry *act; - - flow = kzalloc(sizeof(struct ethtool_rx_flow_rule) + - sizeof(struct ethtool_rx_flow_match), GFP_KERNEL); - if (!flow) - return ERR_PTR(-ENOMEM); - - /* ethtool_rx supports only one single action per rule. */ - flow->rule = flow_rule_alloc(1); - if (!flow->rule) { - kfree(flow); - return ERR_PTR(-ENOMEM); - } - - match = (struct ethtool_rx_flow_match *)flow->priv; - flow->rule->match.dissector = &match->dissector; - flow->rule->match.mask = &match->mask; - flow->rule->match.key = &match->key; - - match->mask.basic.n_proto = htons(0xffff); - - switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS)) { - case ETHER_FLOW: { - const struct ethhdr *ether_spec, *ether_m_spec; - - ether_spec = &fs->h_u.ether_spec; - ether_m_spec = &fs->m_u.ether_spec; - - if (!is_zero_ether_addr(ether_m_spec->h_source)) { - ether_addr_copy(match->key.eth_addrs.src, - ether_spec->h_source); - ether_addr_copy(match->mask.eth_addrs.src, - ether_m_spec->h_source); - } - if (!is_zero_ether_addr(ether_m_spec->h_dest)) { - ether_addr_copy(match->key.eth_addrs.dst, - ether_spec->h_dest); - ether_addr_copy(match->mask.eth_addrs.dst, - ether_m_spec->h_dest); - } - if (ether_m_spec->h_proto) { - match->key.basic.n_proto = ether_spec->h_proto; - match->mask.basic.n_proto = ether_m_spec->h_proto; - } - } - break; - case TCP_V4_FLOW: - case UDP_V4_FLOW: { - const struct ethtool_tcpip4_spec *v4_spec, *v4_m_spec; - - match->key.basic.n_proto = htons(ETH_P_IP); - - v4_spec = &fs->h_u.tcp_ip4_spec; - v4_m_spec = &fs->m_u.tcp_ip4_spec; - - if (v4_m_spec->ip4src) { - match->key.ipv4.src = v4_spec->ip4src; - match->mask.ipv4.src = v4_m_spec->ip4src; - } - if (v4_m_spec->ip4dst) { - match->key.ipv4.dst = v4_spec->ip4dst; - match->mask.ipv4.dst = v4_m_spec->ip4dst; - } - if (v4_m_spec->ip4src || - v4_m_spec->ip4dst) { - match->dissector.used_keys |= - BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS); - match->dissector.offset[FLOW_DISSECTOR_KEY_IPV4_ADDRS] = - offsetof(struct ethtool_rx_flow_key, ipv4); - } - if (v4_m_spec->psrc) { - match->key.tp.src = v4_spec->psrc; - match->mask.tp.src = v4_m_spec->psrc; - } - if (v4_m_spec->pdst) { - match->key.tp.dst = v4_spec->pdst; - match->mask.tp.dst = v4_m_spec->pdst; - } - if (v4_m_spec->psrc || - v4_m_spec->pdst) { - match->dissector.used_keys |= - BIT(FLOW_DISSECTOR_KEY_PORTS); - match->dissector.offset[FLOW_DISSECTOR_KEY_PORTS] = - offsetof(struct ethtool_rx_flow_key, tp); - } - if (v4_m_spec->tos) { - match->key.ip.tos = v4_spec->tos; - match->mask.ip.tos = v4_m_spec->tos; - match->dissector.used_keys |= - BIT(FLOW_DISSECTOR_KEY_IP); - match->dissector.offset[FLOW_DISSECTOR_KEY_IP] = - offsetof(struct ethtool_rx_flow_key, ip); - } - } - break; - case TCP_V6_FLOW: - case UDP_V6_FLOW: { - const struct ethtool_tcpip6_spec *v6_spec, *v6_m_spec; - - match->key.basic.n_proto = htons(ETH_P_IPV6); - - v6_spec = &fs->h_u.tcp_ip6_spec; - v6_m_spec = &fs->m_u.tcp_ip6_spec; - if (memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr))) { - memcpy(&match->key.ipv6.src, v6_spec->ip6src, - sizeof(match->key.ipv6.src)); - memcpy(&match->mask.ipv6.src, v6_m_spec->ip6src, - sizeof(match->mask.ipv6.src)); - } - if (memcmp(v6_m_spec->ip6dst, &zero_addr, sizeof(zero_addr))) { - memcpy(&match->key.ipv6.dst, v6_spec->ip6dst, - sizeof(match->key.ipv6.dst)); - memcpy(&match->mask.ipv6.dst, v6_m_spec->ip6dst, - sizeof(match->mask.ipv6.dst)); - } - if (memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr)) || - memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr))) { - match->dissector.used_keys |= - BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS); - match->dissector.offset[FLOW_DISSECTOR_KEY_IPV6_ADDRS] = - offsetof(struct ethtool_rx_flow_key, ipv6); - } - if (v6_m_spec->psrc) { - match->key.tp.src = v6_spec->psrc; - match->mask.tp.src = v6_m_spec->psrc; - } - if (v6_m_spec->pdst) { - match->key.tp.dst = v6_spec->pdst; - match->mask.tp.dst = v6_m_spec->pdst; - } - if (v6_m_spec->psrc || - v6_m_spec->pdst) { - match->dissector.used_keys |= - BIT(FLOW_DISSECTOR_KEY_PORTS); - match->dissector.offset[FLOW_DISSECTOR_KEY_PORTS] = - offsetof(struct ethtool_rx_flow_key, tp); - } - if (v6_m_spec->tclass) { - match->key.ip.tos = v6_spec->tclass; - match->mask.ip.tos = v6_m_spec->tclass; - match->dissector.used_keys |= - BIT(FLOW_DISSECTOR_KEY_IP); - match->dissector.offset[FLOW_DISSECTOR_KEY_IP] = - offsetof(struct ethtool_rx_flow_key, ip); - } - } - break; - default: - ethtool_rx_flow_rule_destroy(flow); - return ERR_PTR(-EINVAL); - } - - switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS)) { - case TCP_V4_FLOW: - case TCP_V6_FLOW: - match->key.basic.ip_proto = IPPROTO_TCP; - break; - case UDP_V4_FLOW: - case UDP_V6_FLOW: - match->key.basic.ip_proto = IPPROTO_UDP; - break; - } - match->mask.basic.ip_proto = 0xff; - - match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_BASIC); - match->dissector.offset[FLOW_DISSECTOR_KEY_BASIC] = - offsetof(struct ethtool_rx_flow_key, basic); - - if (fs->flow_type & FLOW_EXT) { - const struct ethtool_flow_ext *ext_h_spec = &fs->h_ext; - const struct ethtool_flow_ext *ext_m_spec = &fs->m_ext; - - if (ext_m_spec->vlan_etype) { - match->key.vlan.vlan_tpid = ext_h_spec->vlan_etype; - match->mask.vlan.vlan_tpid = ext_m_spec->vlan_etype; - } - - if (ext_m_spec->vlan_tci) { - match->key.vlan.vlan_id = - ntohs(ext_h_spec->vlan_tci) & 0x0fff; - match->mask.vlan.vlan_id = - ntohs(ext_m_spec->vlan_tci) & 0x0fff; - - match->key.vlan.vlan_dei = - !!(ext_h_spec->vlan_tci & htons(0x1000)); - match->mask.vlan.vlan_dei = - !!(ext_m_spec->vlan_tci & htons(0x1000)); - - match->key.vlan.vlan_priority = - (ntohs(ext_h_spec->vlan_tci) & 0xe000) >> 13; - match->mask.vlan.vlan_priority = - (ntohs(ext_m_spec->vlan_tci) & 0xe000) >> 13; - } - - if (ext_m_spec->vlan_etype || - ext_m_spec->vlan_tci) { - match->dissector.used_keys |= - BIT(FLOW_DISSECTOR_KEY_VLAN); - match->dissector.offset[FLOW_DISSECTOR_KEY_VLAN] = - offsetof(struct ethtool_rx_flow_key, vlan); - } - } - if (fs->flow_type & FLOW_MAC_EXT) { - const struct ethtool_flow_ext *ext_h_spec = &fs->h_ext; - const struct ethtool_flow_ext *ext_m_spec = &fs->m_ext; - - memcpy(match->key.eth_addrs.dst, ext_h_spec->h_dest, - ETH_ALEN); - memcpy(match->mask.eth_addrs.dst, ext_m_spec->h_dest, - ETH_ALEN); - - match->dissector.used_keys |= - BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS); - match->dissector.offset[FLOW_DISSECTOR_KEY_ETH_ADDRS] = - offsetof(struct ethtool_rx_flow_key, eth_addrs); - } - - act = &flow->rule->action.entries[0]; - switch (fs->ring_cookie) { - case RX_CLS_FLOW_DISC: - act->id = FLOW_ACTION_DROP; - break; - case RX_CLS_FLOW_WAKE: - act->id = FLOW_ACTION_WAKE; - break; - default: - act->id = FLOW_ACTION_QUEUE; - if (fs->flow_type & FLOW_RSS) - act->queue.ctx = input->rss_ctx; - - act->queue.vf = ethtool_get_flow_spec_ring_vf(fs->ring_cookie); - act->queue.index = ethtool_get_flow_spec_ring(fs->ring_cookie); - break; - } - - return flow; -} -EXPORT_SYMBOL(ethtool_rx_flow_rule_create); - -void ethtool_rx_flow_rule_destroy(struct ethtool_rx_flow_rule *flow) -{ - kfree(flow->rule); - kfree(flow); -} -EXPORT_SYMBOL(ethtool_rx_flow_rule_destroy); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 3e7e15278c46..bd7eba9066f8 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -974,7 +974,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, frh = nlmsg_data(nlh); frh->family = ops->family; - frh->table = rule->table; + frh->table = rule->table < 256 ? rule->table : RT_TABLE_COMPAT; if (nla_put_u32(skb, FRA_TABLE, rule->table)) goto nla_put_failure; if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen)) diff --git a/net/core/filter.c b/net/core/filter.c index 538f6a735a19..c180871e606d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1573,7 +1573,7 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk) return -EPERM; prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER); - if (IS_ERR(prog) && PTR_ERR(prog) == -EINVAL) + if (PTR_ERR(prog) == -EINVAL) prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SK_REUSEPORT); if (IS_ERR(prog)) return PTR_ERR(prog); @@ -3459,119 +3459,30 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = { .arg2_type = ARG_ANYTHING, }; -static int __bpf_tx_xdp(struct net_device *dev, - struct bpf_map *map, - struct xdp_buff *xdp, - u32 index) -{ - struct xdp_frame *xdpf; - int err, sent; - - if (!dev->netdev_ops->ndo_xdp_xmit) { - return -EOPNOTSUPP; - } - - err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data); - if (unlikely(err)) - return err; - - xdpf = convert_to_xdp_frame(xdp); - if (unlikely(!xdpf)) - return -EOVERFLOW; - - sent = dev->netdev_ops->ndo_xdp_xmit(dev, 1, &xdpf, XDP_XMIT_FLUSH); - if (sent <= 0) - return sent; - return 0; -} - -static noinline int -xdp_do_redirect_slow(struct net_device *dev, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog, struct bpf_redirect_info *ri) -{ - struct net_device *fwd; - u32 index = ri->tgt_index; - int err; - - fwd = dev_get_by_index_rcu(dev_net(dev), index); - ri->tgt_index = 0; - if (unlikely(!fwd)) { - err = -EINVAL; - goto err; - } - - err = __bpf_tx_xdp(fwd, NULL, xdp, 0); - if (unlikely(err)) - goto err; - - _trace_xdp_redirect(dev, xdp_prog, index); - return 0; -err: - _trace_xdp_redirect_err(dev, xdp_prog, index, err); - return err; -} - static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd, - struct bpf_map *map, - struct xdp_buff *xdp, - u32 index) + struct bpf_map *map, struct xdp_buff *xdp) { - int err; - switch (map->map_type) { case BPF_MAP_TYPE_DEVMAP: - case BPF_MAP_TYPE_DEVMAP_HASH: { - struct bpf_dtab_netdev *dst = fwd; - - err = dev_map_enqueue(dst, xdp, dev_rx); - if (unlikely(err)) - return err; - break; - } - case BPF_MAP_TYPE_CPUMAP: { - struct bpf_cpu_map_entry *rcpu = fwd; - - err = cpu_map_enqueue(rcpu, xdp, dev_rx); - if (unlikely(err)) - return err; - break; - } - case BPF_MAP_TYPE_XSKMAP: { - struct xdp_sock *xs = fwd; - - err = __xsk_map_redirect(map, xdp, xs); - return err; - } + case BPF_MAP_TYPE_DEVMAP_HASH: + return dev_map_enqueue(fwd, xdp, dev_rx); + case BPF_MAP_TYPE_CPUMAP: + return cpu_map_enqueue(fwd, xdp, dev_rx); + case BPF_MAP_TYPE_XSKMAP: + return __xsk_map_redirect(fwd, xdp); default: - break; + return -EBADRQC; } return 0; } -void xdp_do_flush_map(void) +void xdp_do_flush(void) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); - struct bpf_map *map = ri->map_to_flush; - - ri->map_to_flush = NULL; - if (map) { - switch (map->map_type) { - case BPF_MAP_TYPE_DEVMAP: - case BPF_MAP_TYPE_DEVMAP_HASH: - __dev_map_flush(map); - break; - case BPF_MAP_TYPE_CPUMAP: - __cpu_map_flush(map); - break; - case BPF_MAP_TYPE_XSKMAP: - __xsk_map_flush(map); - break; - default: - break; - } - } + __dev_flush(); + __cpu_map_flush(); + __xsk_map_flush(); } -EXPORT_SYMBOL_GPL(xdp_do_flush_map); +EXPORT_SYMBOL_GPL(xdp_do_flush); static inline void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index) { @@ -3606,10 +3517,11 @@ void bpf_clear_redirect_map(struct bpf_map *map) } } -static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog, struct bpf_map *map, - struct bpf_redirect_info *ri) +int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, + struct bpf_prog *xdp_prog) { + struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_map *map = READ_ONCE(ri->map); u32 index = ri->tgt_index; void *fwd = ri->tgt_value; int err; @@ -3618,32 +3530,27 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp, ri->tgt_value = NULL; WRITE_ONCE(ri->map, NULL); - if (ri->map_to_flush && unlikely(ri->map_to_flush != map)) - xdp_do_flush_map(); + if (unlikely(!map)) { + fwd = dev_get_by_index_rcu(dev_net(dev), index); + if (unlikely(!fwd)) { + err = -EINVAL; + goto err; + } + + err = dev_xdp_enqueue(fwd, xdp, dev); + } else { + err = __bpf_tx_xdp_map(dev, fwd, map, xdp); + } - err = __bpf_tx_xdp_map(dev, fwd, map, xdp, index); if (unlikely(err)) goto err; - ri->map_to_flush = map; _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index); return 0; err: _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err); return err; } - -int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog) -{ - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); - struct bpf_map *map = READ_ONCE(ri->map); - - if (likely(map)) - return xdp_do_redirect_map(dev, xdp, xdp_prog, map, ri); - - return xdp_do_redirect_slow(dev, xdp, xdp_prog, ri); -} EXPORT_SYMBOL_GPL(xdp_do_redirect); static int xdp_do_generic_redirect_map(struct net_device *dev, @@ -5976,7 +5883,7 @@ bool bpf_helper_changes_pkt_data(void *func) return false; } -static const struct bpf_func_proto * +const struct bpf_func_proto * bpf_base_func_proto(enum bpf_func_id func_id) { switch (func_id) { @@ -6016,6 +5923,8 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_spin_unlock_proto; case BPF_FUNC_trace_printk: return bpf_get_trace_printk_proto(); + case BPF_FUNC_jiffies64: + return &bpf_jiffies64_proto; default: return NULL; } @@ -7648,21 +7557,21 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, break; case offsetof(struct bpf_sock, type): - BUILD_BUG_ON(HWEIGHT32(SK_FL_TYPE_MASK) != BITS_PER_BYTE * 2); - *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sock, __sk_flags_offset)); - *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK); - *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT); - *target_size = 2; + *insn++ = BPF_LDX_MEM( + BPF_FIELD_SIZEOF(struct sock, sk_type), + si->dst_reg, si->src_reg, + bpf_target_off(struct sock, sk_type, + sizeof_field(struct sock, sk_type), + target_size)); break; case offsetof(struct bpf_sock, protocol): - BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE); - *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sock, __sk_flags_offset)); - *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK); - *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT); - *target_size = 1; + *insn++ = BPF_LDX_MEM( + BPF_FIELD_SIZEOF(struct sock, sk_protocol), + si->dst_reg, si->src_reg, + bpf_target_off(struct sock, sk_protocol, + sizeof_field(struct sock, sk_protocol), + target_size)); break; case offsetof(struct bpf_sock, src_ip4): @@ -7944,20 +7853,13 @@ static u32 sock_addr_convert_ctx_access(enum bpf_access_type type, break; case offsetof(struct bpf_sock_addr, type): - SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( - struct bpf_sock_addr_kern, struct sock, sk, - __sk_flags_offset, BPF_W, 0); - *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK); - *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT); + SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern, + struct sock, sk, sk_type); break; case offsetof(struct bpf_sock_addr, protocol): - SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( - struct bpf_sock_addr_kern, struct sock, sk, - __sk_flags_offset, BPF_W, 0); - *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK); - *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, - SK_FL_PROTO_SHIFT); + SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern, + struct sock, sk, sk_protocol); break; case offsetof(struct bpf_sock_addr, msg_src_ip4): @@ -8876,11 +8778,11 @@ sk_reuseport_is_valid_access(int off, int size, skb, \ SKB_FIELD) -#define SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(SK_FIELD, BPF_SIZE, EXTRA_OFF) \ - SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(struct sk_reuseport_kern, \ - struct sock, \ - sk, \ - SK_FIELD, BPF_SIZE, EXTRA_OFF) +#define SK_REUSEPORT_LOAD_SK_FIELD(SK_FIELD) \ + SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern, \ + struct sock, \ + sk, \ + SK_FIELD) static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, @@ -8904,16 +8806,7 @@ static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type, break; case offsetof(struct sk_reuseport_md, ip_protocol): - BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE); - SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(__sk_flags_offset, - BPF_W, 0); - *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK); - *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, - SK_FL_PROTO_SHIFT); - /* SK_FL_PROTO_MASK and SK_FL_PROTO_SHIFT are endian - * aware. No further narrowing or masking is needed. - */ - *target_size = 1; + SK_REUSEPORT_LOAD_SK_FIELD(sk_protocol); break; case offsetof(struct sk_reuseport_md, data_end): @@ -8941,3 +8834,11 @@ const struct bpf_verifier_ops sk_reuseport_verifier_ops = { const struct bpf_prog_ops sk_reuseport_prog_ops = { }; #endif /* CONFIG_INET */ + +DEFINE_BPF_DISPATCHER(bpf_dispatcher_xdp) + +void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog) +{ + bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(bpf_dispatcher_xdp), + prev_prog, prog); +} diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 2dbbb030fbed..a1670dff0629 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -233,7 +233,7 @@ static bool icmp_has_id(u8 type) * @skb: sk_buff to extract from * @key_icmp: struct flow_dissector_key_icmp to fill * @data: raw buffer pointer to the packet - * @toff: offset to extract at + * @thoff: offset to extract at * @hlen: packet header length */ void skb_flow_get_icmp_tci(const struct sk_buff *skb, @@ -834,10 +834,10 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, struct flow_dissector *flow_dissector, void *target_container) { + struct flow_dissector_key_ports *key_ports = NULL; struct flow_dissector_key_control *key_control; struct flow_dissector_key_basic *key_basic; struct flow_dissector_key_addrs *key_addrs; - struct flow_dissector_key_ports *key_ports; struct flow_dissector_key_tags *key_tags; key_control = skb_flow_dissector_target(flow_dissector, @@ -876,10 +876,17 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; } - if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) { + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) key_ports = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_PORTS, target_container); + else if (dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS_RANGE)) + key_ports = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS_RANGE, + target_container); + + if (key_ports) { key_ports->src = flow_keys->sport; key_ports->dst = flow_keys->dport; } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 920784a9b7ff..789a73aa7bd8 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -3290,6 +3290,7 @@ static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos) *pos = cpu+1; return per_cpu_ptr(tbl->stats, cpu); } + (*pos)++; return NULL; } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 39402840025e..757cc1d084e7 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -211,16 +211,10 @@ static int net_eq_idr(int id, void *net, void *peer) return 0; } -/* Should be called with nsid_lock held. If a new id is assigned, the bool alloc - * is set to true, thus the caller knows that the new id must be notified via - * rtnl. - */ -static int __peernet2id_alloc(struct net *net, struct net *peer, bool *alloc) +/* Must be called from RCU-critical section or with nsid_lock held */ +static int __peernet2id(const struct net *net, struct net *peer) { int id = idr_for_each(&net->netns_ids, net_eq_idr, peer); - bool alloc_it = *alloc; - - *alloc = false; /* Magic value for id 0. */ if (id == NET_ID_ZERO) @@ -228,23 +222,9 @@ static int __peernet2id_alloc(struct net *net, struct net *peer, bool *alloc) if (id > 0) return id; - if (alloc_it) { - id = alloc_netid(net, peer, -1); - *alloc = true; - return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED; - } - return NETNSA_NSID_NOT_ASSIGNED; } -/* should be called with nsid_lock held */ -static int __peernet2id(struct net *net, struct net *peer) -{ - bool no = false; - - return __peernet2id_alloc(net, peer, &no); -} - static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid, struct nlmsghdr *nlh, gfp_t gfp); /* This function returns the id of a peer netns. If no id is assigned, one will @@ -252,38 +232,50 @@ static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid, */ int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp) { - bool alloc = false, alive = false; int id; if (refcount_read(&net->count) == 0) return NETNSA_NSID_NOT_ASSIGNED; - spin_lock_bh(&net->nsid_lock); - /* - * When peer is obtained from RCU lists, we may race with + + spin_lock(&net->nsid_lock); + id = __peernet2id(net, peer); + if (id >= 0) { + spin_unlock(&net->nsid_lock); + return id; + } + + /* When peer is obtained from RCU lists, we may race with * its cleanup. Check whether it's alive, and this guarantees * we never hash a peer back to net->netns_ids, after it has * just been idr_remove()'d from there in cleanup_net(). */ - if (maybe_get_net(peer)) - alive = alloc = true; - id = __peernet2id_alloc(net, peer, &alloc); - spin_unlock_bh(&net->nsid_lock); - if (alloc && id >= 0) - rtnl_net_notifyid(net, RTM_NEWNSID, id, 0, NULL, gfp); - if (alive) - put_net(peer); + if (!maybe_get_net(peer)) { + spin_unlock(&net->nsid_lock); + return NETNSA_NSID_NOT_ASSIGNED; + } + + id = alloc_netid(net, peer, -1); + spin_unlock(&net->nsid_lock); + + put_net(peer); + if (id < 0) + return NETNSA_NSID_NOT_ASSIGNED; + + rtnl_net_notifyid(net, RTM_NEWNSID, id, 0, NULL, gfp); + return id; } EXPORT_SYMBOL_GPL(peernet2id_alloc); /* This function returns, if assigned, the id of a peer netns. */ -int peernet2id(struct net *net, struct net *peer) +int peernet2id(const struct net *net, struct net *peer) { int id; - spin_lock_bh(&net->nsid_lock); + rcu_read_lock(); id = __peernet2id(net, peer); - spin_unlock_bh(&net->nsid_lock); + rcu_read_unlock(); + return id; } EXPORT_SYMBOL(peernet2id); @@ -291,12 +283,12 @@ EXPORT_SYMBOL(peernet2id); /* This function returns true is the peer netns has an id assigned into the * current netns. */ -bool peernet_has_id(struct net *net, struct net *peer) +bool peernet_has_id(const struct net *net, struct net *peer) { return peernet2id(net, peer) >= 0; } -struct net *get_net_ns_by_id(struct net *net, int id) +struct net *get_net_ns_by_id(const struct net *net, int id) { struct net *peer; @@ -528,20 +520,20 @@ static void unhash_nsid(struct net *net, struct net *last) for_each_net(tmp) { int id; - spin_lock_bh(&tmp->nsid_lock); + spin_lock(&tmp->nsid_lock); id = __peernet2id(tmp, net); if (id >= 0) idr_remove(&tmp->netns_ids, id); - spin_unlock_bh(&tmp->nsid_lock); + spin_unlock(&tmp->nsid_lock); if (id >= 0) rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL, GFP_KERNEL); if (tmp == last) break; } - spin_lock_bh(&net->nsid_lock); + spin_lock(&net->nsid_lock); idr_destroy(&net->netns_ids); - spin_unlock_bh(&net->nsid_lock); + spin_unlock(&net->nsid_lock); } static LLIST_HEAD(cleanup_list); @@ -754,9 +746,9 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh, return PTR_ERR(peer); } - spin_lock_bh(&net->nsid_lock); + spin_lock(&net->nsid_lock); if (__peernet2id(net, peer) >= 0) { - spin_unlock_bh(&net->nsid_lock); + spin_unlock(&net->nsid_lock); err = -EEXIST; NL_SET_BAD_ATTR(extack, nla); NL_SET_ERR_MSG(extack, @@ -765,7 +757,7 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh, } err = alloc_netid(net, peer, nsid); - spin_unlock_bh(&net->nsid_lock); + spin_unlock(&net->nsid_lock); if (err >= 0) { rtnl_net_notifyid(net, RTM_NEWNSID, err, NETLINK_CB(skb).portid, nlh, GFP_KERNEL); @@ -950,6 +942,7 @@ struct rtnl_net_dump_cb { int s_idx; }; +/* Runs in RCU-critical section. */ static int rtnl_net_dumpid_one(int id, void *peer, void *data) { struct rtnl_net_dump_cb *net_cb = (struct rtnl_net_dump_cb *)data; @@ -1034,19 +1027,9 @@ static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb) goto end; } - spin_lock_bh(&net_cb.tgt_net->nsid_lock); - if (net_cb.fillargs.add_ref && - !net_eq(net_cb.ref_net, net_cb.tgt_net) && - !spin_trylock_bh(&net_cb.ref_net->nsid_lock)) { - spin_unlock_bh(&net_cb.tgt_net->nsid_lock); - err = -EAGAIN; - goto end; - } + rcu_read_lock(); idr_for_each(&net_cb.tgt_net->netns_ids, rtnl_net_dumpid_one, &net_cb); - if (net_cb.fillargs.add_ref && - !net_eq(net_cb.ref_net, net_cb.tgt_net)) - spin_unlock_bh(&net_cb.ref_net->nsid_lock); - spin_unlock_bh(&net_cb.tgt_net->nsid_lock); + rcu_read_unlock(); cb->args[0] = net_cb.idx; end: diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 0642f91c4038..b4c87fe31be2 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -53,30 +53,60 @@ static void cgrp_css_free(struct cgroup_subsys_state *css) kfree(css_cls_state(css)); } +/* + * To avoid freezing of sockets creation for tasks with big number of threads + * and opened sockets lets release file_lock every 1000 iterated descriptors. + * New sockets will already have been created with new classid. + */ + +struct update_classid_context { + u32 classid; + unsigned int batch; +}; + +#define UPDATE_CLASSID_BATCH 1000 + static int update_classid_sock(const void *v, struct file *file, unsigned n) { int err; + struct update_classid_context *ctx = (void *)v; struct socket *sock = sock_from_file(file, &err); if (sock) { spin_lock(&cgroup_sk_update_lock); - sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, - (unsigned long)v); + sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, ctx->classid); spin_unlock(&cgroup_sk_update_lock); } + if (--ctx->batch == 0) { + ctx->batch = UPDATE_CLASSID_BATCH; + return n + 1; + } return 0; } +static void update_classid_task(struct task_struct *p, u32 classid) +{ + struct update_classid_context ctx = { + .classid = classid, + .batch = UPDATE_CLASSID_BATCH + }; + unsigned int fd = 0; + + do { + task_lock(p); + fd = iterate_fd(p->files, fd, update_classid_sock, &ctx); + task_unlock(p); + cond_resched(); + } while (fd); +} + static void cgrp_attach(struct cgroup_taskset *tset) { struct cgroup_subsys_state *css; struct task_struct *p; cgroup_taskset_for_each(p, css, tset) { - task_lock(p); - iterate_fd(p->files, 0, update_classid_sock, - (void *)(unsigned long)css_cls_state(css)->classid); - task_unlock(p); + update_classid_task(p, css_cls_state(css)->classid); } } @@ -98,10 +128,7 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, css_task_iter_start(css, 0, &it); while ((p = css_task_iter_next(&it))) { - task_lock(p); - iterate_fd(p->files, 0, update_classid_sock, - (void *)(unsigned long)cs->classid); - task_unlock(p); + update_classid_task(p, cs->classid); cond_resched(); } css_task_iter_end(&it); diff --git a/net/core/page_pool.c b/net/core/page_pool.c index a6aefe989043..10d2b255df5e 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -96,40 +96,76 @@ struct page_pool *page_pool_create(const struct page_pool_params *params) } EXPORT_SYMBOL(page_pool_create); -/* fast path */ -static struct page *__page_pool_get_cached(struct page_pool *pool) +static void __page_pool_return_page(struct page_pool *pool, struct page *page); + +noinline +static struct page *page_pool_refill_alloc_cache(struct page_pool *pool) { struct ptr_ring *r = &pool->ring; - bool refill = false; struct page *page; - - /* Test for safe-context, caller should provide this guarantee */ - if (likely(in_serving_softirq())) { - if (likely(pool->alloc.count)) { - /* Fast-path */ - page = pool->alloc.cache[--pool->alloc.count]; - return page; - } - refill = true; - } + int pref_nid; /* preferred NUMA node */ /* Quicker fallback, avoid locks when ring is empty */ if (__ptr_ring_empty(r)) return NULL; - /* Slow-path: Get page from locked ring queue, - * refill alloc array if requested. + /* Softirq guarantee CPU and thus NUMA node is stable. This, + * assumes CPU refilling driver RX-ring will also run RX-NAPI. */ +#ifdef CONFIG_NUMA + pref_nid = (pool->p.nid == NUMA_NO_NODE) ? numa_mem_id() : pool->p.nid; +#else + /* Ignore pool->p.nid setting if !CONFIG_NUMA, helps compiler */ + pref_nid = numa_mem_id(); /* will be zero like page_to_nid() */ +#endif + + /* Slower-path: Get pages from locked ring queue */ spin_lock(&r->consumer_lock); - page = __ptr_ring_consume(r); - if (refill) - pool->alloc.count = __ptr_ring_consume_batched(r, - pool->alloc.cache, - PP_ALLOC_CACHE_REFILL); + + /* Refill alloc array, but only if NUMA match */ + do { + page = __ptr_ring_consume(r); + if (unlikely(!page)) + break; + + if (likely(page_to_nid(page) == pref_nid)) { + pool->alloc.cache[pool->alloc.count++] = page; + } else { + /* NUMA mismatch; + * (1) release 1 page to page-allocator and + * (2) break out to fallthrough to alloc_pages_node. + * This limit stress on page buddy alloactor. + */ + __page_pool_return_page(pool, page); + page = NULL; + break; + } + } while (pool->alloc.count < PP_ALLOC_CACHE_REFILL); + + /* Return last page */ + if (likely(pool->alloc.count > 0)) + page = pool->alloc.cache[--pool->alloc.count]; + spin_unlock(&r->consumer_lock); return page; } +/* fast path */ +static struct page *__page_pool_get_cached(struct page_pool *pool) +{ + struct page *page; + + /* Caller MUST guarantee safe non-concurrent access, e.g. softirq */ + if (likely(pool->alloc.count)) { + /* Fast-path */ + page = pool->alloc.cache[--pool->alloc.count]; + } else { + page = page_pool_refill_alloc_cache(pool); + } + + return page; +} + static void page_pool_dma_sync_for_device(struct page_pool *pool, struct page *page, unsigned int dma_sync_size) @@ -163,7 +199,11 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, */ /* Cache was empty, do real allocation */ +#ifdef CONFIG_NUMA page = alloc_pages_node(pool->p.nid, gfp, pool->p.order); +#else + page = alloc_pages(gfp, pool->p.order); +#endif if (!page) return NULL; @@ -311,13 +351,10 @@ static bool __page_pool_recycle_direct(struct page *page, /* page is NOT reusable when: * 1) allocated when system is under some pressure. (page_is_pfmemalloc) - * 2) belongs to a different NUMA node than pool->p.nid. - * - * To update pool->p.nid users must call page_pool_update_nid. */ static bool pool_page_reusable(struct page_pool *pool, struct page *page) { - return !page_is_pfmemalloc(page) && page_to_nid(page) == pool->p.nid; + return !page_is_pfmemalloc(page); } void __page_pool_put_page(struct page_pool *pool, struct page *page, @@ -484,7 +521,15 @@ EXPORT_SYMBOL(page_pool_destroy); /* Caller must provide appropriate safe context, e.g. NAPI. */ void page_pool_update_nid(struct page_pool *pool, int new_nid) { + struct page *page; + trace_page_pool_update_nid(pool, new_nid); pool->p.nid = new_nid; + + /* Flush pool alloc cache, as refill will check NUMA node */ + while (pool->alloc.count) { + page = pool->alloc.cache[--pool->alloc.count]; + __page_pool_return_page(pool, page); + } } EXPORT_SYMBOL(page_pool_update_nid); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 294bfcf0ce0e..acc849df60b5 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -535,12 +535,12 @@ static int pgctrl_open(struct inode *inode, struct file *file) return single_open(file, pgctrl_show, PDE_DATA(inode)); } -static const struct file_operations pktgen_fops = { - .open = pgctrl_open, - .read = seq_read, - .llseek = seq_lseek, - .write = pgctrl_write, - .release = single_release, +static const struct proc_ops pktgen_proc_ops = { + .proc_open = pgctrl_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_write = pgctrl_write, + .proc_release = single_release, }; static int pktgen_if_show(struct seq_file *seq, void *v) @@ -1707,12 +1707,12 @@ static int pktgen_if_open(struct inode *inode, struct file *file) return single_open(file, pktgen_if_show, PDE_DATA(inode)); } -static const struct file_operations pktgen_if_fops = { - .open = pktgen_if_open, - .read = seq_read, - .llseek = seq_lseek, - .write = pktgen_if_write, - .release = single_release, +static const struct proc_ops pktgen_if_proc_ops = { + .proc_open = pktgen_if_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_write = pktgen_if_write, + .proc_release = single_release, }; static int pktgen_thread_show(struct seq_file *seq, void *v) @@ -1844,12 +1844,12 @@ static int pktgen_thread_open(struct inode *inode, struct file *file) return single_open(file, pktgen_thread_show, PDE_DATA(inode)); } -static const struct file_operations pktgen_thread_fops = { - .open = pktgen_thread_open, - .read = seq_read, - .llseek = seq_lseek, - .write = pktgen_thread_write, - .release = single_release, +static const struct proc_ops pktgen_thread_proc_ops = { + .proc_open = pktgen_thread_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_write = pktgen_thread_write, + .proc_release = single_release, }; /* Think find or remove for NN */ @@ -1926,7 +1926,7 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d pkt_dev->entry = proc_create_data(dev->name, 0600, pn->proc_dir, - &pktgen_if_fops, + &pktgen_if_proc_ops, pkt_dev); if (!pkt_dev->entry) pr_err("can't move proc entry for '%s'\n", @@ -3638,7 +3638,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) pkt_dev->clone_skb = pg_clone_skb_d; pkt_dev->entry = proc_create_data(ifname, 0600, t->net->proc_dir, - &pktgen_if_fops, pkt_dev); + &pktgen_if_proc_ops, pkt_dev); if (!pkt_dev->entry) { pr_err("cannot create %s/%s procfs entry\n", PG_PROC_DIR, ifname); @@ -3708,7 +3708,7 @@ static int __net_init pktgen_create_thread(int cpu, struct pktgen_net *pn) t->tsk = p; pe = proc_create_data(t->tsk->comm, 0600, pn->proc_dir, - &pktgen_thread_fops, t); + &pktgen_thread_proc_ops, t); if (!pe) { pr_err("cannot create %s/%s procfs entry\n", PG_PROC_DIR, t->tsk->comm); @@ -3793,7 +3793,7 @@ static int __net_init pg_net_init(struct net *net) pr_warn("cannot create /proc/net/%s\n", PG_PROC_DIR); return -ENODEV; } - pe = proc_create(PGCTRL, 0600, pn->proc_dir, &pktgen_fops); + pe = proc_create(PGCTRL, 0600, pn->proc_dir, &pktgen_proc_ops); if (pe == NULL) { pr_err("cannot create %s procfs entry\n", PGCTRL); ret = -EINVAL; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 02916f43bf63..e1152f4ffe33 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1041,6 +1041,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(4) /* IFLA_MIN_MTU */ + nla_total_size(4) /* IFLA_MAX_MTU */ + rtnl_prop_list_size(dev) + + nla_total_size(MAX_ADDR_LEN) /* IFLA_PERM_ADDRESS */ + 0; } @@ -1241,6 +1242,8 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, return 0; memset(&vf_vlan_info, 0, sizeof(vf_vlan_info)); + memset(&node_guid, 0, sizeof(node_guid)); + memset(&port_guid, 0, sizeof(port_guid)); vf_mac.vf = vf_vlan.vf = @@ -1289,8 +1292,6 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, sizeof(vf_trust), &vf_trust)) goto nla_put_vf_failure; - memset(&node_guid, 0, sizeof(node_guid)); - memset(&port_guid, 0, sizeof(port_guid)); if (dev->netdev_ops->ndo_get_vf_guid && !dev->netdev_ops->ndo_get_vf_guid(dev, vfs_num, &node_guid, &port_guid)) { @@ -1757,6 +1758,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, nla_put_s32(skb, IFLA_NEW_IFINDEX, new_ifindex) < 0) goto nla_put_failure; + if (memchr_inv(dev->perm_addr, '\0', dev->addr_len) && + nla_put(skb, IFLA_PERM_ADDRESS, dev->addr_len, dev->perm_addr)) + goto nla_put_failure; rcu_read_lock(); if (rtnl_fill_link_af(skb, dev, ext_filter_mask)) @@ -1822,6 +1826,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_PROP_LIST] = { .type = NLA_NESTED }, [IFLA_ALT_IFNAME] = { .type = NLA_STRING, .len = ALTIFNAMSIZ - 1 }, + [IFLA_PERM_ADDRESS] = { .type = NLA_REJECT }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -3048,8 +3053,17 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname, dev->rtnl_link_ops = ops; dev->rtnl_link_state = RTNL_LINK_INITIALIZING; - if (tb[IFLA_MTU]) - dev->mtu = nla_get_u32(tb[IFLA_MTU]); + if (tb[IFLA_MTU]) { + u32 mtu = nla_get_u32(tb[IFLA_MTU]); + int err; + + err = dev_validate_mtu(dev, mtu, extack); + if (err) { + free_netdev(dev); + return ERR_PTR(err); + } + dev->mtu = mtu; + } if (tb[IFLA_ADDRESS]) { memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]), nla_len(tb[IFLA_ADDRESS])); @@ -3490,27 +3504,25 @@ static int rtnl_alt_ifname(int cmd, struct net_device *dev, struct nlattr *attr, if (err) return err; - alt_ifname = nla_data(attr); + alt_ifname = nla_strdup(attr, GFP_KERNEL); + if (!alt_ifname) + return -ENOMEM; + if (cmd == RTM_NEWLINKPROP) { - alt_ifname = kstrdup(alt_ifname, GFP_KERNEL); - if (!alt_ifname) - return -ENOMEM; err = netdev_name_node_alt_create(dev, alt_ifname); - if (err) { - kfree(alt_ifname); - return err; - } + if (!err) + alt_ifname = NULL; } else if (cmd == RTM_DELLINKPROP) { err = netdev_name_node_alt_destroy(dev, alt_ifname); - if (err) - return err; } else { - WARN_ON(1); - return 0; + WARN_ON_ONCE(1); + err = -EINVAL; } - *changed = true; - return 0; + kfree(alt_ifname); + if (!err) + *changed = true; + return err; } static int rtnl_linkprop(int cmd, struct sk_buff *skb, struct nlmsghdr *nlh, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 973a71f4bc89..e1101a4f90a6 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -68,6 +68,7 @@ #include <net/ip6_checksum.h> #include <net/xfrm.h> #include <net/mpls.h> +#include <net/mptcp.h> #include <linux/uaccess.h> #include <trace/events/skb.h> @@ -466,7 +467,6 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, return NULL; } - /* use OR instead of assignment to avoid clearing of bits in mask */ if (pfmemalloc) skb->pfmemalloc = 1; skb->head_frag = 1; @@ -526,7 +526,6 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, return NULL; } - /* use OR instead of assignment to avoid clearing of bits in mask */ if (nc->page.pfmemalloc) skb->pfmemalloc = 1; skb->head_frag = 1; @@ -3638,6 +3637,97 @@ static inline skb_frag_t skb_head_frag_to_page_desc(struct sk_buff *frag_skb) return head_frag; } +struct sk_buff *skb_segment_list(struct sk_buff *skb, + netdev_features_t features, + unsigned int offset) +{ + struct sk_buff *list_skb = skb_shinfo(skb)->frag_list; + unsigned int tnl_hlen = skb_tnl_header_len(skb); + unsigned int delta_truesize = 0; + unsigned int delta_len = 0; + struct sk_buff *tail = NULL; + struct sk_buff *nskb; + + skb_push(skb, -skb_network_offset(skb) + offset); + + skb_shinfo(skb)->frag_list = NULL; + + do { + nskb = list_skb; + list_skb = list_skb->next; + + if (!tail) + skb->next = nskb; + else + tail->next = nskb; + + tail = nskb; + + delta_len += nskb->len; + delta_truesize += nskb->truesize; + + skb_push(nskb, -skb_network_offset(nskb) + offset); + + __copy_skb_header(nskb, skb); + + skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb)); + skb_copy_from_linear_data_offset(skb, -tnl_hlen, + nskb->data - tnl_hlen, + offset + tnl_hlen); + + if (skb_needs_linearize(nskb, features) && + __skb_linearize(nskb)) + goto err_linearize; + + } while (list_skb); + + skb->truesize = skb->truesize - delta_truesize; + skb->data_len = skb->data_len - delta_len; + skb->len = skb->len - delta_len; + + skb_gso_reset(skb); + + skb->prev = tail; + + if (skb_needs_linearize(skb, features) && + __skb_linearize(skb)) + goto err_linearize; + + skb_get(skb); + + return skb; + +err_linearize: + kfree_skb_list(skb->next); + skb->next = NULL; + return ERR_PTR(-ENOMEM); +} +EXPORT_SYMBOL_GPL(skb_segment_list); + +int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb) +{ + if (unlikely(p->len + skb->len >= 65536)) + return -E2BIG; + + if (NAPI_GRO_CB(p)->last == p) + skb_shinfo(p)->frag_list = skb; + else + NAPI_GRO_CB(p)->last->next = skb; + + skb_pull(skb, skb_gro_offset(skb)); + + NAPI_GRO_CB(p)->last = skb; + NAPI_GRO_CB(p)->count++; + p->data_len += skb->len; + p->truesize += skb->truesize; + p->len += skb->len; + + NAPI_GRO_CB(skb)->same_flow = 1; + + return 0; +} +EXPORT_SYMBOL_GPL(skb_gro_receive_list); + /** * skb_segment - Perform protocol segmentation on skb. * @head_skb: buffer to segment @@ -4109,6 +4199,9 @@ static const u8 skb_ext_type_len[] = { #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) [TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext), #endif +#if IS_ENABLED(CONFIG_MPTCP) + [SKB_EXT_MPTCP] = SKB_EXT_CHUNKSIZEOF(struct mptcp_ext), +#endif }; static __always_inline unsigned int skb_ext_total_length(void) @@ -4123,6 +4216,9 @@ static __always_inline unsigned int skb_ext_total_length(void) #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) skb_ext_type_len[TC_SKB_EXT] + #endif +#if IS_ENABLED(CONFIG_MPTCP) + skb_ext_type_len[SKB_EXT_MPTCP] + +#endif 0; } @@ -4707,9 +4803,9 @@ static __sum16 *skb_checksum_setup_ip(struct sk_buff *skb, typeof(IPPROTO_IP) proto, unsigned int off) { - switch (proto) { - int err; + int err; + switch (proto) { case IPPROTO_TCP: err = skb_maybe_pull_tail(skb, off + sizeof(struct tcphdr), off + MAX_TCP_HDR_LEN); @@ -5472,12 +5568,15 @@ static void skb_mod_eth_type(struct sk_buff *skb, struct ethhdr *hdr, } /** - * skb_mpls_push() - push a new MPLS header after the mac header + * skb_mpls_push() - push a new MPLS header after mac_len bytes from start of + * the packet * * @skb: buffer * @mpls_lse: MPLS label stack entry to push * @mpls_proto: ethertype of the new MPLS header (expects 0x8847 or 0x8848) * @mac_len: length of the MAC header + * @ethernet: flag to indicate if the resulting packet after skb_mpls_push is + * ethernet * * Expects skb->data at mac header. * @@ -5501,7 +5600,7 @@ int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto, return err; if (!skb->inner_protocol) { - skb_set_inner_network_header(skb, mac_len); + skb_set_inner_network_header(skb, skb_network_offset(skb)); skb_set_inner_protocol(skb, skb->protocol); } @@ -5510,6 +5609,7 @@ int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto, mac_len); skb_reset_mac_header(skb); skb_set_network_header(skb, mac_len); + skb_reset_mac_len(skb); lse = mpls_hdr(skb); lse->label_stack_entry = mpls_lse; @@ -5529,7 +5629,7 @@ EXPORT_SYMBOL_GPL(skb_mpls_push); * @skb: buffer * @next_proto: ethertype of header after popped MPLS header * @mac_len: length of the MAC header - * @ethernet: flag to indicate if ethernet header is present in packet + * @ethernet: flag to indicate if the packet is ethernet * * Expects skb->data at mac header. * @@ -5976,7 +6076,14 @@ static void *skb_ext_get_ptr(struct skb_ext *ext, enum skb_ext_id id) return (void *)ext + (ext->offset[id] * SKB_EXT_ALIGN_VALUE); } -static struct skb_ext *skb_ext_alloc(void) +/** + * __skb_ext_alloc - allocate a new skb extensions storage + * + * Returns the newly allocated pointer. The pointer can later attached to a + * skb via __skb_ext_set(). + * Note: caller must handle the skb_ext as an opaque data. + */ +struct skb_ext *__skb_ext_alloc(void) { struct skb_ext *new = kmem_cache_alloc(skbuff_ext_cache, GFP_ATOMIC); @@ -6017,6 +6124,30 @@ static struct skb_ext *skb_ext_maybe_cow(struct skb_ext *old, } /** + * __skb_ext_set - attach the specified extension storage to this skb + * @skb: buffer + * @id: extension id + * @ext: extension storage previously allocated via __skb_ext_alloc() + * + * Existing extensions, if any, are cleared. + * + * Returns the pointer to the extension. + */ +void *__skb_ext_set(struct sk_buff *skb, enum skb_ext_id id, + struct skb_ext *ext) +{ + unsigned int newlen, newoff = SKB_EXT_CHUNKSIZEOF(*ext); + + skb_ext_put(skb); + newlen = newoff + skb_ext_type_len[id]; + ext->chunks = newlen; + ext->offset[id] = newoff; + skb->extensions = ext; + skb->active_extensions = 1 << id; + return skb_ext_get_ptr(ext, id); +} + +/** * skb_ext_add - allocate space for given extension, COW if needed * @skb: buffer * @id: extension to allocate space for @@ -6049,7 +6180,7 @@ void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id) } else { newoff = SKB_EXT_CHUNKSIZEOF(*new); - new = skb_ext_alloc(); + new = __skb_ext_alloc(); if (!new) return NULL; } diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 3866d7e20c07..ded2d5227678 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -594,8 +594,6 @@ EXPORT_SYMBOL_GPL(sk_psock_destroy); void sk_psock_drop(struct sock *sk, struct sk_psock *psock) { - sock_owned_by_me(sk); - sk_psock_cork_free(psock); sk_psock_zap_ingress(psock); diff --git a/net/core/sock.c b/net/core/sock.c index 8459ad579f73..8f71684305c3 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1830,7 +1830,10 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) atomic_set(&newsk->sk_zckey, 0); sock_reset_flag(newsk, SOCK_DONE); - mem_cgroup_sk_alloc(newsk); + + /* sk->sk_memcg will be populated at accept() time */ + newsk->sk_memcg = NULL; + cgroup_sk_alloc(&newsk->sk_cgrp_data); rcu_read_lock(); @@ -2786,7 +2789,7 @@ static void sock_def_error_report(struct sock *sk) rcu_read_unlock(); } -static void sock_def_readable(struct sock *sk) +void sock_def_readable(struct sock *sk) { struct socket_wq *wq; diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 8998e356f423..085cef5857bb 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -234,7 +234,6 @@ static void sock_map_free(struct bpf_map *map) int i; synchronize_rcu(); - rcu_read_lock(); raw_spin_lock_bh(&stab->lock); for (i = 0; i < stab->map.max_entries; i++) { struct sock **psk = &stab->sks[i]; @@ -243,13 +242,15 @@ static void sock_map_free(struct bpf_map *map) sk = xchg(psk, NULL); if (sk) { lock_sock(sk); + rcu_read_lock(); sock_map_unref(sk, psk); + rcu_read_unlock(); release_sock(sk); } } raw_spin_unlock_bh(&stab->lock); - rcu_read_unlock(); + /* wait for psock readers accessing its map link */ synchronize_rcu(); bpf_map_area_free(stab->sks); @@ -416,14 +417,16 @@ static int sock_map_update_elem(struct bpf_map *map, void *key, ret = -EINVAL; goto out; } - if (!sock_map_sk_is_suitable(sk) || - sk->sk_state != TCP_ESTABLISHED) { + if (!sock_map_sk_is_suitable(sk)) { ret = -EOPNOTSUPP; goto out; } sock_map_sk_acquire(sk); - ret = sock_map_update_common(map, idx, sk, flags); + if (sk->sk_state != TCP_ESTABLISHED) + ret = -EOPNOTSUPP; + else + ret = sock_map_update_common(map, idx, sk, flags); sock_map_sk_release(sk); out: fput(sock->file); @@ -739,14 +742,16 @@ static int sock_hash_update_elem(struct bpf_map *map, void *key, ret = -EINVAL; goto out; } - if (!sock_map_sk_is_suitable(sk) || - sk->sk_state != TCP_ESTABLISHED) { + if (!sock_map_sk_is_suitable(sk)) { ret = -EOPNOTSUPP; goto out; } sock_map_sk_acquire(sk); - ret = sock_hash_update_common(map, key, sk, flags); + if (sk->sk_state != TCP_ESTABLISHED) + ret = -EOPNOTSUPP; + else + ret = sock_hash_update_common(map, key, sk, flags); sock_map_sk_release(sk); out: fput(sock->file); @@ -859,19 +864,22 @@ static void sock_hash_free(struct bpf_map *map) int i; synchronize_rcu(); - rcu_read_lock(); for (i = 0; i < htab->buckets_num; i++) { bucket = sock_hash_select_bucket(htab, i); raw_spin_lock_bh(&bucket->lock); hlist_for_each_entry_safe(elem, node, &bucket->head, node) { hlist_del_rcu(&elem->node); lock_sock(elem->sk); + rcu_read_lock(); sock_map_unref(elem->sk, elem); + rcu_read_unlock(); release_sock(elem->sk); } raw_spin_unlock_bh(&bucket->lock); } - rcu_read_unlock(); + + /* wait for psock readers accessing its map link */ + synchronize_rcu(); bpf_map_area_free(htab->buckets); kfree(htab); diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index f19f179538b9..91e9f2223c39 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -107,7 +107,6 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) if (!more_reuse) return NULL; - more_reuse->max_socks = more_socks_size; more_reuse->num_socks = reuse->num_socks; more_reuse->prog = reuse->prog; more_reuse->reuseport_id = reuse->reuseport_id; diff --git a/net/core/timestamping.c b/net/core/timestamping.c index 7911235706a9..04840697fe79 100644 --- a/net/core/timestamping.c +++ b/net/core/timestamping.c @@ -13,7 +13,7 @@ static unsigned int classify(const struct sk_buff *skb) { if (likely(skb->dev && skb->dev->phydev && - skb->dev->phydev->drv)) + skb->dev->phydev->mii_ts)) return ptp_classify_raw(skb); else return PTP_CLASS_NONE; @@ -21,7 +21,7 @@ static unsigned int classify(const struct sk_buff *skb) void skb_clone_tx_timestamp(struct sk_buff *skb) { - struct phy_device *phydev; + struct mii_timestamper *mii_ts; struct sk_buff *clone; unsigned int type; @@ -32,22 +32,22 @@ void skb_clone_tx_timestamp(struct sk_buff *skb) if (type == PTP_CLASS_NONE) return; - phydev = skb->dev->phydev; - if (likely(phydev->drv->txtstamp)) { + mii_ts = skb->dev->phydev->mii_ts; + if (likely(mii_ts->txtstamp)) { clone = skb_clone_sk(skb); if (!clone) return; - phydev->drv->txtstamp(phydev, clone, type); + mii_ts->txtstamp(mii_ts, clone, type); } } EXPORT_SYMBOL_GPL(skb_clone_tx_timestamp); bool skb_defer_rx_timestamp(struct sk_buff *skb) { - struct phy_device *phydev; + struct mii_timestamper *mii_ts; unsigned int type; - if (!skb->dev || !skb->dev->phydev || !skb->dev->phydev->drv) + if (!skb->dev || !skb->dev->phydev || !skb->dev->phydev->mii_ts) return false; if (skb_headroom(skb) < ETH_HLEN) @@ -62,9 +62,9 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb) if (type == PTP_CLASS_NONE) return false; - phydev = skb->dev->phydev; - if (likely(phydev->drv->rxtstamp)) - return phydev->drv->rxtstamp(phydev, skb, type); + mii_ts = skb->dev->phydev->mii_ts; + if (likely(mii_ts->rxtstamp)) + return mii_ts->rxtstamp(mii_ts, skb, type); return false; } diff --git a/net/core/utils.c b/net/core/utils.c index 6b6e51db9f3b..1f31a39236d5 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -438,6 +438,23 @@ void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, } EXPORT_SYMBOL(inet_proto_csum_replace4); +/** + * inet_proto_csum_replace16 - update layer 4 header checksum field + * @sum: Layer 4 header checksum field + * @skb: sk_buff for the packet + * @from: old IPv6 address + * @to: new IPv6 address + * @pseudohdr: True if layer 4 header checksum includes pseudoheader + * + * Update layer 4 header as per the update in IPv6 src/dst address. + * + * There is no need to update skb->csum in this function, because update in two + * fields a.) IPv6 src/dst address and b.) L4 header checksum cancels each other + * for skb->csum calculation. Whereas inet_proto_csum_replace4 function needs to + * update skb->csum, because update in 3 fields a.) IPv4 src/dst address, + * b.) IPv4 Header checksum and c.) L4 header checksum results in same diff as + * L4 Header checksum for skb->csum calculation. + */ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, const __be32 *from, const __be32 *to, bool pseudohdr) @@ -449,9 +466,6 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, if (skb->ip_summed != CHECKSUM_PARTIAL) { *sum = csum_fold(csum_partial(diff, sizeof(diff), ~csum_unfold(*sum))); - if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) - skb->csum = ~csum_partial(diff, sizeof(diff), - ~skb->csum); } else if (pseudohdr) *sum = ~csum_fold(csum_partial(diff, sizeof(diff), csum_unfold(*sum))); |