aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/bpf_sk_storage.c5
-rw-r--r--net/core/datagram.c27
-rw-r--r--net/core/dev.c286
-rw-r--r--net/core/dev_ioctl.c1
-rw-r--r--net/core/devlink.c255
-rw-r--r--net/core/drop_monitor.c12
-rw-r--r--net/core/ethtool.c3116
-rw-r--r--net/core/fib_rules.c2
-rw-r--r--net/core/filter.c223
-rw-r--r--net/core/flow_dissector.c13
-rw-r--r--net/core/neighbour.c1
-rw-r--r--net/core/net_namespace.c99
-rw-r--r--net/core/netclassid_cgroup.c47
-rw-r--r--net/core/page_pool.c93
-rw-r--r--net/core/pktgen.c44
-rw-r--r--net/core/rtnetlink.c48
-rw-r--r--net/core/skbuff.c149
-rw-r--r--net/core/skmsg.c2
-rw-r--r--net/core/sock.c7
-rw-r--r--net/core/sock_map.c28
-rw-r--r--net/core/sock_reuseport.c1
-rw-r--r--net/core/timestamping.c20
-rw-r--r--net/core/utils.c20
24 files changed, 828 insertions, 3673 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index a104dc8faafc..3e2c378e5f31 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -8,7 +8,7 @@ obj-y := sock.o request_sock.o skbuff.o datagram.o stream.o scm.o \
obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
-obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
+obj-y += dev.o dev_addr_lists.o dst.o netevent.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \
fib_notifier.o xdp.o flow_offload.o
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index 458be6b3eda9..3ab23f698221 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -643,9 +643,10 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
return ERR_PTR(-ENOMEM);
bpf_map_init_from_attr(&smap->map, attr);
+ nbuckets = roundup_pow_of_two(num_possible_cpus());
/* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */
- smap->bucket_log = max_t(u32, 1, ilog2(roundup_pow_of_two(num_possible_cpus())));
- nbuckets = 1U << smap->bucket_log;
+ nbuckets = max_t(u32, 2, nbuckets);
+ smap->bucket_log = ilog2(nbuckets);
cost = sizeof(*smap->buckets) * nbuckets + sizeof(*smap);
ret = bpf_map_charge_init(&smap->map.memory, cost);
diff --git a/net/core/datagram.c b/net/core/datagram.c
index da3c24ed129c..a78e7f864c1e 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -84,7 +84,8 @@ static int receiver_wake_function(wait_queue_entry_t *wait, unsigned int mode, i
/*
* Wait for the last received packet to be different from skb
*/
-int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
+int __skb_wait_for_more_packets(struct sock *sk, struct sk_buff_head *queue,
+ int *err, long *timeo_p,
const struct sk_buff *skb)
{
int error;
@@ -97,7 +98,7 @@ int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
if (error)
goto out_err;
- if (READ_ONCE(sk->sk_receive_queue.prev) != skb)
+ if (READ_ONCE(queue->prev) != skb)
goto out;
/* Socket shut down? */
@@ -209,6 +210,7 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
/**
* __skb_try_recv_datagram - Receive a datagram skbuff
* @sk: socket
+ * @queue: socket queue from which to receive
* @flags: MSG\_ flags
* @destructor: invoked under the receive lock on successful dequeue
* @off: an offset in bytes to peek skb from. Returns an offset
@@ -241,13 +243,14 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
* quite explicitly by POSIX 1003.1g, don't change them without having
* the standard around please.
*/
-struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
+struct sk_buff *__skb_try_recv_datagram(struct sock *sk,
+ struct sk_buff_head *queue,
+ unsigned int flags,
void (*destructor)(struct sock *sk,
struct sk_buff *skb),
int *off, int *err,
struct sk_buff **last)
{
- struct sk_buff_head *queue = &sk->sk_receive_queue;
struct sk_buff *skb;
unsigned long cpu_flags;
/*
@@ -278,7 +281,7 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
break;
sk_busy_loop(sk, flags & MSG_DONTWAIT);
- } while (READ_ONCE(sk->sk_receive_queue.prev) != *last);
+ } while (READ_ONCE(queue->prev) != *last);
error = -EAGAIN;
@@ -288,7 +291,9 @@ no_packet:
}
EXPORT_SYMBOL(__skb_try_recv_datagram);
-struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
+struct sk_buff *__skb_recv_datagram(struct sock *sk,
+ struct sk_buff_head *sk_queue,
+ unsigned int flags,
void (*destructor)(struct sock *sk,
struct sk_buff *skb),
int *off, int *err)
@@ -299,15 +304,16 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
do {
- skb = __skb_try_recv_datagram(sk, flags, destructor, off, err,
- &last);
+ skb = __skb_try_recv_datagram(sk, sk_queue, flags, destructor,
+ off, err, &last);
if (skb)
return skb;
if (*err != -EAGAIN)
break;
} while (timeo &&
- !__skb_wait_for_more_packets(sk, err, &timeo, last));
+ !__skb_wait_for_more_packets(sk, sk_queue, err,
+ &timeo, last));
return NULL;
}
@@ -318,7 +324,8 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
{
int off = 0;
- return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
+ return __skb_recv_datagram(sk, &sk->sk_receive_queue,
+ flags | (noblock ? MSG_DONTWAIT : 0),
NULL, &off, err);
}
EXPORT_SYMBOL(skb_recv_datagram);
diff --git a/net/core/dev.c b/net/core/dev.c
index 7e885d069707..c6c985fe7b1b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -146,7 +146,6 @@
#include "net-sysfs.h"
#define MAX_GRO_SKBS 8
-#define MAX_NEST_DEV 8
/* This should be increased if a protocol with a bigger head is added. */
#define GRO_MAX_HEAD (MAX_HEADER + 128)
@@ -331,6 +330,12 @@ int netdev_name_node_alt_destroy(struct net_device *dev, const char *name)
name_node = netdev_name_node_lookup(net, name);
if (!name_node)
return -ENOENT;
+ /* lookup might have found our primary name or a name belonging
+ * to another device.
+ */
+ if (name_node == dev->name_node || name_node->dev != dev)
+ return -EINVAL;
+
__netdev_name_node_alt_destroy(name_node);
return 0;
@@ -928,7 +933,7 @@ EXPORT_SYMBOL(dev_get_by_napi_id);
*
* The use of raw_seqcount_begin() and cond_resched() before
* retrying is required as we want to give the writers a chance
- * to complete when CONFIG_PREEMPT is not set.
+ * to complete when CONFIG_PREEMPTION is not set.
*/
int netdev_get_name(struct net *net, char *name, int ifindex)
{
@@ -1764,7 +1769,6 @@ EXPORT_SYMBOL(register_netdevice_notifier);
int unregister_netdevice_notifier(struct notifier_block *nb)
{
- struct net_device *dev;
struct net *net;
int err;
@@ -1775,16 +1779,9 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
if (err)
goto unlock;
- for_each_net(net) {
- for_each_netdev(net, dev) {
- if (dev->flags & IFF_UP) {
- call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
- dev);
- call_netdevice_notifier(nb, NETDEV_DOWN, dev);
- }
- call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
- }
- }
+ for_each_net(net)
+ call_netdevice_unregister_net_notifiers(nb, net);
+
unlock:
rtnl_unlock();
up_write(&pernet_ops_rwsem);
@@ -1792,6 +1789,42 @@ unlock:
}
EXPORT_SYMBOL(unregister_netdevice_notifier);
+static int __register_netdevice_notifier_net(struct net *net,
+ struct notifier_block *nb,
+ bool ignore_call_fail)
+{
+ int err;
+
+ err = raw_notifier_chain_register(&net->netdev_chain, nb);
+ if (err)
+ return err;
+ if (dev_boot_phase)
+ return 0;
+
+ err = call_netdevice_register_net_notifiers(nb, net);
+ if (err && !ignore_call_fail)
+ goto chain_unregister;
+
+ return 0;
+
+chain_unregister:
+ raw_notifier_chain_unregister(&net->netdev_chain, nb);
+ return err;
+}
+
+static int __unregister_netdevice_notifier_net(struct net *net,
+ struct notifier_block *nb)
+{
+ int err;
+
+ err = raw_notifier_chain_unregister(&net->netdev_chain, nb);
+ if (err)
+ return err;
+
+ call_netdevice_unregister_net_notifiers(nb, net);
+ return 0;
+}
+
/**
* register_netdevice_notifier_net - register a per-netns network notifier block
* @net: network namespace
@@ -1812,23 +1845,9 @@ int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb)
int err;
rtnl_lock();
- err = raw_notifier_chain_register(&net->netdev_chain, nb);
- if (err)
- goto unlock;
- if (dev_boot_phase)
- goto unlock;
-
- err = call_netdevice_register_net_notifiers(nb, net);
- if (err)
- goto chain_unregister;
-
-unlock:
+ err = __register_netdevice_notifier_net(net, nb, false);
rtnl_unlock();
return err;
-
-chain_unregister:
- raw_notifier_chain_unregister(&netdev_chain, nb);
- goto unlock;
}
EXPORT_SYMBOL(register_netdevice_notifier_net);
@@ -1854,17 +1873,53 @@ int unregister_netdevice_notifier_net(struct net *net,
int err;
rtnl_lock();
- err = raw_notifier_chain_unregister(&net->netdev_chain, nb);
- if (err)
- goto unlock;
+ err = __unregister_netdevice_notifier_net(net, nb);
+ rtnl_unlock();
+ return err;
+}
+EXPORT_SYMBOL(unregister_netdevice_notifier_net);
- call_netdevice_unregister_net_notifiers(nb, net);
+int register_netdevice_notifier_dev_net(struct net_device *dev,
+ struct notifier_block *nb,
+ struct netdev_net_notifier *nn)
+{
+ int err;
-unlock:
+ rtnl_lock();
+ err = __register_netdevice_notifier_net(dev_net(dev), nb, false);
+ if (!err) {
+ nn->nb = nb;
+ list_add(&nn->list, &dev->net_notifier_list);
+ }
rtnl_unlock();
return err;
}
-EXPORT_SYMBOL(unregister_netdevice_notifier_net);
+EXPORT_SYMBOL(register_netdevice_notifier_dev_net);
+
+int unregister_netdevice_notifier_dev_net(struct net_device *dev,
+ struct notifier_block *nb,
+ struct netdev_net_notifier *nn)
+{
+ int err;
+
+ rtnl_lock();
+ list_del(&nn->list);
+ err = __unregister_netdevice_notifier_net(dev_net(dev), nb);
+ rtnl_unlock();
+ return err;
+}
+EXPORT_SYMBOL(unregister_netdevice_notifier_dev_net);
+
+static void move_netdevice_notifiers_dev_net(struct net_device *dev,
+ struct net *net)
+{
+ struct netdev_net_notifier *nn;
+
+ list_for_each_entry(nn, &dev->net_notifier_list, list) {
+ __unregister_netdevice_notifier_net(dev_net(dev), nn->nb);
+ __register_netdevice_notifier_net(net, nn->nb, true);
+ }
+}
/**
* call_netdevice_notifiers_info - call all network notifier blocks
@@ -3021,6 +3076,8 @@ static u16 skb_tx_hash(const struct net_device *dev,
if (skb_rx_queue_recorded(skb)) {
hash = skb_get_rx_queue(skb);
+ if (hash >= qoffset)
+ hash -= qoffset;
while (unlikely(hash >= qcount))
hash -= qcount;
return hash + qoffset;
@@ -3249,7 +3306,7 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
segs = skb_mac_gso_segment(skb, features);
- if (unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs)))
+ if (segs != skb && unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs)))
skb_warn_bad_offload(skb);
return segs;
@@ -3607,26 +3664,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
qdisc_calculate_pkt_len(skb, q);
if (q->flags & TCQ_F_NOLOCK) {
- if ((q->flags & TCQ_F_CAN_BYPASS) && READ_ONCE(q->empty) &&
- qdisc_run_begin(q)) {
- if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED,
- &q->state))) {
- __qdisc_drop(skb, &to_free);
- rc = NET_XMIT_DROP;
- goto end_run;
- }
- qdisc_bstats_cpu_update(q, skb);
-
- rc = NET_XMIT_SUCCESS;
- if (sch_direct_xmit(skb, q, dev, txq, NULL, true))
- __qdisc_run(q);
-
-end_run:
- qdisc_run_end(q);
- } else {
- rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
- qdisc_run(q);
- }
+ rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
+ qdisc_run(q);
if (unlikely(to_free))
kfree_skb_list(to_free);
@@ -4477,14 +4516,14 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
/* Reinjected packets coming from act_mirred or similar should
* not get XDP generic processing.
*/
- if (skb_cloned(skb) || skb_is_tc_redirected(skb))
+ if (skb_is_tc_redirected(skb))
return XDP_PASS;
/* XDP packets must be linear and must have sufficient headroom
* of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
* native XDP provides, thus we need to do it here as well.
*/
- if (skb_is_nonlinear(skb) ||
+ if (skb_cloned(skb) || skb_is_nonlinear(skb) ||
skb_headroom(skb) < XDP_PACKET_HEADROOM) {
int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
int troom = skb->tail + skb->data_len - skb->end;
@@ -4932,7 +4971,6 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
int *ret, struct net_device *orig_dev)
{
-#ifdef CONFIG_NETFILTER_INGRESS
if (nf_hook_ingress_active(skb)) {
int ingress_retval;
@@ -4946,7 +4984,6 @@ static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
rcu_read_unlock();
return ingress_retval;
}
-#endif /* CONFIG_NETFILTER_INGRESS */
return 0;
}
@@ -5491,9 +5528,29 @@ static void flush_all_backlogs(void)
put_online_cpus();
}
+/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
+static void gro_normal_list(struct napi_struct *napi)
+{
+ if (!napi->rx_count)
+ return;
+ netif_receive_skb_list_internal(&napi->rx_list);
+ INIT_LIST_HEAD(&napi->rx_list);
+ napi->rx_count = 0;
+}
+
+/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded,
+ * pass the whole batch up to the stack.
+ */
+static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb)
+{
+ list_add_tail(&skb->list, &napi->rx_list);
+ if (++napi->rx_count >= gro_normal_batch)
+ gro_normal_list(napi);
+}
+
INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int));
INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int));
-static int napi_gro_complete(struct sk_buff *skb)
+static int napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb)
{
struct packet_offload *ptype;
__be16 type = skb->protocol;
@@ -5526,7 +5583,8 @@ static int napi_gro_complete(struct sk_buff *skb)
}
out:
- return netif_receive_skb_internal(skb);
+ gro_normal_one(napi, skb);
+ return NET_RX_SUCCESS;
}
static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
@@ -5539,7 +5597,7 @@ static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
return;
skb_list_del_init(skb);
- napi_gro_complete(skb);
+ napi_gro_complete(napi, skb);
napi->gro_hash[index].count--;
}
@@ -5641,7 +5699,7 @@ static void gro_pull_from_frag0(struct sk_buff *skb, int grow)
}
}
-static void gro_flush_oldest(struct list_head *head)
+static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head)
{
struct sk_buff *oldest;
@@ -5657,7 +5715,7 @@ static void gro_flush_oldest(struct list_head *head)
* SKB to the chain.
*/
skb_list_del_init(oldest);
- napi_gro_complete(oldest);
+ napi_gro_complete(napi, oldest);
}
INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *,
@@ -5723,7 +5781,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
if (&ptype->list == head)
goto normal;
- if (IS_ERR(pp) && PTR_ERR(pp) == -EINPROGRESS) {
+ if (PTR_ERR(pp) == -EINPROGRESS) {
ret = GRO_CONSUMED;
goto ok;
}
@@ -5733,7 +5791,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
if (pp) {
skb_list_del_init(pp);
- napi_gro_complete(pp);
+ napi_gro_complete(napi, pp);
napi->gro_hash[hash].count--;
}
@@ -5744,7 +5802,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
goto normal;
if (unlikely(napi->gro_hash[hash].count >= MAX_GRO_SKBS)) {
- gro_flush_oldest(gro_head);
+ gro_flush_oldest(napi, gro_head);
} else {
napi->gro_hash[hash].count++;
}
@@ -5802,26 +5860,6 @@ struct packet_offload *gro_find_complete_by_type(__be16 type)
}
EXPORT_SYMBOL(gro_find_complete_by_type);
-/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
-static void gro_normal_list(struct napi_struct *napi)
-{
- if (!napi->rx_count)
- return;
- netif_receive_skb_list_internal(&napi->rx_list);
- INIT_LIST_HEAD(&napi->rx_list);
- napi->rx_count = 0;
-}
-
-/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded,
- * pass the whole batch up to the stack.
- */
-static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb)
-{
- list_add_tail(&skb->list, &napi->rx_list);
- if (++napi->rx_count >= gro_normal_batch)
- gro_normal_list(napi);
-}
-
static void napi_skb_free_stolen_head(struct sk_buff *skb)
{
skb_dst_drop(skb);
@@ -6200,8 +6238,6 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
NAPIF_STATE_IN_BUSY_POLL)))
return false;
- gro_normal_list(n);
-
if (n->gro_bitmask) {
unsigned long timeout = 0;
@@ -6217,6 +6253,9 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
hrtimer_start(&n->timer, ns_to_ktime(timeout),
HRTIMER_MODE_REL_PINNED);
}
+
+ gro_normal_list(n);
+
if (unlikely(!list_empty(&n->poll_list))) {
/* If n->poll_list is not empty, we need to mask irqs */
local_irq_save(flags);
@@ -6548,8 +6587,6 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
goto out_unlock;
}
- gro_normal_list(n);
-
if (n->gro_bitmask) {
/* flush too old packets
* If HZ < 1000, flush all packets.
@@ -6557,6 +6594,8 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
napi_gro_flush(n, HZ >= 1000);
}
+ gro_normal_list(n);
+
/* Some drivers may have called napi_schedule
* prior to exhausting their budget.
*/
@@ -7151,8 +7190,8 @@ static int __netdev_walk_all_lower_dev(struct net_device *dev,
return 0;
}
-static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
- struct list_head **iter)
+struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
+ struct list_head **iter)
{
struct netdev_adjacent *lower;
@@ -7164,6 +7203,7 @@ static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
return lower->dev;
}
+EXPORT_SYMBOL(netdev_next_lower_dev_rcu);
static u8 __netdev_upper_depth(struct net_device *dev)
{
@@ -8194,6 +8234,22 @@ int __dev_set_mtu(struct net_device *dev, int new_mtu)
}
EXPORT_SYMBOL(__dev_set_mtu);
+int dev_validate_mtu(struct net_device *dev, int new_mtu,
+ struct netlink_ext_ack *extack)
+{
+ /* MTU must be positive, and in range */
+ if (new_mtu < 0 || new_mtu < dev->min_mtu) {
+ NL_SET_ERR_MSG(extack, "mtu less than device minimum");
+ return -EINVAL;
+ }
+
+ if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) {
+ NL_SET_ERR_MSG(extack, "mtu greater than device maximum");
+ return -EINVAL;
+ }
+ return 0;
+}
+
/**
* dev_set_mtu_ext - Change maximum transfer unit
* @dev: device
@@ -8210,16 +8266,9 @@ int dev_set_mtu_ext(struct net_device *dev, int new_mtu,
if (new_mtu == dev->mtu)
return 0;
- /* MTU must be positive, and in range */
- if (new_mtu < 0 || new_mtu < dev->min_mtu) {
- NL_SET_ERR_MSG(extack, "mtu less than device minimum");
- return -EINVAL;
- }
-
- if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) {
- NL_SET_ERR_MSG(extack, "mtu greater than device maximum");
- return -EINVAL;
- }
+ err = dev_validate_mtu(dev, new_mtu, extack);
+ if (err)
+ return err;
if (!netif_device_present(dev))
return -ENODEV;
@@ -8542,7 +8591,17 @@ static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op,
struct netlink_ext_ack *extack, u32 flags,
struct bpf_prog *prog)
{
+ bool non_hw = !(flags & XDP_FLAGS_HW_MODE);
+ struct bpf_prog *prev_prog = NULL;
struct netdev_bpf xdp;
+ int err;
+
+ if (non_hw) {
+ prev_prog = bpf_prog_by_id(__dev_xdp_query(dev, bpf_op,
+ XDP_QUERY_PROG));
+ if (IS_ERR(prev_prog))
+ prev_prog = NULL;
+ }
memset(&xdp, 0, sizeof(xdp));
if (flags & XDP_FLAGS_HW_MODE)
@@ -8553,7 +8612,14 @@ static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op,
xdp.flags = flags;
xdp.prog = prog;
- return bpf_op(dev, &xdp);
+ err = bpf_op(dev, &xdp);
+ if (!err && non_hw)
+ bpf_prog_change_xdp(prev_prog, prog);
+
+ if (prev_prog)
+ bpf_prog_put(prev_prog);
+
+ return err;
}
static void dev_xdp_uninstall(struct net_device *dev)
@@ -9257,7 +9323,7 @@ int register_netdevice(struct net_device *dev)
/* Transfer changeable features to wanted_features and enable
* software offloads (GSO and GRO).
*/
- dev->hw_features |= NETIF_F_SOFT_FEATURES;
+ dev->hw_features |= (NETIF_F_SOFT_FEATURES | NETIF_F_SOFT_FEATURES_OFF);
dev->features |= NETIF_F_SOFT_FEATURES;
if (dev->netdev_ops->ndo_udp_tunnel_add) {
@@ -9302,8 +9368,10 @@ int register_netdevice(struct net_device *dev)
goto err_uninit;
ret = netdev_register_kobject(dev);
- if (ret)
+ if (ret) {
+ dev->reg_state = NETREG_UNREGISTERED;
goto err_uninit;
+ }
dev->reg_state = NETREG_REGISTERED;
__netdev_update_features(dev);
@@ -9750,6 +9818,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
INIT_LIST_HEAD(&dev->adj_list.lower);
INIT_LIST_HEAD(&dev->ptype_all);
INIT_LIST_HEAD(&dev->ptype_specific);
+ INIT_LIST_HEAD(&dev->net_notifier_list);
#ifdef CONFIG_NET_SCHED
hash_init(dev->qdisc_hash);
#endif
@@ -9820,6 +9889,8 @@ void free_netdev(struct net_device *dev)
free_percpu(dev->pcpu_refcnt);
dev->pcpu_refcnt = NULL;
+ free_percpu(dev->xdp_bulkq);
+ dev->xdp_bulkq = NULL;
netdev_unregister_lockdep_key(dev);
@@ -10011,6 +10082,9 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE);
netdev_adjacent_del_links(dev);
+ /* Move per-net netdevice notifiers that are following the netdevice */
+ move_netdevice_notifiers_dev_net(dev, net);
+
/* Actually switch the network namespace */
dev_net_set(dev, net);
dev->ifindex = new_ifindex;
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 5163d900bb4f..dbaebbe573f0 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -187,6 +187,7 @@ static int net_hwtstamp_validate(struct ifreq *ifr)
case HWTSTAMP_TX_OFF:
case HWTSTAMP_TX_ON:
case HWTSTAMP_TX_ONESTEP_SYNC:
+ case HWTSTAMP_TX_ONESTEP_P2P:
tx_type_valid = 1;
break;
}
diff --git a/net/core/devlink.c b/net/core/devlink.c
index f76219bf0c21..b831c5545d6a 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2103,11 +2103,11 @@ err_action_values_put:
static struct devlink_dpipe_table *
devlink_dpipe_table_find(struct list_head *dpipe_tables,
- const char *table_name)
+ const char *table_name, struct devlink *devlink)
{
struct devlink_dpipe_table *table;
-
- list_for_each_entry_rcu(table, dpipe_tables, list) {
+ list_for_each_entry_rcu(table, dpipe_tables, list,
+ lockdep_is_held(&devlink->lock)) {
if (!strcmp(table->name, table_name))
return table;
}
@@ -2226,7 +2226,7 @@ static int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb,
table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
- table_name);
+ table_name, devlink);
if (!table)
return -EINVAL;
@@ -2382,7 +2382,7 @@ static int devlink_dpipe_table_counters_set(struct devlink *devlink,
struct devlink_dpipe_table *table;
table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
- table_name);
+ table_name, devlink);
if (!table)
return -EINVAL;
@@ -3352,34 +3352,41 @@ devlink_param_value_get_from_info(const struct devlink_param *param,
struct genl_info *info,
union devlink_param_value *value)
{
+ struct nlattr *param_data;
int len;
- if (param->type != DEVLINK_PARAM_TYPE_BOOL &&
- !info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA])
+ param_data = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA];
+
+ if (param->type != DEVLINK_PARAM_TYPE_BOOL && !param_data)
return -EINVAL;
switch (param->type) {
case DEVLINK_PARAM_TYPE_U8:
- value->vu8 = nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]);
+ if (nla_len(param_data) != sizeof(u8))
+ return -EINVAL;
+ value->vu8 = nla_get_u8(param_data);
break;
case DEVLINK_PARAM_TYPE_U16:
- value->vu16 = nla_get_u16(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]);
+ if (nla_len(param_data) != sizeof(u16))
+ return -EINVAL;
+ value->vu16 = nla_get_u16(param_data);
break;
case DEVLINK_PARAM_TYPE_U32:
- value->vu32 = nla_get_u32(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]);
+ if (nla_len(param_data) != sizeof(u32))
+ return -EINVAL;
+ value->vu32 = nla_get_u32(param_data);
break;
case DEVLINK_PARAM_TYPE_STRING:
- len = strnlen(nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]),
- nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]));
- if (len == nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]) ||
+ len = strnlen(nla_data(param_data), nla_len(param_data));
+ if (len == nla_len(param_data) ||
len >= __DEVLINK_PARAM_MAX_STRING_VALUE)
return -EINVAL;
- strcpy(value->vstr,
- nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]));
+ strcpy(value->vstr, nla_data(param_data));
break;
case DEVLINK_PARAM_TYPE_BOOL:
- value->vbool = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA] ?
- true : false;
+ if (param_data && nla_len(param_data))
+ return -EINVAL;
+ value->vbool = nla_get_flag(param_data);
break;
}
return 0;
@@ -3986,6 +3993,12 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
goto out_unlock;
}
+ /* return 0 if there is no further data to read */
+ if (start_offset >= region->size) {
+ err = 0;
+ goto out_unlock;
+ }
+
hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
&devlink_nl_family, NLM_F_ACK | NLM_F_MULTI,
DEVLINK_CMD_REGION_READ);
@@ -4843,22 +4856,100 @@ devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
}
EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
-void
-devlink_health_reporter_state_update(struct devlink_health_reporter *reporter,
- enum devlink_health_reporter_state state)
+static int
+devlink_nl_health_reporter_fill(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct devlink_health_reporter *reporter,
+ enum devlink_command cmd, u32 portid,
+ u32 seq, int flags)
{
- if (WARN_ON(state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY &&
- state != DEVLINK_HEALTH_REPORTER_STATE_ERROR))
+ struct nlattr *reporter_attr;
+ void *hdr;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto genlmsg_cancel;
+
+ reporter_attr = nla_nest_start_noflag(msg,
+ DEVLINK_ATTR_HEALTH_REPORTER);
+ if (!reporter_attr)
+ goto genlmsg_cancel;
+ if (nla_put_string(msg, DEVLINK_ATTR_HEALTH_REPORTER_NAME,
+ reporter->ops->name))
+ goto reporter_nest_cancel;
+ if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_STATE,
+ reporter->health_state))
+ goto reporter_nest_cancel;
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR_COUNT,
+ reporter->error_count, DEVLINK_ATTR_PAD))
+ goto reporter_nest_cancel;
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER_COUNT,
+ reporter->recovery_count, DEVLINK_ATTR_PAD))
+ goto reporter_nest_cancel;
+ if (reporter->ops->recover &&
+ nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD,
+ reporter->graceful_period,
+ DEVLINK_ATTR_PAD))
+ goto reporter_nest_cancel;
+ if (reporter->ops->recover &&
+ nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER,
+ reporter->auto_recover))
+ goto reporter_nest_cancel;
+ if (reporter->dump_fmsg &&
+ nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS,
+ jiffies_to_msecs(reporter->dump_ts),
+ DEVLINK_ATTR_PAD))
+ goto reporter_nest_cancel;
+ if (reporter->dump_fmsg &&
+ nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS,
+ reporter->dump_real_ts, DEVLINK_ATTR_PAD))
+ goto reporter_nest_cancel;
+
+ nla_nest_end(msg, reporter_attr);
+ genlmsg_end(msg, hdr);
+ return 0;
+
+reporter_nest_cancel:
+ nla_nest_end(msg, reporter_attr);
+genlmsg_cancel:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static void devlink_recover_notify(struct devlink_health_reporter *reporter,
+ enum devlink_command cmd)
+{
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON(cmd != DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
return;
- if (reporter->health_state == state)
+ err = devlink_nl_health_reporter_fill(msg, reporter->devlink,
+ reporter, cmd, 0, 0, 0);
+ if (err) {
+ nlmsg_free(msg);
return;
+ }
- reporter->health_state = state;
- trace_devlink_health_reporter_state_update(reporter->devlink,
- reporter->ops->name, state);
+ genlmsg_multicast_netns(&devlink_nl_family,
+ devlink_net(reporter->devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
}
-EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update);
+
+void
+devlink_health_reporter_recovery_done(struct devlink_health_reporter *reporter)
+{
+ reporter->recovery_count++;
+ reporter->last_recovery_ts = jiffies;
+}
+EXPORT_SYMBOL_GPL(devlink_health_reporter_recovery_done);
static int
devlink_health_reporter_recover(struct devlink_health_reporter *reporter,
@@ -4876,9 +4967,9 @@ devlink_health_reporter_recover(struct devlink_health_reporter *reporter,
if (err)
return err;
- reporter->recovery_count++;
+ devlink_health_reporter_recovery_done(reporter);
reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
- reporter->last_recovery_ts = jiffies;
+ devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
return 0;
}
@@ -4945,6 +5036,7 @@ int devlink_health_report(struct devlink_health_reporter *reporter,
reporter->error_count++;
prev_health_state = reporter->health_state;
reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR;
+ devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
/* abort if the previous error wasn't recovered */
if (reporter->auto_recover &&
@@ -5027,68 +5119,23 @@ devlink_health_reporter_put(struct devlink_health_reporter *reporter)
refcount_dec(&reporter->refcount);
}
-static int
-devlink_nl_health_reporter_fill(struct sk_buff *msg,
- struct devlink *devlink,
- struct devlink_health_reporter *reporter,
- enum devlink_command cmd, u32 portid,
- u32 seq, int flags)
+void
+devlink_health_reporter_state_update(struct devlink_health_reporter *reporter,
+ enum devlink_health_reporter_state state)
{
- struct nlattr *reporter_attr;
- void *hdr;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto genlmsg_cancel;
-
- reporter_attr = nla_nest_start_noflag(msg,
- DEVLINK_ATTR_HEALTH_REPORTER);
- if (!reporter_attr)
- goto genlmsg_cancel;
- if (nla_put_string(msg, DEVLINK_ATTR_HEALTH_REPORTER_NAME,
- reporter->ops->name))
- goto reporter_nest_cancel;
- if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_STATE,
- reporter->health_state))
- goto reporter_nest_cancel;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR_COUNT,
- reporter->error_count, DEVLINK_ATTR_PAD))
- goto reporter_nest_cancel;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER_COUNT,
- reporter->recovery_count, DEVLINK_ATTR_PAD))
- goto reporter_nest_cancel;
- if (reporter->ops->recover &&
- nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD,
- reporter->graceful_period,
- DEVLINK_ATTR_PAD))
- goto reporter_nest_cancel;
- if (reporter->ops->recover &&
- nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER,
- reporter->auto_recover))
- goto reporter_nest_cancel;
- if (reporter->dump_fmsg &&
- nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS,
- jiffies_to_msecs(reporter->dump_ts),
- DEVLINK_ATTR_PAD))
- goto reporter_nest_cancel;
- if (reporter->dump_fmsg &&
- nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS,
- reporter->dump_real_ts, DEVLINK_ATTR_PAD))
- goto reporter_nest_cancel;
+ if (WARN_ON(state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY &&
+ state != DEVLINK_HEALTH_REPORTER_STATE_ERROR))
+ return;
- nla_nest_end(msg, reporter_attr);
- genlmsg_end(msg, hdr);
- return 0;
+ if (reporter->health_state == state)
+ return;
-reporter_nest_cancel:
- nla_nest_end(msg, reporter_attr);
-genlmsg_cancel:
- genlmsg_cancel(msg, hdr);
- return -EMSGSIZE;
+ reporter->health_state = state;
+ trace_devlink_health_reporter_state_update(reporter->devlink,
+ reporter->ops->name, state);
+ devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
}
+EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update);
static int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb,
struct genl_info *info)
@@ -5911,6 +5958,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 },
[DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_REGION_CHUNK_ADDR] = { .type = NLA_U64 },
+ [DEVLINK_ATTR_REGION_CHUNK_LEN] = { .type = NLA_U64 },
[DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64 },
[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 },
@@ -6814,7 +6863,7 @@ bool devlink_dpipe_table_counter_enabled(struct devlink *devlink,
rcu_read_lock();
table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
- table_name);
+ table_name, devlink);
enabled = false;
if (table)
enabled = table->counters_enabled;
@@ -6838,26 +6887,34 @@ int devlink_dpipe_table_register(struct devlink *devlink,
void *priv, bool counter_control_extern)
{
struct devlink_dpipe_table *table;
-
- if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name))
- return -EEXIST;
+ int err = 0;
if (WARN_ON(!table_ops->size_get))
return -EINVAL;
+ mutex_lock(&devlink->lock);
+
+ if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name,
+ devlink)) {
+ err = -EEXIST;
+ goto unlock;
+ }
+
table = kzalloc(sizeof(*table), GFP_KERNEL);
- if (!table)
- return -ENOMEM;
+ if (!table) {
+ err = -ENOMEM;
+ goto unlock;
+ }
table->name = table_name;
table->table_ops = table_ops;
table->priv = priv;
table->counter_control_extern = counter_control_extern;
- mutex_lock(&devlink->lock);
list_add_tail_rcu(&table->list, &devlink->dpipe_table_list);
+unlock:
mutex_unlock(&devlink->lock);
- return 0;
+ return err;
}
EXPORT_SYMBOL_GPL(devlink_dpipe_table_register);
@@ -6874,7 +6931,7 @@ void devlink_dpipe_table_unregister(struct devlink *devlink,
mutex_lock(&devlink->lock);
table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
- table_name);
+ table_name, devlink);
if (!table)
goto unlock;
list_del_rcu(&table->list);
@@ -7031,7 +7088,7 @@ int devlink_dpipe_table_resource_set(struct devlink *devlink,
mutex_lock(&devlink->lock);
table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
- table_name);
+ table_name, devlink);
if (!table) {
err = -EINVAL;
goto out;
@@ -7674,6 +7731,9 @@ static const struct devlink_trap devlink_trap_generic[] = {
DEVLINK_TRAP(REJECT_ROUTE, EXCEPTION),
DEVLINK_TRAP(IPV4_LPM_UNICAST_MISS, EXCEPTION),
DEVLINK_TRAP(IPV6_LPM_UNICAST_MISS, EXCEPTION),
+ DEVLINK_TRAP(NON_ROUTABLE, DROP),
+ DEVLINK_TRAP(DECAP_ERROR, EXCEPTION),
+ DEVLINK_TRAP(OVERLAY_SMAC_MC, DROP),
};
#define DEVLINK_TRAP_GROUP(_id) \
@@ -7686,6 +7746,7 @@ static const struct devlink_trap_group devlink_trap_group_generic[] = {
DEVLINK_TRAP_GROUP(L2_DROPS),
DEVLINK_TRAP_GROUP(L3_DROPS),
DEVLINK_TRAP_GROUP(BUFFER_DROPS),
+ DEVLINK_TRAP_GROUP(TUNNEL_DROPS),
};
static int devlink_trap_generic_verify(const struct devlink_trap *trap)
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 536e032d95c8..31700e0c3928 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -802,16 +802,12 @@ net_dm_hw_metadata_clone(const struct net_dm_hw_metadata *hw_metadata)
if (!n_hw_metadata)
return NULL;
- trap_group_name = kmemdup(hw_metadata->trap_group_name,
- strlen(hw_metadata->trap_group_name) + 1,
- GFP_ATOMIC | __GFP_ZERO);
+ trap_group_name = kstrdup(hw_metadata->trap_group_name, GFP_ATOMIC);
if (!trap_group_name)
goto free_hw_metadata;
n_hw_metadata->trap_group_name = trap_group_name;
- trap_name = kmemdup(hw_metadata->trap_name,
- strlen(hw_metadata->trap_name) + 1,
- GFP_ATOMIC | __GFP_ZERO);
+ trap_name = kstrdup(hw_metadata->trap_name, GFP_ATOMIC);
if (!trap_name)
goto free_trap_group;
n_hw_metadata->trap_name = trap_name;
@@ -1004,8 +1000,10 @@ static void net_dm_hw_monitor_stop(struct netlink_ext_ack *extack)
{
int cpu;
- if (!monitor_hw)
+ if (!monitor_hw) {
NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already disabled");
+ return;
+ }
monitor_hw = false;
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
deleted file mode 100644
index cd9bc67381b2..000000000000
--- a/net/core/ethtool.c
+++ /dev/null
@@ -1,3116 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * net/core/ethtool.c - Ethtool ioctl handler
- * Copyright (c) 2003 Matthew Wilcox <matthew@wil.cx>
- *
- * This file is where we call all the ethtool_ops commands to get
- * the information ethtool needs.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/capability.h>
-#include <linux/errno.h>
-#include <linux/ethtool.h>
-#include <linux/netdevice.h>
-#include <linux/net_tstamp.h>
-#include <linux/phy.h>
-#include <linux/bitops.h>
-#include <linux/uaccess.h>
-#include <linux/vmalloc.h>
-#include <linux/sfp.h>
-#include <linux/slab.h>
-#include <linux/rtnetlink.h>
-#include <linux/sched/signal.h>
-#include <linux/net.h>
-#include <net/devlink.h>
-#include <net/xdp_sock.h>
-#include <net/flow_offload.h>
-
-/*
- * Some useful ethtool_ops methods that're device independent.
- * If we find that all drivers want to do the same thing here,
- * we can turn these into dev_() function calls.
- */
-
-u32 ethtool_op_get_link(struct net_device *dev)
-{
- return netif_carrier_ok(dev) ? 1 : 0;
-}
-EXPORT_SYMBOL(ethtool_op_get_link);
-
-int ethtool_op_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info)
-{
- info->so_timestamping =
- SOF_TIMESTAMPING_TX_SOFTWARE |
- SOF_TIMESTAMPING_RX_SOFTWARE |
- SOF_TIMESTAMPING_SOFTWARE;
- info->phc_index = -1;
- return 0;
-}
-EXPORT_SYMBOL(ethtool_op_get_ts_info);
-
-/* Handlers for each ethtool command */
-
-#define ETHTOOL_DEV_FEATURE_WORDS ((NETDEV_FEATURE_COUNT + 31) / 32)
-
-static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = {
- [NETIF_F_SG_BIT] = "tx-scatter-gather",
- [NETIF_F_IP_CSUM_BIT] = "tx-checksum-ipv4",
- [NETIF_F_HW_CSUM_BIT] = "tx-checksum-ip-generic",
- [NETIF_F_IPV6_CSUM_BIT] = "tx-checksum-ipv6",
- [NETIF_F_HIGHDMA_BIT] = "highdma",
- [NETIF_F_FRAGLIST_BIT] = "tx-scatter-gather-fraglist",
- [NETIF_F_HW_VLAN_CTAG_TX_BIT] = "tx-vlan-hw-insert",
-
- [NETIF_F_HW_VLAN_CTAG_RX_BIT] = "rx-vlan-hw-parse",
- [NETIF_F_HW_VLAN_CTAG_FILTER_BIT] = "rx-vlan-filter",
- [NETIF_F_HW_VLAN_STAG_TX_BIT] = "tx-vlan-stag-hw-insert",
- [NETIF_F_HW_VLAN_STAG_RX_BIT] = "rx-vlan-stag-hw-parse",
- [NETIF_F_HW_VLAN_STAG_FILTER_BIT] = "rx-vlan-stag-filter",
- [NETIF_F_VLAN_CHALLENGED_BIT] = "vlan-challenged",
- [NETIF_F_GSO_BIT] = "tx-generic-segmentation",
- [NETIF_F_LLTX_BIT] = "tx-lockless",
- [NETIF_F_NETNS_LOCAL_BIT] = "netns-local",
- [NETIF_F_GRO_BIT] = "rx-gro",
- [NETIF_F_GRO_HW_BIT] = "rx-gro-hw",
- [NETIF_F_LRO_BIT] = "rx-lro",
-
- [NETIF_F_TSO_BIT] = "tx-tcp-segmentation",
- [NETIF_F_GSO_ROBUST_BIT] = "tx-gso-robust",
- [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation",
- [NETIF_F_TSO_MANGLEID_BIT] = "tx-tcp-mangleid-segmentation",
- [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation",
- [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation",
- [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation",
- [NETIF_F_GSO_GRE_CSUM_BIT] = "tx-gre-csum-segmentation",
- [NETIF_F_GSO_IPXIP4_BIT] = "tx-ipxip4-segmentation",
- [NETIF_F_GSO_IPXIP6_BIT] = "tx-ipxip6-segmentation",
- [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation",
- [NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation",
- [NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial",
- [NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation",
- [NETIF_F_GSO_ESP_BIT] = "tx-esp-segmentation",
- [NETIF_F_GSO_UDP_L4_BIT] = "tx-udp-segmentation",
-
- [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
- [NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp",
- [NETIF_F_FCOE_MTU_BIT] = "fcoe-mtu",
- [NETIF_F_NTUPLE_BIT] = "rx-ntuple-filter",
- [NETIF_F_RXHASH_BIT] = "rx-hashing",
- [NETIF_F_RXCSUM_BIT] = "rx-checksum",
- [NETIF_F_NOCACHE_COPY_BIT] = "tx-nocache-copy",
- [NETIF_F_LOOPBACK_BIT] = "loopback",
- [NETIF_F_RXFCS_BIT] = "rx-fcs",
- [NETIF_F_RXALL_BIT] = "rx-all",
- [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload",
- [NETIF_F_HW_TC_BIT] = "hw-tc-offload",
- [NETIF_F_HW_ESP_BIT] = "esp-hw-offload",
- [NETIF_F_HW_ESP_TX_CSUM_BIT] = "esp-tx-csum-hw-offload",
- [NETIF_F_RX_UDP_TUNNEL_PORT_BIT] = "rx-udp_tunnel-port-offload",
- [NETIF_F_HW_TLS_RECORD_BIT] = "tls-hw-record",
- [NETIF_F_HW_TLS_TX_BIT] = "tls-hw-tx-offload",
- [NETIF_F_HW_TLS_RX_BIT] = "tls-hw-rx-offload",
-};
-
-static const char
-rss_hash_func_strings[ETH_RSS_HASH_FUNCS_COUNT][ETH_GSTRING_LEN] = {
- [ETH_RSS_HASH_TOP_BIT] = "toeplitz",
- [ETH_RSS_HASH_XOR_BIT] = "xor",
- [ETH_RSS_HASH_CRC32_BIT] = "crc32",
-};
-
-static const char
-tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
- [ETHTOOL_ID_UNSPEC] = "Unspec",
- [ETHTOOL_RX_COPYBREAK] = "rx-copybreak",
- [ETHTOOL_TX_COPYBREAK] = "tx-copybreak",
- [ETHTOOL_PFC_PREVENTION_TOUT] = "pfc-prevention-tout",
-};
-
-static const char
-phy_tunable_strings[__ETHTOOL_PHY_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
- [ETHTOOL_ID_UNSPEC] = "Unspec",
- [ETHTOOL_PHY_DOWNSHIFT] = "phy-downshift",
- [ETHTOOL_PHY_FAST_LINK_DOWN] = "phy-fast-link-down",
- [ETHTOOL_PHY_EDPD] = "phy-energy-detect-power-down",
-};
-
-static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
-{
- struct ethtool_gfeatures cmd = {
- .cmd = ETHTOOL_GFEATURES,
- .size = ETHTOOL_DEV_FEATURE_WORDS,
- };
- struct ethtool_get_features_block features[ETHTOOL_DEV_FEATURE_WORDS];
- u32 __user *sizeaddr;
- u32 copy_size;
- int i;
-
- /* in case feature bits run out again */
- BUILD_BUG_ON(ETHTOOL_DEV_FEATURE_WORDS * sizeof(u32) > sizeof(netdev_features_t));
-
- for (i = 0; i < ETHTOOL_DEV_FEATURE_WORDS; ++i) {
- features[i].available = (u32)(dev->hw_features >> (32 * i));
- features[i].requested = (u32)(dev->wanted_features >> (32 * i));
- features[i].active = (u32)(dev->features >> (32 * i));
- features[i].never_changed =
- (u32)(NETIF_F_NEVER_CHANGE >> (32 * i));
- }
-
- sizeaddr = useraddr + offsetof(struct ethtool_gfeatures, size);
- if (get_user(copy_size, sizeaddr))
- return -EFAULT;
-
- if (copy_size > ETHTOOL_DEV_FEATURE_WORDS)
- copy_size = ETHTOOL_DEV_FEATURE_WORDS;
-
- if (copy_to_user(useraddr, &cmd, sizeof(cmd)))
- return -EFAULT;
- useraddr += sizeof(cmd);
- if (copy_to_user(useraddr, features, copy_size * sizeof(*features)))
- return -EFAULT;
-
- return 0;
-}
-
-static int ethtool_set_features(struct net_device *dev, void __user *useraddr)
-{
- struct ethtool_sfeatures cmd;
- struct ethtool_set_features_block features[ETHTOOL_DEV_FEATURE_WORDS];
- netdev_features_t wanted = 0, valid = 0;
- int i, ret = 0;
-
- if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
- return -EFAULT;
- useraddr += sizeof(cmd);
-
- if (cmd.size != ETHTOOL_DEV_FEATURE_WORDS)
- return -EINVAL;
-
- if (copy_from_user(features, useraddr, sizeof(features)))
- return -EFAULT;
-
- for (i = 0; i < ETHTOOL_DEV_FEATURE_WORDS; ++i) {
- valid |= (netdev_features_t)features[i].valid << (32 * i);
- wanted |= (netdev_features_t)features[i].requested << (32 * i);
- }
-
- if (valid & ~NETIF_F_ETHTOOL_BITS)
- return -EINVAL;
-
- if (valid & ~dev->hw_features) {
- valid &= dev->hw_features;
- ret |= ETHTOOL_F_UNSUPPORTED;
- }
-
- dev->wanted_features &= ~valid;
- dev->wanted_features |= wanted & valid;
- __netdev_update_features(dev);
-
- if ((dev->wanted_features ^ dev->features) & valid)
- ret |= ETHTOOL_F_WISH;
-
- return ret;
-}
-
-static int __ethtool_get_sset_count(struct net_device *dev, int sset)
-{
- const struct ethtool_ops *ops = dev->ethtool_ops;
-
- if (sset == ETH_SS_FEATURES)
- return ARRAY_SIZE(netdev_features_strings);
-
- if (sset == ETH_SS_RSS_HASH_FUNCS)
- return ARRAY_SIZE(rss_hash_func_strings);
-
- if (sset == ETH_SS_TUNABLES)
- return ARRAY_SIZE(tunable_strings);
-
- if (sset == ETH_SS_PHY_TUNABLES)
- return ARRAY_SIZE(phy_tunable_strings);
-
- if (sset == ETH_SS_PHY_STATS && dev->phydev &&
- !ops->get_ethtool_phy_stats)
- return phy_ethtool_get_sset_count(dev->phydev);
-
- if (ops->get_sset_count && ops->get_strings)
- return ops->get_sset_count(dev, sset);
- else
- return -EOPNOTSUPP;
-}
-
-static void __ethtool_get_strings(struct net_device *dev,
- u32 stringset, u8 *data)
-{
- const struct ethtool_ops *ops = dev->ethtool_ops;
-
- if (stringset == ETH_SS_FEATURES)
- memcpy(data, netdev_features_strings,
- sizeof(netdev_features_strings));
- else if (stringset == ETH_SS_RSS_HASH_FUNCS)
- memcpy(data, rss_hash_func_strings,
- sizeof(rss_hash_func_strings));
- else if (stringset == ETH_SS_TUNABLES)
- memcpy(data, tunable_strings, sizeof(tunable_strings));
- else if (stringset == ETH_SS_PHY_TUNABLES)
- memcpy(data, phy_tunable_strings, sizeof(phy_tunable_strings));
- else if (stringset == ETH_SS_PHY_STATS && dev->phydev &&
- !ops->get_ethtool_phy_stats)
- phy_ethtool_get_strings(dev->phydev, data);
- else
- /* ops->get_strings is valid because checked earlier */
- ops->get_strings(dev, stringset, data);
-}
-
-static netdev_features_t ethtool_get_feature_mask(u32 eth_cmd)
-{
- /* feature masks of legacy discrete ethtool ops */
-
- switch (eth_cmd) {
- case ETHTOOL_GTXCSUM:
- case ETHTOOL_STXCSUM:
- return NETIF_F_CSUM_MASK | NETIF_F_SCTP_CRC;
- case ETHTOOL_GRXCSUM:
- case ETHTOOL_SRXCSUM:
- return NETIF_F_RXCSUM;
- case ETHTOOL_GSG:
- case ETHTOOL_SSG:
- return NETIF_F_SG;
- case ETHTOOL_GTSO:
- case ETHTOOL_STSO:
- return NETIF_F_ALL_TSO;
- case ETHTOOL_GGSO:
- case ETHTOOL_SGSO:
- return NETIF_F_GSO;
- case ETHTOOL_GGRO:
- case ETHTOOL_SGRO:
- return NETIF_F_GRO;
- default:
- BUG();
- }
-}
-
-static int ethtool_get_one_feature(struct net_device *dev,
- char __user *useraddr, u32 ethcmd)
-{
- netdev_features_t mask = ethtool_get_feature_mask(ethcmd);
- struct ethtool_value edata = {
- .cmd = ethcmd,
- .data = !!(dev->features & mask),
- };
-
- if (copy_to_user(useraddr, &edata, sizeof(edata)))
- return -EFAULT;
- return 0;
-}
-
-static int ethtool_set_one_feature(struct net_device *dev,
- void __user *useraddr, u32 ethcmd)
-{
- struct ethtool_value edata;
- netdev_features_t mask;
-
- if (copy_from_user(&edata, useraddr, sizeof(edata)))
- return -EFAULT;
-
- mask = ethtool_get_feature_mask(ethcmd);
- mask &= dev->hw_features;
- if (!mask)
- return -EOPNOTSUPP;
-
- if (edata.data)
- dev->wanted_features |= mask;
- else
- dev->wanted_features &= ~mask;
-
- __netdev_update_features(dev);
-
- return 0;
-}
-
-#define ETH_ALL_FLAGS (ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | \
- ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH)
-#define ETH_ALL_FEATURES (NETIF_F_LRO | NETIF_F_HW_VLAN_CTAG_RX | \
- NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_NTUPLE | \
- NETIF_F_RXHASH)
-
-static u32 __ethtool_get_flags(struct net_device *dev)
-{
- u32 flags = 0;
-
- if (dev->features & NETIF_F_LRO)
- flags |= ETH_FLAG_LRO;
- if (dev->features & NETIF_F_HW_VLAN_CTAG_RX)
- flags |= ETH_FLAG_RXVLAN;
- if (dev->features & NETIF_F_HW_VLAN_CTAG_TX)
- flags |= ETH_FLAG_TXVLAN;
- if (dev->features & NETIF_F_NTUPLE)
- flags |= ETH_FLAG_NTUPLE;
- if (dev->features & NETIF_F_RXHASH)
- flags |= ETH_FLAG_RXHASH;
-
- return flags;
-}
-
-static int __ethtool_set_flags(struct net_device *dev, u32 data)
-{
- netdev_features_t features = 0, changed;
-
- if (data & ~ETH_ALL_FLAGS)
- return -EINVAL;
-
- if (data & ETH_FLAG_LRO)
- features |= NETIF_F_LRO;
- if (data & ETH_FLAG_RXVLAN)
- features |= NETIF_F_HW_VLAN_CTAG_RX;
- if (data & ETH_FLAG_TXVLAN)
- features |= NETIF_F_HW_VLAN_CTAG_TX;
- if (data & ETH_FLAG_NTUPLE)
- features |= NETIF_F_NTUPLE;
- if (data & ETH_FLAG_RXHASH)
- features |= NETIF_F_RXHASH;
-
- /* allow changing only bits set in hw_features */
- changed = (features ^ dev->features) & ETH_ALL_FEATURES;
- if (changed & ~dev->hw_features)
- return (changed & dev->hw_features) ? -EINVAL : -EOPNOTSUPP;
-
- dev->wanted_features =
- (dev->wanted_features & ~changed) | (features & changed);
-
- __netdev_update_features(dev);
-
- return 0;
-}
-
-/* Given two link masks, AND them together and save the result in dst. */
-void ethtool_intersect_link_masks(struct ethtool_link_ksettings *dst,
- struct ethtool_link_ksettings *src)
-{
- unsigned int size = BITS_TO_LONGS(__ETHTOOL_LINK_MODE_MASK_NBITS);
- unsigned int idx = 0;
-
- for (; idx < size; idx++) {
- dst->link_modes.supported[idx] &=
- src->link_modes.supported[idx];
- dst->link_modes.advertising[idx] &=
- src->link_modes.advertising[idx];
- }
-}
-EXPORT_SYMBOL(ethtool_intersect_link_masks);
-
-void ethtool_convert_legacy_u32_to_link_mode(unsigned long *dst,
- u32 legacy_u32)
-{
- bitmap_zero(dst, __ETHTOOL_LINK_MODE_MASK_NBITS);
- dst[0] = legacy_u32;
-}
-EXPORT_SYMBOL(ethtool_convert_legacy_u32_to_link_mode);
-
-/* return false if src had higher bits set. lower bits always updated. */
-bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32,
- const unsigned long *src)
-{
- bool retval = true;
-
- /* TODO: following test will soon always be true */
- if (__ETHTOOL_LINK_MODE_MASK_NBITS > 32) {
- __ETHTOOL_DECLARE_LINK_MODE_MASK(ext);
-
- bitmap_zero(ext, __ETHTOOL_LINK_MODE_MASK_NBITS);
- bitmap_fill(ext, 32);
- bitmap_complement(ext, ext, __ETHTOOL_LINK_MODE_MASK_NBITS);
- if (bitmap_intersects(ext, src,
- __ETHTOOL_LINK_MODE_MASK_NBITS)) {
- /* src mask goes beyond bit 31 */
- retval = false;
- }
- }
- *legacy_u32 = src[0];
- return retval;
-}
-EXPORT_SYMBOL(ethtool_convert_link_mode_to_legacy_u32);
-
-/* return false if legacy contained non-0 deprecated fields
- * maxtxpkt/maxrxpkt. rest of ksettings always updated
- */
-static bool
-convert_legacy_settings_to_link_ksettings(
- struct ethtool_link_ksettings *link_ksettings,
- const struct ethtool_cmd *legacy_settings)
-{
- bool retval = true;
-
- memset(link_ksettings, 0, sizeof(*link_ksettings));
-
- /* This is used to tell users that driver is still using these
- * deprecated legacy fields, and they should not use
- * %ETHTOOL_GLINKSETTINGS/%ETHTOOL_SLINKSETTINGS
- */
- if (legacy_settings->maxtxpkt ||
- legacy_settings->maxrxpkt)
- retval = false;
-
- ethtool_convert_legacy_u32_to_link_mode(
- link_ksettings->link_modes.supported,
- legacy_settings->supported);
- ethtool_convert_legacy_u32_to_link_mode(
- link_ksettings->link_modes.advertising,
- legacy_settings->advertising);
- ethtool_convert_legacy_u32_to_link_mode(
- link_ksettings->link_modes.lp_advertising,
- legacy_settings->lp_advertising);
- link_ksettings->base.speed
- = ethtool_cmd_speed(legacy_settings);
- link_ksettings->base.duplex
- = legacy_settings->duplex;
- link_ksettings->base.port
- = legacy_settings->port;
- link_ksettings->base.phy_address
- = legacy_settings->phy_address;
- link_ksettings->base.autoneg
- = legacy_settings->autoneg;
- link_ksettings->base.mdio_support
- = legacy_settings->mdio_support;
- link_ksettings->base.eth_tp_mdix
- = legacy_settings->eth_tp_mdix;
- link_ksettings->base.eth_tp_mdix_ctrl
- = legacy_settings->eth_tp_mdix_ctrl;
- return retval;
-}
-
-/* return false if ksettings link modes had higher bits
- * set. legacy_settings always updated (best effort)
- */
-static bool
-convert_link_ksettings_to_legacy_settings(
- struct ethtool_cmd *legacy_settings,
- const struct ethtool_link_ksettings *link_ksettings)
-{
- bool retval = true;
-
- memset(legacy_settings, 0, sizeof(*legacy_settings));
- /* this also clears the deprecated fields in legacy structure:
- * __u8 transceiver;
- * __u32 maxtxpkt;
- * __u32 maxrxpkt;
- */
-
- retval &= ethtool_convert_link_mode_to_legacy_u32(
- &legacy_settings->supported,
- link_ksettings->link_modes.supported);
- retval &= ethtool_convert_link_mode_to_legacy_u32(
- &legacy_settings->advertising,
- link_ksettings->link_modes.advertising);
- retval &= ethtool_convert_link_mode_to_legacy_u32(
- &legacy_settings->lp_advertising,
- link_ksettings->link_modes.lp_advertising);
- ethtool_cmd_speed_set(legacy_settings, link_ksettings->base.speed);
- legacy_settings->duplex
- = link_ksettings->base.duplex;
- legacy_settings->port
- = link_ksettings->base.port;
- legacy_settings->phy_address
- = link_ksettings->base.phy_address;
- legacy_settings->autoneg
- = link_ksettings->base.autoneg;
- legacy_settings->mdio_support
- = link_ksettings->base.mdio_support;
- legacy_settings->eth_tp_mdix
- = link_ksettings->base.eth_tp_mdix;
- legacy_settings->eth_tp_mdix_ctrl
- = link_ksettings->base.eth_tp_mdix_ctrl;
- legacy_settings->transceiver
- = link_ksettings->base.transceiver;
- return retval;
-}
-
-/* number of 32-bit words to store the user's link mode bitmaps */
-#define __ETHTOOL_LINK_MODE_MASK_NU32 \
- DIV_ROUND_UP(__ETHTOOL_LINK_MODE_MASK_NBITS, 32)
-
-/* layout of the struct passed from/to userland */
-struct ethtool_link_usettings {
- struct ethtool_link_settings base;
- struct {
- __u32 supported[__ETHTOOL_LINK_MODE_MASK_NU32];
- __u32 advertising[__ETHTOOL_LINK_MODE_MASK_NU32];
- __u32 lp_advertising[__ETHTOOL_LINK_MODE_MASK_NU32];
- } link_modes;
-};
-
-/* Internal kernel helper to query a device ethtool_link_settings. */
-int __ethtool_get_link_ksettings(struct net_device *dev,
- struct ethtool_link_ksettings *link_ksettings)
-{
- ASSERT_RTNL();
-
- if (!dev->ethtool_ops->get_link_ksettings)
- return -EOPNOTSUPP;
-
- memset(link_ksettings, 0, sizeof(*link_ksettings));
- return dev->ethtool_ops->get_link_ksettings(dev, link_ksettings);
-}
-EXPORT_SYMBOL(__ethtool_get_link_ksettings);
-
-/* convert ethtool_link_usettings in user space to a kernel internal
- * ethtool_link_ksettings. return 0 on success, errno on error.
- */
-static int load_link_ksettings_from_user(struct ethtool_link_ksettings *to,
- const void __user *from)
-{
- struct ethtool_link_usettings link_usettings;
-
- if (copy_from_user(&link_usettings, from, sizeof(link_usettings)))
- return -EFAULT;
-
- memcpy(&to->base, &link_usettings.base, sizeof(to->base));
- bitmap_from_arr32(to->link_modes.supported,
- link_usettings.link_modes.supported,
- __ETHTOOL_LINK_MODE_MASK_NBITS);
- bitmap_from_arr32(to->link_modes.advertising,
- link_usettings.link_modes.advertising,
- __ETHTOOL_LINK_MODE_MASK_NBITS);
- bitmap_from_arr32(to->link_modes.lp_advertising,
- link_usettings.link_modes.lp_advertising,
- __ETHTOOL_LINK_MODE_MASK_NBITS);
-
- return 0;
-}
-
-/* convert a kernel internal ethtool_link_ksettings to
- * ethtool_link_usettings in user space. return 0 on success, errno on
- * error.
- */
-static int
-store_link_ksettings_for_user(void __user *to,
- const struct ethtool_link_ksettings *from)
-{
- struct ethtool_link_usettings link_usettings;
-
- memcpy(&link_usettings.base, &from->base, sizeof(link_usettings));
- bitmap_to_arr32(link_usettings.link_modes.supported,
- from->link_modes.supported,
- __ETHTOOL_LINK_MODE_MASK_NBITS);
- bitmap_to_arr32(link_usettings.link_modes.advertising,
- from->link_modes.advertising,
- __ETHTOOL_LINK_MODE_MASK_NBITS);
- bitmap_to_arr32(link_usettings.link_modes.lp_advertising,
- from->link_modes.lp_advertising,
- __ETHTOOL_LINK_MODE_MASK_NBITS);
-
- if (copy_to_user(to, &link_usettings, sizeof(link_usettings)))
- return -EFAULT;
-
- return 0;
-}
-
-/* Query device for its ethtool_link_settings. */
-static int ethtool_get_link_ksettings(struct net_device *dev,
- void __user *useraddr)
-{
- int err = 0;
- struct ethtool_link_ksettings link_ksettings;
-
- ASSERT_RTNL();
- if (!dev->ethtool_ops->get_link_ksettings)
- return -EOPNOTSUPP;
-
- /* handle bitmap nbits handshake */
- if (copy_from_user(&link_ksettings.base, useraddr,
- sizeof(link_ksettings.base)))
- return -EFAULT;
-
- if (__ETHTOOL_LINK_MODE_MASK_NU32
- != link_ksettings.base.link_mode_masks_nwords) {
- /* wrong link mode nbits requested */
- memset(&link_ksettings, 0, sizeof(link_ksettings));
- link_ksettings.base.cmd = ETHTOOL_GLINKSETTINGS;
- /* send back number of words required as negative val */
- compiletime_assert(__ETHTOOL_LINK_MODE_MASK_NU32 <= S8_MAX,
- "need too many bits for link modes!");
- link_ksettings.base.link_mode_masks_nwords
- = -((s8)__ETHTOOL_LINK_MODE_MASK_NU32);
-
- /* copy the base fields back to user, not the link
- * mode bitmaps
- */
- if (copy_to_user(useraddr, &link_ksettings.base,
- sizeof(link_ksettings.base)))
- return -EFAULT;
-
- return 0;
- }
-
- /* handshake successful: user/kernel agree on
- * link_mode_masks_nwords
- */
-
- memset(&link_ksettings, 0, sizeof(link_ksettings));
- err = dev->ethtool_ops->get_link_ksettings(dev, &link_ksettings);
- if (err < 0)
- return err;
-
- /* make sure we tell the right values to user */
- link_ksettings.base.cmd = ETHTOOL_GLINKSETTINGS;
- link_ksettings.base.link_mode_masks_nwords
- = __ETHTOOL_LINK_MODE_MASK_NU32;
-
- return store_link_ksettings_for_user(useraddr, &link_ksettings);
-}
-
-/* Update device ethtool_link_settings. */
-static int ethtool_set_link_ksettings(struct net_device *dev,
- void __user *useraddr)
-{
- int err;
- struct ethtool_link_ksettings link_ksettings;
-
- ASSERT_RTNL();
-
- if (!dev->ethtool_ops->set_link_ksettings)
- return -EOPNOTSUPP;
-
- /* make sure nbits field has expected value */
- if (copy_from_user(&link_ksettings.base, useraddr,
- sizeof(link_ksettings.base)))
- return -EFAULT;
-
- if (__ETHTOOL_LINK_MODE_MASK_NU32
- != link_ksettings.base.link_mode_masks_nwords)
- return -EINVAL;
-
- /* copy the whole structure, now that we know it has expected
- * format
- */
- err = load_link_ksettings_from_user(&link_ksettings, useraddr);
- if (err)
- return err;
-
- /* re-check nwords field, just in case */
- if (__ETHTOOL_LINK_MODE_MASK_NU32
- != link_ksettings.base.link_mode_masks_nwords)
- return -EINVAL;
-
- return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings);
-}
-
-/* Query device for its ethtool_cmd settings.
- *
- * Backward compatibility note: for compatibility with legacy ethtool, this is
- * now implemented via get_link_ksettings. When driver reports higher link mode
- * bits, a kernel warning is logged once (with name of 1st driver/device) to
- * recommend user to upgrade ethtool, but the command is successful (only the
- * lower link mode bits reported back to user). Deprecated fields from
- * ethtool_cmd (transceiver/maxrxpkt/maxtxpkt) are always set to zero.
- */
-static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
-{
- struct ethtool_link_ksettings link_ksettings;
- struct ethtool_cmd cmd;
- int err;
-
- ASSERT_RTNL();
- if (!dev->ethtool_ops->get_link_ksettings)
- return -EOPNOTSUPP;
-
- memset(&link_ksettings, 0, sizeof(link_ksettings));
- err = dev->ethtool_ops->get_link_ksettings(dev, &link_ksettings);
- if (err < 0)
- return err;
- convert_link_ksettings_to_legacy_settings(&cmd, &link_ksettings);
-
- /* send a sensible cmd tag back to user */
- cmd.cmd = ETHTOOL_GSET;
-
- if (copy_to_user(useraddr, &cmd, sizeof(cmd)))
- return -EFAULT;
-
- return 0;
-}
-
-/* Update device link settings with given ethtool_cmd.
- *
- * Backward compatibility note: for compatibility with legacy ethtool, this is
- * now always implemented via set_link_settings. When user's request updates
- * deprecated ethtool_cmd fields (transceiver/maxrxpkt/maxtxpkt), a kernel
- * warning is logged once (with name of 1st driver/device) to recommend user to
- * upgrade ethtool, and the request is rejected.
- */
-static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
-{
- struct ethtool_link_ksettings link_ksettings;
- struct ethtool_cmd cmd;
-
- ASSERT_RTNL();
-
- if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
- return -EFAULT;
- if (!dev->ethtool_ops->set_link_ksettings)
- return -EOPNOTSUPP;
-
- if (!convert_legacy_settings_to_link_ksettings(&link_ksettings, &cmd))
- return -EINVAL;
- link_ksettings.base.link_mode_masks_nwords =
- __ETHTOOL_LINK_MODE_MASK_NU32;
- return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings);
-}
-
-static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
- void __user *useraddr)
-{
- struct ethtool_drvinfo info;
- const struct ethtool_ops *ops = dev->ethtool_ops;
-
- memset(&info, 0, sizeof(info));
- info.cmd = ETHTOOL_GDRVINFO;
- if (ops->get_drvinfo) {
- ops->get_drvinfo(dev, &info);
- } else if (dev->dev.parent && dev->dev.parent->driver) {
- strlcpy(info.bus_info, dev_name(dev->dev.parent),
- sizeof(info.bus_info));
- strlcpy(info.driver, dev->dev.parent->driver->name,
- sizeof(info.driver));
- } else {
- return -EOPNOTSUPP;
- }
-
- /*
- * this method of obtaining string set info is deprecated;
- * Use ETHTOOL_GSSET_INFO instead.
- */
- if (ops->get_sset_count) {
- int rc;
-
- rc = ops->get_sset_count(dev, ETH_SS_TEST);
- if (rc >= 0)
- info.testinfo_len = rc;
- rc = ops->get_sset_count(dev, ETH_SS_STATS);
- if (rc >= 0)
- info.n_stats = rc;
- rc = ops->get_sset_count(dev, ETH_SS_PRIV_FLAGS);
- if (rc >= 0)
- info.n_priv_flags = rc;
- }
- if (ops->get_regs_len) {
- int ret = ops->get_regs_len(dev);
-
- if (ret > 0)
- info.regdump_len = ret;
- }
-
- if (ops->get_eeprom_len)
- info.eedump_len = ops->get_eeprom_len(dev);
-
- if (!info.fw_version[0])
- devlink_compat_running_version(dev, info.fw_version,
- sizeof(info.fw_version));
-
- if (copy_to_user(useraddr, &info, sizeof(info)))
- return -EFAULT;
- return 0;
-}
-
-static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev,
- void __user *useraddr)
-{
- struct ethtool_sset_info info;
- u64 sset_mask;
- int i, idx = 0, n_bits = 0, ret, rc;
- u32 *info_buf = NULL;
-
- if (copy_from_user(&info, useraddr, sizeof(info)))
- return -EFAULT;
-
- /* store copy of mask, because we zero struct later on */
- sset_mask = info.sset_mask;
- if (!sset_mask)
- return 0;
-
- /* calculate size of return buffer */
- n_bits = hweight64(sset_mask);
-
- memset(&info, 0, sizeof(info));
- info.cmd = ETHTOOL_GSSET_INFO;
-
- info_buf = kcalloc(n_bits, sizeof(u32), GFP_USER);
- if (!info_buf)
- return -ENOMEM;
-
- /*
- * fill return buffer based on input bitmask and successful
- * get_sset_count return
- */
- for (i = 0; i < 64; i++) {
- if (!(sset_mask & (1ULL << i)))
- continue;
-
- rc = __ethtool_get_sset_count(dev, i);
- if (rc >= 0) {
- info.sset_mask |= (1ULL << i);
- info_buf[idx++] = rc;
- }
- }
-
- ret = -EFAULT;
- if (copy_to_user(useraddr, &info, sizeof(info)))
- goto out;
-
- useraddr += offsetof(struct ethtool_sset_info, data);
- if (copy_to_user(useraddr, info_buf, idx * sizeof(u32)))
- goto out;
-
- ret = 0;
-
-out:
- kfree(info_buf);
- return ret;
-}
-
-static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
- u32 cmd, void __user *useraddr)
-{
- struct ethtool_rxnfc info;
- size_t info_size = sizeof(info);
- int rc;
-
- if (!dev->ethtool_ops->set_rxnfc)
- return -EOPNOTSUPP;
-
- /* struct ethtool_rxnfc was originally defined for
- * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data
- * members. User-space might still be using that
- * definition. */
- if (cmd == ETHTOOL_SRXFH)
- info_size = (offsetof(struct ethtool_rxnfc, data) +
- sizeof(info.data));
-
- if (copy_from_user(&info, useraddr, info_size))
- return -EFAULT;
-
- rc = dev->ethtool_ops->set_rxnfc(dev, &info);
- if (rc)
- return rc;
-
- if (cmd == ETHTOOL_SRXCLSRLINS &&
- copy_to_user(useraddr, &info, info_size))
- return -EFAULT;
-
- return 0;
-}
-
-static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
- u32 cmd, void __user *useraddr)
-{
- struct ethtool_rxnfc info;
- size_t info_size = sizeof(info);
- const struct ethtool_ops *ops = dev->ethtool_ops;
- int ret;
- void *rule_buf = NULL;
-
- if (!ops->get_rxnfc)
- return -EOPNOTSUPP;
-
- /* struct ethtool_rxnfc was originally defined for
- * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data
- * members. User-space might still be using that
- * definition. */
- if (cmd == ETHTOOL_GRXFH)
- info_size = (offsetof(struct ethtool_rxnfc, data) +
- sizeof(info.data));
-
- if (copy_from_user(&info, useraddr, info_size))
- return -EFAULT;
-
- /* If FLOW_RSS was requested then user-space must be using the
- * new definition, as FLOW_RSS is newer.
- */
- if (cmd == ETHTOOL_GRXFH && info.flow_type & FLOW_RSS) {
- info_size = sizeof(info);
- if (copy_from_user(&info, useraddr, info_size))
- return -EFAULT;
- /* Since malicious users may modify the original data,
- * we need to check whether FLOW_RSS is still requested.
- */
- if (!(info.flow_type & FLOW_RSS))
- return -EINVAL;
- }
-
- if (info.cmd != cmd)
- return -EINVAL;
-
- if (info.cmd == ETHTOOL_GRXCLSRLALL) {
- if (info.rule_cnt > 0) {
- if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32))
- rule_buf = kcalloc(info.rule_cnt, sizeof(u32),
- GFP_USER);
- if (!rule_buf)
- return -ENOMEM;
- }
- }
-
- ret = ops->get_rxnfc(dev, &info, rule_buf);
- if (ret < 0)
- goto err_out;
-
- ret = -EFAULT;
- if (copy_to_user(useraddr, &info, info_size))
- goto err_out;
-
- if (rule_buf) {
- useraddr += offsetof(struct ethtool_rxnfc, rule_locs);
- if (copy_to_user(useraddr, rule_buf,
- info.rule_cnt * sizeof(u32)))
- goto err_out;
- }
- ret = 0;
-
-err_out:
- kfree(rule_buf);
-
- return ret;
-}
-
-static int ethtool_copy_validate_indir(u32 *indir, void __user *useraddr,
- struct ethtool_rxnfc *rx_rings,
- u32 size)
-{
- int i;
-
- if (copy_from_user(indir, useraddr, size * sizeof(indir[0])))
- return -EFAULT;
-
- /* Validate ring indices */
- for (i = 0; i < size; i++)
- if (indir[i] >= rx_rings->data)
- return -EINVAL;
-
- return 0;
-}
-
-u8 netdev_rss_key[NETDEV_RSS_KEY_LEN] __read_mostly;
-
-void netdev_rss_key_fill(void *buffer, size_t len)
-{
- BUG_ON(len > sizeof(netdev_rss_key));
- net_get_random_once(netdev_rss_key, sizeof(netdev_rss_key));
- memcpy(buffer, netdev_rss_key, len);
-}
-EXPORT_SYMBOL(netdev_rss_key_fill);
-
-static int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max)
-{
- u32 dev_size, current_max = 0;
- u32 *indir;
- int ret;
-
- if (!dev->ethtool_ops->get_rxfh_indir_size ||
- !dev->ethtool_ops->get_rxfh)
- return -EOPNOTSUPP;
- dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev);
- if (dev_size == 0)
- return -EOPNOTSUPP;
-
- indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER);
- if (!indir)
- return -ENOMEM;
-
- ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL, NULL);
- if (ret)
- goto out;
-
- while (dev_size--)
- current_max = max(current_max, indir[dev_size]);
-
- *max = current_max;
-
-out:
- kfree(indir);
- return ret;
-}
-
-static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
- void __user *useraddr)
-{
- u32 user_size, dev_size;
- u32 *indir;
- int ret;
-
- if (!dev->ethtool_ops->get_rxfh_indir_size ||
- !dev->ethtool_ops->get_rxfh)
- return -EOPNOTSUPP;
- dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev);
- if (dev_size == 0)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&user_size,
- useraddr + offsetof(struct ethtool_rxfh_indir, size),
- sizeof(user_size)))
- return -EFAULT;
-
- if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh_indir, size),
- &dev_size, sizeof(dev_size)))
- return -EFAULT;
-
- /* If the user buffer size is 0, this is just a query for the
- * device table size. Otherwise, if it's smaller than the
- * device table size it's an error.
- */
- if (user_size < dev_size)
- return user_size == 0 ? 0 : -EINVAL;
-
- indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER);
- if (!indir)
- return -ENOMEM;
-
- ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL, NULL);
- if (ret)
- goto out;
-
- if (copy_to_user(useraddr +
- offsetof(struct ethtool_rxfh_indir, ring_index[0]),
- indir, dev_size * sizeof(indir[0])))
- ret = -EFAULT;
-
-out:
- kfree(indir);
- return ret;
-}
-
-static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
- void __user *useraddr)
-{
- struct ethtool_rxnfc rx_rings;
- u32 user_size, dev_size, i;
- u32 *indir;
- const struct ethtool_ops *ops = dev->ethtool_ops;
- int ret;
- u32 ringidx_offset = offsetof(struct ethtool_rxfh_indir, ring_index[0]);
-
- if (!ops->get_rxfh_indir_size || !ops->set_rxfh ||
- !ops->get_rxnfc)
- return -EOPNOTSUPP;
-
- dev_size = ops->get_rxfh_indir_size(dev);
- if (dev_size == 0)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&user_size,
- useraddr + offsetof(struct ethtool_rxfh_indir, size),
- sizeof(user_size)))
- return -EFAULT;
-
- if (user_size != 0 && user_size != dev_size)
- return -EINVAL;
-
- indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER);
- if (!indir)
- return -ENOMEM;
-
- rx_rings.cmd = ETHTOOL_GRXRINGS;
- ret = ops->get_rxnfc(dev, &rx_rings, NULL);
- if (ret)
- goto out;
-
- if (user_size == 0) {
- for (i = 0; i < dev_size; i++)
- indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
- } else {
- ret = ethtool_copy_validate_indir(indir,
- useraddr + ringidx_offset,
- &rx_rings,
- dev_size);
- if (ret)
- goto out;
- }
-
- ret = ops->set_rxfh(dev, indir, NULL, ETH_RSS_HASH_NO_CHANGE);
- if (ret)
- goto out;
-
- /* indicate whether rxfh was set to default */
- if (user_size == 0)
- dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
- else
- dev->priv_flags |= IFF_RXFH_CONFIGURED;
-
-out:
- kfree(indir);
- return ret;
-}
-
-static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
- void __user *useraddr)
-{
- int ret;
- const struct ethtool_ops *ops = dev->ethtool_ops;
- u32 user_indir_size, user_key_size;
- u32 dev_indir_size = 0, dev_key_size = 0;
- struct ethtool_rxfh rxfh;
- u32 total_size;
- u32 indir_bytes;
- u32 *indir = NULL;
- u8 dev_hfunc = 0;
- u8 *hkey = NULL;
- u8 *rss_config;
-
- if (!ops->get_rxfh)
- return -EOPNOTSUPP;
-
- if (ops->get_rxfh_indir_size)
- dev_indir_size = ops->get_rxfh_indir_size(dev);
- if (ops->get_rxfh_key_size)
- dev_key_size = ops->get_rxfh_key_size(dev);
-
- if (copy_from_user(&rxfh, useraddr, sizeof(rxfh)))
- return -EFAULT;
- user_indir_size = rxfh.indir_size;
- user_key_size = rxfh.key_size;
-
- /* Check that reserved fields are 0 for now */
- if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd8[2] || rxfh.rsvd32)
- return -EINVAL;
- /* Most drivers don't handle rss_context, check it's 0 as well */
- if (rxfh.rss_context && !ops->get_rxfh_context)
- return -EOPNOTSUPP;
-
- rxfh.indir_size = dev_indir_size;
- rxfh.key_size = dev_key_size;
- if (copy_to_user(useraddr, &rxfh, sizeof(rxfh)))
- return -EFAULT;
-
- if ((user_indir_size && (user_indir_size != dev_indir_size)) ||
- (user_key_size && (user_key_size != dev_key_size)))
- return -EINVAL;
-
- indir_bytes = user_indir_size * sizeof(indir[0]);
- total_size = indir_bytes + user_key_size;
- rss_config = kzalloc(total_size, GFP_USER);
- if (!rss_config)
- return -ENOMEM;
-
- if (user_indir_size)
- indir = (u32 *)rss_config;
-
- if (user_key_size)
- hkey = rss_config + indir_bytes;
-
- if (rxfh.rss_context)
- ret = dev->ethtool_ops->get_rxfh_context(dev, indir, hkey,
- &dev_hfunc,
- rxfh.rss_context);
- else
- ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey, &dev_hfunc);
- if (ret)
- goto out;
-
- if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, hfunc),
- &dev_hfunc, sizeof(rxfh.hfunc))) {
- ret = -EFAULT;
- } else if (copy_to_user(useraddr +
- offsetof(struct ethtool_rxfh, rss_config[0]),
- rss_config, total_size)) {
- ret = -EFAULT;
- }
-out:
- kfree(rss_config);
-
- return ret;
-}
-
-static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
- void __user *useraddr)
-{
- int ret;
- const struct ethtool_ops *ops = dev->ethtool_ops;
- struct ethtool_rxnfc rx_rings;
- struct ethtool_rxfh rxfh;
- u32 dev_indir_size = 0, dev_key_size = 0, i;
- u32 *indir = NULL, indir_bytes = 0;
- u8 *hkey = NULL;
- u8 *rss_config;
- u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]);
- bool delete = false;
-
- if (!ops->get_rxnfc || !ops->set_rxfh)
- return -EOPNOTSUPP;
-
- if (ops->get_rxfh_indir_size)
- dev_indir_size = ops->get_rxfh_indir_size(dev);
- if (ops->get_rxfh_key_size)
- dev_key_size = ops->get_rxfh_key_size(dev);
-
- if (copy_from_user(&rxfh, useraddr, sizeof(rxfh)))
- return -EFAULT;
-
- /* Check that reserved fields are 0 for now */
- if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd8[2] || rxfh.rsvd32)
- return -EINVAL;
- /* Most drivers don't handle rss_context, check it's 0 as well */
- if (rxfh.rss_context && !ops->set_rxfh_context)
- return -EOPNOTSUPP;
-
- /* If either indir, hash key or function is valid, proceed further.
- * Must request at least one change: indir size, hash key or function.
- */
- if ((rxfh.indir_size &&
- rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE &&
- rxfh.indir_size != dev_indir_size) ||
- (rxfh.key_size && (rxfh.key_size != dev_key_size)) ||
- (rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE &&
- rxfh.key_size == 0 && rxfh.hfunc == ETH_RSS_HASH_NO_CHANGE))
- return -EINVAL;
-
- if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
- indir_bytes = dev_indir_size * sizeof(indir[0]);
-
- rss_config = kzalloc(indir_bytes + rxfh.key_size, GFP_USER);
- if (!rss_config)
- return -ENOMEM;
-
- rx_rings.cmd = ETHTOOL_GRXRINGS;
- ret = ops->get_rxnfc(dev, &rx_rings, NULL);
- if (ret)
- goto out;
-
- /* rxfh.indir_size == 0 means reset the indir table to default (master
- * context) or delete the context (other RSS contexts).
- * rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE means leave it unchanged.
- */
- if (rxfh.indir_size &&
- rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) {
- indir = (u32 *)rss_config;
- ret = ethtool_copy_validate_indir(indir,
- useraddr + rss_cfg_offset,
- &rx_rings,
- rxfh.indir_size);
- if (ret)
- goto out;
- } else if (rxfh.indir_size == 0) {
- if (rxfh.rss_context == 0) {
- indir = (u32 *)rss_config;
- for (i = 0; i < dev_indir_size; i++)
- indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
- } else {
- delete = true;
- }
- }
-
- if (rxfh.key_size) {
- hkey = rss_config + indir_bytes;
- if (copy_from_user(hkey,
- useraddr + rss_cfg_offset + indir_bytes,
- rxfh.key_size)) {
- ret = -EFAULT;
- goto out;
- }
- }
-
- if (rxfh.rss_context)
- ret = ops->set_rxfh_context(dev, indir, hkey, rxfh.hfunc,
- &rxfh.rss_context, delete);
- else
- ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc);
- if (ret)
- goto out;
-
- if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, rss_context),
- &rxfh.rss_context, sizeof(rxfh.rss_context)))
- ret = -EFAULT;
-
- if (!rxfh.rss_context) {
- /* indicate whether rxfh was set to default */
- if (rxfh.indir_size == 0)
- dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
- else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
- dev->priv_flags |= IFF_RXFH_CONFIGURED;
- }
-
-out:
- kfree(rss_config);
- return ret;
-}
-
-static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
-{
- struct ethtool_regs regs;
- const struct ethtool_ops *ops = dev->ethtool_ops;
- void *regbuf;
- int reglen, ret;
-
- if (!ops->get_regs || !ops->get_regs_len)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&regs, useraddr, sizeof(regs)))
- return -EFAULT;
-
- reglen = ops->get_regs_len(dev);
- if (reglen <= 0)
- return reglen;
-
- if (regs.len > reglen)
- regs.len = reglen;
-
- regbuf = vzalloc(reglen);
- if (!regbuf)
- return -ENOMEM;
-
- if (regs.len < reglen)
- reglen = regs.len;
-
- ops->get_regs(dev, &regs, regbuf);
-
- ret = -EFAULT;
- if (copy_to_user(useraddr, &regs, sizeof(regs)))
- goto out;
- useraddr += offsetof(struct ethtool_regs, data);
- if (copy_to_user(useraddr, regbuf, reglen))
- goto out;
- ret = 0;
-
- out:
- vfree(regbuf);
- return ret;
-}
-
-static int ethtool_reset(struct net_device *dev, char __user *useraddr)
-{
- struct ethtool_value reset;
- int ret;
-
- if (!dev->ethtool_ops->reset)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&reset, useraddr, sizeof(reset)))
- return -EFAULT;
-
- ret = dev->ethtool_ops->reset(dev, &reset.data);
- if (ret)
- return ret;
-
- if (copy_to_user(useraddr, &reset, sizeof(reset)))
- return -EFAULT;
- return 0;
-}
-
-static int ethtool_get_wol(struct net_device *dev, char __user *useraddr)
-{
- struct ethtool_wolinfo wol;
-
- if (!dev->ethtool_ops->get_wol)
- return -EOPNOTSUPP;
-
- memset(&wol, 0, sizeof(struct ethtool_wolinfo));
- wol.cmd = ETHTOOL_GWOL;
- dev->ethtool_ops->get_wol(dev, &wol);
-
- if (copy_to_user(useraddr, &wol, sizeof(wol)))
- return -EFAULT;
- return 0;
-}
-
-static int ethtool_set_wol(struct net_device *dev, char __user *useraddr)
-{
- struct ethtool_wolinfo wol;
- int ret;
-
- if (!dev->ethtool_ops->set_wol)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&wol, useraddr, sizeof(wol)))
- return -EFAULT;
-
- ret = dev->ethtool_ops->set_wol(dev, &wol);
- if (ret)
- return ret;
-
- dev->wol_enabled = !!wol.wolopts;
-
- return 0;
-}
-
-static int ethtool_get_eee(struct net_device *dev, char __user *useraddr)
-{
- struct ethtool_eee edata;
- int rc;
-
- if (!dev->ethtool_ops->get_eee)
- return -EOPNOTSUPP;
-
- memset(&edata, 0, sizeof(struct ethtool_eee));
- edata.cmd = ETHTOOL_GEEE;
- rc = dev->ethtool_ops->get_eee(dev, &edata);
-
- if (rc)
- return rc;
-
- if (copy_to_user(useraddr, &edata, sizeof(edata)))
- return -EFAULT;
-
- return 0;
-}
-
-static int ethtool_set_eee(struct net_device *dev, char __user *useraddr)
-{
- struct ethtool_eee edata;
-
- if (!dev->ethtool_ops->set_eee)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&edata, useraddr, sizeof(edata)))
- return -EFAULT;
-
- return dev->ethtool_ops->set_eee(dev, &edata);
-}
-
-static int ethtool_nway_reset(struct net_device *dev)
-{
- if (!dev->ethtool_ops->nway_reset)
- return -EOPNOTSUPP;
-
- return dev->ethtool_ops->nway_reset(dev);
-}
-
-static int ethtool_get_link(struct net_device *dev, char __user *useraddr)
-{
- struct ethtool_value edata = { .cmd = ETHTOOL_GLINK };
-
- if (!dev->ethtool_ops->get_link)
- return -EOPNOTSUPP;
-
- edata.data = netif_running(dev) && dev->ethtool_ops->get_link(dev);
-
- if (copy_to_user(useraddr, &edata, sizeof(edata)))
- return -EFAULT;
- return 0;
-}
-
-static int ethtool_get_any_eeprom(struct net_device *dev, void __user *useraddr,
- int (*getter)(struct net_device *,
- struct ethtool_eeprom *, u8 *),
- u32 total_len)
-{
- struct ethtool_eeprom eeprom;
- void __user *userbuf = useraddr + sizeof(eeprom);
- u32 bytes_remaining;
- u8 *data;
- int ret = 0;
-
- if (copy_from_user(&eeprom, useraddr, sizeof(eeprom)))
- return -EFAULT;
-
- /* Check for wrap and zero */
- if (eeprom.offset + eeprom.len <= eeprom.offset)
- return -EINVAL;
-
- /* Check for exceeding total eeprom len */
- if (eeprom.offset + eeprom.len > total_len)
- return -EINVAL;
-
- data = kmalloc(PAGE_SIZE, GFP_USER);
- if (!data)
- return -ENOMEM;
-
- bytes_remaining = eeprom.len;
- while (bytes_remaining > 0) {
- eeprom.len = min(bytes_remaining, (u32)PAGE_SIZE);
-
- ret = getter(dev, &eeprom, data);
- if (ret)
- break;
- if (copy_to_user(userbuf, data, eeprom.len)) {
- ret = -EFAULT;
- break;
- }
- userbuf += eeprom.len;
- eeprom.offset += eeprom.len;
- bytes_remaining -= eeprom.len;
- }
-
- eeprom.len = userbuf - (useraddr + sizeof(eeprom));
- eeprom.offset -= eeprom.len;
- if (copy_to_user(useraddr, &eeprom, sizeof(eeprom)))
- ret = -EFAULT;
-
- kfree(data);
- return ret;
-}
-
-static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr)
-{
- const struct ethtool_ops *ops = dev->ethtool_ops;
-
- if (!ops->get_eeprom || !ops->get_eeprom_len ||
- !ops->get_eeprom_len(dev))
- return -EOPNOTSUPP;
-
- return ethtool_get_any_eeprom(dev, useraddr, ops->get_eeprom,
- ops->get_eeprom_len(dev));
-}
-
-static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr)
-{
- struct ethtool_eeprom eeprom;
- const struct ethtool_ops *ops = dev->ethtool_ops;
- void __user *userbuf = useraddr + sizeof(eeprom);
- u32 bytes_remaining;
- u8 *data;
- int ret = 0;
-
- if (!ops->set_eeprom || !ops->get_eeprom_len ||
- !ops->get_eeprom_len(dev))
- return -EOPNOTSUPP;
-
- if (copy_from_user(&eeprom, useraddr, sizeof(eeprom)))
- return -EFAULT;
-
- /* Check for wrap and zero */
- if (eeprom.offset + eeprom.len <= eeprom.offset)
- return -EINVAL;
-
- /* Check for exceeding total eeprom len */
- if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev))
- return -EINVAL;
-
- data = kmalloc(PAGE_SIZE, GFP_USER);
- if (!data)
- return -ENOMEM;
-
- bytes_remaining = eeprom.len;
- while (bytes_remaining > 0) {
- eeprom.len = min(bytes_remaining, (u32)PAGE_SIZE);
-
- if (copy_from_user(data, userbuf, eeprom.len)) {
- ret = -EFAULT;
- break;
- }
- ret = ops->set_eeprom(dev, &eeprom, data);
- if (ret)
- break;
- userbuf += eeprom.len;
- eeprom.offset += eeprom.len;
- bytes_remaining -= eeprom.len;
- }
-
- kfree(data);
- return ret;
-}
-
-static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev,
- void __user *useraddr)
-{
- struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE };
-
- if (!dev->ethtool_ops->get_coalesce)
- return -EOPNOTSUPP;
-
- dev->ethtool_ops->get_coalesce(dev, &coalesce);
-
- if (copy_to_user(useraddr, &coalesce, sizeof(coalesce)))
- return -EFAULT;
- return 0;
-}
-
-static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev,
- void __user *useraddr)
-{
- struct ethtool_coalesce coalesce;
-
- if (!dev->ethtool_ops->set_coalesce)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&coalesce, useraddr, sizeof(coalesce)))
- return -EFAULT;
-
- return dev->ethtool_ops->set_coalesce(dev, &coalesce);
-}
-
-static int ethtool_get_ringparam(struct net_device *dev, void __user *useraddr)
-{
- struct ethtool_ringparam ringparam = { .cmd = ETHTOOL_GRINGPARAM };
-
- if (!dev->ethtool_ops->get_ringparam)
- return -EOPNOTSUPP;
-
- dev->ethtool_ops->get_ringparam(dev, &ringparam);
-
- if (copy_to_user(useraddr, &ringparam, sizeof(ringparam)))
- return -EFAULT;
- return 0;
-}
-
-static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr)
-{
- struct ethtool_ringparam ringparam, max = { .cmd = ETHTOOL_GRINGPARAM };
-
- if (!dev->ethtool_ops->set_ringparam || !dev->ethtool_ops->get_ringparam)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&ringparam, useraddr, sizeof(ringparam)))
- return -EFAULT;
-
- dev->ethtool_ops->get_ringparam(dev, &max);
-
- /* ensure new ring parameters are within the maximums */
- if (ringparam.rx_pending > max.rx_max_pending ||
- ringparam.rx_mini_pending > max.rx_mini_max_pending ||
- ringparam.rx_jumbo_pending > max.rx_jumbo_max_pending ||
- ringparam.tx_pending > max.tx_max_pending)
- return -EINVAL;
-
- return dev->ethtool_ops->set_ringparam(dev, &ringparam);
-}
-
-static noinline_for_stack int ethtool_get_channels(struct net_device *dev,
- void __user *useraddr)
-{
- struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS };
-
- if (!dev->ethtool_ops->get_channels)
- return -EOPNOTSUPP;
-
- dev->ethtool_ops->get_channels(dev, &channels);
-
- if (copy_to_user(useraddr, &channels, sizeof(channels)))
- return -EFAULT;
- return 0;
-}
-
-static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
- void __user *useraddr)
-{
- struct ethtool_channels channels, curr = { .cmd = ETHTOOL_GCHANNELS };
- u16 from_channel, to_channel;
- u32 max_rx_in_use = 0;
- unsigned int i;
-
- if (!dev->ethtool_ops->set_channels || !dev->ethtool_ops->get_channels)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&channels, useraddr, sizeof(channels)))
- return -EFAULT;
-
- dev->ethtool_ops->get_channels(dev, &curr);
-
- /* ensure new counts are within the maximums */
- if (channels.rx_count > curr.max_rx ||
- channels.tx_count > curr.max_tx ||
- channels.combined_count > curr.max_combined ||
- channels.other_count > curr.max_other)
- return -EINVAL;
-
- /* ensure the new Rx count fits within the configured Rx flow
- * indirection table settings */
- if (netif_is_rxfh_configured(dev) &&
- !ethtool_get_max_rxfh_channel(dev, &max_rx_in_use) &&
- (channels.combined_count + channels.rx_count) <= max_rx_in_use)
- return -EINVAL;
-
- /* Disabling channels, query zero-copy AF_XDP sockets */
- from_channel = channels.combined_count +
- min(channels.rx_count, channels.tx_count);
- to_channel = curr.combined_count + max(curr.rx_count, curr.tx_count);
- for (i = from_channel; i < to_channel; i++)
- if (xdp_get_umem_from_qid(dev, i))
- return -EINVAL;
-
- return dev->ethtool_ops->set_channels(dev, &channels);
-}
-
-static int ethtool_get_pauseparam(struct net_device *dev, void __user *useraddr)
-{
- struct ethtool_pauseparam pauseparam = { .cmd = ETHTOOL_GPAUSEPARAM };
-
- if (!dev->ethtool_ops->get_pauseparam)
- return -EOPNOTSUPP;
-
- dev->ethtool_ops->get_pauseparam(dev, &pauseparam);
-
- if (copy_to_user(useraddr, &pauseparam, sizeof(pauseparam)))
- return -EFAULT;
- return 0;
-}
-
-static int ethtool_set_pauseparam(struct net_device *dev, void __user *useraddr)
-{
- struct ethtool_pauseparam pauseparam;
-
- if (!dev->ethtool_ops->set_pauseparam)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&pauseparam, useraddr, sizeof(pauseparam)))
- return -EFAULT;
-
- return dev->ethtool_ops->set_pauseparam(dev, &pauseparam);
-}
-
-static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
-{
- struct ethtool_test test;
- const struct ethtool_ops *ops = dev->ethtool_ops;
- u64 *data;
- int ret, test_len;
-
- if (!ops->self_test || !ops->get_sset_count)
- return -EOPNOTSUPP;
-
- test_len = ops->get_sset_count(dev, ETH_SS_TEST);
- if (test_len < 0)
- return test_len;
- WARN_ON(test_len == 0);
-
- if (copy_from_user(&test, useraddr, sizeof(test)))
- return -EFAULT;
-
- test.len = test_len;
- data = kmalloc_array(test_len, sizeof(u64), GFP_USER);
- if (!data)
- return -ENOMEM;
-
- ops->self_test(dev, &test, data);
-
- ret = -EFAULT;
- if (copy_to_user(useraddr, &test, sizeof(test)))
- goto out;
- useraddr += sizeof(test);
- if (copy_to_user(useraddr, data, test.len * sizeof(u64)))
- goto out;
- ret = 0;
-
- out:
- kfree(data);
- return ret;
-}
-
-static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
-{
- struct ethtool_gstrings gstrings;
- u8 *data;
- int ret;
-
- if (copy_from_user(&gstrings, useraddr, sizeof(gstrings)))
- return -EFAULT;
-
- ret = __ethtool_get_sset_count(dev, gstrings.string_set);
- if (ret < 0)
- return ret;
- if (ret > S32_MAX / ETH_GSTRING_LEN)
- return -ENOMEM;
- WARN_ON_ONCE(!ret);
-
- gstrings.len = ret;
-
- if (gstrings.len) {
- data = vzalloc(array_size(gstrings.len, ETH_GSTRING_LEN));
- if (!data)
- return -ENOMEM;
-
- __ethtool_get_strings(dev, gstrings.string_set, data);
- } else {
- data = NULL;
- }
-
- ret = -EFAULT;
- if (copy_to_user(useraddr, &gstrings, sizeof(gstrings)))
- goto out;
- useraddr += sizeof(gstrings);
- if (gstrings.len &&
- copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN))
- goto out;
- ret = 0;
-
-out:
- vfree(data);
- return ret;
-}
-
-static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
-{
- struct ethtool_value id;
- static bool busy;
- const struct ethtool_ops *ops = dev->ethtool_ops;
- int rc;
-
- if (!ops->set_phys_id)
- return -EOPNOTSUPP;
-
- if (busy)
- return -EBUSY;
-
- if (copy_from_user(&id, useraddr, sizeof(id)))
- return -EFAULT;
-
- rc = ops->set_phys_id(dev, ETHTOOL_ID_ACTIVE);
- if (rc < 0)
- return rc;
-
- /* Drop the RTNL lock while waiting, but prevent reentry or
- * removal of the device.
- */
- busy = true;
- dev_hold(dev);
- rtnl_unlock();
-
- if (rc == 0) {
- /* Driver will handle this itself */
- schedule_timeout_interruptible(
- id.data ? (id.data * HZ) : MAX_SCHEDULE_TIMEOUT);
- } else {
- /* Driver expects to be called at twice the frequency in rc */
- int n = rc * 2, i, interval = HZ / n;
-
- /* Count down seconds */
- do {
- /* Count down iterations per second */
- i = n;
- do {
- rtnl_lock();
- rc = ops->set_phys_id(dev,
- (i & 1) ? ETHTOOL_ID_OFF : ETHTOOL_ID_ON);
- rtnl_unlock();
- if (rc)
- break;
- schedule_timeout_interruptible(interval);
- } while (!signal_pending(current) && --i != 0);
- } while (!signal_pending(current) &&
- (id.data == 0 || --id.data != 0));
- }
-
- rtnl_lock();
- dev_put(dev);
- busy = false;
-
- (void) ops->set_phys_id(dev, ETHTOOL_ID_INACTIVE);
- return rc;
-}
-
-static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
-{
- struct ethtool_stats stats;
- const struct ethtool_ops *ops = dev->ethtool_ops;
- u64 *data;
- int ret, n_stats;
-
- if (!ops->get_ethtool_stats || !ops->get_sset_count)
- return -EOPNOTSUPP;
-
- n_stats = ops->get_sset_count(dev, ETH_SS_STATS);
- if (n_stats < 0)
- return n_stats;
- if (n_stats > S32_MAX / sizeof(u64))
- return -ENOMEM;
- WARN_ON_ONCE(!n_stats);
- if (copy_from_user(&stats, useraddr, sizeof(stats)))
- return -EFAULT;
-
- stats.n_stats = n_stats;
-
- if (n_stats) {
- data = vzalloc(array_size(n_stats, sizeof(u64)));
- if (!data)
- return -ENOMEM;
- ops->get_ethtool_stats(dev, &stats, data);
- } else {
- data = NULL;
- }
-
- ret = -EFAULT;
- if (copy_to_user(useraddr, &stats, sizeof(stats)))
- goto out;
- useraddr += sizeof(stats);
- if (n_stats && copy_to_user(useraddr, data, n_stats * sizeof(u64)))
- goto out;
- ret = 0;
-
- out:
- vfree(data);
- return ret;
-}
-
-static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr)
-{
- const struct ethtool_ops *ops = dev->ethtool_ops;
- struct phy_device *phydev = dev->phydev;
- struct ethtool_stats stats;
- u64 *data;
- int ret, n_stats;
-
- if (!phydev && (!ops->get_ethtool_phy_stats || !ops->get_sset_count))
- return -EOPNOTSUPP;
-
- if (dev->phydev && !ops->get_ethtool_phy_stats)
- n_stats = phy_ethtool_get_sset_count(dev->phydev);
- else
- n_stats = ops->get_sset_count(dev, ETH_SS_PHY_STATS);
- if (n_stats < 0)
- return n_stats;
- if (n_stats > S32_MAX / sizeof(u64))
- return -ENOMEM;
- WARN_ON_ONCE(!n_stats);
-
- if (copy_from_user(&stats, useraddr, sizeof(stats)))
- return -EFAULT;
-
- stats.n_stats = n_stats;
-
- if (n_stats) {
- data = vzalloc(array_size(n_stats, sizeof(u64)));
- if (!data)
- return -ENOMEM;
-
- if (dev->phydev && !ops->get_ethtool_phy_stats) {
- ret = phy_ethtool_get_stats(dev->phydev, &stats, data);
- if (ret < 0)
- goto out;
- } else {
- ops->get_ethtool_phy_stats(dev, &stats, data);
- }
- } else {
- data = NULL;
- }
-
- ret = -EFAULT;
- if (copy_to_user(useraddr, &stats, sizeof(stats)))
- goto out;
- useraddr += sizeof(stats);
- if (n_stats && copy_to_user(useraddr, data, n_stats * sizeof(u64)))
- goto out;
- ret = 0;
-
- out:
- vfree(data);
- return ret;
-}
-
-static int ethtool_get_perm_addr(struct net_device *dev, void __user *useraddr)
-{
- struct ethtool_perm_addr epaddr;
-
- if (copy_from_user(&epaddr, useraddr, sizeof(epaddr)))
- return -EFAULT;
-
- if (epaddr.size < dev->addr_len)
- return -ETOOSMALL;
- epaddr.size = dev->addr_len;
-
- if (copy_to_user(useraddr, &epaddr, sizeof(epaddr)))
- return -EFAULT;
- useraddr += sizeof(epaddr);
- if (copy_to_user(useraddr, dev->perm_addr, epaddr.size))
- return -EFAULT;
- return 0;
-}
-
-static int ethtool_get_value(struct net_device *dev, char __user *useraddr,
- u32 cmd, u32 (*actor)(struct net_device *))
-{
- struct ethtool_value edata = { .cmd = cmd };
-
- if (!actor)
- return -EOPNOTSUPP;
-
- edata.data = actor(dev);
-
- if (copy_to_user(useraddr, &edata, sizeof(edata)))
- return -EFAULT;
- return 0;
-}
-
-static int ethtool_set_value_void(struct net_device *dev, char __user *useraddr,
- void (*actor)(struct net_device *, u32))
-{
- struct ethtool_value edata;
-
- if (!actor)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&edata, useraddr, sizeof(edata)))
- return -EFAULT;
-
- actor(dev, edata.data);
- return 0;
-}
-
-static int ethtool_set_value(struct net_device *dev, char __user *useraddr,
- int (*actor)(struct net_device *, u32))
-{
- struct ethtool_value edata;
-
- if (!actor)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&edata, useraddr, sizeof(edata)))
- return -EFAULT;
-
- return actor(dev, edata.data);
-}
-
-static noinline_for_stack int ethtool_flash_device(struct net_device *dev,
- char __user *useraddr)
-{
- struct ethtool_flash efl;
-
- if (copy_from_user(&efl, useraddr, sizeof(efl)))
- return -EFAULT;
- efl.data[ETHTOOL_FLASH_MAX_FILENAME - 1] = 0;
-
- if (!dev->ethtool_ops->flash_device)
- return devlink_compat_flash_update(dev, efl.data);
-
- return dev->ethtool_ops->flash_device(dev, &efl);
-}
-
-static int ethtool_set_dump(struct net_device *dev,
- void __user *useraddr)
-{
- struct ethtool_dump dump;
-
- if (!dev->ethtool_ops->set_dump)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&dump, useraddr, sizeof(dump)))
- return -EFAULT;
-
- return dev->ethtool_ops->set_dump(dev, &dump);
-}
-
-static int ethtool_get_dump_flag(struct net_device *dev,
- void __user *useraddr)
-{
- int ret;
- struct ethtool_dump dump;
- const struct ethtool_ops *ops = dev->ethtool_ops;
-
- if (!ops->get_dump_flag)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&dump, useraddr, sizeof(dump)))
- return -EFAULT;
-
- ret = ops->get_dump_flag(dev, &dump);
- if (ret)
- return ret;
-
- if (copy_to_user(useraddr, &dump, sizeof(dump)))
- return -EFAULT;
- return 0;
-}
-
-static int ethtool_get_dump_data(struct net_device *dev,
- void __user *useraddr)
-{
- int ret;
- __u32 len;
- struct ethtool_dump dump, tmp;
- const struct ethtool_ops *ops = dev->ethtool_ops;
- void *data = NULL;
-
- if (!ops->get_dump_data || !ops->get_dump_flag)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&dump, useraddr, sizeof(dump)))
- return -EFAULT;
-
- memset(&tmp, 0, sizeof(tmp));
- tmp.cmd = ETHTOOL_GET_DUMP_FLAG;
- ret = ops->get_dump_flag(dev, &tmp);
- if (ret)
- return ret;
-
- len = min(tmp.len, dump.len);
- if (!len)
- return -EFAULT;
-
- /* Don't ever let the driver think there's more space available
- * than it requested with .get_dump_flag().
- */
- dump.len = len;
-
- /* Always allocate enough space to hold the whole thing so that the
- * driver does not need to check the length and bother with partial
- * dumping.
- */
- data = vzalloc(tmp.len);
- if (!data)
- return -ENOMEM;
- ret = ops->get_dump_data(dev, &dump, data);
- if (ret)
- goto out;
-
- /* There are two sane possibilities:
- * 1. The driver's .get_dump_data() does not touch dump.len.
- * 2. Or it may set dump.len to how much it really writes, which
- * should be tmp.len (or len if it can do a partial dump).
- * In any case respond to userspace with the actual length of data
- * it's receiving.
- */
- WARN_ON(dump.len != len && dump.len != tmp.len);
- dump.len = len;
-
- if (copy_to_user(useraddr, &dump, sizeof(dump))) {
- ret = -EFAULT;
- goto out;
- }
- useraddr += offsetof(struct ethtool_dump, data);
- if (copy_to_user(useraddr, data, len))
- ret = -EFAULT;
-out:
- vfree(data);
- return ret;
-}
-
-static int ethtool_get_ts_info(struct net_device *dev, void __user *useraddr)
-{
- int err = 0;
- struct ethtool_ts_info info;
- const struct ethtool_ops *ops = dev->ethtool_ops;
- struct phy_device *phydev = dev->phydev;
-
- memset(&info, 0, sizeof(info));
- info.cmd = ETHTOOL_GET_TS_INFO;
-
- if (phydev && phydev->drv && phydev->drv->ts_info) {
- err = phydev->drv->ts_info(phydev, &info);
- } else if (ops->get_ts_info) {
- err = ops->get_ts_info(dev, &info);
- } else {
- info.so_timestamping =
- SOF_TIMESTAMPING_RX_SOFTWARE |
- SOF_TIMESTAMPING_SOFTWARE;
- info.phc_index = -1;
- }
-
- if (err)
- return err;
-
- if (copy_to_user(useraddr, &info, sizeof(info)))
- err = -EFAULT;
-
- return err;
-}
-
-static int __ethtool_get_module_info(struct net_device *dev,
- struct ethtool_modinfo *modinfo)
-{
- const struct ethtool_ops *ops = dev->ethtool_ops;
- struct phy_device *phydev = dev->phydev;
-
- if (dev->sfp_bus)
- return sfp_get_module_info(dev->sfp_bus, modinfo);
-
- if (phydev && phydev->drv && phydev->drv->module_info)
- return phydev->drv->module_info(phydev, modinfo);
-
- if (ops->get_module_info)
- return ops->get_module_info(dev, modinfo);
-
- return -EOPNOTSUPP;
-}
-
-static int ethtool_get_module_info(struct net_device *dev,
- void __user *useraddr)
-{
- int ret;
- struct ethtool_modinfo modinfo;
-
- if (copy_from_user(&modinfo, useraddr, sizeof(modinfo)))
- return -EFAULT;
-
- ret = __ethtool_get_module_info(dev, &modinfo);
- if (ret)
- return ret;
-
- if (copy_to_user(useraddr, &modinfo, sizeof(modinfo)))
- return -EFAULT;
-
- return 0;
-}
-
-static int __ethtool_get_module_eeprom(struct net_device *dev,
- struct ethtool_eeprom *ee, u8 *data)
-{
- const struct ethtool_ops *ops = dev->ethtool_ops;
- struct phy_device *phydev = dev->phydev;
-
- if (dev->sfp_bus)
- return sfp_get_module_eeprom(dev->sfp_bus, ee, data);
-
- if (phydev && phydev->drv && phydev->drv->module_eeprom)
- return phydev->drv->module_eeprom(phydev, ee, data);
-
- if (ops->get_module_eeprom)
- return ops->get_module_eeprom(dev, ee, data);
-
- return -EOPNOTSUPP;
-}
-
-static int ethtool_get_module_eeprom(struct net_device *dev,
- void __user *useraddr)
-{
- int ret;
- struct ethtool_modinfo modinfo;
-
- ret = __ethtool_get_module_info(dev, &modinfo);
- if (ret)
- return ret;
-
- return ethtool_get_any_eeprom(dev, useraddr,
- __ethtool_get_module_eeprom,
- modinfo.eeprom_len);
-}
-
-static int ethtool_tunable_valid(const struct ethtool_tunable *tuna)
-{
- switch (tuna->id) {
- case ETHTOOL_RX_COPYBREAK:
- case ETHTOOL_TX_COPYBREAK:
- if (tuna->len != sizeof(u32) ||
- tuna->type_id != ETHTOOL_TUNABLE_U32)
- return -EINVAL;
- break;
- case ETHTOOL_PFC_PREVENTION_TOUT:
- if (tuna->len != sizeof(u16) ||
- tuna->type_id != ETHTOOL_TUNABLE_U16)
- return -EINVAL;
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int ethtool_get_tunable(struct net_device *dev, void __user *useraddr)
-{
- int ret;
- struct ethtool_tunable tuna;
- const struct ethtool_ops *ops = dev->ethtool_ops;
- void *data;
-
- if (!ops->get_tunable)
- return -EOPNOTSUPP;
- if (copy_from_user(&tuna, useraddr, sizeof(tuna)))
- return -EFAULT;
- ret = ethtool_tunable_valid(&tuna);
- if (ret)
- return ret;
- data = kmalloc(tuna.len, GFP_USER);
- if (!data)
- return -ENOMEM;
- ret = ops->get_tunable(dev, &tuna, data);
- if (ret)
- goto out;
- useraddr += sizeof(tuna);
- ret = -EFAULT;
- if (copy_to_user(useraddr, data, tuna.len))
- goto out;
- ret = 0;
-
-out:
- kfree(data);
- return ret;
-}
-
-static int ethtool_set_tunable(struct net_device *dev, void __user *useraddr)
-{
- int ret;
- struct ethtool_tunable tuna;
- const struct ethtool_ops *ops = dev->ethtool_ops;
- void *data;
-
- if (!ops->set_tunable)
- return -EOPNOTSUPP;
- if (copy_from_user(&tuna, useraddr, sizeof(tuna)))
- return -EFAULT;
- ret = ethtool_tunable_valid(&tuna);
- if (ret)
- return ret;
- useraddr += sizeof(tuna);
- data = memdup_user(useraddr, tuna.len);
- if (IS_ERR(data))
- return PTR_ERR(data);
- ret = ops->set_tunable(dev, &tuna, data);
-
- kfree(data);
- return ret;
-}
-
-static noinline_for_stack int
-ethtool_get_per_queue_coalesce(struct net_device *dev,
- void __user *useraddr,
- struct ethtool_per_queue_op *per_queue_opt)
-{
- u32 bit;
- int ret;
- DECLARE_BITMAP(queue_mask, MAX_NUM_QUEUE);
-
- if (!dev->ethtool_ops->get_per_queue_coalesce)
- return -EOPNOTSUPP;
-
- useraddr += sizeof(*per_queue_opt);
-
- bitmap_from_arr32(queue_mask, per_queue_opt->queue_mask,
- MAX_NUM_QUEUE);
-
- for_each_set_bit(bit, queue_mask, MAX_NUM_QUEUE) {
- struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE };
-
- ret = dev->ethtool_ops->get_per_queue_coalesce(dev, bit, &coalesce);
- if (ret != 0)
- return ret;
- if (copy_to_user(useraddr, &coalesce, sizeof(coalesce)))
- return -EFAULT;
- useraddr += sizeof(coalesce);
- }
-
- return 0;
-}
-
-static noinline_for_stack int
-ethtool_set_per_queue_coalesce(struct net_device *dev,
- void __user *useraddr,
- struct ethtool_per_queue_op *per_queue_opt)
-{
- u32 bit;
- int i, ret = 0;
- int n_queue;
- struct ethtool_coalesce *backup = NULL, *tmp = NULL;
- DECLARE_BITMAP(queue_mask, MAX_NUM_QUEUE);
-
- if ((!dev->ethtool_ops->set_per_queue_coalesce) ||
- (!dev->ethtool_ops->get_per_queue_coalesce))
- return -EOPNOTSUPP;
-
- useraddr += sizeof(*per_queue_opt);
-
- bitmap_from_arr32(queue_mask, per_queue_opt->queue_mask, MAX_NUM_QUEUE);
- n_queue = bitmap_weight(queue_mask, MAX_NUM_QUEUE);
- tmp = backup = kmalloc_array(n_queue, sizeof(*backup), GFP_KERNEL);
- if (!backup)
- return -ENOMEM;
-
- for_each_set_bit(bit, queue_mask, MAX_NUM_QUEUE) {
- struct ethtool_coalesce coalesce;
-
- ret = dev->ethtool_ops->get_per_queue_coalesce(dev, bit, tmp);
- if (ret != 0)
- goto roll_back;
-
- tmp++;
-
- if (copy_from_user(&coalesce, useraddr, sizeof(coalesce))) {
- ret = -EFAULT;
- goto roll_back;
- }
-
- ret = dev->ethtool_ops->set_per_queue_coalesce(dev, bit, &coalesce);
- if (ret != 0)
- goto roll_back;
-
- useraddr += sizeof(coalesce);
- }
-
-roll_back:
- if (ret != 0) {
- tmp = backup;
- for_each_set_bit(i, queue_mask, bit) {
- dev->ethtool_ops->set_per_queue_coalesce(dev, i, tmp);
- tmp++;
- }
- }
- kfree(backup);
-
- return ret;
-}
-
-static int noinline_for_stack ethtool_set_per_queue(struct net_device *dev,
- void __user *useraddr, u32 sub_cmd)
-{
- struct ethtool_per_queue_op per_queue_opt;
-
- if (copy_from_user(&per_queue_opt, useraddr, sizeof(per_queue_opt)))
- return -EFAULT;
-
- if (per_queue_opt.sub_command != sub_cmd)
- return -EINVAL;
-
- switch (per_queue_opt.sub_command) {
- case ETHTOOL_GCOALESCE:
- return ethtool_get_per_queue_coalesce(dev, useraddr, &per_queue_opt);
- case ETHTOOL_SCOALESCE:
- return ethtool_set_per_queue_coalesce(dev, useraddr, &per_queue_opt);
- default:
- return -EOPNOTSUPP;
- };
-}
-
-static int ethtool_phy_tunable_valid(const struct ethtool_tunable *tuna)
-{
- switch (tuna->id) {
- case ETHTOOL_PHY_DOWNSHIFT:
- case ETHTOOL_PHY_FAST_LINK_DOWN:
- if (tuna->len != sizeof(u8) ||
- tuna->type_id != ETHTOOL_TUNABLE_U8)
- return -EINVAL;
- break;
- case ETHTOOL_PHY_EDPD:
- if (tuna->len != sizeof(u16) ||
- tuna->type_id != ETHTOOL_TUNABLE_U16)
- return -EINVAL;
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int get_phy_tunable(struct net_device *dev, void __user *useraddr)
-{
- int ret;
- struct ethtool_tunable tuna;
- struct phy_device *phydev = dev->phydev;
- void *data;
-
- if (!(phydev && phydev->drv && phydev->drv->get_tunable))
- return -EOPNOTSUPP;
-
- if (copy_from_user(&tuna, useraddr, sizeof(tuna)))
- return -EFAULT;
- ret = ethtool_phy_tunable_valid(&tuna);
- if (ret)
- return ret;
- data = kmalloc(tuna.len, GFP_USER);
- if (!data)
- return -ENOMEM;
- mutex_lock(&phydev->lock);
- ret = phydev->drv->get_tunable(phydev, &tuna, data);
- mutex_unlock(&phydev->lock);
- if (ret)
- goto out;
- useraddr += sizeof(tuna);
- ret = -EFAULT;
- if (copy_to_user(useraddr, data, tuna.len))
- goto out;
- ret = 0;
-
-out:
- kfree(data);
- return ret;
-}
-
-static int set_phy_tunable(struct net_device *dev, void __user *useraddr)
-{
- int ret;
- struct ethtool_tunable tuna;
- struct phy_device *phydev = dev->phydev;
- void *data;
-
- if (!(phydev && phydev->drv && phydev->drv->set_tunable))
- return -EOPNOTSUPP;
- if (copy_from_user(&tuna, useraddr, sizeof(tuna)))
- return -EFAULT;
- ret = ethtool_phy_tunable_valid(&tuna);
- if (ret)
- return ret;
- useraddr += sizeof(tuna);
- data = memdup_user(useraddr, tuna.len);
- if (IS_ERR(data))
- return PTR_ERR(data);
- mutex_lock(&phydev->lock);
- ret = phydev->drv->set_tunable(phydev, &tuna, data);
- mutex_unlock(&phydev->lock);
-
- kfree(data);
- return ret;
-}
-
-static int ethtool_get_fecparam(struct net_device *dev, void __user *useraddr)
-{
- struct ethtool_fecparam fecparam = { .cmd = ETHTOOL_GFECPARAM };
- int rc;
-
- if (!dev->ethtool_ops->get_fecparam)
- return -EOPNOTSUPP;
-
- rc = dev->ethtool_ops->get_fecparam(dev, &fecparam);
- if (rc)
- return rc;
-
- if (copy_to_user(useraddr, &fecparam, sizeof(fecparam)))
- return -EFAULT;
- return 0;
-}
-
-static int ethtool_set_fecparam(struct net_device *dev, void __user *useraddr)
-{
- struct ethtool_fecparam fecparam;
-
- if (!dev->ethtool_ops->set_fecparam)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&fecparam, useraddr, sizeof(fecparam)))
- return -EFAULT;
-
- return dev->ethtool_ops->set_fecparam(dev, &fecparam);
-}
-
-/* The main entry point in this file. Called from net/core/dev_ioctl.c */
-
-int dev_ethtool(struct net *net, struct ifreq *ifr)
-{
- struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
- void __user *useraddr = ifr->ifr_data;
- u32 ethcmd, sub_cmd;
- int rc;
- netdev_features_t old_features;
-
- if (!dev || !netif_device_present(dev))
- return -ENODEV;
-
- if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
- return -EFAULT;
-
- if (ethcmd == ETHTOOL_PERQUEUE) {
- if (copy_from_user(&sub_cmd, useraddr + sizeof(ethcmd), sizeof(sub_cmd)))
- return -EFAULT;
- } else {
- sub_cmd = ethcmd;
- }
- /* Allow some commands to be done by anyone */
- switch (sub_cmd) {
- case ETHTOOL_GSET:
- case ETHTOOL_GDRVINFO:
- case ETHTOOL_GMSGLVL:
- case ETHTOOL_GLINK:
- case ETHTOOL_GCOALESCE:
- case ETHTOOL_GRINGPARAM:
- case ETHTOOL_GPAUSEPARAM:
- case ETHTOOL_GRXCSUM:
- case ETHTOOL_GTXCSUM:
- case ETHTOOL_GSG:
- case ETHTOOL_GSSET_INFO:
- case ETHTOOL_GSTRINGS:
- case ETHTOOL_GSTATS:
- case ETHTOOL_GPHYSTATS:
- case ETHTOOL_GTSO:
- case ETHTOOL_GPERMADDR:
- case ETHTOOL_GUFO:
- case ETHTOOL_GGSO:
- case ETHTOOL_GGRO:
- case ETHTOOL_GFLAGS:
- case ETHTOOL_GPFLAGS:
- case ETHTOOL_GRXFH:
- case ETHTOOL_GRXRINGS:
- case ETHTOOL_GRXCLSRLCNT:
- case ETHTOOL_GRXCLSRULE:
- case ETHTOOL_GRXCLSRLALL:
- case ETHTOOL_GRXFHINDIR:
- case ETHTOOL_GRSSH:
- case ETHTOOL_GFEATURES:
- case ETHTOOL_GCHANNELS:
- case ETHTOOL_GET_TS_INFO:
- case ETHTOOL_GEEE:
- case ETHTOOL_GTUNABLE:
- case ETHTOOL_PHY_GTUNABLE:
- case ETHTOOL_GLINKSETTINGS:
- case ETHTOOL_GFECPARAM:
- break;
- default:
- if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
- return -EPERM;
- }
-
- if (dev->ethtool_ops->begin) {
- rc = dev->ethtool_ops->begin(dev);
- if (rc < 0)
- return rc;
- }
- old_features = dev->features;
-
- switch (ethcmd) {
- case ETHTOOL_GSET:
- rc = ethtool_get_settings(dev, useraddr);
- break;
- case ETHTOOL_SSET:
- rc = ethtool_set_settings(dev, useraddr);
- break;
- case ETHTOOL_GDRVINFO:
- rc = ethtool_get_drvinfo(dev, useraddr);
- break;
- case ETHTOOL_GREGS:
- rc = ethtool_get_regs(dev, useraddr);
- break;
- case ETHTOOL_GWOL:
- rc = ethtool_get_wol(dev, useraddr);
- break;
- case ETHTOOL_SWOL:
- rc = ethtool_set_wol(dev, useraddr);
- break;
- case ETHTOOL_GMSGLVL:
- rc = ethtool_get_value(dev, useraddr, ethcmd,
- dev->ethtool_ops->get_msglevel);
- break;
- case ETHTOOL_SMSGLVL:
- rc = ethtool_set_value_void(dev, useraddr,
- dev->ethtool_ops->set_msglevel);
- break;
- case ETHTOOL_GEEE:
- rc = ethtool_get_eee(dev, useraddr);
- break;
- case ETHTOOL_SEEE:
- rc = ethtool_set_eee(dev, useraddr);
- break;
- case ETHTOOL_NWAY_RST:
- rc = ethtool_nway_reset(dev);
- break;
- case ETHTOOL_GLINK:
- rc = ethtool_get_link(dev, useraddr);
- break;
- case ETHTOOL_GEEPROM:
- rc = ethtool_get_eeprom(dev, useraddr);
- break;
- case ETHTOOL_SEEPROM:
- rc = ethtool_set_eeprom(dev, useraddr);
- break;
- case ETHTOOL_GCOALESCE:
- rc = ethtool_get_coalesce(dev, useraddr);
- break;
- case ETHTOOL_SCOALESCE:
- rc = ethtool_set_coalesce(dev, useraddr);
- break;
- case ETHTOOL_GRINGPARAM:
- rc = ethtool_get_ringparam(dev, useraddr);
- break;
- case ETHTOOL_SRINGPARAM:
- rc = ethtool_set_ringparam(dev, useraddr);
- break;
- case ETHTOOL_GPAUSEPARAM:
- rc = ethtool_get_pauseparam(dev, useraddr);
- break;
- case ETHTOOL_SPAUSEPARAM:
- rc = ethtool_set_pauseparam(dev, useraddr);
- break;
- case ETHTOOL_TEST:
- rc = ethtool_self_test(dev, useraddr);
- break;
- case ETHTOOL_GSTRINGS:
- rc = ethtool_get_strings(dev, useraddr);
- break;
- case ETHTOOL_PHYS_ID:
- rc = ethtool_phys_id(dev, useraddr);
- break;
- case ETHTOOL_GSTATS:
- rc = ethtool_get_stats(dev, useraddr);
- break;
- case ETHTOOL_GPERMADDR:
- rc = ethtool_get_perm_addr(dev, useraddr);
- break;
- case ETHTOOL_GFLAGS:
- rc = ethtool_get_value(dev, useraddr, ethcmd,
- __ethtool_get_flags);
- break;
- case ETHTOOL_SFLAGS:
- rc = ethtool_set_value(dev, useraddr, __ethtool_set_flags);
- break;
- case ETHTOOL_GPFLAGS:
- rc = ethtool_get_value(dev, useraddr, ethcmd,
- dev->ethtool_ops->get_priv_flags);
- break;
- case ETHTOOL_SPFLAGS:
- rc = ethtool_set_value(dev, useraddr,
- dev->ethtool_ops->set_priv_flags);
- break;
- case ETHTOOL_GRXFH:
- case ETHTOOL_GRXRINGS:
- case ETHTOOL_GRXCLSRLCNT:
- case ETHTOOL_GRXCLSRULE:
- case ETHTOOL_GRXCLSRLALL:
- rc = ethtool_get_rxnfc(dev, ethcmd, useraddr);
- break;
- case ETHTOOL_SRXFH:
- case ETHTOOL_SRXCLSRLDEL:
- case ETHTOOL_SRXCLSRLINS:
- rc = ethtool_set_rxnfc(dev, ethcmd, useraddr);
- break;
- case ETHTOOL_FLASHDEV:
- rc = ethtool_flash_device(dev, useraddr);
- break;
- case ETHTOOL_RESET:
- rc = ethtool_reset(dev, useraddr);
- break;
- case ETHTOOL_GSSET_INFO:
- rc = ethtool_get_sset_info(dev, useraddr);
- break;
- case ETHTOOL_GRXFHINDIR:
- rc = ethtool_get_rxfh_indir(dev, useraddr);
- break;
- case ETHTOOL_SRXFHINDIR:
- rc = ethtool_set_rxfh_indir(dev, useraddr);
- break;
- case ETHTOOL_GRSSH:
- rc = ethtool_get_rxfh(dev, useraddr);
- break;
- case ETHTOOL_SRSSH:
- rc = ethtool_set_rxfh(dev, useraddr);
- break;
- case ETHTOOL_GFEATURES:
- rc = ethtool_get_features(dev, useraddr);
- break;
- case ETHTOOL_SFEATURES:
- rc = ethtool_set_features(dev, useraddr);
- break;
- case ETHTOOL_GTXCSUM:
- case ETHTOOL_GRXCSUM:
- case ETHTOOL_GSG:
- case ETHTOOL_GTSO:
- case ETHTOOL_GGSO:
- case ETHTOOL_GGRO:
- rc = ethtool_get_one_feature(dev, useraddr, ethcmd);
- break;
- case ETHTOOL_STXCSUM:
- case ETHTOOL_SRXCSUM:
- case ETHTOOL_SSG:
- case ETHTOOL_STSO:
- case ETHTOOL_SGSO:
- case ETHTOOL_SGRO:
- rc = ethtool_set_one_feature(dev, useraddr, ethcmd);
- break;
- case ETHTOOL_GCHANNELS:
- rc = ethtool_get_channels(dev, useraddr);
- break;
- case ETHTOOL_SCHANNELS:
- rc = ethtool_set_channels(dev, useraddr);
- break;
- case ETHTOOL_SET_DUMP:
- rc = ethtool_set_dump(dev, useraddr);
- break;
- case ETHTOOL_GET_DUMP_FLAG:
- rc = ethtool_get_dump_flag(dev, useraddr);
- break;
- case ETHTOOL_GET_DUMP_DATA:
- rc = ethtool_get_dump_data(dev, useraddr);
- break;
- case ETHTOOL_GET_TS_INFO:
- rc = ethtool_get_ts_info(dev, useraddr);
- break;
- case ETHTOOL_GMODULEINFO:
- rc = ethtool_get_module_info(dev, useraddr);
- break;
- case ETHTOOL_GMODULEEEPROM:
- rc = ethtool_get_module_eeprom(dev, useraddr);
- break;
- case ETHTOOL_GTUNABLE:
- rc = ethtool_get_tunable(dev, useraddr);
- break;
- case ETHTOOL_STUNABLE:
- rc = ethtool_set_tunable(dev, useraddr);
- break;
- case ETHTOOL_GPHYSTATS:
- rc = ethtool_get_phy_stats(dev, useraddr);
- break;
- case ETHTOOL_PERQUEUE:
- rc = ethtool_set_per_queue(dev, useraddr, sub_cmd);
- break;
- case ETHTOOL_GLINKSETTINGS:
- rc = ethtool_get_link_ksettings(dev, useraddr);
- break;
- case ETHTOOL_SLINKSETTINGS:
- rc = ethtool_set_link_ksettings(dev, useraddr);
- break;
- case ETHTOOL_PHY_GTUNABLE:
- rc = get_phy_tunable(dev, useraddr);
- break;
- case ETHTOOL_PHY_STUNABLE:
- rc = set_phy_tunable(dev, useraddr);
- break;
- case ETHTOOL_GFECPARAM:
- rc = ethtool_get_fecparam(dev, useraddr);
- break;
- case ETHTOOL_SFECPARAM:
- rc = ethtool_set_fecparam(dev, useraddr);
- break;
- default:
- rc = -EOPNOTSUPP;
- }
-
- if (dev->ethtool_ops->complete)
- dev->ethtool_ops->complete(dev);
-
- if (old_features != dev->features)
- netdev_features_change(dev);
-
- return rc;
-}
-
-struct ethtool_rx_flow_key {
- struct flow_dissector_key_basic basic;
- union {
- struct flow_dissector_key_ipv4_addrs ipv4;
- struct flow_dissector_key_ipv6_addrs ipv6;
- };
- struct flow_dissector_key_ports tp;
- struct flow_dissector_key_ip ip;
- struct flow_dissector_key_vlan vlan;
- struct flow_dissector_key_eth_addrs eth_addrs;
-} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
-
-struct ethtool_rx_flow_match {
- struct flow_dissector dissector;
- struct ethtool_rx_flow_key key;
- struct ethtool_rx_flow_key mask;
-};
-
-struct ethtool_rx_flow_rule *
-ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input)
-{
- const struct ethtool_rx_flow_spec *fs = input->fs;
- static struct in6_addr zero_addr = {};
- struct ethtool_rx_flow_match *match;
- struct ethtool_rx_flow_rule *flow;
- struct flow_action_entry *act;
-
- flow = kzalloc(sizeof(struct ethtool_rx_flow_rule) +
- sizeof(struct ethtool_rx_flow_match), GFP_KERNEL);
- if (!flow)
- return ERR_PTR(-ENOMEM);
-
- /* ethtool_rx supports only one single action per rule. */
- flow->rule = flow_rule_alloc(1);
- if (!flow->rule) {
- kfree(flow);
- return ERR_PTR(-ENOMEM);
- }
-
- match = (struct ethtool_rx_flow_match *)flow->priv;
- flow->rule->match.dissector = &match->dissector;
- flow->rule->match.mask = &match->mask;
- flow->rule->match.key = &match->key;
-
- match->mask.basic.n_proto = htons(0xffff);
-
- switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS)) {
- case ETHER_FLOW: {
- const struct ethhdr *ether_spec, *ether_m_spec;
-
- ether_spec = &fs->h_u.ether_spec;
- ether_m_spec = &fs->m_u.ether_spec;
-
- if (!is_zero_ether_addr(ether_m_spec->h_source)) {
- ether_addr_copy(match->key.eth_addrs.src,
- ether_spec->h_source);
- ether_addr_copy(match->mask.eth_addrs.src,
- ether_m_spec->h_source);
- }
- if (!is_zero_ether_addr(ether_m_spec->h_dest)) {
- ether_addr_copy(match->key.eth_addrs.dst,
- ether_spec->h_dest);
- ether_addr_copy(match->mask.eth_addrs.dst,
- ether_m_spec->h_dest);
- }
- if (ether_m_spec->h_proto) {
- match->key.basic.n_proto = ether_spec->h_proto;
- match->mask.basic.n_proto = ether_m_spec->h_proto;
- }
- }
- break;
- case TCP_V4_FLOW:
- case UDP_V4_FLOW: {
- const struct ethtool_tcpip4_spec *v4_spec, *v4_m_spec;
-
- match->key.basic.n_proto = htons(ETH_P_IP);
-
- v4_spec = &fs->h_u.tcp_ip4_spec;
- v4_m_spec = &fs->m_u.tcp_ip4_spec;
-
- if (v4_m_spec->ip4src) {
- match->key.ipv4.src = v4_spec->ip4src;
- match->mask.ipv4.src = v4_m_spec->ip4src;
- }
- if (v4_m_spec->ip4dst) {
- match->key.ipv4.dst = v4_spec->ip4dst;
- match->mask.ipv4.dst = v4_m_spec->ip4dst;
- }
- if (v4_m_spec->ip4src ||
- v4_m_spec->ip4dst) {
- match->dissector.used_keys |=
- BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS);
- match->dissector.offset[FLOW_DISSECTOR_KEY_IPV4_ADDRS] =
- offsetof(struct ethtool_rx_flow_key, ipv4);
- }
- if (v4_m_spec->psrc) {
- match->key.tp.src = v4_spec->psrc;
- match->mask.tp.src = v4_m_spec->psrc;
- }
- if (v4_m_spec->pdst) {
- match->key.tp.dst = v4_spec->pdst;
- match->mask.tp.dst = v4_m_spec->pdst;
- }
- if (v4_m_spec->psrc ||
- v4_m_spec->pdst) {
- match->dissector.used_keys |=
- BIT(FLOW_DISSECTOR_KEY_PORTS);
- match->dissector.offset[FLOW_DISSECTOR_KEY_PORTS] =
- offsetof(struct ethtool_rx_flow_key, tp);
- }
- if (v4_m_spec->tos) {
- match->key.ip.tos = v4_spec->tos;
- match->mask.ip.tos = v4_m_spec->tos;
- match->dissector.used_keys |=
- BIT(FLOW_DISSECTOR_KEY_IP);
- match->dissector.offset[FLOW_DISSECTOR_KEY_IP] =
- offsetof(struct ethtool_rx_flow_key, ip);
- }
- }
- break;
- case TCP_V6_FLOW:
- case UDP_V6_FLOW: {
- const struct ethtool_tcpip6_spec *v6_spec, *v6_m_spec;
-
- match->key.basic.n_proto = htons(ETH_P_IPV6);
-
- v6_spec = &fs->h_u.tcp_ip6_spec;
- v6_m_spec = &fs->m_u.tcp_ip6_spec;
- if (memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr))) {
- memcpy(&match->key.ipv6.src, v6_spec->ip6src,
- sizeof(match->key.ipv6.src));
- memcpy(&match->mask.ipv6.src, v6_m_spec->ip6src,
- sizeof(match->mask.ipv6.src));
- }
- if (memcmp(v6_m_spec->ip6dst, &zero_addr, sizeof(zero_addr))) {
- memcpy(&match->key.ipv6.dst, v6_spec->ip6dst,
- sizeof(match->key.ipv6.dst));
- memcpy(&match->mask.ipv6.dst, v6_m_spec->ip6dst,
- sizeof(match->mask.ipv6.dst));
- }
- if (memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr)) ||
- memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr))) {
- match->dissector.used_keys |=
- BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS);
- match->dissector.offset[FLOW_DISSECTOR_KEY_IPV6_ADDRS] =
- offsetof(struct ethtool_rx_flow_key, ipv6);
- }
- if (v6_m_spec->psrc) {
- match->key.tp.src = v6_spec->psrc;
- match->mask.tp.src = v6_m_spec->psrc;
- }
- if (v6_m_spec->pdst) {
- match->key.tp.dst = v6_spec->pdst;
- match->mask.tp.dst = v6_m_spec->pdst;
- }
- if (v6_m_spec->psrc ||
- v6_m_spec->pdst) {
- match->dissector.used_keys |=
- BIT(FLOW_DISSECTOR_KEY_PORTS);
- match->dissector.offset[FLOW_DISSECTOR_KEY_PORTS] =
- offsetof(struct ethtool_rx_flow_key, tp);
- }
- if (v6_m_spec->tclass) {
- match->key.ip.tos = v6_spec->tclass;
- match->mask.ip.tos = v6_m_spec->tclass;
- match->dissector.used_keys |=
- BIT(FLOW_DISSECTOR_KEY_IP);
- match->dissector.offset[FLOW_DISSECTOR_KEY_IP] =
- offsetof(struct ethtool_rx_flow_key, ip);
- }
- }
- break;
- default:
- ethtool_rx_flow_rule_destroy(flow);
- return ERR_PTR(-EINVAL);
- }
-
- switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS)) {
- case TCP_V4_FLOW:
- case TCP_V6_FLOW:
- match->key.basic.ip_proto = IPPROTO_TCP;
- break;
- case UDP_V4_FLOW:
- case UDP_V6_FLOW:
- match->key.basic.ip_proto = IPPROTO_UDP;
- break;
- }
- match->mask.basic.ip_proto = 0xff;
-
- match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_BASIC);
- match->dissector.offset[FLOW_DISSECTOR_KEY_BASIC] =
- offsetof(struct ethtool_rx_flow_key, basic);
-
- if (fs->flow_type & FLOW_EXT) {
- const struct ethtool_flow_ext *ext_h_spec = &fs->h_ext;
- const struct ethtool_flow_ext *ext_m_spec = &fs->m_ext;
-
- if (ext_m_spec->vlan_etype) {
- match->key.vlan.vlan_tpid = ext_h_spec->vlan_etype;
- match->mask.vlan.vlan_tpid = ext_m_spec->vlan_etype;
- }
-
- if (ext_m_spec->vlan_tci) {
- match->key.vlan.vlan_id =
- ntohs(ext_h_spec->vlan_tci) & 0x0fff;
- match->mask.vlan.vlan_id =
- ntohs(ext_m_spec->vlan_tci) & 0x0fff;
-
- match->key.vlan.vlan_dei =
- !!(ext_h_spec->vlan_tci & htons(0x1000));
- match->mask.vlan.vlan_dei =
- !!(ext_m_spec->vlan_tci & htons(0x1000));
-
- match->key.vlan.vlan_priority =
- (ntohs(ext_h_spec->vlan_tci) & 0xe000) >> 13;
- match->mask.vlan.vlan_priority =
- (ntohs(ext_m_spec->vlan_tci) & 0xe000) >> 13;
- }
-
- if (ext_m_spec->vlan_etype ||
- ext_m_spec->vlan_tci) {
- match->dissector.used_keys |=
- BIT(FLOW_DISSECTOR_KEY_VLAN);
- match->dissector.offset[FLOW_DISSECTOR_KEY_VLAN] =
- offsetof(struct ethtool_rx_flow_key, vlan);
- }
- }
- if (fs->flow_type & FLOW_MAC_EXT) {
- const struct ethtool_flow_ext *ext_h_spec = &fs->h_ext;
- const struct ethtool_flow_ext *ext_m_spec = &fs->m_ext;
-
- memcpy(match->key.eth_addrs.dst, ext_h_spec->h_dest,
- ETH_ALEN);
- memcpy(match->mask.eth_addrs.dst, ext_m_spec->h_dest,
- ETH_ALEN);
-
- match->dissector.used_keys |=
- BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS);
- match->dissector.offset[FLOW_DISSECTOR_KEY_ETH_ADDRS] =
- offsetof(struct ethtool_rx_flow_key, eth_addrs);
- }
-
- act = &flow->rule->action.entries[0];
- switch (fs->ring_cookie) {
- case RX_CLS_FLOW_DISC:
- act->id = FLOW_ACTION_DROP;
- break;
- case RX_CLS_FLOW_WAKE:
- act->id = FLOW_ACTION_WAKE;
- break;
- default:
- act->id = FLOW_ACTION_QUEUE;
- if (fs->flow_type & FLOW_RSS)
- act->queue.ctx = input->rss_ctx;
-
- act->queue.vf = ethtool_get_flow_spec_ring_vf(fs->ring_cookie);
- act->queue.index = ethtool_get_flow_spec_ring(fs->ring_cookie);
- break;
- }
-
- return flow;
-}
-EXPORT_SYMBOL(ethtool_rx_flow_rule_create);
-
-void ethtool_rx_flow_rule_destroy(struct ethtool_rx_flow_rule *flow)
-{
- kfree(flow->rule);
- kfree(flow);
-}
-EXPORT_SYMBOL(ethtool_rx_flow_rule_destroy);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 3e7e15278c46..bd7eba9066f8 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -974,7 +974,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
frh = nlmsg_data(nlh);
frh->family = ops->family;
- frh->table = rule->table;
+ frh->table = rule->table < 256 ? rule->table : RT_TABLE_COMPAT;
if (nla_put_u32(skb, FRA_TABLE, rule->table))
goto nla_put_failure;
if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen))
diff --git a/net/core/filter.c b/net/core/filter.c
index 538f6a735a19..c180871e606d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1573,7 +1573,7 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
return -EPERM;
prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
- if (IS_ERR(prog) && PTR_ERR(prog) == -EINVAL)
+ if (PTR_ERR(prog) == -EINVAL)
prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SK_REUSEPORT);
if (IS_ERR(prog))
return PTR_ERR(prog);
@@ -3459,119 +3459,30 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
.arg2_type = ARG_ANYTHING,
};
-static int __bpf_tx_xdp(struct net_device *dev,
- struct bpf_map *map,
- struct xdp_buff *xdp,
- u32 index)
-{
- struct xdp_frame *xdpf;
- int err, sent;
-
- if (!dev->netdev_ops->ndo_xdp_xmit) {
- return -EOPNOTSUPP;
- }
-
- err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data);
- if (unlikely(err))
- return err;
-
- xdpf = convert_to_xdp_frame(xdp);
- if (unlikely(!xdpf))
- return -EOVERFLOW;
-
- sent = dev->netdev_ops->ndo_xdp_xmit(dev, 1, &xdpf, XDP_XMIT_FLUSH);
- if (sent <= 0)
- return sent;
- return 0;
-}
-
-static noinline int
-xdp_do_redirect_slow(struct net_device *dev, struct xdp_buff *xdp,
- struct bpf_prog *xdp_prog, struct bpf_redirect_info *ri)
-{
- struct net_device *fwd;
- u32 index = ri->tgt_index;
- int err;
-
- fwd = dev_get_by_index_rcu(dev_net(dev), index);
- ri->tgt_index = 0;
- if (unlikely(!fwd)) {
- err = -EINVAL;
- goto err;
- }
-
- err = __bpf_tx_xdp(fwd, NULL, xdp, 0);
- if (unlikely(err))
- goto err;
-
- _trace_xdp_redirect(dev, xdp_prog, index);
- return 0;
-err:
- _trace_xdp_redirect_err(dev, xdp_prog, index, err);
- return err;
-}
-
static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
- struct bpf_map *map,
- struct xdp_buff *xdp,
- u32 index)
+ struct bpf_map *map, struct xdp_buff *xdp)
{
- int err;
-
switch (map->map_type) {
case BPF_MAP_TYPE_DEVMAP:
- case BPF_MAP_TYPE_DEVMAP_HASH: {
- struct bpf_dtab_netdev *dst = fwd;
-
- err = dev_map_enqueue(dst, xdp, dev_rx);
- if (unlikely(err))
- return err;
- break;
- }
- case BPF_MAP_TYPE_CPUMAP: {
- struct bpf_cpu_map_entry *rcpu = fwd;
-
- err = cpu_map_enqueue(rcpu, xdp, dev_rx);
- if (unlikely(err))
- return err;
- break;
- }
- case BPF_MAP_TYPE_XSKMAP: {
- struct xdp_sock *xs = fwd;
-
- err = __xsk_map_redirect(map, xdp, xs);
- return err;
- }
+ case BPF_MAP_TYPE_DEVMAP_HASH:
+ return dev_map_enqueue(fwd, xdp, dev_rx);
+ case BPF_MAP_TYPE_CPUMAP:
+ return cpu_map_enqueue(fwd, xdp, dev_rx);
+ case BPF_MAP_TYPE_XSKMAP:
+ return __xsk_map_redirect(fwd, xdp);
default:
- break;
+ return -EBADRQC;
}
return 0;
}
-void xdp_do_flush_map(void)
+void xdp_do_flush(void)
{
- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
- struct bpf_map *map = ri->map_to_flush;
-
- ri->map_to_flush = NULL;
- if (map) {
- switch (map->map_type) {
- case BPF_MAP_TYPE_DEVMAP:
- case BPF_MAP_TYPE_DEVMAP_HASH:
- __dev_map_flush(map);
- break;
- case BPF_MAP_TYPE_CPUMAP:
- __cpu_map_flush(map);
- break;
- case BPF_MAP_TYPE_XSKMAP:
- __xsk_map_flush(map);
- break;
- default:
- break;
- }
- }
+ __dev_flush();
+ __cpu_map_flush();
+ __xsk_map_flush();
}
-EXPORT_SYMBOL_GPL(xdp_do_flush_map);
+EXPORT_SYMBOL_GPL(xdp_do_flush);
static inline void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
{
@@ -3606,10 +3517,11 @@ void bpf_clear_redirect_map(struct bpf_map *map)
}
}
-static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
- struct bpf_prog *xdp_prog, struct bpf_map *map,
- struct bpf_redirect_info *ri)
+int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
+ struct bpf_prog *xdp_prog)
{
+ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+ struct bpf_map *map = READ_ONCE(ri->map);
u32 index = ri->tgt_index;
void *fwd = ri->tgt_value;
int err;
@@ -3618,32 +3530,27 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
ri->tgt_value = NULL;
WRITE_ONCE(ri->map, NULL);
- if (ri->map_to_flush && unlikely(ri->map_to_flush != map))
- xdp_do_flush_map();
+ if (unlikely(!map)) {
+ fwd = dev_get_by_index_rcu(dev_net(dev), index);
+ if (unlikely(!fwd)) {
+ err = -EINVAL;
+ goto err;
+ }
+
+ err = dev_xdp_enqueue(fwd, xdp, dev);
+ } else {
+ err = __bpf_tx_xdp_map(dev, fwd, map, xdp);
+ }
- err = __bpf_tx_xdp_map(dev, fwd, map, xdp, index);
if (unlikely(err))
goto err;
- ri->map_to_flush = map;
_trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
return 0;
err:
_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
return err;
}
-
-int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
- struct bpf_prog *xdp_prog)
-{
- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
- struct bpf_map *map = READ_ONCE(ri->map);
-
- if (likely(map))
- return xdp_do_redirect_map(dev, xdp, xdp_prog, map, ri);
-
- return xdp_do_redirect_slow(dev, xdp, xdp_prog, ri);
-}
EXPORT_SYMBOL_GPL(xdp_do_redirect);
static int xdp_do_generic_redirect_map(struct net_device *dev,
@@ -5976,7 +5883,7 @@ bool bpf_helper_changes_pkt_data(void *func)
return false;
}
-static const struct bpf_func_proto *
+const struct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id)
{
switch (func_id) {
@@ -6016,6 +5923,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
return &bpf_spin_unlock_proto;
case BPF_FUNC_trace_printk:
return bpf_get_trace_printk_proto();
+ case BPF_FUNC_jiffies64:
+ return &bpf_jiffies64_proto;
default:
return NULL;
}
@@ -7648,21 +7557,21 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct bpf_sock, type):
- BUILD_BUG_ON(HWEIGHT32(SK_FL_TYPE_MASK) != BITS_PER_BYTE * 2);
- *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
- offsetof(struct sock, __sk_flags_offset));
- *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
- *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
- *target_size = 2;
+ *insn++ = BPF_LDX_MEM(
+ BPF_FIELD_SIZEOF(struct sock, sk_type),
+ si->dst_reg, si->src_reg,
+ bpf_target_off(struct sock, sk_type,
+ sizeof_field(struct sock, sk_type),
+ target_size));
break;
case offsetof(struct bpf_sock, protocol):
- BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE);
- *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
- offsetof(struct sock, __sk_flags_offset));
- *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
- *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT);
- *target_size = 1;
+ *insn++ = BPF_LDX_MEM(
+ BPF_FIELD_SIZEOF(struct sock, sk_protocol),
+ si->dst_reg, si->src_reg,
+ bpf_target_off(struct sock, sk_protocol,
+ sizeof_field(struct sock, sk_protocol),
+ target_size));
break;
case offsetof(struct bpf_sock, src_ip4):
@@ -7944,20 +7853,13 @@ static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct bpf_sock_addr, type):
- SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(
- struct bpf_sock_addr_kern, struct sock, sk,
- __sk_flags_offset, BPF_W, 0);
- *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
- *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
+ SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
+ struct sock, sk, sk_type);
break;
case offsetof(struct bpf_sock_addr, protocol):
- SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(
- struct bpf_sock_addr_kern, struct sock, sk,
- __sk_flags_offset, BPF_W, 0);
- *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
- *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg,
- SK_FL_PROTO_SHIFT);
+ SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
+ struct sock, sk, sk_protocol);
break;
case offsetof(struct bpf_sock_addr, msg_src_ip4):
@@ -8876,11 +8778,11 @@ sk_reuseport_is_valid_access(int off, int size,
skb, \
SKB_FIELD)
-#define SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(SK_FIELD, BPF_SIZE, EXTRA_OFF) \
- SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(struct sk_reuseport_kern, \
- struct sock, \
- sk, \
- SK_FIELD, BPF_SIZE, EXTRA_OFF)
+#define SK_REUSEPORT_LOAD_SK_FIELD(SK_FIELD) \
+ SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern, \
+ struct sock, \
+ sk, \
+ SK_FIELD)
static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
@@ -8904,16 +8806,7 @@ static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct sk_reuseport_md, ip_protocol):
- BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE);
- SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(__sk_flags_offset,
- BPF_W, 0);
- *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
- *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg,
- SK_FL_PROTO_SHIFT);
- /* SK_FL_PROTO_MASK and SK_FL_PROTO_SHIFT are endian
- * aware. No further narrowing or masking is needed.
- */
- *target_size = 1;
+ SK_REUSEPORT_LOAD_SK_FIELD(sk_protocol);
break;
case offsetof(struct sk_reuseport_md, data_end):
@@ -8941,3 +8834,11 @@ const struct bpf_verifier_ops sk_reuseport_verifier_ops = {
const struct bpf_prog_ops sk_reuseport_prog_ops = {
};
#endif /* CONFIG_INET */
+
+DEFINE_BPF_DISPATCHER(bpf_dispatcher_xdp)
+
+void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog)
+{
+ bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(bpf_dispatcher_xdp),
+ prev_prog, prog);
+}
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 2dbbb030fbed..a1670dff0629 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -233,7 +233,7 @@ static bool icmp_has_id(u8 type)
* @skb: sk_buff to extract from
* @key_icmp: struct flow_dissector_key_icmp to fill
* @data: raw buffer pointer to the packet
- * @toff: offset to extract at
+ * @thoff: offset to extract at
* @hlen: packet header length
*/
void skb_flow_get_icmp_tci(const struct sk_buff *skb,
@@ -834,10 +834,10 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
struct flow_dissector *flow_dissector,
void *target_container)
{
+ struct flow_dissector_key_ports *key_ports = NULL;
struct flow_dissector_key_control *key_control;
struct flow_dissector_key_basic *key_basic;
struct flow_dissector_key_addrs *key_addrs;
- struct flow_dissector_key_ports *key_ports;
struct flow_dissector_key_tags *key_tags;
key_control = skb_flow_dissector_target(flow_dissector,
@@ -876,10 +876,17 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
}
- if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) {
+ if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS))
key_ports = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_PORTS,
target_container);
+ else if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_PORTS_RANGE))
+ key_ports = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_PORTS_RANGE,
+ target_container);
+
+ if (key_ports) {
key_ports->src = flow_keys->sport;
key_ports->dst = flow_keys->dport;
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 920784a9b7ff..789a73aa7bd8 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -3290,6 +3290,7 @@ static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
*pos = cpu+1;
return per_cpu_ptr(tbl->stats, cpu);
}
+ (*pos)++;
return NULL;
}
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 39402840025e..757cc1d084e7 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -211,16 +211,10 @@ static int net_eq_idr(int id, void *net, void *peer)
return 0;
}
-/* Should be called with nsid_lock held. If a new id is assigned, the bool alloc
- * is set to true, thus the caller knows that the new id must be notified via
- * rtnl.
- */
-static int __peernet2id_alloc(struct net *net, struct net *peer, bool *alloc)
+/* Must be called from RCU-critical section or with nsid_lock held */
+static int __peernet2id(const struct net *net, struct net *peer)
{
int id = idr_for_each(&net->netns_ids, net_eq_idr, peer);
- bool alloc_it = *alloc;
-
- *alloc = false;
/* Magic value for id 0. */
if (id == NET_ID_ZERO)
@@ -228,23 +222,9 @@ static int __peernet2id_alloc(struct net *net, struct net *peer, bool *alloc)
if (id > 0)
return id;
- if (alloc_it) {
- id = alloc_netid(net, peer, -1);
- *alloc = true;
- return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED;
- }
-
return NETNSA_NSID_NOT_ASSIGNED;
}
-/* should be called with nsid_lock held */
-static int __peernet2id(struct net *net, struct net *peer)
-{
- bool no = false;
-
- return __peernet2id_alloc(net, peer, &no);
-}
-
static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid,
struct nlmsghdr *nlh, gfp_t gfp);
/* This function returns the id of a peer netns. If no id is assigned, one will
@@ -252,38 +232,50 @@ static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid,
*/
int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp)
{
- bool alloc = false, alive = false;
int id;
if (refcount_read(&net->count) == 0)
return NETNSA_NSID_NOT_ASSIGNED;
- spin_lock_bh(&net->nsid_lock);
- /*
- * When peer is obtained from RCU lists, we may race with
+
+ spin_lock(&net->nsid_lock);
+ id = __peernet2id(net, peer);
+ if (id >= 0) {
+ spin_unlock(&net->nsid_lock);
+ return id;
+ }
+
+ /* When peer is obtained from RCU lists, we may race with
* its cleanup. Check whether it's alive, and this guarantees
* we never hash a peer back to net->netns_ids, after it has
* just been idr_remove()'d from there in cleanup_net().
*/
- if (maybe_get_net(peer))
- alive = alloc = true;
- id = __peernet2id_alloc(net, peer, &alloc);
- spin_unlock_bh(&net->nsid_lock);
- if (alloc && id >= 0)
- rtnl_net_notifyid(net, RTM_NEWNSID, id, 0, NULL, gfp);
- if (alive)
- put_net(peer);
+ if (!maybe_get_net(peer)) {
+ spin_unlock(&net->nsid_lock);
+ return NETNSA_NSID_NOT_ASSIGNED;
+ }
+
+ id = alloc_netid(net, peer, -1);
+ spin_unlock(&net->nsid_lock);
+
+ put_net(peer);
+ if (id < 0)
+ return NETNSA_NSID_NOT_ASSIGNED;
+
+ rtnl_net_notifyid(net, RTM_NEWNSID, id, 0, NULL, gfp);
+
return id;
}
EXPORT_SYMBOL_GPL(peernet2id_alloc);
/* This function returns, if assigned, the id of a peer netns. */
-int peernet2id(struct net *net, struct net *peer)
+int peernet2id(const struct net *net, struct net *peer)
{
int id;
- spin_lock_bh(&net->nsid_lock);
+ rcu_read_lock();
id = __peernet2id(net, peer);
- spin_unlock_bh(&net->nsid_lock);
+ rcu_read_unlock();
+
return id;
}
EXPORT_SYMBOL(peernet2id);
@@ -291,12 +283,12 @@ EXPORT_SYMBOL(peernet2id);
/* This function returns true is the peer netns has an id assigned into the
* current netns.
*/
-bool peernet_has_id(struct net *net, struct net *peer)
+bool peernet_has_id(const struct net *net, struct net *peer)
{
return peernet2id(net, peer) >= 0;
}
-struct net *get_net_ns_by_id(struct net *net, int id)
+struct net *get_net_ns_by_id(const struct net *net, int id)
{
struct net *peer;
@@ -528,20 +520,20 @@ static void unhash_nsid(struct net *net, struct net *last)
for_each_net(tmp) {
int id;
- spin_lock_bh(&tmp->nsid_lock);
+ spin_lock(&tmp->nsid_lock);
id = __peernet2id(tmp, net);
if (id >= 0)
idr_remove(&tmp->netns_ids, id);
- spin_unlock_bh(&tmp->nsid_lock);
+ spin_unlock(&tmp->nsid_lock);
if (id >= 0)
rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL,
GFP_KERNEL);
if (tmp == last)
break;
}
- spin_lock_bh(&net->nsid_lock);
+ spin_lock(&net->nsid_lock);
idr_destroy(&net->netns_ids);
- spin_unlock_bh(&net->nsid_lock);
+ spin_unlock(&net->nsid_lock);
}
static LLIST_HEAD(cleanup_list);
@@ -754,9 +746,9 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh,
return PTR_ERR(peer);
}
- spin_lock_bh(&net->nsid_lock);
+ spin_lock(&net->nsid_lock);
if (__peernet2id(net, peer) >= 0) {
- spin_unlock_bh(&net->nsid_lock);
+ spin_unlock(&net->nsid_lock);
err = -EEXIST;
NL_SET_BAD_ATTR(extack, nla);
NL_SET_ERR_MSG(extack,
@@ -765,7 +757,7 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh,
}
err = alloc_netid(net, peer, nsid);
- spin_unlock_bh(&net->nsid_lock);
+ spin_unlock(&net->nsid_lock);
if (err >= 0) {
rtnl_net_notifyid(net, RTM_NEWNSID, err, NETLINK_CB(skb).portid,
nlh, GFP_KERNEL);
@@ -950,6 +942,7 @@ struct rtnl_net_dump_cb {
int s_idx;
};
+/* Runs in RCU-critical section. */
static int rtnl_net_dumpid_one(int id, void *peer, void *data)
{
struct rtnl_net_dump_cb *net_cb = (struct rtnl_net_dump_cb *)data;
@@ -1034,19 +1027,9 @@ static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
goto end;
}
- spin_lock_bh(&net_cb.tgt_net->nsid_lock);
- if (net_cb.fillargs.add_ref &&
- !net_eq(net_cb.ref_net, net_cb.tgt_net) &&
- !spin_trylock_bh(&net_cb.ref_net->nsid_lock)) {
- spin_unlock_bh(&net_cb.tgt_net->nsid_lock);
- err = -EAGAIN;
- goto end;
- }
+ rcu_read_lock();
idr_for_each(&net_cb.tgt_net->netns_ids, rtnl_net_dumpid_one, &net_cb);
- if (net_cb.fillargs.add_ref &&
- !net_eq(net_cb.ref_net, net_cb.tgt_net))
- spin_unlock_bh(&net_cb.ref_net->nsid_lock);
- spin_unlock_bh(&net_cb.tgt_net->nsid_lock);
+ rcu_read_unlock();
cb->args[0] = net_cb.idx;
end:
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 0642f91c4038..b4c87fe31be2 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -53,30 +53,60 @@ static void cgrp_css_free(struct cgroup_subsys_state *css)
kfree(css_cls_state(css));
}
+/*
+ * To avoid freezing of sockets creation for tasks with big number of threads
+ * and opened sockets lets release file_lock every 1000 iterated descriptors.
+ * New sockets will already have been created with new classid.
+ */
+
+struct update_classid_context {
+ u32 classid;
+ unsigned int batch;
+};
+
+#define UPDATE_CLASSID_BATCH 1000
+
static int update_classid_sock(const void *v, struct file *file, unsigned n)
{
int err;
+ struct update_classid_context *ctx = (void *)v;
struct socket *sock = sock_from_file(file, &err);
if (sock) {
spin_lock(&cgroup_sk_update_lock);
- sock_cgroup_set_classid(&sock->sk->sk_cgrp_data,
- (unsigned long)v);
+ sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, ctx->classid);
spin_unlock(&cgroup_sk_update_lock);
}
+ if (--ctx->batch == 0) {
+ ctx->batch = UPDATE_CLASSID_BATCH;
+ return n + 1;
+ }
return 0;
}
+static void update_classid_task(struct task_struct *p, u32 classid)
+{
+ struct update_classid_context ctx = {
+ .classid = classid,
+ .batch = UPDATE_CLASSID_BATCH
+ };
+ unsigned int fd = 0;
+
+ do {
+ task_lock(p);
+ fd = iterate_fd(p->files, fd, update_classid_sock, &ctx);
+ task_unlock(p);
+ cond_resched();
+ } while (fd);
+}
+
static void cgrp_attach(struct cgroup_taskset *tset)
{
struct cgroup_subsys_state *css;
struct task_struct *p;
cgroup_taskset_for_each(p, css, tset) {
- task_lock(p);
- iterate_fd(p->files, 0, update_classid_sock,
- (void *)(unsigned long)css_cls_state(css)->classid);
- task_unlock(p);
+ update_classid_task(p, css_cls_state(css)->classid);
}
}
@@ -98,10 +128,7 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
css_task_iter_start(css, 0, &it);
while ((p = css_task_iter_next(&it))) {
- task_lock(p);
- iterate_fd(p->files, 0, update_classid_sock,
- (void *)(unsigned long)cs->classid);
- task_unlock(p);
+ update_classid_task(p, cs->classid);
cond_resched();
}
css_task_iter_end(&it);
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index a6aefe989043..10d2b255df5e 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -96,40 +96,76 @@ struct page_pool *page_pool_create(const struct page_pool_params *params)
}
EXPORT_SYMBOL(page_pool_create);
-/* fast path */
-static struct page *__page_pool_get_cached(struct page_pool *pool)
+static void __page_pool_return_page(struct page_pool *pool, struct page *page);
+
+noinline
+static struct page *page_pool_refill_alloc_cache(struct page_pool *pool)
{
struct ptr_ring *r = &pool->ring;
- bool refill = false;
struct page *page;
-
- /* Test for safe-context, caller should provide this guarantee */
- if (likely(in_serving_softirq())) {
- if (likely(pool->alloc.count)) {
- /* Fast-path */
- page = pool->alloc.cache[--pool->alloc.count];
- return page;
- }
- refill = true;
- }
+ int pref_nid; /* preferred NUMA node */
/* Quicker fallback, avoid locks when ring is empty */
if (__ptr_ring_empty(r))
return NULL;
- /* Slow-path: Get page from locked ring queue,
- * refill alloc array if requested.
+ /* Softirq guarantee CPU and thus NUMA node is stable. This,
+ * assumes CPU refilling driver RX-ring will also run RX-NAPI.
*/
+#ifdef CONFIG_NUMA
+ pref_nid = (pool->p.nid == NUMA_NO_NODE) ? numa_mem_id() : pool->p.nid;
+#else
+ /* Ignore pool->p.nid setting if !CONFIG_NUMA, helps compiler */
+ pref_nid = numa_mem_id(); /* will be zero like page_to_nid() */
+#endif
+
+ /* Slower-path: Get pages from locked ring queue */
spin_lock(&r->consumer_lock);
- page = __ptr_ring_consume(r);
- if (refill)
- pool->alloc.count = __ptr_ring_consume_batched(r,
- pool->alloc.cache,
- PP_ALLOC_CACHE_REFILL);
+
+ /* Refill alloc array, but only if NUMA match */
+ do {
+ page = __ptr_ring_consume(r);
+ if (unlikely(!page))
+ break;
+
+ if (likely(page_to_nid(page) == pref_nid)) {
+ pool->alloc.cache[pool->alloc.count++] = page;
+ } else {
+ /* NUMA mismatch;
+ * (1) release 1 page to page-allocator and
+ * (2) break out to fallthrough to alloc_pages_node.
+ * This limit stress on page buddy alloactor.
+ */
+ __page_pool_return_page(pool, page);
+ page = NULL;
+ break;
+ }
+ } while (pool->alloc.count < PP_ALLOC_CACHE_REFILL);
+
+ /* Return last page */
+ if (likely(pool->alloc.count > 0))
+ page = pool->alloc.cache[--pool->alloc.count];
+
spin_unlock(&r->consumer_lock);
return page;
}
+/* fast path */
+static struct page *__page_pool_get_cached(struct page_pool *pool)
+{
+ struct page *page;
+
+ /* Caller MUST guarantee safe non-concurrent access, e.g. softirq */
+ if (likely(pool->alloc.count)) {
+ /* Fast-path */
+ page = pool->alloc.cache[--pool->alloc.count];
+ } else {
+ page = page_pool_refill_alloc_cache(pool);
+ }
+
+ return page;
+}
+
static void page_pool_dma_sync_for_device(struct page_pool *pool,
struct page *page,
unsigned int dma_sync_size)
@@ -163,7 +199,11 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
*/
/* Cache was empty, do real allocation */
+#ifdef CONFIG_NUMA
page = alloc_pages_node(pool->p.nid, gfp, pool->p.order);
+#else
+ page = alloc_pages(gfp, pool->p.order);
+#endif
if (!page)
return NULL;
@@ -311,13 +351,10 @@ static bool __page_pool_recycle_direct(struct page *page,
/* page is NOT reusable when:
* 1) allocated when system is under some pressure. (page_is_pfmemalloc)
- * 2) belongs to a different NUMA node than pool->p.nid.
- *
- * To update pool->p.nid users must call page_pool_update_nid.
*/
static bool pool_page_reusable(struct page_pool *pool, struct page *page)
{
- return !page_is_pfmemalloc(page) && page_to_nid(page) == pool->p.nid;
+ return !page_is_pfmemalloc(page);
}
void __page_pool_put_page(struct page_pool *pool, struct page *page,
@@ -484,7 +521,15 @@ EXPORT_SYMBOL(page_pool_destroy);
/* Caller must provide appropriate safe context, e.g. NAPI. */
void page_pool_update_nid(struct page_pool *pool, int new_nid)
{
+ struct page *page;
+
trace_page_pool_update_nid(pool, new_nid);
pool->p.nid = new_nid;
+
+ /* Flush pool alloc cache, as refill will check NUMA node */
+ while (pool->alloc.count) {
+ page = pool->alloc.cache[--pool->alloc.count];
+ __page_pool_return_page(pool, page);
+ }
}
EXPORT_SYMBOL(page_pool_update_nid);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 294bfcf0ce0e..acc849df60b5 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -535,12 +535,12 @@ static int pgctrl_open(struct inode *inode, struct file *file)
return single_open(file, pgctrl_show, PDE_DATA(inode));
}
-static const struct file_operations pktgen_fops = {
- .open = pgctrl_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .write = pgctrl_write,
- .release = single_release,
+static const struct proc_ops pktgen_proc_ops = {
+ .proc_open = pgctrl_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_write = pgctrl_write,
+ .proc_release = single_release,
};
static int pktgen_if_show(struct seq_file *seq, void *v)
@@ -1707,12 +1707,12 @@ static int pktgen_if_open(struct inode *inode, struct file *file)
return single_open(file, pktgen_if_show, PDE_DATA(inode));
}
-static const struct file_operations pktgen_if_fops = {
- .open = pktgen_if_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .write = pktgen_if_write,
- .release = single_release,
+static const struct proc_ops pktgen_if_proc_ops = {
+ .proc_open = pktgen_if_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_write = pktgen_if_write,
+ .proc_release = single_release,
};
static int pktgen_thread_show(struct seq_file *seq, void *v)
@@ -1844,12 +1844,12 @@ static int pktgen_thread_open(struct inode *inode, struct file *file)
return single_open(file, pktgen_thread_show, PDE_DATA(inode));
}
-static const struct file_operations pktgen_thread_fops = {
- .open = pktgen_thread_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .write = pktgen_thread_write,
- .release = single_release,
+static const struct proc_ops pktgen_thread_proc_ops = {
+ .proc_open = pktgen_thread_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_write = pktgen_thread_write,
+ .proc_release = single_release,
};
/* Think find or remove for NN */
@@ -1926,7 +1926,7 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d
pkt_dev->entry = proc_create_data(dev->name, 0600,
pn->proc_dir,
- &pktgen_if_fops,
+ &pktgen_if_proc_ops,
pkt_dev);
if (!pkt_dev->entry)
pr_err("can't move proc entry for '%s'\n",
@@ -3638,7 +3638,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
pkt_dev->clone_skb = pg_clone_skb_d;
pkt_dev->entry = proc_create_data(ifname, 0600, t->net->proc_dir,
- &pktgen_if_fops, pkt_dev);
+ &pktgen_if_proc_ops, pkt_dev);
if (!pkt_dev->entry) {
pr_err("cannot create %s/%s procfs entry\n",
PG_PROC_DIR, ifname);
@@ -3708,7 +3708,7 @@ static int __net_init pktgen_create_thread(int cpu, struct pktgen_net *pn)
t->tsk = p;
pe = proc_create_data(t->tsk->comm, 0600, pn->proc_dir,
- &pktgen_thread_fops, t);
+ &pktgen_thread_proc_ops, t);
if (!pe) {
pr_err("cannot create %s/%s procfs entry\n",
PG_PROC_DIR, t->tsk->comm);
@@ -3793,7 +3793,7 @@ static int __net_init pg_net_init(struct net *net)
pr_warn("cannot create /proc/net/%s\n", PG_PROC_DIR);
return -ENODEV;
}
- pe = proc_create(PGCTRL, 0600, pn->proc_dir, &pktgen_fops);
+ pe = proc_create(PGCTRL, 0600, pn->proc_dir, &pktgen_proc_ops);
if (pe == NULL) {
pr_err("cannot create %s procfs entry\n", PGCTRL);
ret = -EINVAL;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 02916f43bf63..e1152f4ffe33 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1041,6 +1041,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(4) /* IFLA_MIN_MTU */
+ nla_total_size(4) /* IFLA_MAX_MTU */
+ rtnl_prop_list_size(dev)
+ + nla_total_size(MAX_ADDR_LEN) /* IFLA_PERM_ADDRESS */
+ 0;
}
@@ -1241,6 +1242,8 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
return 0;
memset(&vf_vlan_info, 0, sizeof(vf_vlan_info));
+ memset(&node_guid, 0, sizeof(node_guid));
+ memset(&port_guid, 0, sizeof(port_guid));
vf_mac.vf =
vf_vlan.vf =
@@ -1289,8 +1292,6 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
sizeof(vf_trust), &vf_trust))
goto nla_put_vf_failure;
- memset(&node_guid, 0, sizeof(node_guid));
- memset(&port_guid, 0, sizeof(port_guid));
if (dev->netdev_ops->ndo_get_vf_guid &&
!dev->netdev_ops->ndo_get_vf_guid(dev, vfs_num, &node_guid,
&port_guid)) {
@@ -1757,6 +1758,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
nla_put_s32(skb, IFLA_NEW_IFINDEX, new_ifindex) < 0)
goto nla_put_failure;
+ if (memchr_inv(dev->perm_addr, '\0', dev->addr_len) &&
+ nla_put(skb, IFLA_PERM_ADDRESS, dev->addr_len, dev->perm_addr))
+ goto nla_put_failure;
rcu_read_lock();
if (rtnl_fill_link_af(skb, dev, ext_filter_mask))
@@ -1822,6 +1826,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_PROP_LIST] = { .type = NLA_NESTED },
[IFLA_ALT_IFNAME] = { .type = NLA_STRING,
.len = ALTIFNAMSIZ - 1 },
+ [IFLA_PERM_ADDRESS] = { .type = NLA_REJECT },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -3048,8 +3053,17 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname,
dev->rtnl_link_ops = ops;
dev->rtnl_link_state = RTNL_LINK_INITIALIZING;
- if (tb[IFLA_MTU])
- dev->mtu = nla_get_u32(tb[IFLA_MTU]);
+ if (tb[IFLA_MTU]) {
+ u32 mtu = nla_get_u32(tb[IFLA_MTU]);
+ int err;
+
+ err = dev_validate_mtu(dev, mtu, extack);
+ if (err) {
+ free_netdev(dev);
+ return ERR_PTR(err);
+ }
+ dev->mtu = mtu;
+ }
if (tb[IFLA_ADDRESS]) {
memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]),
nla_len(tb[IFLA_ADDRESS]));
@@ -3490,27 +3504,25 @@ static int rtnl_alt_ifname(int cmd, struct net_device *dev, struct nlattr *attr,
if (err)
return err;
- alt_ifname = nla_data(attr);
+ alt_ifname = nla_strdup(attr, GFP_KERNEL);
+ if (!alt_ifname)
+ return -ENOMEM;
+
if (cmd == RTM_NEWLINKPROP) {
- alt_ifname = kstrdup(alt_ifname, GFP_KERNEL);
- if (!alt_ifname)
- return -ENOMEM;
err = netdev_name_node_alt_create(dev, alt_ifname);
- if (err) {
- kfree(alt_ifname);
- return err;
- }
+ if (!err)
+ alt_ifname = NULL;
} else if (cmd == RTM_DELLINKPROP) {
err = netdev_name_node_alt_destroy(dev, alt_ifname);
- if (err)
- return err;
} else {
- WARN_ON(1);
- return 0;
+ WARN_ON_ONCE(1);
+ err = -EINVAL;
}
- *changed = true;
- return 0;
+ kfree(alt_ifname);
+ if (!err)
+ *changed = true;
+ return err;
}
static int rtnl_linkprop(int cmd, struct sk_buff *skb, struct nlmsghdr *nlh,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 973a71f4bc89..e1101a4f90a6 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -68,6 +68,7 @@
#include <net/ip6_checksum.h>
#include <net/xfrm.h>
#include <net/mpls.h>
+#include <net/mptcp.h>
#include <linux/uaccess.h>
#include <trace/events/skb.h>
@@ -466,7 +467,6 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
return NULL;
}
- /* use OR instead of assignment to avoid clearing of bits in mask */
if (pfmemalloc)
skb->pfmemalloc = 1;
skb->head_frag = 1;
@@ -526,7 +526,6 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
return NULL;
}
- /* use OR instead of assignment to avoid clearing of bits in mask */
if (nc->page.pfmemalloc)
skb->pfmemalloc = 1;
skb->head_frag = 1;
@@ -3638,6 +3637,97 @@ static inline skb_frag_t skb_head_frag_to_page_desc(struct sk_buff *frag_skb)
return head_frag;
}
+struct sk_buff *skb_segment_list(struct sk_buff *skb,
+ netdev_features_t features,
+ unsigned int offset)
+{
+ struct sk_buff *list_skb = skb_shinfo(skb)->frag_list;
+ unsigned int tnl_hlen = skb_tnl_header_len(skb);
+ unsigned int delta_truesize = 0;
+ unsigned int delta_len = 0;
+ struct sk_buff *tail = NULL;
+ struct sk_buff *nskb;
+
+ skb_push(skb, -skb_network_offset(skb) + offset);
+
+ skb_shinfo(skb)->frag_list = NULL;
+
+ do {
+ nskb = list_skb;
+ list_skb = list_skb->next;
+
+ if (!tail)
+ skb->next = nskb;
+ else
+ tail->next = nskb;
+
+ tail = nskb;
+
+ delta_len += nskb->len;
+ delta_truesize += nskb->truesize;
+
+ skb_push(nskb, -skb_network_offset(nskb) + offset);
+
+ __copy_skb_header(nskb, skb);
+
+ skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb));
+ skb_copy_from_linear_data_offset(skb, -tnl_hlen,
+ nskb->data - tnl_hlen,
+ offset + tnl_hlen);
+
+ if (skb_needs_linearize(nskb, features) &&
+ __skb_linearize(nskb))
+ goto err_linearize;
+
+ } while (list_skb);
+
+ skb->truesize = skb->truesize - delta_truesize;
+ skb->data_len = skb->data_len - delta_len;
+ skb->len = skb->len - delta_len;
+
+ skb_gso_reset(skb);
+
+ skb->prev = tail;
+
+ if (skb_needs_linearize(skb, features) &&
+ __skb_linearize(skb))
+ goto err_linearize;
+
+ skb_get(skb);
+
+ return skb;
+
+err_linearize:
+ kfree_skb_list(skb->next);
+ skb->next = NULL;
+ return ERR_PTR(-ENOMEM);
+}
+EXPORT_SYMBOL_GPL(skb_segment_list);
+
+int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
+{
+ if (unlikely(p->len + skb->len >= 65536))
+ return -E2BIG;
+
+ if (NAPI_GRO_CB(p)->last == p)
+ skb_shinfo(p)->frag_list = skb;
+ else
+ NAPI_GRO_CB(p)->last->next = skb;
+
+ skb_pull(skb, skb_gro_offset(skb));
+
+ NAPI_GRO_CB(p)->last = skb;
+ NAPI_GRO_CB(p)->count++;
+ p->data_len += skb->len;
+ p->truesize += skb->truesize;
+ p->len += skb->len;
+
+ NAPI_GRO_CB(skb)->same_flow = 1;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(skb_gro_receive_list);
+
/**
* skb_segment - Perform protocol segmentation on skb.
* @head_skb: buffer to segment
@@ -4109,6 +4199,9 @@ static const u8 skb_ext_type_len[] = {
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
[TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext),
#endif
+#if IS_ENABLED(CONFIG_MPTCP)
+ [SKB_EXT_MPTCP] = SKB_EXT_CHUNKSIZEOF(struct mptcp_ext),
+#endif
};
static __always_inline unsigned int skb_ext_total_length(void)
@@ -4123,6 +4216,9 @@ static __always_inline unsigned int skb_ext_total_length(void)
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
skb_ext_type_len[TC_SKB_EXT] +
#endif
+#if IS_ENABLED(CONFIG_MPTCP)
+ skb_ext_type_len[SKB_EXT_MPTCP] +
+#endif
0;
}
@@ -4707,9 +4803,9 @@ static __sum16 *skb_checksum_setup_ip(struct sk_buff *skb,
typeof(IPPROTO_IP) proto,
unsigned int off)
{
- switch (proto) {
- int err;
+ int err;
+ switch (proto) {
case IPPROTO_TCP:
err = skb_maybe_pull_tail(skb, off + sizeof(struct tcphdr),
off + MAX_TCP_HDR_LEN);
@@ -5472,12 +5568,15 @@ static void skb_mod_eth_type(struct sk_buff *skb, struct ethhdr *hdr,
}
/**
- * skb_mpls_push() - push a new MPLS header after the mac header
+ * skb_mpls_push() - push a new MPLS header after mac_len bytes from start of
+ * the packet
*
* @skb: buffer
* @mpls_lse: MPLS label stack entry to push
* @mpls_proto: ethertype of the new MPLS header (expects 0x8847 or 0x8848)
* @mac_len: length of the MAC header
+ * @ethernet: flag to indicate if the resulting packet after skb_mpls_push is
+ * ethernet
*
* Expects skb->data at mac header.
*
@@ -5501,7 +5600,7 @@ int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto,
return err;
if (!skb->inner_protocol) {
- skb_set_inner_network_header(skb, mac_len);
+ skb_set_inner_network_header(skb, skb_network_offset(skb));
skb_set_inner_protocol(skb, skb->protocol);
}
@@ -5510,6 +5609,7 @@ int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto,
mac_len);
skb_reset_mac_header(skb);
skb_set_network_header(skb, mac_len);
+ skb_reset_mac_len(skb);
lse = mpls_hdr(skb);
lse->label_stack_entry = mpls_lse;
@@ -5529,7 +5629,7 @@ EXPORT_SYMBOL_GPL(skb_mpls_push);
* @skb: buffer
* @next_proto: ethertype of header after popped MPLS header
* @mac_len: length of the MAC header
- * @ethernet: flag to indicate if ethernet header is present in packet
+ * @ethernet: flag to indicate if the packet is ethernet
*
* Expects skb->data at mac header.
*
@@ -5976,7 +6076,14 @@ static void *skb_ext_get_ptr(struct skb_ext *ext, enum skb_ext_id id)
return (void *)ext + (ext->offset[id] * SKB_EXT_ALIGN_VALUE);
}
-static struct skb_ext *skb_ext_alloc(void)
+/**
+ * __skb_ext_alloc - allocate a new skb extensions storage
+ *
+ * Returns the newly allocated pointer. The pointer can later attached to a
+ * skb via __skb_ext_set().
+ * Note: caller must handle the skb_ext as an opaque data.
+ */
+struct skb_ext *__skb_ext_alloc(void)
{
struct skb_ext *new = kmem_cache_alloc(skbuff_ext_cache, GFP_ATOMIC);
@@ -6017,6 +6124,30 @@ static struct skb_ext *skb_ext_maybe_cow(struct skb_ext *old,
}
/**
+ * __skb_ext_set - attach the specified extension storage to this skb
+ * @skb: buffer
+ * @id: extension id
+ * @ext: extension storage previously allocated via __skb_ext_alloc()
+ *
+ * Existing extensions, if any, are cleared.
+ *
+ * Returns the pointer to the extension.
+ */
+void *__skb_ext_set(struct sk_buff *skb, enum skb_ext_id id,
+ struct skb_ext *ext)
+{
+ unsigned int newlen, newoff = SKB_EXT_CHUNKSIZEOF(*ext);
+
+ skb_ext_put(skb);
+ newlen = newoff + skb_ext_type_len[id];
+ ext->chunks = newlen;
+ ext->offset[id] = newoff;
+ skb->extensions = ext;
+ skb->active_extensions = 1 << id;
+ return skb_ext_get_ptr(ext, id);
+}
+
+/**
* skb_ext_add - allocate space for given extension, COW if needed
* @skb: buffer
* @id: extension to allocate space for
@@ -6049,7 +6180,7 @@ void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id)
} else {
newoff = SKB_EXT_CHUNKSIZEOF(*new);
- new = skb_ext_alloc();
+ new = __skb_ext_alloc();
if (!new)
return NULL;
}
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 3866d7e20c07..ded2d5227678 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -594,8 +594,6 @@ EXPORT_SYMBOL_GPL(sk_psock_destroy);
void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
{
- sock_owned_by_me(sk);
-
sk_psock_cork_free(psock);
sk_psock_zap_ingress(psock);
diff --git a/net/core/sock.c b/net/core/sock.c
index 8459ad579f73..8f71684305c3 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1830,7 +1830,10 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
atomic_set(&newsk->sk_zckey, 0);
sock_reset_flag(newsk, SOCK_DONE);
- mem_cgroup_sk_alloc(newsk);
+
+ /* sk->sk_memcg will be populated at accept() time */
+ newsk->sk_memcg = NULL;
+
cgroup_sk_alloc(&newsk->sk_cgrp_data);
rcu_read_lock();
@@ -2786,7 +2789,7 @@ static void sock_def_error_report(struct sock *sk)
rcu_read_unlock();
}
-static void sock_def_readable(struct sock *sk)
+void sock_def_readable(struct sock *sk)
{
struct socket_wq *wq;
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 8998e356f423..085cef5857bb 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -234,7 +234,6 @@ static void sock_map_free(struct bpf_map *map)
int i;
synchronize_rcu();
- rcu_read_lock();
raw_spin_lock_bh(&stab->lock);
for (i = 0; i < stab->map.max_entries; i++) {
struct sock **psk = &stab->sks[i];
@@ -243,13 +242,15 @@ static void sock_map_free(struct bpf_map *map)
sk = xchg(psk, NULL);
if (sk) {
lock_sock(sk);
+ rcu_read_lock();
sock_map_unref(sk, psk);
+ rcu_read_unlock();
release_sock(sk);
}
}
raw_spin_unlock_bh(&stab->lock);
- rcu_read_unlock();
+ /* wait for psock readers accessing its map link */
synchronize_rcu();
bpf_map_area_free(stab->sks);
@@ -416,14 +417,16 @@ static int sock_map_update_elem(struct bpf_map *map, void *key,
ret = -EINVAL;
goto out;
}
- if (!sock_map_sk_is_suitable(sk) ||
- sk->sk_state != TCP_ESTABLISHED) {
+ if (!sock_map_sk_is_suitable(sk)) {
ret = -EOPNOTSUPP;
goto out;
}
sock_map_sk_acquire(sk);
- ret = sock_map_update_common(map, idx, sk, flags);
+ if (sk->sk_state != TCP_ESTABLISHED)
+ ret = -EOPNOTSUPP;
+ else
+ ret = sock_map_update_common(map, idx, sk, flags);
sock_map_sk_release(sk);
out:
fput(sock->file);
@@ -739,14 +742,16 @@ static int sock_hash_update_elem(struct bpf_map *map, void *key,
ret = -EINVAL;
goto out;
}
- if (!sock_map_sk_is_suitable(sk) ||
- sk->sk_state != TCP_ESTABLISHED) {
+ if (!sock_map_sk_is_suitable(sk)) {
ret = -EOPNOTSUPP;
goto out;
}
sock_map_sk_acquire(sk);
- ret = sock_hash_update_common(map, key, sk, flags);
+ if (sk->sk_state != TCP_ESTABLISHED)
+ ret = -EOPNOTSUPP;
+ else
+ ret = sock_hash_update_common(map, key, sk, flags);
sock_map_sk_release(sk);
out:
fput(sock->file);
@@ -859,19 +864,22 @@ static void sock_hash_free(struct bpf_map *map)
int i;
synchronize_rcu();
- rcu_read_lock();
for (i = 0; i < htab->buckets_num; i++) {
bucket = sock_hash_select_bucket(htab, i);
raw_spin_lock_bh(&bucket->lock);
hlist_for_each_entry_safe(elem, node, &bucket->head, node) {
hlist_del_rcu(&elem->node);
lock_sock(elem->sk);
+ rcu_read_lock();
sock_map_unref(elem->sk, elem);
+ rcu_read_unlock();
release_sock(elem->sk);
}
raw_spin_unlock_bh(&bucket->lock);
}
- rcu_read_unlock();
+
+ /* wait for psock readers accessing its map link */
+ synchronize_rcu();
bpf_map_area_free(htab->buckets);
kfree(htab);
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index f19f179538b9..91e9f2223c39 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -107,7 +107,6 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
if (!more_reuse)
return NULL;
- more_reuse->max_socks = more_socks_size;
more_reuse->num_socks = reuse->num_socks;
more_reuse->prog = reuse->prog;
more_reuse->reuseport_id = reuse->reuseport_id;
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index 7911235706a9..04840697fe79 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -13,7 +13,7 @@
static unsigned int classify(const struct sk_buff *skb)
{
if (likely(skb->dev && skb->dev->phydev &&
- skb->dev->phydev->drv))
+ skb->dev->phydev->mii_ts))
return ptp_classify_raw(skb);
else
return PTP_CLASS_NONE;
@@ -21,7 +21,7 @@ static unsigned int classify(const struct sk_buff *skb)
void skb_clone_tx_timestamp(struct sk_buff *skb)
{
- struct phy_device *phydev;
+ struct mii_timestamper *mii_ts;
struct sk_buff *clone;
unsigned int type;
@@ -32,22 +32,22 @@ void skb_clone_tx_timestamp(struct sk_buff *skb)
if (type == PTP_CLASS_NONE)
return;
- phydev = skb->dev->phydev;
- if (likely(phydev->drv->txtstamp)) {
+ mii_ts = skb->dev->phydev->mii_ts;
+ if (likely(mii_ts->txtstamp)) {
clone = skb_clone_sk(skb);
if (!clone)
return;
- phydev->drv->txtstamp(phydev, clone, type);
+ mii_ts->txtstamp(mii_ts, clone, type);
}
}
EXPORT_SYMBOL_GPL(skb_clone_tx_timestamp);
bool skb_defer_rx_timestamp(struct sk_buff *skb)
{
- struct phy_device *phydev;
+ struct mii_timestamper *mii_ts;
unsigned int type;
- if (!skb->dev || !skb->dev->phydev || !skb->dev->phydev->drv)
+ if (!skb->dev || !skb->dev->phydev || !skb->dev->phydev->mii_ts)
return false;
if (skb_headroom(skb) < ETH_HLEN)
@@ -62,9 +62,9 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb)
if (type == PTP_CLASS_NONE)
return false;
- phydev = skb->dev->phydev;
- if (likely(phydev->drv->rxtstamp))
- return phydev->drv->rxtstamp(phydev, skb, type);
+ mii_ts = skb->dev->phydev->mii_ts;
+ if (likely(mii_ts->rxtstamp))
+ return mii_ts->rxtstamp(mii_ts, skb, type);
return false;
}
diff --git a/net/core/utils.c b/net/core/utils.c
index 6b6e51db9f3b..1f31a39236d5 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -438,6 +438,23 @@ void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
}
EXPORT_SYMBOL(inet_proto_csum_replace4);
+/**
+ * inet_proto_csum_replace16 - update layer 4 header checksum field
+ * @sum: Layer 4 header checksum field
+ * @skb: sk_buff for the packet
+ * @from: old IPv6 address
+ * @to: new IPv6 address
+ * @pseudohdr: True if layer 4 header checksum includes pseudoheader
+ *
+ * Update layer 4 header as per the update in IPv6 src/dst address.
+ *
+ * There is no need to update skb->csum in this function, because update in two
+ * fields a.) IPv6 src/dst address and b.) L4 header checksum cancels each other
+ * for skb->csum calculation. Whereas inet_proto_csum_replace4 function needs to
+ * update skb->csum, because update in 3 fields a.) IPv4 src/dst address,
+ * b.) IPv4 Header checksum and c.) L4 header checksum results in same diff as
+ * L4 Header checksum for skb->csum calculation.
+ */
void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
const __be32 *from, const __be32 *to,
bool pseudohdr)
@@ -449,9 +466,6 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
if (skb->ip_summed != CHECKSUM_PARTIAL) {
*sum = csum_fold(csum_partial(diff, sizeof(diff),
~csum_unfold(*sum)));
- if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
- skb->csum = ~csum_partial(diff, sizeof(diff),
- ~skb->csum);
} else if (pseudohdr)
*sum = ~csum_fold(csum_partial(diff, sizeof(diff),
csum_unfold(*sum)));