aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/dev.c206
-rw-r--r--net/core/devlink.c594
-rw-r--r--net/core/dst.c14
-rw-r--r--net/core/ethtool.c14
-rw-r--r--net/core/filter.c63
-rw-r--r--net/core/flow_dissector.c69
-rw-r--r--net/core/gen_stats.c9
-rw-r--r--net/core/neighbour.c1
-rw-r--r--net/core/net-procfs.c4
-rw-r--r--net/core/net-sysfs.c6
-rw-r--r--net/core/net_namespace.c8
-rw-r--r--net/core/pktgen.c15
-rw-r--r--net/core/rtnetlink.c322
-rw-r--r--net/core/skbuff.c14
-rw-r--r--net/core/sock.c56
-rw-r--r--net/core/sock_reuseport.c4
-rw-r--r--net/core/xdp.c73
18 files changed, 1213 insertions, 261 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index 1fd0a9c88b1b..6dbbba8c57ae 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -11,7 +11,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \
- fib_notifier.o
+ fib_notifier.o xdp.o
obj-y += net-sysfs.o
obj-$(CONFIG_PROC_FS) += net-procfs.o
diff --git a/net/core/dev.c b/net/core/dev.c
index 0e0ba36eeac9..94435cd09072 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1554,6 +1554,23 @@ void dev_disable_lro(struct net_device *dev)
}
EXPORT_SYMBOL(dev_disable_lro);
+/**
+ * dev_disable_gro_hw - disable HW Generic Receive Offload on a device
+ * @dev: device
+ *
+ * Disable HW Generic Receive Offload (GRO_HW) on a net device. Must be
+ * called under RTNL. This is needed if Generic XDP is installed on
+ * the device.
+ */
+static void dev_disable_gro_hw(struct net_device *dev)
+{
+ dev->wanted_features &= ~NETIF_F_GRO_HW;
+ netdev_update_features(dev);
+
+ if (unlikely(dev->features & NETIF_F_GRO_HW))
+ netdev_WARN(dev, "failed to disable GRO_HW!\n");
+}
+
static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
struct net_device *dev)
{
@@ -2815,7 +2832,7 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
segs = skb_mac_gso_segment(skb, features);
- if (unlikely(skb_needs_check(skb, tx_path)))
+ if (unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs)))
skb_warn_bad_offload(skb);
return segs;
@@ -3054,7 +3071,7 @@ int skb_csum_hwoffload_help(struct sk_buff *skb,
}
EXPORT_SYMBOL(skb_csum_hwoffload_help);
-static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev)
+static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev, bool *again)
{
netdev_features_t features;
@@ -3078,9 +3095,6 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
__skb_linearize(skb))
goto out_kfree_skb;
- if (validate_xmit_xfrm(skb, features))
- goto out_kfree_skb;
-
/* If packet is not checksummed and device does not
* support checksumming for this protocol, complete
* checksumming here.
@@ -3097,6 +3111,8 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
}
}
+ skb = validate_xmit_xfrm(skb, features, again);
+
return skb;
out_kfree_skb:
@@ -3106,7 +3122,7 @@ out_null:
return NULL;
}
-struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev)
+struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again)
{
struct sk_buff *next, *head = NULL, *tail;
@@ -3117,7 +3133,7 @@ struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *d
/* in case skb wont be segmented, point to itself */
skb->prev = skb;
- skb = validate_xmit_skb(skb, dev);
+ skb = validate_xmit_skb(skb, dev, again);
if (!skb)
continue;
@@ -3174,6 +3190,21 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
int rc;
qdisc_calculate_pkt_len(skb, q);
+
+ if (q->flags & TCQ_F_NOLOCK) {
+ if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
+ __qdisc_drop(skb, &to_free);
+ rc = NET_XMIT_DROP;
+ } else {
+ rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
+ __qdisc_run(q);
+ }
+
+ if (unlikely(to_free))
+ kfree_skb_list(to_free);
+ return rc;
+ }
+
/*
* Heuristic to force contended enqueues to serialize on a
* separate lock before trying to get qdisc main lock.
@@ -3204,9 +3235,9 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
contended = false;
}
__qdisc_run(q);
- } else
- qdisc_run_end(q);
+ }
+ qdisc_run_end(q);
rc = NET_XMIT_SUCCESS;
} else {
rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
@@ -3216,6 +3247,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
contended = false;
}
__qdisc_run(q);
+ qdisc_run_end(q);
}
}
spin_unlock(root_lock);
@@ -3388,8 +3420,7 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
else
queue_index = __netdev_pick_tx(dev, skb);
- if (!accel_priv)
- queue_index = netdev_cap_txqueue(dev, queue_index);
+ queue_index = netdev_cap_txqueue(dev, queue_index);
}
skb_set_queue_mapping(skb, queue_index);
@@ -3428,6 +3459,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
struct netdev_queue *txq;
struct Qdisc *q;
int rc = -ENOMEM;
+ bool again = false;
skb_reset_mac_header(skb);
@@ -3489,7 +3521,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
XMIT_RECURSION_LIMIT))
goto recursion_alert;
- skb = validate_xmit_skb(skb, dev);
+ skb = validate_xmit_skb(skb, dev, &again);
if (!skb)
goto out;
@@ -3885,9 +3917,33 @@ drop:
return NET_RX_DROP;
}
+static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb)
+{
+ struct net_device *dev = skb->dev;
+ struct netdev_rx_queue *rxqueue;
+
+ rxqueue = dev->_rx;
+
+ if (skb_rx_queue_recorded(skb)) {
+ u16 index = skb_get_rx_queue(skb);
+
+ if (unlikely(index >= dev->real_num_rx_queues)) {
+ WARN_ONCE(dev->real_num_rx_queues > 1,
+ "%s received packet on queue %u, but number "
+ "of RX queues is %u\n",
+ dev->name, index, dev->real_num_rx_queues);
+
+ return rxqueue; /* Return first rxqueue */
+ }
+ rxqueue += index;
+ }
+ return rxqueue;
+}
+
static u32 netif_receive_generic_xdp(struct sk_buff *skb,
struct bpf_prog *xdp_prog)
{
+ struct netdev_rx_queue *rxqueue;
u32 metalen, act = XDP_DROP;
struct xdp_buff xdp;
void *orig_data;
@@ -3931,6 +3987,9 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
xdp.data_hard_start = skb->data - skb_headroom(skb);
orig_data = xdp.data;
+ rxqueue = netif_get_rxqueue(skb);
+ xdp.rxq = &rxqueue->xdp_rxq;
+
act = bpf_prog_run_xdp(xdp_prog, &xdp);
off = xdp.data - orig_data;
@@ -4155,21 +4214,26 @@ static __latent_entropy void net_tx_action(struct softirq_action *h)
while (head) {
struct Qdisc *q = head;
- spinlock_t *root_lock;
+ spinlock_t *root_lock = NULL;
head = head->next_sched;
- root_lock = qdisc_lock(q);
- spin_lock(root_lock);
+ if (!(q->flags & TCQ_F_NOLOCK)) {
+ root_lock = qdisc_lock(q);
+ spin_lock(root_lock);
+ }
/* We need to make sure head->next_sched is read
* before clearing __QDISC_STATE_SCHED
*/
smp_mb__before_atomic();
clear_bit(__QDISC_STATE_SCHED, &q->state);
qdisc_run(q);
- spin_unlock(root_lock);
+ if (root_lock)
+ spin_unlock(root_lock);
}
}
+
+ xfrm_dev_backlog(sd);
}
#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_ATM_LANE)
@@ -4557,6 +4621,7 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
} else if (new && !old) {
static_key_slow_inc(&generic_xdp_needed);
dev_disable_lro(dev);
+ dev_disable_gro_hw(dev);
}
break;
@@ -7085,17 +7150,21 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
}
EXPORT_SYMBOL(dev_change_proto_down);
-u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op, u32 *prog_id)
+void __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op,
+ struct netdev_bpf *xdp)
{
- struct netdev_bpf xdp;
-
- memset(&xdp, 0, sizeof(xdp));
- xdp.command = XDP_QUERY_PROG;
+ memset(xdp, 0, sizeof(*xdp));
+ xdp->command = XDP_QUERY_PROG;
/* Query must always succeed. */
- WARN_ON(bpf_op(dev, &xdp) < 0);
- if (prog_id)
- *prog_id = xdp.prog_id;
+ WARN_ON(bpf_op(dev, xdp) < 0);
+}
+
+static u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op)
+{
+ struct netdev_bpf xdp;
+
+ __dev_xdp_query(dev, bpf_op, &xdp);
return xdp.prog_attached;
}
@@ -7118,6 +7187,27 @@ static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op,
return bpf_op(dev, &xdp);
}
+static void dev_xdp_uninstall(struct net_device *dev)
+{
+ struct netdev_bpf xdp;
+ bpf_op_t ndo_bpf;
+
+ /* Remove generic XDP */
+ WARN_ON(dev_xdp_install(dev, generic_xdp_install, NULL, 0, NULL));
+
+ /* Remove from the driver */
+ ndo_bpf = dev->netdev_ops->ndo_bpf;
+ if (!ndo_bpf)
+ return;
+
+ __dev_xdp_query(dev, ndo_bpf, &xdp);
+ if (xdp.prog_attached == XDP_ATTACHED_NONE)
+ return;
+
+ /* Program removal should always succeed */
+ WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags, NULL));
+}
+
/**
* dev_change_xdp_fd - set or clear a bpf program for a device rx path
* @dev: device
@@ -7146,10 +7236,10 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
bpf_chk = generic_xdp_install;
if (fd >= 0) {
- if (bpf_chk && __dev_xdp_attached(dev, bpf_chk, NULL))
+ if (bpf_chk && __dev_xdp_attached(dev, bpf_chk))
return -EEXIST;
if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
- __dev_xdp_attached(dev, bpf_op, NULL))
+ __dev_xdp_attached(dev, bpf_op))
return -EBUSY;
prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
@@ -7248,6 +7338,7 @@ static void rollback_registered_many(struct list_head *head)
/* Shutdown queueing discipline. */
dev_shutdown(dev);
+ dev_xdp_uninstall(dev);
/* Notify protocols, that we are about to destroy
* this device. They should clean all the things.
@@ -7391,6 +7482,18 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
features &= ~dev->gso_partial_features;
}
+ if (!(features & NETIF_F_RXCSUM)) {
+ /* NETIF_F_GRO_HW implies doing RXCSUM since every packet
+ * successfully merged by hardware must also have the
+ * checksum verified by hardware. If the user does not
+ * want to enable RXCSUM, logically, we should disable GRO_HW.
+ */
+ if (features & NETIF_F_GRO_HW) {
+ netdev_dbg(dev, "Dropping NETIF_F_GRO_HW since no RXCSUM feature.\n");
+ features &= ~NETIF_F_GRO_HW;
+ }
+ }
+
return features;
}
@@ -7524,12 +7627,12 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev,
}
EXPORT_SYMBOL(netif_stacked_transfer_operstate);
-#ifdef CONFIG_SYSFS
static int netif_alloc_rx_queues(struct net_device *dev)
{
unsigned int i, count = dev->num_rx_queues;
struct netdev_rx_queue *rx;
size_t sz = count * sizeof(*rx);
+ int err = 0;
BUG_ON(count < 1);
@@ -7539,11 +7642,38 @@ static int netif_alloc_rx_queues(struct net_device *dev)
dev->_rx = rx;
- for (i = 0; i < count; i++)
+ for (i = 0; i < count; i++) {
rx[i].dev = dev;
+
+ /* XDP RX-queue setup */
+ err = xdp_rxq_info_reg(&rx[i].xdp_rxq, dev, i);
+ if (err < 0)
+ goto err_rxq_info;
+ }
return 0;
+
+err_rxq_info:
+ /* Rollback successful reg's and free other resources */
+ while (i--)
+ xdp_rxq_info_unreg(&rx[i].xdp_rxq);
+ kvfree(dev->_rx);
+ dev->_rx = NULL;
+ return err;
+}
+
+static void netif_free_rx_queues(struct net_device *dev)
+{
+ unsigned int i, count = dev->num_rx_queues;
+
+ /* netif_alloc_rx_queues alloc failed, resources have been unreg'ed */
+ if (!dev->_rx)
+ return;
+
+ for (i = 0; i < count; i++)
+ xdp_rxq_info_unreg(&dev->_rx[i].xdp_rxq);
+
+ kvfree(dev->_rx);
}
-#endif
static void netdev_init_one_queue(struct net_device *dev,
struct netdev_queue *queue, void *_unused)
@@ -8104,12 +8234,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
return NULL;
}
-#ifdef CONFIG_SYSFS
if (rxqs < 1) {
pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
return NULL;
}
-#endif
alloc_size = sizeof(struct net_device);
if (sizeof_priv) {
@@ -8166,12 +8294,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
if (netif_alloc_netdev_queues(dev))
goto free_all;
-#ifdef CONFIG_SYSFS
dev->num_rx_queues = rxqs;
dev->real_num_rx_queues = rxqs;
if (netif_alloc_rx_queues(dev))
goto free_all;
-#endif
strcpy(dev->name, name);
dev->name_assign_type = name_assign_type;
@@ -8207,13 +8333,10 @@ EXPORT_SYMBOL(alloc_netdev_mqs);
void free_netdev(struct net_device *dev)
{
struct napi_struct *p, *n;
- struct bpf_prog *prog;
might_sleep();
netif_free_tx_queues(dev);
-#ifdef CONFIG_SYSFS
- kvfree(dev->_rx);
-#endif
+ netif_free_rx_queues(dev);
kfree(rcu_dereference_protected(dev->ingress_queue, 1));
@@ -8226,12 +8349,6 @@ void free_netdev(struct net_device *dev)
free_percpu(dev->pcpu_refcnt);
dev->pcpu_refcnt = NULL;
- prog = rcu_dereference_protected(dev->xdp_prog, 1);
- if (prog) {
- bpf_prog_put(prog);
- static_key_slow_dec(&generic_xdp_needed);
- }
-
/* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED) {
netdev_freemem(dev);
@@ -8819,6 +8936,9 @@ static int __init net_dev_init(void)
skb_queue_head_init(&sd->input_pkt_queue);
skb_queue_head_init(&sd->process_queue);
+#ifdef CONFIG_XFRM_OFFLOAD
+ skb_queue_head_init(&sd->xfrm_backlog);
+#endif
INIT_LIST_HEAD(&sd->poll_list);
sd->output_queue_tailp = &sd->output_queue;
#ifdef CONFIG_RPS
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 7d430c1d9c3e..dd7d6dd07bfb 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -92,12 +92,6 @@ static LIST_HEAD(devlink_list);
*/
static DEFINE_MUTEX(devlink_mutex);
-/* devlink_port_mutex
- *
- * Shared lock to guard lists of ports in all devlink devices.
- */
-static DEFINE_MUTEX(devlink_port_mutex);
-
static struct net *devlink_net(const struct devlink *devlink)
{
return read_pnet(&devlink->_net);
@@ -335,15 +329,18 @@ devlink_sb_tc_index_get_from_info(struct devlink_sb *devlink_sb,
#define DEVLINK_NL_FLAG_NEED_DEVLINK BIT(0)
#define DEVLINK_NL_FLAG_NEED_PORT BIT(1)
#define DEVLINK_NL_FLAG_NEED_SB BIT(2)
-#define DEVLINK_NL_FLAG_LOCK_PORTS BIT(3)
- /* port is not needed but we need to ensure they don't
- * change in the middle of command
- */
+
+/* The per devlink instance lock is taken by default in the pre-doit
+ * operation, yet several commands do not require this. The global
+ * devlink lock is taken and protects from disruption by user-calls.
+ */
+#define DEVLINK_NL_FLAG_NO_LOCK BIT(3)
static int devlink_nl_pre_doit(const struct genl_ops *ops,
struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink;
+ int err;
mutex_lock(&devlink_mutex);
devlink = devlink_get_from_info(info);
@@ -351,44 +348,47 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
mutex_unlock(&devlink_mutex);
return PTR_ERR(devlink);
}
+ if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
+ mutex_lock(&devlink->lock);
if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK) {
info->user_ptr[0] = devlink;
} else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) {
struct devlink_port *devlink_port;
- mutex_lock(&devlink_port_mutex);
devlink_port = devlink_port_get_from_info(devlink, info);
if (IS_ERR(devlink_port)) {
- mutex_unlock(&devlink_port_mutex);
- mutex_unlock(&devlink_mutex);
- return PTR_ERR(devlink_port);
+ err = PTR_ERR(devlink_port);
+ goto unlock;
}
info->user_ptr[0] = devlink_port;
}
- if (ops->internal_flags & DEVLINK_NL_FLAG_LOCK_PORTS) {
- mutex_lock(&devlink_port_mutex);
- }
if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_SB) {
struct devlink_sb *devlink_sb;
devlink_sb = devlink_sb_get_from_info(devlink, info);
if (IS_ERR(devlink_sb)) {
- if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT)
- mutex_unlock(&devlink_port_mutex);
- mutex_unlock(&devlink_mutex);
- return PTR_ERR(devlink_sb);
+ err = PTR_ERR(devlink_sb);
+ goto unlock;
}
info->user_ptr[1] = devlink_sb;
}
return 0;
+
+unlock:
+ if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
+ mutex_unlock(&devlink->lock);
+ mutex_unlock(&devlink_mutex);
+ return err;
}
static void devlink_nl_post_doit(const struct genl_ops *ops,
struct sk_buff *skb, struct genl_info *info)
{
- if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT ||
- ops->internal_flags & DEVLINK_NL_FLAG_LOCK_PORTS)
- mutex_unlock(&devlink_port_mutex);
+ struct devlink *devlink;
+
+ devlink = devlink_get_from_info(info);
+ if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
+ mutex_unlock(&devlink->lock);
mutex_unlock(&devlink_mutex);
}
@@ -614,10 +614,10 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg,
int err;
mutex_lock(&devlink_mutex);
- mutex_lock(&devlink_port_mutex);
list_for_each_entry(devlink, &devlink_list, list) {
if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
continue;
+ mutex_lock(&devlink->lock);
list_for_each_entry(devlink_port, &devlink->port_list, list) {
if (idx < start) {
idx++;
@@ -628,13 +628,15 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NLM_F_MULTI);
- if (err)
+ if (err) {
+ mutex_unlock(&devlink->lock);
goto out;
+ }
idx++;
}
+ mutex_unlock(&devlink->lock);
}
out:
- mutex_unlock(&devlink_port_mutex);
mutex_unlock(&devlink_mutex);
cb->args[0] = idx;
@@ -801,6 +803,7 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg,
list_for_each_entry(devlink, &devlink_list, list) {
if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
continue;
+ mutex_lock(&devlink->lock);
list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
if (idx < start) {
idx++;
@@ -811,10 +814,13 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NLM_F_MULTI);
- if (err)
+ if (err) {
+ mutex_unlock(&devlink->lock);
goto out;
+ }
idx++;
}
+ mutex_unlock(&devlink->lock);
}
out:
mutex_unlock(&devlink_mutex);
@@ -935,14 +941,18 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg,
if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
!devlink->ops || !devlink->ops->sb_pool_get)
continue;
+ mutex_lock(&devlink->lock);
list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
err = __sb_pool_get_dumpit(msg, start, &idx, devlink,
devlink_sb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq);
- if (err && err != -EOPNOTSUPP)
+ if (err && err != -EOPNOTSUPP) {
+ mutex_unlock(&devlink->lock);
goto out;
+ }
}
+ mutex_unlock(&devlink->lock);
}
out:
mutex_unlock(&devlink_mutex);
@@ -1123,22 +1133,24 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg,
int err;
mutex_lock(&devlink_mutex);
- mutex_lock(&devlink_port_mutex);
list_for_each_entry(devlink, &devlink_list, list) {
if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
!devlink->ops || !devlink->ops->sb_port_pool_get)
continue;
+ mutex_lock(&devlink->lock);
list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
err = __sb_port_pool_get_dumpit(msg, start, &idx,
devlink, devlink_sb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq);
- if (err && err != -EOPNOTSUPP)
+ if (err && err != -EOPNOTSUPP) {
+ mutex_unlock(&devlink->lock);
goto out;
+ }
}
+ mutex_unlock(&devlink->lock);
}
out:
- mutex_unlock(&devlink_port_mutex);
mutex_unlock(&devlink_mutex);
cb->args[0] = idx;
@@ -1347,23 +1359,26 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
int err;
mutex_lock(&devlink_mutex);
- mutex_lock(&devlink_port_mutex);
list_for_each_entry(devlink, &devlink_list, list) {
if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
!devlink->ops || !devlink->ops->sb_tc_pool_bind_get)
continue;
+
+ mutex_lock(&devlink->lock);
list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
err = __sb_tc_pool_bind_get_dumpit(msg, start, &idx,
devlink,
devlink_sb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq);
- if (err && err != -EOPNOTSUPP)
+ if (err && err != -EOPNOTSUPP) {
+ mutex_unlock(&devlink->lock);
goto out;
+ }
}
+ mutex_unlock(&devlink->lock);
}
out:
- mutex_unlock(&devlink_port_mutex);
mutex_unlock(&devlink_mutex);
cb->args[0] = idx;
@@ -1679,6 +1694,12 @@ static int devlink_dpipe_table_put(struct sk_buff *skb,
table->counters_enabled))
goto nla_put_failure;
+ if (table->resource_valid) {
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID,
+ table->resource_id, DEVLINK_ATTR_PAD);
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS,
+ table->resource_units, DEVLINK_ATTR_PAD);
+ }
if (devlink_dpipe_matches_put(table, skb))
goto nla_put_failure;
@@ -2273,6 +2294,272 @@ static int devlink_nl_cmd_dpipe_table_counters_set(struct sk_buff *skb,
counters_enable);
}
+struct devlink_resource *
+devlink_resource_find(struct devlink *devlink,
+ struct devlink_resource *resource, u64 resource_id)
+{
+ struct list_head *resource_list;
+
+ if (resource)
+ resource_list = &resource->resource_list;
+ else
+ resource_list = &devlink->resource_list;
+
+ list_for_each_entry(resource, resource_list, list) {
+ struct devlink_resource *child_resource;
+
+ if (resource->id == resource_id)
+ return resource;
+
+ child_resource = devlink_resource_find(devlink, resource,
+ resource_id);
+ if (child_resource)
+ return child_resource;
+ }
+ return NULL;
+}
+
+void devlink_resource_validate_children(struct devlink_resource *resource)
+{
+ struct devlink_resource *child_resource;
+ bool size_valid = true;
+ u64 parts_size = 0;
+
+ if (list_empty(&resource->resource_list))
+ goto out;
+
+ list_for_each_entry(child_resource, &resource->resource_list, list)
+ parts_size += child_resource->size_new;
+
+ if (parts_size > resource->size)
+ size_valid = false;
+out:
+ resource->size_valid = size_valid;
+}
+
+static int devlink_nl_cmd_resource_set(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_resource *resource;
+ u64 resource_id;
+ u64 size;
+ int err;
+
+ if (!info->attrs[DEVLINK_ATTR_RESOURCE_ID] ||
+ !info->attrs[DEVLINK_ATTR_RESOURCE_SIZE])
+ return -EINVAL;
+ resource_id = nla_get_u64(info->attrs[DEVLINK_ATTR_RESOURCE_ID]);
+
+ resource = devlink_resource_find(devlink, NULL, resource_id);
+ if (!resource)
+ return -EINVAL;
+
+ if (!resource->resource_ops->size_validate)
+ return -EINVAL;
+
+ size = nla_get_u64(info->attrs[DEVLINK_ATTR_RESOURCE_SIZE]);
+ err = resource->resource_ops->size_validate(devlink, size,
+ info->extack);
+ if (err)
+ return err;
+
+ resource->size_new = size;
+ devlink_resource_validate_children(resource);
+ if (resource->parent)
+ devlink_resource_validate_children(resource->parent);
+ return 0;
+}
+
+static void
+devlink_resource_size_params_put(struct devlink_resource *resource,
+ struct sk_buff *skb)
+{
+ struct devlink_resource_size_params *size_params;
+
+ size_params = resource->size_params;
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_GRAN,
+ size_params->size_granularity, DEVLINK_ATTR_PAD);
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MAX,
+ size_params->size_max, DEVLINK_ATTR_PAD);
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MIN,
+ size_params->size_min, DEVLINK_ATTR_PAD);
+ nla_put_u8(skb, DEVLINK_ATTR_RESOURCE_UNIT, size_params->unit);
+}
+
+static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb,
+ struct devlink_resource *resource)
+{
+ struct devlink_resource *child_resource;
+ struct nlattr *child_resource_attr;
+ struct nlattr *resource_attr;
+
+ resource_attr = nla_nest_start(skb, DEVLINK_ATTR_RESOURCE);
+ if (!resource_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_string(skb, DEVLINK_ATTR_RESOURCE_NAME, resource->name) ||
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE, resource->size,
+ DEVLINK_ATTR_PAD) ||
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_ID, resource->id,
+ DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+ if (resource->size != resource->size_new)
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_NEW,
+ resource->size_new, DEVLINK_ATTR_PAD);
+ if (resource->resource_ops && resource->resource_ops->occ_get)
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_OCC,
+ resource->resource_ops->occ_get(devlink),
+ DEVLINK_ATTR_PAD);
+ devlink_resource_size_params_put(resource, skb);
+ if (list_empty(&resource->resource_list))
+ goto out;
+
+ if (nla_put_u8(skb, DEVLINK_ATTR_RESOURCE_SIZE_VALID,
+ resource->size_valid))
+ goto nla_put_failure;
+
+ child_resource_attr = nla_nest_start(skb, DEVLINK_ATTR_RESOURCE_LIST);
+ if (!child_resource_attr)
+ goto nla_put_failure;
+
+ list_for_each_entry(child_resource, &resource->resource_list, list) {
+ if (devlink_resource_put(devlink, skb, child_resource))
+ goto resource_put_failure;
+ }
+
+ nla_nest_end(skb, child_resource_attr);
+out:
+ nla_nest_end(skb, resource_attr);
+ return 0;
+
+resource_put_failure:
+ nla_nest_cancel(skb, child_resource_attr);
+nla_put_failure:
+ nla_nest_cancel(skb, resource_attr);
+ return -EMSGSIZE;
+}
+
+static int devlink_resource_fill(struct genl_info *info,
+ enum devlink_command cmd, int flags)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_resource *resource;
+ struct nlattr *resources_attr;
+ struct sk_buff *skb = NULL;
+ struct nlmsghdr *nlh;
+ bool incomplete;
+ void *hdr;
+ int i;
+ int err;
+
+ resource = list_first_entry(&devlink->resource_list,
+ struct devlink_resource, list);
+start_again:
+ err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+ if (err)
+ return err;
+
+ hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
+ &devlink_nl_family, NLM_F_MULTI, cmd);
+ if (!hdr) {
+ nlmsg_free(skb);
+ return -EMSGSIZE;
+ }
+
+ if (devlink_nl_put_handle(skb, devlink))
+ goto nla_put_failure;
+
+ resources_attr = nla_nest_start(skb, DEVLINK_ATTR_RESOURCE_LIST);
+ if (!resources_attr)
+ goto nla_put_failure;
+
+ incomplete = false;
+ i = 0;
+ list_for_each_entry_from(resource, &devlink->resource_list, list) {
+ err = devlink_resource_put(devlink, skb, resource);
+ if (err) {
+ if (!i)
+ goto err_resource_put;
+ incomplete = true;
+ break;
+ }
+ i++;
+ }
+ nla_nest_end(skb, resources_attr);
+ genlmsg_end(skb, hdr);
+ if (incomplete)
+ goto start_again;
+send_done:
+ nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
+ NLMSG_DONE, 0, flags | NLM_F_MULTI);
+ if (!nlh) {
+ err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+ if (err)
+ goto err_skb_send_alloc;
+ goto send_done;
+ }
+ return genlmsg_reply(skb, info);
+
+nla_put_failure:
+ err = -EMSGSIZE;
+err_resource_put:
+err_skb_send_alloc:
+ genlmsg_cancel(skb, hdr);
+ nlmsg_free(skb);
+ return err;
+}
+
+static int devlink_nl_cmd_resource_dump(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+
+ if (list_empty(&devlink->resource_list))
+ return -EOPNOTSUPP;
+
+ return devlink_resource_fill(info, DEVLINK_CMD_RESOURCE_DUMP, 0);
+}
+
+static int
+devlink_resources_validate(struct devlink *devlink,
+ struct devlink_resource *resource,
+ struct genl_info *info)
+{
+ struct list_head *resource_list;
+ int err = 0;
+
+ if (resource)
+ resource_list = &resource->resource_list;
+ else
+ resource_list = &devlink->resource_list;
+
+ list_for_each_entry(resource, resource_list, list) {
+ if (!resource->size_valid)
+ return -EINVAL;
+ err = devlink_resources_validate(devlink, resource, info);
+ if (err)
+ return err;
+ }
+ return err;
+}
+
+static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ int err;
+
+ if (!devlink->ops->reload)
+ return -EOPNOTSUPP;
+
+ err = devlink_resources_validate(devlink, NULL, info);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(info->extack, "resources size validation failed");
+ return err;
+ }
+ return devlink->ops->reload(devlink);
+}
+
static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
@@ -2291,6 +2578,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = { .type = NLA_U8 },
[DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .type = NLA_U8 },
+ [DEVLINK_ATTR_RESOURCE_ID] = { .type = NLA_U64},
+ [DEVLINK_ATTR_RESOURCE_SIZE] = { .type = NLA_U64},
};
static const struct genl_ops devlink_nl_ops[] = {
@@ -2322,14 +2611,16 @@ static const struct genl_ops devlink_nl_ops[] = {
.doit = devlink_nl_cmd_port_split_doit,
.policy = devlink_nl_policy,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+ DEVLINK_NL_FLAG_NO_LOCK,
},
{
.cmd = DEVLINK_CMD_PORT_UNSPLIT,
.doit = devlink_nl_cmd_port_unsplit_doit,
.policy = devlink_nl_policy,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+ DEVLINK_NL_FLAG_NO_LOCK,
},
{
.cmd = DEVLINK_CMD_SB_GET,
@@ -2397,8 +2688,7 @@ static const struct genl_ops devlink_nl_ops[] = {
.policy = devlink_nl_policy,
.flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
- DEVLINK_NL_FLAG_NEED_SB |
- DEVLINK_NL_FLAG_LOCK_PORTS,
+ DEVLINK_NL_FLAG_NEED_SB,
},
{
.cmd = DEVLINK_CMD_SB_OCC_MAX_CLEAR,
@@ -2406,8 +2696,7 @@ static const struct genl_ops devlink_nl_ops[] = {
.policy = devlink_nl_policy,
.flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
- DEVLINK_NL_FLAG_NEED_SB |
- DEVLINK_NL_FLAG_LOCK_PORTS,
+ DEVLINK_NL_FLAG_NEED_SB,
},
{
.cmd = DEVLINK_CMD_ESWITCH_GET,
@@ -2451,6 +2740,28 @@ static const struct genl_ops devlink_nl_ops[] = {
.flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
+ {
+ .cmd = DEVLINK_CMD_RESOURCE_SET,
+ .doit = devlink_nl_cmd_resource_set,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
+ {
+ .cmd = DEVLINK_CMD_RESOURCE_DUMP,
+ .doit = devlink_nl_cmd_resource_dump,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
+ {
+ .cmd = DEVLINK_CMD_RELOAD,
+ .doit = devlink_nl_cmd_reload,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+ DEVLINK_NL_FLAG_NO_LOCK,
+ },
};
static struct genl_family devlink_nl_family __ro_after_init = {
@@ -2488,6 +2799,8 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
INIT_LIST_HEAD(&devlink->port_list);
INIT_LIST_HEAD(&devlink->sb_list);
INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list);
+ INIT_LIST_HEAD(&devlink->resource_list);
+ mutex_init(&devlink->lock);
return devlink;
}
EXPORT_SYMBOL_GPL(devlink_alloc);
@@ -2550,16 +2863,16 @@ int devlink_port_register(struct devlink *devlink,
struct devlink_port *devlink_port,
unsigned int port_index)
{
- mutex_lock(&devlink_port_mutex);
+ mutex_lock(&devlink->lock);
if (devlink_port_index_exists(devlink, port_index)) {
- mutex_unlock(&devlink_port_mutex);
+ mutex_unlock(&devlink->lock);
return -EEXIST;
}
devlink_port->devlink = devlink;
devlink_port->index = port_index;
devlink_port->registered = true;
list_add_tail(&devlink_port->list, &devlink->port_list);
- mutex_unlock(&devlink_port_mutex);
+ mutex_unlock(&devlink->lock);
devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
return 0;
}
@@ -2572,10 +2885,12 @@ EXPORT_SYMBOL_GPL(devlink_port_register);
*/
void devlink_port_unregister(struct devlink_port *devlink_port)
{
+ struct devlink *devlink = devlink_port->devlink;
+
devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
- mutex_lock(&devlink_port_mutex);
+ mutex_lock(&devlink->lock);
list_del(&devlink_port->list);
- mutex_unlock(&devlink_port_mutex);
+ mutex_unlock(&devlink->lock);
}
EXPORT_SYMBOL_GPL(devlink_port_unregister);
@@ -2651,7 +2966,7 @@ int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
struct devlink_sb *devlink_sb;
int err = 0;
- mutex_lock(&devlink_mutex);
+ mutex_lock(&devlink->lock);
if (devlink_sb_index_exists(devlink, sb_index)) {
err = -EEXIST;
goto unlock;
@@ -2670,7 +2985,7 @@ int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
devlink_sb->egress_tc_count = egress_tc_count;
list_add_tail(&devlink_sb->list, &devlink->sb_list);
unlock:
- mutex_unlock(&devlink_mutex);
+ mutex_unlock(&devlink->lock);
return err;
}
EXPORT_SYMBOL_GPL(devlink_sb_register);
@@ -2679,11 +2994,11 @@ void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index)
{
struct devlink_sb *devlink_sb;
- mutex_lock(&devlink_mutex);
+ mutex_lock(&devlink->lock);
devlink_sb = devlink_sb_get_by_index(devlink, sb_index);
WARN_ON(!devlink_sb);
list_del(&devlink_sb->list);
- mutex_unlock(&devlink_mutex);
+ mutex_unlock(&devlink->lock);
kfree(devlink_sb);
}
EXPORT_SYMBOL_GPL(devlink_sb_unregister);
@@ -2699,9 +3014,9 @@ EXPORT_SYMBOL_GPL(devlink_sb_unregister);
int devlink_dpipe_headers_register(struct devlink *devlink,
struct devlink_dpipe_headers *dpipe_headers)
{
- mutex_lock(&devlink_mutex);
+ mutex_lock(&devlink->lock);
devlink->dpipe_headers = dpipe_headers;
- mutex_unlock(&devlink_mutex);
+ mutex_unlock(&devlink->lock);
return 0;
}
EXPORT_SYMBOL_GPL(devlink_dpipe_headers_register);
@@ -2715,9 +3030,9 @@ EXPORT_SYMBOL_GPL(devlink_dpipe_headers_register);
*/
void devlink_dpipe_headers_unregister(struct devlink *devlink)
{
- mutex_lock(&devlink_mutex);
+ mutex_lock(&devlink->lock);
devlink->dpipe_headers = NULL;
- mutex_unlock(&devlink_mutex);
+ mutex_unlock(&devlink->lock);
}
EXPORT_SYMBOL_GPL(devlink_dpipe_headers_unregister);
@@ -2783,9 +3098,9 @@ int devlink_dpipe_table_register(struct devlink *devlink,
table->priv = priv;
table->counter_control_extern = counter_control_extern;
- mutex_lock(&devlink_mutex);
+ mutex_lock(&devlink->lock);
list_add_tail_rcu(&table->list, &devlink->dpipe_table_list);
- mutex_unlock(&devlink_mutex);
+ mutex_unlock(&devlink->lock);
return 0;
}
EXPORT_SYMBOL_GPL(devlink_dpipe_table_register);
@@ -2801,20 +3116,181 @@ void devlink_dpipe_table_unregister(struct devlink *devlink,
{
struct devlink_dpipe_table *table;
- mutex_lock(&devlink_mutex);
+ mutex_lock(&devlink->lock);
table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
table_name);
if (!table)
goto unlock;
list_del_rcu(&table->list);
- mutex_unlock(&devlink_mutex);
+ mutex_unlock(&devlink->lock);
kfree_rcu(table, rcu);
return;
unlock:
- mutex_unlock(&devlink_mutex);
+ mutex_unlock(&devlink->lock);
}
EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister);
+/**
+ * devlink_resource_register - devlink resource register
+ *
+ * @devlink: devlink
+ * @resource_name: resource's name
+ * @top_hierarchy: top hierarchy
+ * @reload_required: reload is required for new configuration to
+ * apply
+ * @resource_size: resource's size
+ * @resource_id: resource's id
+ * @parent_reosurce_id: resource's parent id
+ * @size params: size parameters
+ * @resource_ops: resource ops
+ */
+int devlink_resource_register(struct devlink *devlink,
+ const char *resource_name,
+ bool top_hierarchy,
+ u64 resource_size,
+ u64 resource_id,
+ u64 parent_resource_id,
+ struct devlink_resource_size_params *size_params,
+ const struct devlink_resource_ops *resource_ops)
+{
+ struct devlink_resource *resource;
+ struct list_head *resource_list;
+ int err = 0;
+
+ mutex_lock(&devlink->lock);
+ resource = devlink_resource_find(devlink, NULL, resource_id);
+ if (resource) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ resource = kzalloc(sizeof(*resource), GFP_KERNEL);
+ if (!resource) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ if (top_hierarchy) {
+ resource_list = &devlink->resource_list;
+ } else {
+ struct devlink_resource *parent_resource;
+
+ parent_resource = devlink_resource_find(devlink, NULL,
+ parent_resource_id);
+ if (parent_resource) {
+ resource_list = &parent_resource->resource_list;
+ resource->parent = parent_resource;
+ } else {
+ err = -EINVAL;
+ goto out;
+ }
+ }
+
+ resource->name = resource_name;
+ resource->size = resource_size;
+ resource->size_new = resource_size;
+ resource->id = resource_id;
+ resource->resource_ops = resource_ops;
+ resource->size_valid = true;
+ resource->size_params = size_params;
+ INIT_LIST_HEAD(&resource->resource_list);
+ list_add_tail(&resource->list, resource_list);
+out:
+ mutex_unlock(&devlink->lock);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devlink_resource_register);
+
+/**
+ * devlink_resources_unregister - free all resources
+ *
+ * @devlink: devlink
+ * @resource: resource
+ */
+void devlink_resources_unregister(struct devlink *devlink,
+ struct devlink_resource *resource)
+{
+ struct devlink_resource *tmp, *child_resource;
+ struct list_head *resource_list;
+
+ if (resource)
+ resource_list = &resource->resource_list;
+ else
+ resource_list = &devlink->resource_list;
+
+ if (!resource)
+ mutex_lock(&devlink->lock);
+
+ list_for_each_entry_safe(child_resource, tmp, resource_list, list) {
+ devlink_resources_unregister(devlink, child_resource);
+ list_del(&child_resource->list);
+ kfree(child_resource);
+ }
+
+ if (!resource)
+ mutex_unlock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devlink_resources_unregister);
+
+/**
+ * devlink_resource_size_get - get and update size
+ *
+ * @devlink: devlink
+ * @resource_id: the requested resource id
+ * @p_resource_size: ptr to update
+ */
+int devlink_resource_size_get(struct devlink *devlink,
+ u64 resource_id,
+ u64 *p_resource_size)
+{
+ struct devlink_resource *resource;
+ int err = 0;
+
+ mutex_lock(&devlink->lock);
+ resource = devlink_resource_find(devlink, NULL, resource_id);
+ if (!resource) {
+ err = -EINVAL;
+ goto out;
+ }
+ *p_resource_size = resource->size_new;
+ resource->size = resource->size_new;
+out:
+ mutex_unlock(&devlink->lock);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devlink_resource_size_get);
+
+/**
+ * devlink_dpipe_table_resource_set - set the resource id
+ *
+ * @devlink: devlink
+ * @table_name: table name
+ * @resource_id: resource id
+ * @resource_units: number of resource's units consumed per table's entry
+ */
+int devlink_dpipe_table_resource_set(struct devlink *devlink,
+ const char *table_name, u64 resource_id,
+ u64 resource_units)
+{
+ struct devlink_dpipe_table *table;
+ int err = 0;
+
+ mutex_lock(&devlink->lock);
+ table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+ table_name);
+ if (!table) {
+ err = -EINVAL;
+ goto out;
+ }
+ table->resource_id = resource_id;
+ table->resource_units = resource_units;
+ table->resource_valid = true;
+out:
+ mutex_unlock(&devlink->lock);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_table_resource_set);
+
static int __init devlink_module_init(void)
{
return genl_register_family(&devlink_nl_family);
diff --git a/net/core/dst.c b/net/core/dst.c
index 662a2d4a3d19..007aa0b08291 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -21,6 +21,7 @@
#include <linux/sched.h>
#include <linux/prefetch.h>
#include <net/lwtunnel.h>
+#include <net/xfrm.h>
#include <net/dst.h>
#include <net/dst_metadata.h>
@@ -62,15 +63,12 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
struct net_device *dev, int initial_ref, int initial_obsolete,
unsigned short flags)
{
- dst->child = NULL;
dst->dev = dev;
if (dev)
dev_hold(dev);
dst->ops = ops;
dst_init_metrics(dst, dst_default_metrics.metrics, true);
dst->expires = 0UL;
- dst->path = dst;
- dst->from = NULL;
#ifdef CONFIG_XFRM
dst->xfrm = NULL;
#endif
@@ -88,7 +86,6 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
dst->__use = 0;
dst->lastuse = jiffies;
dst->flags = flags;
- dst->next = NULL;
if (!(flags & DST_NOCOUNT))
dst_entries_add(ops, 1);
}
@@ -116,12 +113,17 @@ EXPORT_SYMBOL(dst_alloc);
struct dst_entry *dst_destroy(struct dst_entry * dst)
{
- struct dst_entry *child;
+ struct dst_entry *child = NULL;
smp_rmb();
- child = dst->child;
+#ifdef CONFIG_XFRM
+ if (dst->xfrm) {
+ struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
+ child = xdst->child;
+ }
+#endif
if (!(dst->flags & DST_NOCOUNT))
dst_entries_add(dst->ops, -1);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 8225416911ae..107b122c8969 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -73,6 +73,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_LLTX_BIT] = "tx-lockless",
[NETIF_F_NETNS_LOCAL_BIT] = "netns-local",
[NETIF_F_GRO_BIT] = "rx-gro",
+ [NETIF_F_GRO_HW_BIT] = "rx-gro-hw",
[NETIF_F_LRO_BIT] = "rx-lro",
[NETIF_F_TSO_BIT] = "tx-tcp-segmentation",
@@ -1692,14 +1693,23 @@ static int ethtool_get_ringparam(struct net_device *dev, void __user *useraddr)
static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr)
{
- struct ethtool_ringparam ringparam;
+ struct ethtool_ringparam ringparam, max = { .cmd = ETHTOOL_GRINGPARAM };
- if (!dev->ethtool_ops->set_ringparam)
+ if (!dev->ethtool_ops->set_ringparam || !dev->ethtool_ops->get_ringparam)
return -EOPNOTSUPP;
if (copy_from_user(&ringparam, useraddr, sizeof(ringparam)))
return -EFAULT;
+ dev->ethtool_ops->get_ringparam(dev, &max);
+
+ /* ensure new ring parameters are within the maximums */
+ if (ringparam.rx_pending > max.rx_max_pending ||
+ ringparam.rx_mini_pending > max.rx_mini_max_pending ||
+ ringparam.rx_jumbo_pending > max.rx_jumbo_max_pending ||
+ ringparam.tx_pending > max.tx_max_pending)
+ return -EINVAL;
+
return dev->ethtool_ops->set_ringparam(dev, &ringparam);
}
diff --git a/net/core/filter.c b/net/core/filter.c
index d339ef170df6..db2ee8c7e1bd 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2682,8 +2682,9 @@ static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd)
return 0;
}
-int xdp_do_generic_redirect_map(struct net_device *dev, struct sk_buff *skb,
- struct bpf_prog *xdp_prog)
+static int xdp_do_generic_redirect_map(struct net_device *dev,
+ struct sk_buff *skb,
+ struct bpf_prog *xdp_prog)
{
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
unsigned long map_owner = ri->map_owner;
@@ -3011,6 +3012,8 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
if (flags & BPF_F_DONT_FRAGMENT)
info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
+ if (flags & BPF_F_ZERO_CSUM_TX)
+ info->key.tun_flags &= ~TUNNEL_CSUM;
info->key.tun_id = cpu_to_be64(from->tunnel_id);
info->key.tos = from->tunnel_tos;
@@ -3024,8 +3027,6 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
IPV6_FLOWLABEL_MASK;
} else {
info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
- if (flags & BPF_F_ZERO_CSUM_TX)
- info->key.tun_flags &= ~TUNNEL_CSUM;
}
return 0;
@@ -4301,6 +4302,24 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
si->dst_reg, si->src_reg,
offsetof(struct xdp_buff, data_end));
break;
+ case offsetof(struct xdp_md, ingress_ifindex):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
+ si->dst_reg, si->src_reg,
+ offsetof(struct xdp_buff, rxq));
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_rxq_info, dev),
+ si->dst_reg, si->dst_reg,
+ offsetof(struct xdp_rxq_info, dev));
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+ offsetof(struct net_device, ifindex));
+ break;
+ case offsetof(struct xdp_md, rx_queue_index):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
+ si->dst_reg, si->src_reg,
+ offsetof(struct xdp_buff, rxq));
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+ offsetof(struct xdp_rxq_info,
+ queue_index));
+ break;
}
return insn - insn_buf;
@@ -4435,6 +4454,42 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
offsetof(struct sock_common, skc_num));
break;
+
+ case offsetof(struct bpf_sock_ops, is_fullsock):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+ struct bpf_sock_ops_kern,
+ is_fullsock),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sock_ops_kern,
+ is_fullsock));
+ break;
+
+/* Helper macro for adding read access to tcp_sock fields. */
+#define SOCK_OPS_GET_TCP32(FIELD_NAME) \
+ do { \
+ BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, FIELD_NAME) != 4); \
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
+ struct bpf_sock_ops_kern, \
+ is_fullsock), \
+ si->dst_reg, si->src_reg, \
+ offsetof(struct bpf_sock_ops_kern, \
+ is_fullsock)); \
+ *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 2); \
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
+ struct bpf_sock_ops_kern, sk),\
+ si->dst_reg, si->src_reg, \
+ offsetof(struct bpf_sock_ops_kern, sk));\
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, \
+ offsetof(struct tcp_sock, FIELD_NAME)); \
+ } while (0)
+
+ case offsetof(struct bpf_sock_ops, snd_cwnd):
+ SOCK_OPS_GET_TCP32(snd_cwnd);
+ break;
+
+ case offsetof(struct bpf_sock_ops, srtt_us):
+ SOCK_OPS_GET_TCP32(srtt_us);
+ break;
}
return insn - insn_buf;
}
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 15ce30063765..02db7b122a73 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -24,6 +24,7 @@
#include <linux/tcp.h>
#include <net/flow_dissector.h>
#include <scsi/fc/fc_fcoe.h>
+#include <uapi/linux/batadv_packet.h>
static void dissector_set_key(struct flow_dissector *flow_dissector,
enum flow_dissector_key_id key_id)
@@ -133,10 +134,10 @@ skb_flow_dissect_set_enc_addr_type(enum flow_dissector_key_id type,
ctrl->addr_type = type;
}
-static void
-__skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
- struct flow_dissector *flow_dissector,
- void *target_container)
+void
+skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container)
{
struct ip_tunnel_info *info;
struct ip_tunnel_key *key;
@@ -212,6 +213,7 @@ __skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
tp->dst = key->tp_dst;
}
}
+EXPORT_SYMBOL(skb_flow_dissect_tunnel_info);
static enum flow_dissect_ret
__skb_flow_dissect_mpls(const struct sk_buff *skb,
@@ -436,6 +438,57 @@ __skb_flow_dissect_gre(const struct sk_buff *skb,
return FLOW_DISSECT_RET_PROTO_AGAIN;
}
+/**
+ * __skb_flow_dissect_batadv() - dissect batman-adv header
+ * @skb: sk_buff to with the batman-adv header
+ * @key_control: flow dissectors control key
+ * @data: raw buffer pointer to the packet, if NULL use skb->data
+ * @p_proto: pointer used to update the protocol to process next
+ * @p_nhoff: pointer used to update inner network header offset
+ * @hlen: packet header length
+ * @flags: any combination of FLOW_DISSECTOR_F_*
+ *
+ * ETH_P_BATMAN packets are tried to be dissected. Only
+ * &struct batadv_unicast packets are actually processed because they contain an
+ * inner ethernet header and are usually followed by actual network header. This
+ * allows the flow dissector to continue processing the packet.
+ *
+ * Return: FLOW_DISSECT_RET_PROTO_AGAIN when &struct batadv_unicast was found,
+ * FLOW_DISSECT_RET_OUT_GOOD when dissector should stop after encapsulation,
+ * otherwise FLOW_DISSECT_RET_OUT_BAD
+ */
+static enum flow_dissect_ret
+__skb_flow_dissect_batadv(const struct sk_buff *skb,
+ struct flow_dissector_key_control *key_control,
+ void *data, __be16 *p_proto, int *p_nhoff, int hlen,
+ unsigned int flags)
+{
+ struct {
+ struct batadv_unicast_packet batadv_unicast;
+ struct ethhdr eth;
+ } *hdr, _hdr;
+
+ hdr = __skb_header_pointer(skb, *p_nhoff, sizeof(_hdr), data, hlen,
+ &_hdr);
+ if (!hdr)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ if (hdr->batadv_unicast.version != BATADV_COMPAT_VERSION)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ if (hdr->batadv_unicast.packet_type != BATADV_UNICAST)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ *p_proto = hdr->eth.h_proto;
+ *p_nhoff += sizeof(*hdr);
+
+ key_control->flags |= FLOW_DIS_ENCAPSULATION;
+ if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
+ return FLOW_DISSECT_RET_OUT_GOOD;
+
+ return FLOW_DISSECT_RET_PROTO_AGAIN;
+}
+
static void
__skb_flow_dissect_tcp(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
@@ -576,9 +629,6 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
FLOW_DISSECTOR_KEY_BASIC,
target_container);
- __skb_flow_dissect_tunnel_info(skb, flow_dissector,
- target_container);
-
if (dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
struct ethhdr *eth = eth_hdr(skb);
@@ -817,6 +867,11 @@ proto_again:
nhoff, hlen);
break;
+ case htons(ETH_P_BATMAN):
+ fdret = __skb_flow_dissect_batadv(skb, key_control, data,
+ &proto, &nhoff, hlen, flags);
+ break;
+
default:
fdret = FLOW_DISSECT_RET_OUT_BAD;
break;
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 87f28557b329..b2b2323bdc84 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -252,10 +252,10 @@ __gnet_stats_copy_queue_cpu(struct gnet_stats_queue *qstats,
}
}
-static void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats,
- const struct gnet_stats_queue __percpu *cpu,
- const struct gnet_stats_queue *q,
- __u32 qlen)
+void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats,
+ const struct gnet_stats_queue __percpu *cpu,
+ const struct gnet_stats_queue *q,
+ __u32 qlen)
{
if (cpu) {
__gnet_stats_copy_queue_cpu(qstats, cpu);
@@ -269,6 +269,7 @@ static void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats,
qstats->qlen = qlen;
}
+EXPORT_SYMBOL(__gnet_stats_copy_queue);
/**
* gnet_stats_copy_queue - copy queue statistics into statistics TLV
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 7f831711b6e0..7b7a14abba28 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2862,7 +2862,6 @@ static int neigh_stat_seq_open(struct inode *inode, struct file *file)
};
static const struct file_operations neigh_stat_seq_fops = {
- .owner = THIS_MODULE,
.open = neigh_stat_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 615ccab55f38..e010bb800d7b 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -182,7 +182,6 @@ static int dev_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations dev_seq_fops = {
- .owner = THIS_MODULE,
.open = dev_seq_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -202,7 +201,6 @@ static int softnet_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations softnet_seq_fops = {
- .owner = THIS_MODULE,
.open = softnet_seq_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -306,7 +304,6 @@ static int ptype_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations ptype_seq_fops = {
- .owner = THIS_MODULE,
.open = ptype_seq_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -387,7 +384,6 @@ static int dev_mc_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations dev_mc_seq_fops = {
- .owner = THIS_MODULE,
.open = dev_mc_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 799b75268291..7bf8b85ade16 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -961,7 +961,7 @@ net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
while (--i >= new_num) {
struct kobject *kobj = &dev->_rx[i].kobj;
- if (!atomic_read(&dev_net(dev)->count))
+ if (!refcount_read(&dev_net(dev)->count))
kobj->uevent_suppress = 1;
if (dev->sysfs_rx_queue_group)
sysfs_remove_group(kobj, dev->sysfs_rx_queue_group);
@@ -1367,7 +1367,7 @@ netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
while (--i >= new_num) {
struct netdev_queue *queue = dev->_tx + i;
- if (!atomic_read(&dev_net(dev)->count))
+ if (!refcount_read(&dev_net(dev)->count))
queue->kobj.uevent_suppress = 1;
#ifdef CONFIG_BQL
sysfs_remove_group(&queue->kobj, &dql_group);
@@ -1558,7 +1558,7 @@ void netdev_unregister_kobject(struct net_device *ndev)
{
struct device *dev = &ndev->dev;
- if (!atomic_read(&dev_net(ndev)->count))
+ if (!refcount_read(&dev_net(ndev)->count))
dev_set_uevent_suppress(dev, 1);
kobject_get(&dev->kobj);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 60a71be75aea..2213d45fcafd 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -35,7 +35,7 @@ LIST_HEAD(net_namespace_list);
EXPORT_SYMBOL_GPL(net_namespace_list);
struct net init_net = {
- .count = ATOMIC_INIT(1),
+ .count = REFCOUNT_INIT(1),
.dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
};
EXPORT_SYMBOL(init_net);
@@ -224,10 +224,10 @@ int peernet2id_alloc(struct net *net, struct net *peer)
bool alloc;
int id;
- if (atomic_read(&net->count) == 0)
+ if (refcount_read(&net->count) == 0)
return NETNSA_NSID_NOT_ASSIGNED;
spin_lock_bh(&net->nsid_lock);
- alloc = atomic_read(&peer->count) == 0 ? false : true;
+ alloc = refcount_read(&peer->count) == 0 ? false : true;
id = __peernet2id_alloc(net, peer, &alloc);
spin_unlock_bh(&net->nsid_lock);
if (alloc && id >= 0)
@@ -284,7 +284,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
int error = 0;
LIST_HEAD(net_exit_list);
- atomic_set(&net->count, 1);
+ refcount_set(&net->count, 1);
refcount_set(&net->passive, 1);
net->dev_base_seq = 1;
net->user_ns = user_ns;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index f95a15086225..4fcfcb14e7c6 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -399,7 +399,7 @@ struct pktgen_dev {
__u8 ipsmode; /* IPSEC mode (config) */
__u8 ipsproto; /* IPSEC type (config) */
__u32 spi;
- struct dst_entry dst;
+ struct xfrm_dst xdst;
struct dst_ops dstops;
#endif
char result[512];
@@ -523,7 +523,6 @@ static int pgctrl_open(struct inode *inode, struct file *file)
}
static const struct file_operations pktgen_fops = {
- .owner = THIS_MODULE,
.open = pgctrl_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -1804,7 +1803,6 @@ static int pktgen_if_open(struct inode *inode, struct file *file)
}
static const struct file_operations pktgen_if_fops = {
- .owner = THIS_MODULE,
.open = pktgen_if_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -1942,7 +1940,6 @@ static int pktgen_thread_open(struct inode *inode, struct file *file)
}
static const struct file_operations pktgen_thread_fops = {
- .owner = THIS_MODULE,
.open = pktgen_thread_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -2609,7 +2606,7 @@ static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev)
* supports both transport/tunnel mode + ESP/AH type.
*/
if ((x->props.mode == XFRM_MODE_TUNNEL) && (pkt_dev->spi != 0))
- skb->_skb_refdst = (unsigned long)&pkt_dev->dst | SKB_DST_NOREF;
+ skb->_skb_refdst = (unsigned long)&pkt_dev->xdst.u.dst | SKB_DST_NOREF;
rcu_read_lock_bh();
err = x->outer_mode->output(x, skb);
@@ -3742,10 +3739,10 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
* performance under such circumstance.
*/
pkt_dev->dstops.family = AF_INET;
- pkt_dev->dst.dev = pkt_dev->odev;
- dst_init_metrics(&pkt_dev->dst, pktgen_dst_metrics, false);
- pkt_dev->dst.child = &pkt_dev->dst;
- pkt_dev->dst.ops = &pkt_dev->dstops;
+ pkt_dev->xdst.u.dst.dev = pkt_dev->odev;
+ dst_init_metrics(&pkt_dev->xdst.u.dst, pktgen_dst_metrics, false);
+ pkt_dev->xdst.child = &pkt_dev->xdst.u.dst;
+ pkt_dev->xdst.u.dst.ops = &pkt_dev->dstops;
#endif
return add_dev_to_thread(t, pkt_dev);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 778d7f03404a..16d644a4f974 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -62,7 +62,9 @@
struct rtnl_link {
rtnl_doit_func doit;
rtnl_dumpit_func dumpit;
+ struct module *owner;
unsigned int flags;
+ struct rcu_head rcu;
};
static DEFINE_MUTEX(rtnl_mutex);
@@ -127,8 +129,7 @@ bool lockdep_rtnl_is_held(void)
EXPORT_SYMBOL(lockdep_rtnl_is_held);
#endif /* #ifdef CONFIG_PROVE_LOCKING */
-static struct rtnl_link __rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
-static refcount_t rtnl_msg_handlers_ref[RTNL_FAMILY_MAX + 1];
+static struct rtnl_link *__rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
static inline int rtm_msgindex(int msgtype)
{
@@ -144,72 +145,127 @@ static inline int rtm_msgindex(int msgtype)
return msgindex;
}
-/**
- * __rtnl_register - Register a rtnetlink message type
- * @protocol: Protocol family or PF_UNSPEC
- * @msgtype: rtnetlink message type
- * @doit: Function pointer called for each request message
- * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
- * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions
- *
- * Registers the specified function pointers (at least one of them has
- * to be non-NULL) to be called whenever a request message for the
- * specified protocol family and message type is received.
- *
- * The special protocol family PF_UNSPEC may be used to define fallback
- * function pointers for the case when no entry for the specific protocol
- * family exists.
- *
- * Returns 0 on success or a negative error code.
- */
-int __rtnl_register(int protocol, int msgtype,
- rtnl_doit_func doit, rtnl_dumpit_func dumpit,
- unsigned int flags)
+static struct rtnl_link *rtnl_get_link(int protocol, int msgtype)
+{
+ struct rtnl_link **tab;
+
+ if (protocol >= ARRAY_SIZE(rtnl_msg_handlers))
+ protocol = PF_UNSPEC;
+
+ tab = rcu_dereference_rtnl(rtnl_msg_handlers[protocol]);
+ if (!tab)
+ tab = rcu_dereference_rtnl(rtnl_msg_handlers[PF_UNSPEC]);
+
+ return tab[msgtype];
+}
+
+static int rtnl_register_internal(struct module *owner,
+ int protocol, int msgtype,
+ rtnl_doit_func doit, rtnl_dumpit_func dumpit,
+ unsigned int flags)
{
- struct rtnl_link *tab;
+ struct rtnl_link *link, *old;
+ struct rtnl_link __rcu **tab;
int msgindex;
+ int ret = -ENOBUFS;
BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
msgindex = rtm_msgindex(msgtype);
- tab = rcu_dereference_raw(rtnl_msg_handlers[protocol]);
+ rtnl_lock();
+ tab = rtnl_msg_handlers[protocol];
if (tab == NULL) {
- tab = kcalloc(RTM_NR_MSGTYPES, sizeof(*tab), GFP_KERNEL);
- if (tab == NULL)
- return -ENOBUFS;
+ tab = kcalloc(RTM_NR_MSGTYPES, sizeof(void *), GFP_KERNEL);
+ if (!tab)
+ goto unlock;
+ /* ensures we see the 0 stores */
rcu_assign_pointer(rtnl_msg_handlers[protocol], tab);
}
+ old = rtnl_dereference(tab[msgindex]);
+ if (old) {
+ link = kmemdup(old, sizeof(*old), GFP_KERNEL);
+ if (!link)
+ goto unlock;
+ } else {
+ link = kzalloc(sizeof(*link), GFP_KERNEL);
+ if (!link)
+ goto unlock;
+ }
+
+ WARN_ON(link->owner && link->owner != owner);
+ link->owner = owner;
+
+ WARN_ON(doit && link->doit && link->doit != doit);
if (doit)
- tab[msgindex].doit = doit;
+ link->doit = doit;
+ WARN_ON(dumpit && link->dumpit && link->dumpit != dumpit);
if (dumpit)
- tab[msgindex].dumpit = dumpit;
- tab[msgindex].flags |= flags;
+ link->dumpit = dumpit;
- return 0;
+ link->flags |= flags;
+
+ /* publish protocol:msgtype */
+ rcu_assign_pointer(tab[msgindex], link);
+ ret = 0;
+ if (old)
+ kfree_rcu(old, rcu);
+unlock:
+ rtnl_unlock();
+ return ret;
+}
+
+/**
+ * rtnl_register_module - Register a rtnetlink message type
+ *
+ * @owner: module registering the hook (THIS_MODULE)
+ * @protocol: Protocol family or PF_UNSPEC
+ * @msgtype: rtnetlink message type
+ * @doit: Function pointer called for each request message
+ * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
+ * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions
+ *
+ * Like rtnl_register, but for use by removable modules.
+ */
+int rtnl_register_module(struct module *owner,
+ int protocol, int msgtype,
+ rtnl_doit_func doit, rtnl_dumpit_func dumpit,
+ unsigned int flags)
+{
+ return rtnl_register_internal(owner, protocol, msgtype,
+ doit, dumpit, flags);
}
-EXPORT_SYMBOL_GPL(__rtnl_register);
+EXPORT_SYMBOL_GPL(rtnl_register_module);
/**
* rtnl_register - Register a rtnetlink message type
+ * @protocol: Protocol family or PF_UNSPEC
+ * @msgtype: rtnetlink message type
+ * @doit: Function pointer called for each request message
+ * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
+ * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions
+ *
+ * Registers the specified function pointers (at least one of them has
+ * to be non-NULL) to be called whenever a request message for the
+ * specified protocol family and message type is received.
*
- * Identical to __rtnl_register() but panics on failure. This is useful
- * as failure of this function is very unlikely, it can only happen due
- * to lack of memory when allocating the chain to store all message
- * handlers for a protocol. Meant for use in init functions where lack
- * of memory implies no sense in continuing.
+ * The special protocol family PF_UNSPEC may be used to define fallback
+ * function pointers for the case when no entry for the specific protocol
+ * family exists.
*/
void rtnl_register(int protocol, int msgtype,
rtnl_doit_func doit, rtnl_dumpit_func dumpit,
unsigned int flags)
{
- if (__rtnl_register(protocol, msgtype, doit, dumpit, flags) < 0)
- panic("Unable to register rtnetlink message handler, "
- "protocol = %d, message type = %d\n",
- protocol, msgtype);
+ int err;
+
+ err = rtnl_register_internal(NULL, protocol, msgtype, doit, dumpit,
+ flags);
+ if (err)
+ pr_err("Unable to register rtnetlink message handler, "
+ "protocol = %d, message type = %d\n", protocol, msgtype);
}
-EXPORT_SYMBOL_GPL(rtnl_register);
/**
* rtnl_unregister - Unregister a rtnetlink message type
@@ -220,24 +276,25 @@ EXPORT_SYMBOL_GPL(rtnl_register);
*/
int rtnl_unregister(int protocol, int msgtype)
{
- struct rtnl_link *handlers;
+ struct rtnl_link **tab, *link;
int msgindex;
BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
msgindex = rtm_msgindex(msgtype);
rtnl_lock();
- handlers = rtnl_dereference(rtnl_msg_handlers[protocol]);
- if (!handlers) {
+ tab = rtnl_dereference(rtnl_msg_handlers[protocol]);
+ if (!tab) {
rtnl_unlock();
return -ENOENT;
}
- handlers[msgindex].doit = NULL;
- handlers[msgindex].dumpit = NULL;
- handlers[msgindex].flags = 0;
+ link = tab[msgindex];
+ rcu_assign_pointer(tab[msgindex], NULL);
rtnl_unlock();
+ kfree_rcu(link, rcu);
+
return 0;
}
EXPORT_SYMBOL_GPL(rtnl_unregister);
@@ -251,20 +308,27 @@ EXPORT_SYMBOL_GPL(rtnl_unregister);
*/
void rtnl_unregister_all(int protocol)
{
- struct rtnl_link *handlers;
+ struct rtnl_link **tab, *link;
+ int msgindex;
BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
rtnl_lock();
- handlers = rtnl_dereference(rtnl_msg_handlers[protocol]);
+ tab = rtnl_msg_handlers[protocol];
RCU_INIT_POINTER(rtnl_msg_handlers[protocol], NULL);
+ for (msgindex = 0; msgindex < RTM_NR_MSGTYPES; msgindex++) {
+ link = tab[msgindex];
+ if (!link)
+ continue;
+
+ rcu_assign_pointer(tab[msgindex], NULL);
+ kfree_rcu(link, rcu);
+ }
rtnl_unlock();
synchronize_net();
- while (refcount_read(&rtnl_msg_handlers_ref[protocol]) > 1)
- schedule();
- kfree(handlers);
+ kfree(tab);
}
EXPORT_SYMBOL_GPL(rtnl_unregister_all);
@@ -840,6 +904,10 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
nla_total_size_64bit(sizeof(__u64)) +
/* IFLA_VF_STATS_MULTICAST */
nla_total_size_64bit(sizeof(__u64)) +
+ /* IFLA_VF_STATS_RX_DROPPED */
+ nla_total_size_64bit(sizeof(__u64)) +
+ /* IFLA_VF_STATS_TX_DROPPED */
+ nla_total_size_64bit(sizeof(__u64)) +
nla_total_size(sizeof(struct ifla_vf_trust)));
return size;
} else
@@ -1194,7 +1262,11 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
nla_put_u64_64bit(skb, IFLA_VF_STATS_BROADCAST,
vf_stats.broadcast, IFLA_VF_STATS_PAD) ||
nla_put_u64_64bit(skb, IFLA_VF_STATS_MULTICAST,
- vf_stats.multicast, IFLA_VF_STATS_PAD)) {
+ vf_stats.multicast, IFLA_VF_STATS_PAD) ||
+ nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_DROPPED,
+ vf_stats.rx_dropped, IFLA_VF_STATS_PAD) ||
+ nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_DROPPED,
+ vf_stats.tx_dropped, IFLA_VF_STATS_PAD)) {
nla_nest_cancel(skb, vfstats);
goto nla_put_vf_failure;
}
@@ -1261,6 +1333,7 @@ static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id)
{
const struct net_device_ops *ops = dev->netdev_ops;
const struct bpf_prog *generic_xdp_prog;
+ struct netdev_bpf xdp;
ASSERT_RTNL();
@@ -1273,7 +1346,10 @@ static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id)
if (!ops->ndo_bpf)
return XDP_ATTACHED_NONE;
- return __dev_xdp_attached(dev, ops->ndo_bpf, prog_id);
+ __dev_xdp_query(dev, ops->ndo_bpf, &xdp);
+ *prog_id = xdp.prog_id;
+
+ return xdp.prog_attached;
}
static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
@@ -1569,6 +1645,8 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_PROMISCUITY] = { .type = NLA_U32 },
[IFLA_NUM_TX_QUEUES] = { .type = NLA_U32 },
[IFLA_NUM_RX_QUEUES] = { .type = NLA_U32 },
+ [IFLA_GSO_MAX_SEGS] = { .type = NLA_U32 },
+ [IFLA_GSO_MAX_SIZE] = { .type = NLA_U32 },
[IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
[IFLA_CARRIER_CHANGES] = { .type = NLA_U32 }, /* ignored */
[IFLA_PHYS_SWITCH_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
@@ -2219,6 +2297,34 @@ static int do_setlink(const struct sk_buff *skb,
}
}
+ if (tb[IFLA_GSO_MAX_SIZE]) {
+ u32 max_size = nla_get_u32(tb[IFLA_GSO_MAX_SIZE]);
+
+ if (max_size > GSO_MAX_SIZE) {
+ err = -EINVAL;
+ goto errout;
+ }
+
+ if (dev->gso_max_size ^ max_size) {
+ netif_set_gso_max_size(dev, max_size);
+ status |= DO_SETLINK_MODIFIED;
+ }
+ }
+
+ if (tb[IFLA_GSO_MAX_SEGS]) {
+ u32 max_segs = nla_get_u32(tb[IFLA_GSO_MAX_SEGS]);
+
+ if (max_segs > GSO_MAX_SEGS) {
+ err = -EINVAL;
+ goto errout;
+ }
+
+ if (dev->gso_max_segs ^ max_segs) {
+ dev->gso_max_segs = max_segs;
+ status |= DO_SETLINK_MODIFIED;
+ }
+ }
+
if (tb[IFLA_OPERSTATE])
set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
@@ -2583,6 +2689,10 @@ struct net_device *rtnl_create_link(struct net *net,
dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
if (tb[IFLA_GROUP])
dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP]));
+ if (tb[IFLA_GSO_MAX_SIZE])
+ netif_set_gso_max_size(dev, nla_get_u32(tb[IFLA_GSO_MAX_SIZE]));
+ if (tb[IFLA_GSO_MAX_SEGS])
+ dev->gso_max_segs = nla_get_u32(tb[IFLA_GSO_MAX_SEGS]);
return dev;
}
@@ -2973,18 +3083,26 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
s_idx = 1;
for (idx = 1; idx <= RTNL_FAMILY_MAX; idx++) {
+ struct rtnl_link **tab;
int type = cb->nlh->nlmsg_type-RTM_BASE;
- struct rtnl_link *handlers;
+ struct rtnl_link *link;
rtnl_dumpit_func dumpit;
if (idx < s_idx || idx == PF_PACKET)
continue;
- handlers = rtnl_dereference(rtnl_msg_handlers[idx]);
- if (!handlers)
+ if (type < 0 || type >= RTM_NR_MSGTYPES)
continue;
- dumpit = READ_ONCE(handlers[type].dumpit);
+ tab = rcu_dereference_rtnl(rtnl_msg_handlers[idx]);
+ if (!tab)
+ continue;
+
+ link = tab[type];
+ if (!link)
+ continue;
+
+ dumpit = link->dumpit;
if (!dumpit)
continue;
@@ -4314,7 +4432,8 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
- struct rtnl_link *handlers;
+ struct rtnl_link *link;
+ struct module *owner;
int err = -EOPNOTSUPP;
rtnl_doit_func doit;
unsigned int flags;
@@ -4338,79 +4457,85 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
if (kind != 2 && !netlink_net_capable(skb, CAP_NET_ADMIN))
return -EPERM;
- if (family >= ARRAY_SIZE(rtnl_msg_handlers))
- family = PF_UNSPEC;
-
rcu_read_lock();
- handlers = rcu_dereference(rtnl_msg_handlers[family]);
- if (!handlers) {
- family = PF_UNSPEC;
- handlers = rcu_dereference(rtnl_msg_handlers[family]);
- }
-
if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
struct sock *rtnl;
rtnl_dumpit_func dumpit;
u16 min_dump_alloc = 0;
- dumpit = READ_ONCE(handlers[type].dumpit);
- if (!dumpit) {
+ link = rtnl_get_link(family, type);
+ if (!link || !link->dumpit) {
family = PF_UNSPEC;
- handlers = rcu_dereference(rtnl_msg_handlers[PF_UNSPEC]);
- if (!handlers)
- goto err_unlock;
-
- dumpit = READ_ONCE(handlers[type].dumpit);
- if (!dumpit)
+ link = rtnl_get_link(family, type);
+ if (!link || !link->dumpit)
goto err_unlock;
}
-
- refcount_inc(&rtnl_msg_handlers_ref[family]);
+ owner = link->owner;
+ dumpit = link->dumpit;
if (type == RTM_GETLINK - RTM_BASE)
min_dump_alloc = rtnl_calcit(skb, nlh);
+ err = 0;
+ /* need to do this before rcu_read_unlock() */
+ if (!try_module_get(owner))
+ err = -EPROTONOSUPPORT;
+
rcu_read_unlock();
rtnl = net->rtnl;
- {
+ if (err == 0) {
struct netlink_dump_control c = {
.dump = dumpit,
.min_dump_alloc = min_dump_alloc,
+ .module = owner,
};
err = netlink_dump_start(rtnl, skb, nlh, &c);
+ /* netlink_dump_start() will keep a reference on
+ * module if dump is still in progress.
+ */
+ module_put(owner);
}
- refcount_dec(&rtnl_msg_handlers_ref[family]);
return err;
}
- doit = READ_ONCE(handlers[type].doit);
- if (!doit) {
+ link = rtnl_get_link(family, type);
+ if (!link || !link->doit) {
family = PF_UNSPEC;
- handlers = rcu_dereference(rtnl_msg_handlers[family]);
+ link = rtnl_get_link(PF_UNSPEC, type);
+ if (!link || !link->doit)
+ goto out_unlock;
+ }
+
+ owner = link->owner;
+ if (!try_module_get(owner)) {
+ err = -EPROTONOSUPPORT;
+ goto out_unlock;
}
- flags = READ_ONCE(handlers[type].flags);
+ flags = link->flags;
if (flags & RTNL_FLAG_DOIT_UNLOCKED) {
- refcount_inc(&rtnl_msg_handlers_ref[family]);
- doit = READ_ONCE(handlers[type].doit);
+ doit = link->doit;
rcu_read_unlock();
if (doit)
err = doit(skb, nlh, extack);
- refcount_dec(&rtnl_msg_handlers_ref[family]);
+ module_put(owner);
return err;
}
-
rcu_read_unlock();
rtnl_lock();
- handlers = rtnl_dereference(rtnl_msg_handlers[family]);
- if (handlers) {
- doit = READ_ONCE(handlers[type].doit);
- if (doit)
- err = doit(skb, nlh, extack);
- }
+ link = rtnl_get_link(family, type);
+ if (link && link->doit)
+ err = link->doit(skb, nlh, extack);
rtnl_unlock();
+
+ module_put(owner);
+
+ return err;
+
+out_unlock:
+ rcu_read_unlock();
return err;
err_unlock:
@@ -4498,11 +4623,6 @@ static struct pernet_operations rtnetlink_net_ops = {
void __init rtnetlink_init(void)
{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(rtnl_msg_handlers_ref); i++)
- refcount_set(&rtnl_msg_handlers_ref[i], 1);
-
if (register_pernet_subsys(&rtnetlink_net_ops))
panic("rtnetlink_init: cannot initialize rtnetlink\n");
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 08f574081315..01e8285aea73 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3656,6 +3656,10 @@ normal:
skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags &
SKBTX_SHARED_FRAG;
+ if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
+ skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
+ goto err;
+
while (pos < offset + len) {
if (i >= nfrags) {
BUG_ON(skb_headlen(list_skb));
@@ -3667,6 +3671,11 @@ normal:
BUG_ON(!nfrags);
+ if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
+ skb_zerocopy_clone(nskb, frag_skb,
+ GFP_ATOMIC))
+ goto err;
+
list_skb = list_skb->next;
}
@@ -3678,11 +3687,6 @@ normal:
goto err;
}
- if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC)))
- goto err;
- if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
- goto err;
-
*nskb_frag = *frag;
__skb_frag_ref(nskb_frag);
size = skb_frag_size(nskb_frag);
diff --git a/net/core/sock.c b/net/core/sock.c
index c0b5b2f17412..abf4cbff99b2 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -145,6 +145,8 @@
static DEFINE_MUTEX(proto_list_mutex);
static LIST_HEAD(proto_list);
+static void sock_inuse_add(struct net *net, int val);
+
/**
* sk_ns_capable - General socket capability test
* @sk: Socket to use a capability on or through
@@ -1531,8 +1533,11 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
sk->sk_kern_sock = kern;
sock_lock_init(sk);
sk->sk_net_refcnt = kern ? 0 : 1;
- if (likely(sk->sk_net_refcnt))
+ if (likely(sk->sk_net_refcnt)) {
get_net(net);
+ sock_inuse_add(net, 1);
+ }
+
sock_net_set(sk, net);
refcount_set(&sk->sk_wmem_alloc, 1);
@@ -1595,6 +1600,9 @@ void sk_destruct(struct sock *sk)
static void __sk_free(struct sock *sk)
{
+ if (likely(sk->sk_net_refcnt))
+ sock_inuse_add(sock_net(sk), -1);
+
if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt))
sock_diag_broadcast_destroy(sk);
else
@@ -1716,6 +1724,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
newsk->sk_priority = 0;
newsk->sk_incoming_cpu = raw_smp_processor_id();
atomic64_set(&newsk->sk_cookie, 0);
+ if (likely(newsk->sk_net_refcnt))
+ sock_inuse_add(sock_net(newsk), 1);
/*
* Before updating sk_refcnt, we must commit prior changes to memory
@@ -3045,7 +3055,7 @@ static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
{
- __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
+ __this_cpu_add(net->core.prot_inuse->val[prot->inuse_idx], val);
}
EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
@@ -3055,21 +3065,50 @@ int sock_prot_inuse_get(struct net *net, struct proto *prot)
int res = 0;
for_each_possible_cpu(cpu)
- res += per_cpu_ptr(net->core.inuse, cpu)->val[idx];
+ res += per_cpu_ptr(net->core.prot_inuse, cpu)->val[idx];
return res >= 0 ? res : 0;
}
EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
+static void sock_inuse_add(struct net *net, int val)
+{
+ this_cpu_add(*net->core.sock_inuse, val);
+}
+
+int sock_inuse_get(struct net *net)
+{
+ int cpu, res = 0;
+
+ for_each_possible_cpu(cpu)
+ res += *per_cpu_ptr(net->core.sock_inuse, cpu);
+
+ return res;
+}
+
+EXPORT_SYMBOL_GPL(sock_inuse_get);
+
static int __net_init sock_inuse_init_net(struct net *net)
{
- net->core.inuse = alloc_percpu(struct prot_inuse);
- return net->core.inuse ? 0 : -ENOMEM;
+ net->core.prot_inuse = alloc_percpu(struct prot_inuse);
+ if (net->core.prot_inuse == NULL)
+ return -ENOMEM;
+
+ net->core.sock_inuse = alloc_percpu(int);
+ if (net->core.sock_inuse == NULL)
+ goto out;
+
+ return 0;
+
+out:
+ free_percpu(net->core.prot_inuse);
+ return -ENOMEM;
}
static void __net_exit sock_inuse_exit_net(struct net *net)
{
- free_percpu(net->core.inuse);
+ free_percpu(net->core.prot_inuse);
+ free_percpu(net->core.sock_inuse);
}
static struct pernet_operations net_inuse_ops = {
@@ -3112,6 +3151,10 @@ static inline void assign_proto_idx(struct proto *prot)
static inline void release_proto_idx(struct proto *prot)
{
}
+
+static void sock_inuse_add(struct net *net, int val)
+{
+}
#endif
static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
@@ -3319,7 +3362,6 @@ static int proto_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations proto_seq_fops = {
- .owner = THIS_MODULE,
.open = proto_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index 5eeb1d20cc38..c5bb52bc73a1 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -235,7 +235,9 @@ struct sock *reuseport_select_sock(struct sock *sk,
if (prog && skb)
sk2 = run_bpf(reuse, socks, prog, skb, hdr_len);
- else
+
+ /* no bpf or invalid bpf result: fall back to hash usage */
+ if (!sk2)
sk2 = reuse->socks[reciprocal_scale(hash, socks)];
}
diff --git a/net/core/xdp.c b/net/core/xdp.c
new file mode 100644
index 000000000000..097a0f74e004
--- /dev/null
+++ b/net/core/xdp.c
@@ -0,0 +1,73 @@
+/* net/core/xdp.c
+ *
+ * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
+ * Released under terms in GPL version 2. See COPYING.
+ */
+#include <linux/types.h>
+#include <linux/mm.h>
+
+#include <net/xdp.h>
+
+#define REG_STATE_NEW 0x0
+#define REG_STATE_REGISTERED 0x1
+#define REG_STATE_UNREGISTERED 0x2
+#define REG_STATE_UNUSED 0x3
+
+void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
+{
+ /* Simplify driver cleanup code paths, allow unreg "unused" */
+ if (xdp_rxq->reg_state == REG_STATE_UNUSED)
+ return;
+
+ WARN(!(xdp_rxq->reg_state == REG_STATE_REGISTERED), "Driver BUG");
+
+ xdp_rxq->reg_state = REG_STATE_UNREGISTERED;
+ xdp_rxq->dev = NULL;
+}
+EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg);
+
+static void xdp_rxq_info_init(struct xdp_rxq_info *xdp_rxq)
+{
+ memset(xdp_rxq, 0, sizeof(*xdp_rxq));
+}
+
+/* Returns 0 on success, negative on failure */
+int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
+ struct net_device *dev, u32 queue_index)
+{
+ if (xdp_rxq->reg_state == REG_STATE_UNUSED) {
+ WARN(1, "Driver promised not to register this");
+ return -EINVAL;
+ }
+
+ if (xdp_rxq->reg_state == REG_STATE_REGISTERED) {
+ WARN(1, "Missing unregister, handled but fix driver");
+ xdp_rxq_info_unreg(xdp_rxq);
+ }
+
+ if (!dev) {
+ WARN(1, "Missing net_device from driver");
+ return -ENODEV;
+ }
+
+ /* State either UNREGISTERED or NEW */
+ xdp_rxq_info_init(xdp_rxq);
+ xdp_rxq->dev = dev;
+ xdp_rxq->queue_index = queue_index;
+
+ xdp_rxq->reg_state = REG_STATE_REGISTERED;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xdp_rxq_info_reg);
+
+void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq)
+{
+ xdp_rxq->reg_state = REG_STATE_UNUSED;
+}
+EXPORT_SYMBOL_GPL(xdp_rxq_info_unused);
+
+bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq)
+{
+ return (xdp_rxq->reg_state == REG_STATE_REGISTERED);
+}
+EXPORT_SYMBOL_GPL(xdp_rxq_info_is_reg);