aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c6
-rw-r--r--net/8021q/vlan.h2
-rw-r--r--net/8021q/vlan_netlink.c2
-rw-r--r--net/batman-adv/hard-interface.c2
-rw-r--r--net/batman-adv/soft-interface.c5
-rw-r--r--net/bpf/test_run.c3
-rw-r--r--net/bridge/br_device.c5
-rw-r--r--net/bridge/br_if.c15
-rw-r--r--net/bridge/br_input.c1
-rw-r--r--net/bridge/br_ioctl.c5
-rw-r--r--net/bridge/br_netlink.c14
-rw-r--r--net/bridge/br_private.h13
-rw-r--r--net/bridge/br_sysfs_if.c18
-rw-r--r--net/core/datagram.c2
-rw-r--r--net/core/dev.c190
-rw-r--r--net/core/filter.c91
-rw-r--r--net/core/flow_dissector.c100
-rw-r--r--net/core/lwt_bpf.c2
-rw-r--r--net/core/neighbour.c18
-rw-r--r--net/core/net-sysfs.c17
-rw-r--r--net/core/rtnetlink.c194
-rw-r--r--net/core/skbuff.c42
-rw-r--r--net/dsa/Makefile2
-rw-r--r--net/dsa/dsa.c34
-rw-r--r--net/dsa/dsa2.c33
-rw-r--r--net/dsa/dsa_priv.h39
-rw-r--r--net/dsa/legacy.c25
-rw-r--r--net/dsa/master.c117
-rw-r--r--net/dsa/port.c42
-rw-r--r--net/dsa/slave.c285
-rw-r--r--net/dsa/tag_brcm.c9
-rw-r--r--net/dsa/tag_dsa.c18
-rw-r--r--net/dsa/tag_edsa.c18
-rw-r--r--net/dsa/tag_ksz.c9
-rw-r--r--net/dsa/tag_lan9303.c20
-rw-r--r--net/dsa/tag_mtk.c16
-rw-r--r--net/dsa/tag_qca.c17
-rw-r--r--net/dsa/tag_trailer.c9
-rw-r--r--net/ipv4/af_inet.c9
-rw-r--r--net/ipv4/devinet.c30
-rw-r--r--net/ipv4/fib_frontend.c22
-rw-r--r--net/ipv4/fib_semantics.c15
-rw-r--r--net/ipv4/inet_connection_sock.c36
-rw-r--r--net/ipv4/inetpeer.c11
-rw-r--r--net/ipv4/ip_gre.c22
-rw-r--r--net/ipv4/ip_tunnel.c12
-rw-r--r--net/ipv4/ip_vti.c7
-rw-r--r--net/ipv4/ipip.c7
-rw-r--r--net/ipv4/ipmr.c261
-rw-r--r--net/ipv4/sysctl_net_ipv4.c60
-rw-r--r--net/ipv4/tcp.c6
-rw-r--r--net/ipv4/tcp_fastopen.c105
-rw-r--r--net/ipv4/tcp_input.c33
-rw-r--r--net/ipv4/tcp_ipv4.c10
-rw-r--r--net/ipv4/tcp_metrics.c14
-rw-r--r--net/ipv4/tcp_vegas.c2
-rw-r--r--net/ipv4/udp.c3
-rw-r--r--net/ipv6/addrconf.c18
-rw-r--r--net/ipv6/addrlabel.c81
-rw-r--r--net/ipv6/exthdrs_core.c2
-rw-r--r--net/ipv6/ip6_gre.c8
-rw-r--r--net/ipv6/ip6_tunnel.c20
-rw-r--r--net/ipv6/ip6_vti.c23
-rw-r--r--net/ipv6/ipv6_sockglue.c12
-rw-r--r--net/ipv6/sit.c9
-rw-r--r--net/kcm/kcmsock.c2
-rw-r--r--net/mac802154/llsec.c14
-rw-r--r--net/nfc/core.c5
-rw-r--r--net/nfc/hci/core.c5
-rw-r--r--net/nfc/hci/llc_shdlc.c15
-rw-r--r--net/nfc/llcp_core.c10
-rw-r--r--net/openvswitch/vport-netdev.c3
-rw-r--r--net/packet/af_packet.c4
-rw-r--r--net/sched/act_bpf.c4
-rw-r--r--net/sched/cls_basic.c38
-rw-r--r--net/sched/cls_bpf.c61
-rw-r--r--net/sched/cls_flower.c25
-rw-r--r--net/sched/cls_u32.c108
-rw-r--r--net/sched/sch_api.c3
-rw-r--r--net/sched/sch_generic.c10
-rw-r--r--net/sched/sch_htb.c5
-rw-r--r--net/sched/sch_netem.c14
-rw-r--r--net/sctp/Makefile3
-rw-r--r--net/sctp/chunk.c6
-rw-r--r--net/sctp/outqueue.c63
-rw-r--r--net/sctp/sm_sideeffect.c3
-rw-r--r--net/sctp/socket.c179
-rw-r--r--net/sctp/stream.c196
-rw-r--r--net/sctp/stream_sched.c275
-rw-r--r--net/sctp/stream_sched_prio.c347
-rw-r--r--net/sctp/stream_sched_rr.c201
-rw-r--r--net/smc/smc_cdc.c7
-rw-r--r--net/smc/smc_cdc.h3
-rw-r--r--net/smc/smc_tx.c6
-rw-r--r--net/vmw_vsock/vmci_transport.c1
95 files changed, 2791 insertions, 1103 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 9649579b5b9f..71c3e045505b 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -138,7 +138,7 @@ int vlan_check_real_dev(struct net_device *real_dev,
return 0;
}
-int register_vlan_dev(struct net_device *dev)
+int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack)
{
struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
struct net_device *real_dev = vlan->real_dev;
@@ -174,7 +174,7 @@ int register_vlan_dev(struct net_device *dev)
if (err < 0)
goto out_uninit_mvrp;
- err = netdev_upper_dev_link(real_dev, dev);
+ err = netdev_upper_dev_link(real_dev, dev, extack);
if (err)
goto out_unregister_netdev;
@@ -270,7 +270,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
vlan->flags = VLAN_FLAG_REORDER_HDR;
new_dev->rtnl_link_ops = &vlan_link_ops;
- err = register_vlan_dev(new_dev);
+ err = register_vlan_dev(new_dev, NULL);
if (err < 0)
goto out_free_newdev;
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index df8bd65dd370..94f8eed9f9b3 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -107,7 +107,7 @@ void vlan_dev_get_realdev_name(const struct net_device *dev, char *result);
int vlan_check_real_dev(struct net_device *real_dev,
__be16 protocol, u16 vlan_id);
void vlan_setup(struct net_device *dev);
-int register_vlan_dev(struct net_device *dev);
+int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack);
void unregister_vlan_dev(struct net_device *dev, struct list_head *head);
bool vlan_dev_inherit_address(struct net_device *dev,
struct net_device *real_dev);
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 5e831de3103e..6e7c5a6a7930 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -160,7 +160,7 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
if (err < 0)
return err;
- return register_vlan_dev(dev);
+ return register_vlan_dev(dev, extack);
}
static inline size_t vlan_qos_map_size(unsigned int n)
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index e348f76ea8c1..f7b413b9297e 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -738,7 +738,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
bat_priv = netdev_priv(hard_iface->soft_iface);
ret = netdev_master_upper_dev_link(hard_iface->net_dev,
- soft_iface, NULL, NULL);
+ soft_iface, NULL, NULL, NULL);
if (ret)
goto err_dev;
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 10f7edfb176e..e7d5fbb6ad53 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -69,7 +69,7 @@ int batadv_skb_head_push(struct sk_buff *skb, unsigned int len)
int result;
/* TODO: We must check if we can release all references to non-payload
- * data using skb_header_release in our skbs to allow skb_cow_header to
+ * data using __skb_header_release in our skbs to allow skb_cow_header to
* work optimally. This means that those skbs are not allowed to read
* or write any data which is before the current position of skb->data
* after that call and thus allow other skbs with the same data buffer
@@ -867,7 +867,8 @@ free_bat_counters:
* Return: 0 if successful or error otherwise.
*/
static int batadv_softif_slave_add(struct net_device *dev,
- struct net_device *slave_dev)
+ struct net_device *slave_dev,
+ struct netlink_ext_ack *extack)
{
struct batadv_hard_iface *hard_iface;
struct net *net = dev_net(dev);
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 6be41a44d688..a86e6687026e 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -133,7 +133,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
if (is_l2)
__skb_push(skb, ETH_HLEN);
if (is_direct_pkt_access)
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
retval = bpf_test_run(prog, skb, repeat, &duration);
if (!is_l2)
__skb_push(skb, ETH_HLEN);
@@ -162,6 +162,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
xdp.data_hard_start = data;
xdp.data = data + XDP_PACKET_HEADROOM + NET_IP_ALIGN;
+ xdp.data_meta = xdp.data;
xdp.data_end = xdp.data + size;
retval = bpf_test_run(prog, &xdp, repeat, &duration);
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index f6b6a92f1c48..7acb77c9bd65 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -320,12 +320,13 @@ void br_netpoll_disable(struct net_bridge_port *p)
#endif
-static int br_add_slave(struct net_device *dev, struct net_device *slave_dev)
+static int br_add_slave(struct net_device *dev, struct net_device *slave_dev,
+ struct netlink_ext_ack *extack)
{
struct net_bridge *br = netdev_priv(dev);
- return br_add_if(br, slave_dev);
+ return br_add_if(br, slave_dev, extack);
}
static int br_del_slave(struct net_device *dev, struct net_device *slave_dev)
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index f3aef22931ab..59a74a414e20 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -480,7 +480,8 @@ netdev_features_t br_features_recompute(struct net_bridge *br,
}
/* called with RTNL */
-int br_add_if(struct net_bridge *br, struct net_device *dev)
+int br_add_if(struct net_bridge *br, struct net_device *dev,
+ struct netlink_ext_ack *extack)
{
struct net_bridge_port *p;
int err = 0;
@@ -500,16 +501,22 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
return -EINVAL;
/* No bridging of bridges */
- if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit)
+ if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) {
+ NL_SET_ERR_MSG(extack,
+ "Can not enslave a bridge to a bridge");
return -ELOOP;
+ }
/* Device is already being bridged */
if (br_port_exists(dev))
return -EBUSY;
/* No bridging devices that dislike that (e.g. wireless) */
- if (dev->priv_flags & IFF_DONT_BRIDGE)
+ if (dev->priv_flags & IFF_DONT_BRIDGE) {
+ NL_SET_ERR_MSG(extack,
+ "Device does not allow enslaving to a bridge");
return -EOPNOTSUPP;
+ }
p = new_nbp(br, dev);
if (IS_ERR(p))
@@ -540,7 +547,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
dev->priv_flags |= IFF_BRIDGE_PORT;
- err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL);
+ err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL, extack);
if (err)
goto err5;
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 7637f58c1226..7cb613776b31 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -289,6 +289,7 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
*
* Others reserved for future standardization
*/
+ fwd_mask |= p->group_fwd_mask;
switch (dest[5]) {
case 0x00: /* Bridge Group Address */
/* If STP is turned off,
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 7970f8540cbb..8f29103935a3 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -98,10 +98,13 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd)
return -EINVAL;
if (isadd)
- ret = br_add_if(br, dev);
+ ret = br_add_if(br, dev, NULL);
else
ret = br_del_if(br, dev);
+ if (!ret)
+ rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_MASTER, GFP_KERNEL);
+
return ret;
}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 3bc890716c89..dea88a255d26 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -152,6 +152,7 @@ static inline size_t br_port_info_size(void)
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MULTICAST_ROUTER */
#endif
+ + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_GROUP_FWD_MASK */
+ 0;
}
@@ -208,7 +209,8 @@ static int br_port_fill_attrs(struct sk_buff *skb,
p->topology_change_ack) ||
nla_put_u8(skb, IFLA_BRPORT_CONFIG_PENDING, p->config_pending) ||
nla_put_u8(skb, IFLA_BRPORT_VLAN_TUNNEL, !!(p->flags &
- BR_VLAN_TUNNEL)))
+ BR_VLAN_TUNNEL)) ||
+ nla_put_u16(skb, IFLA_BRPORT_GROUP_FWD_MASK, p->group_fwd_mask))
return -EMSGSIZE;
timerval = br_timer_value(&p->message_age_timer);
@@ -637,6 +639,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
[IFLA_BRPORT_MCAST_TO_UCAST] = { .type = NLA_U8 },
[IFLA_BRPORT_MCAST_FLOOD] = { .type = NLA_U8 },
[IFLA_BRPORT_BCAST_FLOOD] = { .type = NLA_U8 },
+ [IFLA_BRPORT_GROUP_FWD_MASK] = { .type = NLA_U16 },
};
/* Change the state of the port and notify spanning tree */
@@ -773,6 +776,15 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
return err;
}
#endif
+
+ if (tb[IFLA_BRPORT_GROUP_FWD_MASK]) {
+ u16 fwd_mask = nla_get_u16(tb[IFLA_BRPORT_GROUP_FWD_MASK]);
+
+ if (fwd_mask & BR_GROUPFWD_MACPAUSE)
+ return -EINVAL;
+ p->group_fwd_mask = fwd_mask;
+ }
+
br_port_flags_change(p, old_flags ^ p->flags);
return 0;
}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index e870cfc85b14..ab4df24f7bba 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -36,7 +36,14 @@
/* Control of forwarding link local multicast */
#define BR_GROUPFWD_DEFAULT 0
/* Don't allow forwarding of control protocols like STP, MAC PAUSE and LACP */
-#define BR_GROUPFWD_RESTRICTED 0x0007u
+enum {
+ BR_GROUPFWD_STP = BIT(0),
+ BR_GROUPFWD_MACPAUSE = BIT(1),
+ BR_GROUPFWD_LACP = BIT(2),
+};
+
+#define BR_GROUPFWD_RESTRICTED (BR_GROUPFWD_STP | BR_GROUPFWD_MACPAUSE | \
+ BR_GROUPFWD_LACP)
/* The Nearest Customer Bridge Group Address, 01-80-C2-00-00-[00,0B,0C,0D,0F] */
#define BR_GROUPFWD_8021AD 0xB801u
@@ -268,6 +275,7 @@ struct net_bridge_port {
#ifdef CONFIG_NET_SWITCHDEV
int offload_fwd_mark;
#endif
+ u16 group_fwd_mask;
};
#define br_auto_port(p) ((p)->flags & BR_AUTO_MASK)
@@ -558,7 +566,8 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb,
void br_port_carrier_check(struct net_bridge_port *p);
int br_add_bridge(struct net *net, const char *name);
int br_del_bridge(struct net *net, const char *name);
-int br_add_if(struct net_bridge *br, struct net_device *dev);
+int br_add_if(struct net_bridge *br, struct net_device *dev,
+ struct netlink_ext_ack *extack);
int br_del_if(struct net_bridge *br, struct net_device *dev);
int br_min_mtu(const struct net_bridge *br);
netdev_features_t br_features_recompute(struct net_bridge *br,
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 5d5d413a6cf8..9110d5e56085 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -165,6 +165,23 @@ static int store_flush(struct net_bridge_port *p, unsigned long v)
}
static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush);
+static ssize_t show_group_fwd_mask(struct net_bridge_port *p, char *buf)
+{
+ return sprintf(buf, "%#x\n", p->group_fwd_mask);
+}
+
+static int store_group_fwd_mask(struct net_bridge_port *p,
+ unsigned long v)
+{
+ if (v & BR_GROUPFWD_MACPAUSE)
+ return -EINVAL;
+ p->group_fwd_mask = v;
+
+ return 0;
+}
+static BRPORT_ATTR(group_fwd_mask, S_IRUGO | S_IWUSR, show_group_fwd_mask,
+ store_group_fwd_mask);
+
BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE);
BRPORT_ATTR_FLAG(bpdu_guard, BR_BPDU_GUARD);
BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK);
@@ -223,6 +240,7 @@ static const struct brport_attribute *brport_attrs[] = {
&brport_attr_proxyarp_wifi,
&brport_attr_multicast_flood,
&brport_attr_broadcast_flood,
+ &brport_attr_group_fwd_mask,
NULL
};
diff --git a/net/core/datagram.c b/net/core/datagram.c
index f7fb7e3f2acf..0b7b4c22719e 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -188,7 +188,7 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
}
if (!skb->len) {
skb = skb_set_peeked(skb);
- if (unlikely(IS_ERR(skb))) {
+ if (IS_ERR(skb)) {
*err = PTR_ERR(skb);
return NULL;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index 588b473194a8..fcddccb6be41 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -145,6 +145,7 @@
#include <linux/crash_dump.h>
#include <linux/sctp.h>
#include <net/udp_tunnel.h>
+#include <linux/net_namespace.h>
#include "net-sysfs.h"
@@ -162,7 +163,6 @@ static struct list_head offload_base __read_mostly;
static int netif_rx_internal(struct sk_buff *skb);
static int call_netdevice_notifiers_info(unsigned long val,
- struct net_device *dev,
struct netdev_notifier_info *info);
static struct napi_struct *napi_by_id(unsigned int napi_id);
@@ -188,6 +188,8 @@ static struct napi_struct *napi_by_id(unsigned int napi_id);
DEFINE_RWLOCK(dev_base_lock);
EXPORT_SYMBOL(dev_base_lock);
+static DEFINE_MUTEX(ifalias_mutex);
+
/* protects napi_hash addition/deletion and napi_gen_id */
static DEFINE_SPINLOCK(napi_hash_lock);
@@ -1265,29 +1267,53 @@ rollback:
*/
int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
{
- char *new_ifalias;
-
- ASSERT_RTNL();
+ struct dev_ifalias *new_alias = NULL;
if (len >= IFALIASZ)
return -EINVAL;
- if (!len) {
- kfree(dev->ifalias);
- dev->ifalias = NULL;
- return 0;
+ if (len) {
+ new_alias = kmalloc(sizeof(*new_alias) + len + 1, GFP_KERNEL);
+ if (!new_alias)
+ return -ENOMEM;
+
+ memcpy(new_alias->ifalias, alias, len);
+ new_alias->ifalias[len] = 0;
}
- new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
- if (!new_ifalias)
- return -ENOMEM;
- dev->ifalias = new_ifalias;
- memcpy(dev->ifalias, alias, len);
- dev->ifalias[len] = 0;
+ mutex_lock(&ifalias_mutex);
+ rcu_swap_protected(dev->ifalias, new_alias,
+ mutex_is_locked(&ifalias_mutex));
+ mutex_unlock(&ifalias_mutex);
+
+ if (new_alias)
+ kfree_rcu(new_alias, rcuhead);
return len;
}
+/**
+ * dev_get_alias - get ifalias of a device
+ * @dev: device
+ * @name: buffer to store name of ifalias
+ * @len: size of buffer
+ *
+ * get ifalias for a device. Caller must make sure dev cannot go
+ * away, e.g. rcu read lock or own a reference count to device.
+ */
+int dev_get_alias(const struct net_device *dev, char *name, size_t len)
+{
+ const struct dev_ifalias *alias;
+ int ret = 0;
+
+ rcu_read_lock();
+ alias = rcu_dereference(dev->ifalias);
+ if (alias)
+ ret = snprintf(name, len, "%s", alias->ifalias);
+ rcu_read_unlock();
+
+ return ret;
+}
/**
* netdev_features_change - device changes features
@@ -1312,10 +1338,11 @@ EXPORT_SYMBOL(netdev_features_change);
void netdev_state_change(struct net_device *dev)
{
if (dev->flags & IFF_UP) {
- struct netdev_notifier_change_info change_info;
+ struct netdev_notifier_change_info change_info = {
+ .info.dev = dev,
+ };
- change_info.flags_changed = 0;
- call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
+ call_netdevice_notifiers_info(NETDEV_CHANGE,
&change_info.info);
rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
}
@@ -1536,9 +1563,10 @@ EXPORT_SYMBOL(dev_disable_lro);
static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
struct net_device *dev)
{
- struct netdev_notifier_info info;
+ struct netdev_notifier_info info = {
+ .dev = dev,
+ };
- netdev_notifier_info_init(&info, dev);
return nb->notifier_call(nb, val, &info);
}
@@ -1663,11 +1691,9 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
*/
static int call_netdevice_notifiers_info(unsigned long val,
- struct net_device *dev,
struct netdev_notifier_info *info)
{
ASSERT_RTNL();
- netdev_notifier_info_init(info, dev);
return raw_notifier_call_chain(&netdev_chain, val, info);
}
@@ -1682,9 +1708,11 @@ static int call_netdevice_notifiers_info(unsigned long val,
int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
{
- struct netdev_notifier_info info;
+ struct netdev_notifier_info info = {
+ .dev = dev,
+ };
- return call_netdevice_notifiers_info(val, dev, &info);
+ return call_netdevice_notifiers_info(val, &info);
}
EXPORT_SYMBOL(call_netdevice_notifiers);
@@ -3864,8 +3892,8 @@ drop:
static u32 netif_receive_generic_xdp(struct sk_buff *skb,
struct bpf_prog *xdp_prog)
{
+ u32 metalen, act = XDP_DROP;
struct xdp_buff xdp;
- u32 act = XDP_DROP;
void *orig_data;
int hlen, off;
u32 mac_len;
@@ -3876,8 +3904,25 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
if (skb_cloned(skb))
return XDP_PASS;
- if (skb_linearize(skb))
- goto do_drop;
+ /* XDP packets must be linear and must have sufficient headroom
+ * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
+ * native XDP provides, thus we need to do it here as well.
+ */
+ if (skb_is_nonlinear(skb) ||
+ skb_headroom(skb) < XDP_PACKET_HEADROOM) {
+ int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
+ int troom = skb->tail + skb->data_len - skb->end;
+
+ /* In case we have to go down the path and also linearize,
+ * then lets do the pskb_expand_head() work just once here.
+ */
+ if (pskb_expand_head(skb,
+ hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
+ troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
+ goto do_drop;
+ if (troom > 0 && __skb_linearize(skb))
+ goto do_drop;
+ }
/* The XDP program wants to see the packet starting at the MAC
* header.
@@ -3885,6 +3930,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
mac_len = skb->data - skb_mac_header(skb);
hlen = skb_headlen(skb) + mac_len;
xdp.data = skb->data - mac_len;
+ xdp.data_meta = xdp.data;
xdp.data_end = xdp.data + hlen;
xdp.data_hard_start = skb->data - skb_headroom(skb);
orig_data = xdp.data;
@@ -3902,10 +3948,12 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
case XDP_REDIRECT:
case XDP_TX:
__skb_push(skb, mac_len);
- /* fall through */
+ break;
case XDP_PASS:
+ metalen = xdp.data - xdp.data_meta;
+ if (metalen)
+ skb_metadata_set(skb, metalen);
break;
-
default:
bpf_warn_invalid_xdp_action(act);
/* fall through */
@@ -4695,6 +4743,7 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
diffs |= p->vlan_tci ^ skb->vlan_tci;
diffs |= skb_metadata_dst_cmp(p, skb);
+ diffs |= skb_metadata_differs(p, skb);
if (maclen == ETH_HLEN)
diffs |= compare_ether_header(skb_mac_header(p),
skb_mac_header(skb));
@@ -6228,9 +6277,19 @@ static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
static int __netdev_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev, bool master,
- void *upper_priv, void *upper_info)
-{
- struct netdev_notifier_changeupper_info changeupper_info;
+ void *upper_priv, void *upper_info,
+ struct netlink_ext_ack *extack)
+{
+ struct netdev_notifier_changeupper_info changeupper_info = {
+ .info = {
+ .dev = dev,
+ .extack = extack,
+ },
+ .upper_dev = upper_dev,
+ .master = master,
+ .linking = true,
+ .upper_info = upper_info,
+ };
int ret = 0;
ASSERT_RTNL();
@@ -6248,12 +6307,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
if (master && netdev_master_upper_dev_get(dev))
return -EBUSY;
- changeupper_info.upper_dev = upper_dev;
- changeupper_info.master = master;
- changeupper_info.linking = true;
- changeupper_info.upper_info = upper_info;
-
- ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
+ ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
&changeupper_info.info);
ret = notifier_to_errno(ret);
if (ret)
@@ -6264,7 +6318,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
if (ret)
return ret;
- ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
+ ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
&changeupper_info.info);
ret = notifier_to_errno(ret);
if (ret)
@@ -6289,9 +6343,11 @@ rollback:
* returns zero.
*/
int netdev_upper_dev_link(struct net_device *dev,
- struct net_device *upper_dev)
+ struct net_device *upper_dev,
+ struct netlink_ext_ack *extack)
{
- return __netdev_upper_dev_link(dev, upper_dev, false, NULL, NULL);
+ return __netdev_upper_dev_link(dev, upper_dev, false,
+ NULL, NULL, extack);
}
EXPORT_SYMBOL(netdev_upper_dev_link);
@@ -6310,10 +6366,11 @@ EXPORT_SYMBOL(netdev_upper_dev_link);
*/
int netdev_master_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev,
- void *upper_priv, void *upper_info)
+ void *upper_priv, void *upper_info,
+ struct netlink_ext_ack *extack)
{
return __netdev_upper_dev_link(dev, upper_dev, true,
- upper_priv, upper_info);
+ upper_priv, upper_info, extack);
}
EXPORT_SYMBOL(netdev_master_upper_dev_link);
@@ -6328,20 +6385,24 @@ EXPORT_SYMBOL(netdev_master_upper_dev_link);
void netdev_upper_dev_unlink(struct net_device *dev,
struct net_device *upper_dev)
{
- struct netdev_notifier_changeupper_info changeupper_info;
+ struct netdev_notifier_changeupper_info changeupper_info = {
+ .info = {
+ .dev = dev,
+ },
+ .upper_dev = upper_dev,
+ .linking = false,
+ };
ASSERT_RTNL();
- changeupper_info.upper_dev = upper_dev;
changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev;
- changeupper_info.linking = false;
- call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
+ call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
&changeupper_info.info);
__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
- call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
+ call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
&changeupper_info.info);
}
EXPORT_SYMBOL(netdev_upper_dev_unlink);
@@ -6357,11 +6418,13 @@ EXPORT_SYMBOL(netdev_upper_dev_unlink);
void netdev_bonding_info_change(struct net_device *dev,
struct netdev_bonding_info *bonding_info)
{
- struct netdev_notifier_bonding_info info;
+ struct netdev_notifier_bonding_info info = {
+ .info.dev = dev,
+ };
memcpy(&info.bonding_info, bonding_info,
sizeof(struct netdev_bonding_info));
- call_netdevice_notifiers_info(NETDEV_BONDING_INFO, dev,
+ call_netdevice_notifiers_info(NETDEV_BONDING_INFO,
&info.info);
}
EXPORT_SYMBOL(netdev_bonding_info_change);
@@ -6487,11 +6550,13 @@ EXPORT_SYMBOL(dev_get_nest_level);
void netdev_lower_state_changed(struct net_device *lower_dev,
void *lower_state_info)
{
- struct netdev_notifier_changelowerstate_info changelowerstate_info;
+ struct netdev_notifier_changelowerstate_info changelowerstate_info = {
+ .info.dev = lower_dev,
+ };
ASSERT_RTNL();
changelowerstate_info.lower_state_info = lower_state_info;
- call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE, lower_dev,
+ call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE,
&changelowerstate_info.info);
}
EXPORT_SYMBOL(netdev_lower_state_changed);
@@ -6782,11 +6847,14 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
if (dev->flags & IFF_UP &&
(changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) {
- struct netdev_notifier_change_info change_info;
+ struct netdev_notifier_change_info change_info = {
+ .info = {
+ .dev = dev,
+ },
+ .flags_changed = changes,
+ };
- change_info.flags_changed = changes;
- call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
- &change_info.info);
+ call_netdevice_notifiers_info(NETDEV_CHANGE, &change_info.info);
}
}
@@ -7157,7 +7225,7 @@ static void rollback_registered_many(struct list_head *head)
if (!dev->rtnl_link_ops ||
dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
- GFP_KERNEL);
+ GFP_KERNEL, NULL);
/*
* Flush the unicast and multicast chains
@@ -7994,7 +8062,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
unsigned int txqs, unsigned int rxqs)
{
struct net_device *dev;
- size_t alloc_size;
+ unsigned int alloc_size;
struct net_device *p;
BUG_ON(strlen(name) >= sizeof(dev->name));
@@ -8244,7 +8312,7 @@ EXPORT_SYMBOL(unregister_netdev);
int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
{
- int err;
+ int err, new_nsid;
ASSERT_RTNL();
@@ -8300,7 +8368,11 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
rcu_barrier();
call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
- rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
+ if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net)
+ new_nsid = peernet2id_alloc(dev_net(dev), net);
+ else
+ new_nsid = peernet2id(dev_net(dev), net);
+ rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid);
/*
* Flush the unicast and multicast chains
diff --git a/net/core/filter.c b/net/core/filter.c
index 74b8c91fb5f4..b7e8caa1e790 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1406,7 +1406,7 @@ static inline int bpf_try_make_writable(struct sk_buff *skb,
{
int err = __bpf_try_make_writable(skb, write_len);
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
return err;
}
@@ -1966,7 +1966,7 @@ BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto,
ret = skb_vlan_push(skb, vlan_proto, vlan_tci);
bpf_pull_mac_rcsum(skb);
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
return ret;
}
@@ -1988,7 +1988,7 @@ BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb)
ret = skb_vlan_pop(skb);
bpf_pull_mac_rcsum(skb);
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
return ret;
}
@@ -2182,7 +2182,7 @@ BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
* need to be verified first.
*/
ret = bpf_skb_proto_xlat(skb, proto);
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
return ret;
}
@@ -2307,7 +2307,7 @@ static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
ret = shrink ? bpf_skb_net_shrink(skb, len_diff_abs) :
bpf_skb_net_grow(skb, len_diff_abs);
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
return ret;
}
@@ -2398,7 +2398,7 @@ BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
skb_gso_reset(skb);
}
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
return ret;
}
@@ -2438,7 +2438,7 @@ BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
skb_reset_mac_header(skb);
}
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
return 0;
}
@@ -2451,14 +2451,26 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = {
.arg3_type = ARG_ANYTHING,
};
+static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
+{
+ return xdp_data_meta_unsupported(xdp) ? 0 :
+ xdp->data - xdp->data_meta;
+}
+
BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
{
+ unsigned long metalen = xdp_get_metalen(xdp);
+ void *data_start = xdp->data_hard_start + metalen;
void *data = xdp->data + offset;
- if (unlikely(data < xdp->data_hard_start ||
+ if (unlikely(data < data_start ||
data > xdp->data_end - ETH_HLEN))
return -EINVAL;
+ if (metalen)
+ memmove(xdp->data_meta + offset,
+ xdp->data_meta, metalen);
+ xdp->data_meta += offset;
xdp->data = data;
return 0;
@@ -2472,6 +2484,33 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
.arg2_type = ARG_ANYTHING,
};
+BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset)
+{
+ void *meta = xdp->data_meta + offset;
+ unsigned long metalen = xdp->data - meta;
+
+ if (xdp_data_meta_unsupported(xdp))
+ return -ENOTSUPP;
+ if (unlikely(meta < xdp->data_hard_start ||
+ meta > xdp->data))
+ return -EINVAL;
+ if (unlikely((metalen & (sizeof(__u32) - 1)) ||
+ (metalen > 32)))
+ return -EACCES;
+
+ xdp->data_meta = meta;
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
+ .func = bpf_xdp_adjust_meta,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
+
static int __bpf_tx_xdp(struct net_device *dev,
struct bpf_map *map,
struct xdp_buff *xdp,
@@ -2696,7 +2735,8 @@ bool bpf_helper_changes_pkt_data(void *func)
func == bpf_clone_redirect ||
func == bpf_l3_csum_replace ||
func == bpf_l4_csum_replace ||
- func == bpf_xdp_adjust_head)
+ func == bpf_xdp_adjust_head ||
+ func == bpf_xdp_adjust_meta)
return true;
return false;
@@ -3292,6 +3332,8 @@ xdp_func_proto(enum bpf_func_id func_id)
return &bpf_get_smp_processor_id_proto;
case BPF_FUNC_xdp_adjust_head:
return &bpf_xdp_adjust_head_proto;
+ case BPF_FUNC_xdp_adjust_meta:
+ return &bpf_xdp_adjust_meta_proto;
case BPF_FUNC_redirect:
return &bpf_xdp_redirect_proto;
case BPF_FUNC_redirect_map:
@@ -3422,6 +3464,7 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
case bpf_ctx_range_till(struct __sk_buff, remote_ip4, remote_ip4):
case bpf_ctx_range_till(struct __sk_buff, local_ip4, local_ip4):
case bpf_ctx_range(struct __sk_buff, data):
+ case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, data_end):
if (size != size_default)
return false;
@@ -3448,6 +3491,7 @@ static bool sk_filter_is_valid_access(int off, int size,
switch (off) {
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range(struct __sk_buff, data):
+ case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, data_end):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
return false;
@@ -3472,6 +3516,7 @@ static bool lwt_is_valid_access(int off, int size,
switch (off) {
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
+ case bpf_ctx_range(struct __sk_buff, data_meta):
return false;
}
@@ -3590,6 +3635,9 @@ static bool tc_cls_act_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data):
info->reg_type = PTR_TO_PACKET;
break;
+ case bpf_ctx_range(struct __sk_buff, data_meta):
+ info->reg_type = PTR_TO_PACKET_META;
+ break;
case bpf_ctx_range(struct __sk_buff, data_end):
info->reg_type = PTR_TO_PACKET_END;
break;
@@ -3623,6 +3671,9 @@ static bool xdp_is_valid_access(int off, int size,
case offsetof(struct xdp_md, data):
info->reg_type = PTR_TO_PACKET;
break;
+ case offsetof(struct xdp_md, data_meta):
+ info->reg_type = PTR_TO_PACKET_META;
+ break;
case offsetof(struct xdp_md, data_end):
info->reg_type = PTR_TO_PACKET_END;
break;
@@ -3681,6 +3732,12 @@ static bool sk_skb_is_valid_access(int off, int size,
enum bpf_access_type type,
struct bpf_insn_access_aux *info)
{
+ switch (off) {
+ case bpf_ctx_range(struct __sk_buff, tc_classid):
+ case bpf_ctx_range(struct __sk_buff, data_meta):
+ return false;
+ }
+
if (type == BPF_WRITE) {
switch (off) {
case bpf_ctx_range(struct __sk_buff, mark):
@@ -3693,8 +3750,6 @@ static bool sk_skb_is_valid_access(int off, int size,
}
switch (off) {
- case bpf_ctx_range(struct __sk_buff, tc_classid):
- return false;
case bpf_ctx_range(struct __sk_buff, data):
info->reg_type = PTR_TO_PACKET;
break;
@@ -3851,6 +3906,15 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
offsetof(struct sk_buff, data));
break;
+ case offsetof(struct __sk_buff, data_meta):
+ off = si->off;
+ off -= offsetof(struct __sk_buff, data_meta);
+ off += offsetof(struct sk_buff, cb);
+ off += offsetof(struct bpf_skb_data_end, data_meta);
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
+ si->src_reg, off);
+ break;
+
case offsetof(struct __sk_buff, data_end):
off = si->off;
off -= offsetof(struct __sk_buff, data_end);
@@ -4099,6 +4163,11 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
si->dst_reg, si->src_reg,
offsetof(struct xdp_buff, data));
break;
+ case offsetof(struct xdp_md, data_meta):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_meta),
+ si->dst_reg, si->src_reg,
+ offsetof(struct xdp_buff, data_meta));
+ break;
case offsetof(struct xdp_md, data_end):
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end),
si->dst_reg, si->src_reg,
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 0a977373d003..1f5caafb4492 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -5,6 +5,7 @@
#include <linux/ipv6.h>
#include <linux/if_vlan.h>
#include <net/dsa.h>
+#include <net/dst_metadata.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/gre.h>
@@ -115,6 +116,102 @@ __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
}
EXPORT_SYMBOL(__skb_flow_get_ports);
+static void
+skb_flow_dissect_set_enc_addr_type(enum flow_dissector_key_id type,
+ struct flow_dissector *flow_dissector,
+ void *target_container)
+{
+ struct flow_dissector_key_control *ctrl;
+
+ if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL))
+ return;
+
+ ctrl = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_CONTROL,
+ target_container);
+ ctrl->addr_type = type;
+}
+
+static void
+__skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container)
+{
+ struct ip_tunnel_info *info;
+ struct ip_tunnel_key *key;
+
+ /* A quick check to see if there might be something to do. */
+ if (!dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_KEYID) &&
+ !dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) &&
+ !dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) &&
+ !dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_CONTROL) &&
+ !dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_PORTS))
+ return;
+
+ info = skb_tunnel_info(skb);
+ if (!info)
+ return;
+
+ key = &info->key;
+
+ switch (ip_tunnel_info_af(info)) {
+ case AF_INET:
+ skb_flow_dissect_set_enc_addr_type(FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ flow_dissector,
+ target_container);
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
+ struct flow_dissector_key_ipv4_addrs *ipv4;
+
+ ipv4 = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
+ target_container);
+ ipv4->src = key->u.ipv4.src;
+ ipv4->dst = key->u.ipv4.dst;
+ }
+ break;
+ case AF_INET6:
+ skb_flow_dissect_set_enc_addr_type(FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+ flow_dissector,
+ target_container);
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) {
+ struct flow_dissector_key_ipv6_addrs *ipv6;
+
+ ipv6 = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
+ target_container);
+ ipv6->src = key->u.ipv6.src;
+ ipv6->dst = key->u.ipv6.dst;
+ }
+ break;
+ }
+
+ if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+ struct flow_dissector_key_keyid *keyid;
+
+ keyid = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_KEYID,
+ target_container);
+ keyid->keyid = tunnel_id_to_key32(key->tun_id);
+ }
+
+ if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
+ struct flow_dissector_key_ports *tp;
+
+ tp = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_PORTS,
+ target_container);
+ tp->src = key->tp_src;
+ tp->dst = key->tp_dst;
+ }
+}
+
static enum flow_dissect_ret
__skb_flow_dissect_mpls(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
@@ -478,6 +575,9 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
FLOW_DISSECTOR_KEY_BASIC,
target_container);
+ __skb_flow_dissect_tunnel_info(skb, flow_dissector,
+ target_container);
+
if (dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
struct ethhdr *eth = eth_hdr(skb);
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index 1307731ddfe4..e7e626fb87bb 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -51,7 +51,7 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
*/
preempt_disable();
rcu_read_lock();
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
ret = bpf_prog_run_save_cb(lwt->prog, skb);
rcu_read_unlock();
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 16a1a4c4eb57..6ea3a1a7f36a 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -457,7 +457,7 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
const void *pkey)
{
struct neighbour *n;
- int key_len = tbl->key_len;
+ unsigned int key_len = tbl->key_len;
u32 hash_val;
struct neigh_hash_table *nht;
@@ -488,7 +488,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
struct net_device *dev, bool want_ref)
{
u32 hash_val;
- int key_len = tbl->key_len;
+ unsigned int key_len = tbl->key_len;
int error;
struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
struct neigh_hash_table *nht;
@@ -572,7 +572,7 @@ out_neigh_release:
}
EXPORT_SYMBOL(__neigh_create);
-static u32 pneigh_hash(const void *pkey, int key_len)
+static u32 pneigh_hash(const void *pkey, unsigned int key_len)
{
u32 hash_val = *(u32 *)(pkey + key_len - 4);
hash_val ^= (hash_val >> 16);
@@ -585,7 +585,7 @@ static u32 pneigh_hash(const void *pkey, int key_len)
static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
struct net *net,
const void *pkey,
- int key_len,
+ unsigned int key_len,
struct net_device *dev)
{
while (n) {
@@ -601,7 +601,7 @@ static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
struct net *net, const void *pkey, struct net_device *dev)
{
- int key_len = tbl->key_len;
+ unsigned int key_len = tbl->key_len;
u32 hash_val = pneigh_hash(pkey, key_len);
return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
@@ -614,7 +614,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
struct net_device *dev, int creat)
{
struct pneigh_entry *n;
- int key_len = tbl->key_len;
+ unsigned int key_len = tbl->key_len;
u32 hash_val = pneigh_hash(pkey, key_len);
read_lock_bh(&tbl->lock);
@@ -659,7 +659,7 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
struct net_device *dev)
{
struct pneigh_entry *n, **np;
- int key_len = tbl->key_len;
+ unsigned int key_len = tbl->key_len;
u32 hash_val = pneigh_hash(pkey, key_len);
write_lock_bh(&tbl->lock);
@@ -1662,7 +1662,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tbl == NULL)
return -EAFNOSUPPORT;
- if (nla_len(dst_attr) < tbl->key_len)
+ if (nla_len(dst_attr) < (int)tbl->key_len)
goto out;
if (ndm->ndm_flags & NTF_PROXY) {
@@ -1730,7 +1730,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tbl == NULL)
return -EAFNOSUPPORT;
- if (nla_len(tb[NDA_DST]) < tbl->key_len)
+ if (nla_len(tb[NDA_DST]) < (int)tbl->key_len)
goto out;
dst = nla_data(tb[NDA_DST]);
lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 927a6dcbad96..51d5836d8fb9 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -391,10 +391,7 @@ static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
if (len > 0 && buf[len - 1] == '\n')
--count;
- if (!rtnl_trylock())
- return restart_syscall();
ret = dev_set_alias(netdev, buf, count);
- rtnl_unlock();
return ret < 0 ? ret : len;
}
@@ -403,13 +400,12 @@ static ssize_t ifalias_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
const struct net_device *netdev = to_net_dev(dev);
+ char tmp[IFALIASZ];
ssize_t ret = 0;
- if (!rtnl_trylock())
- return restart_syscall();
- if (netdev->ifalias)
- ret = sprintf(buf, "%s\n", netdev->ifalias);
- rtnl_unlock();
+ ret = dev_get_alias(netdev, tmp, sizeof(tmp));
+ if (ret > 0)
+ ret = sprintf(buf, "%s\n", tmp);
return ret;
}
static DEVICE_ATTR_RW(ifalias);
@@ -1488,7 +1484,10 @@ static void netdev_release(struct device *d)
BUG_ON(dev->reg_state != NETREG_RELEASED);
- kfree(dev->ifalias);
+ /* no need to wait for rcu grace period:
+ * device is dead and about to be freed.
+ */
+ kfree(rcu_access_pointer(dev->ifalias));
netdev_freemem(dev);
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d4bcdcc68e92..e84d108cfee4 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -476,25 +476,13 @@ void rtnl_af_register(struct rtnl_af_ops *ops)
EXPORT_SYMBOL_GPL(rtnl_af_register);
/**
- * __rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
- * @ops: struct rtnl_af_ops * to unregister
- *
- * The caller must hold the rtnl_mutex.
- */
-void __rtnl_af_unregister(struct rtnl_af_ops *ops)
-{
- list_del(&ops->list);
-}
-EXPORT_SYMBOL_GPL(__rtnl_af_unregister);
-
-/**
* rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
* @ops: struct rtnl_af_ops * to unregister
*/
void rtnl_af_unregister(struct rtnl_af_ops *ops)
{
rtnl_lock();
- __rtnl_af_unregister(ops);
+ list_del(&ops->list);
rtnl_unlock();
}
EXPORT_SYMBOL_GPL(rtnl_af_unregister);
@@ -522,11 +510,15 @@ static size_t rtnl_link_get_af_size(const struct net_device *dev,
static bool rtnl_have_link_slave_info(const struct net_device *dev)
{
struct net_device *master_dev;
+ bool ret = false;
- master_dev = netdev_master_upper_dev_get((struct net_device *) dev);
+ rcu_read_lock();
+
+ master_dev = netdev_master_upper_dev_get_rcu((struct net_device *)dev);
if (master_dev && master_dev->rtnl_link_ops)
- return true;
- return false;
+ ret = true;
+ rcu_read_unlock();
+ return ret;
}
static int rtnl_link_slave_info_fill(struct sk_buff *skb,
@@ -923,6 +915,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */
+ rtnl_xdp_size() /* IFLA_XDP */
+ nla_total_size(4) /* IFLA_EVENT */
+ + nla_total_size(4) /* IFLA_NEW_NETNSID */
+ nla_total_size(1); /* IFLA_PROTO_DOWN */
}
@@ -1211,6 +1204,36 @@ nla_put_vfinfo_failure:
return -EMSGSIZE;
}
+static noinline_for_stack int rtnl_fill_vf(struct sk_buff *skb,
+ struct net_device *dev,
+ u32 ext_filter_mask)
+{
+ struct nlattr *vfinfo;
+ int i, num_vfs;
+
+ if (!dev->dev.parent || ((ext_filter_mask & RTEXT_FILTER_VF) == 0))
+ return 0;
+
+ num_vfs = dev_num_vf(dev->dev.parent);
+ if (nla_put_u32(skb, IFLA_NUM_VF, num_vfs))
+ return -EMSGSIZE;
+
+ if (!dev->netdev_ops->ndo_get_vf_config)
+ return 0;
+
+ vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
+ if (!vfinfo)
+ return -EMSGSIZE;
+
+ for (i = 0; i < num_vfs; i++) {
+ if (rtnl_fill_vfinfo(skb, dev, i, vfinfo))
+ return -EMSGSIZE;
+ }
+
+ nla_nest_end(skb, vfinfo);
+ return 0;
+}
+
static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
{
struct rtnl_link_ifmap map;
@@ -1307,16 +1330,67 @@ static u32 rtnl_get_event(unsigned long event)
return rtnl_event_type;
}
+static int put_master_ifindex(struct sk_buff *skb, struct net_device *dev)
+{
+ const struct net_device *upper_dev;
+ int ret = 0;
+
+ rcu_read_lock();
+
+ upper_dev = netdev_master_upper_dev_get_rcu(dev);
+ if (upper_dev)
+ ret = nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex);
+
+ rcu_read_unlock();
+ return ret;
+}
+
+static int nla_put_iflink(struct sk_buff *skb, const struct net_device *dev)
+{
+ int ifindex = dev_get_iflink(dev);
+
+ if (dev->ifindex == ifindex)
+ return 0;
+
+ return nla_put_u32(skb, IFLA_LINK, ifindex);
+}
+
+static noinline_for_stack int nla_put_ifalias(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ char buf[IFALIASZ];
+ int ret;
+
+ ret = dev_get_alias(dev, buf, sizeof(buf));
+ return ret > 0 ? nla_put_string(skb, IFLA_IFALIAS, buf) : 0;
+}
+
+static int rtnl_fill_link_netnsid(struct sk_buff *skb,
+ const struct net_device *dev)
+{
+ if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net) {
+ struct net *link_net = dev->rtnl_link_ops->get_link_net(dev);
+
+ if (!net_eq(dev_net(dev), link_net)) {
+ int id = peernet2id_alloc(dev_net(dev), link_net);
+
+ if (nla_put_s32(skb, IFLA_LINK_NETNSID, id))
+ return -EMSGSIZE;
+ }
+ }
+
+ return 0;
+}
+
static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
int type, u32 pid, u32 seq, u32 change,
unsigned int flags, u32 ext_filter_mask,
- u32 event)
+ u32 event, int *new_nsid)
{
struct ifinfomsg *ifm;
struct nlmsghdr *nlh;
struct nlattr *af_spec;
struct rtnl_af_ops *af_ops;
- struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
ASSERT_RTNL();
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
@@ -1345,15 +1419,12 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
#ifdef CONFIG_RPS
nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) ||
#endif
- (dev->ifindex != dev_get_iflink(dev) &&
- nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))) ||
- (upper_dev &&
- nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex)) ||
+ nla_put_iflink(skb, dev) ||
+ put_master_ifindex(skb, dev) ||
nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) ||
(dev->qdisc &&
nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) ||
- (dev->ifalias &&
- nla_put_string(skb, IFLA_IFALIAS, dev->ifalias)) ||
+ nla_put_ifalias(skb, dev) ||
nla_put_u32(skb, IFLA_CARRIER_CHANGES,
atomic_read(&dev->carrier_changes)) ||
nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down))
@@ -1385,27 +1456,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
if (rtnl_fill_stats(skb, dev))
goto nla_put_failure;
- if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF) &&
- nla_put_u32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent)))
+ if (rtnl_fill_vf(skb, dev, ext_filter_mask))
goto nla_put_failure;
- if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent &&
- ext_filter_mask & RTEXT_FILTER_VF) {
- int i;
- struct nlattr *vfinfo;
- int num_vfs = dev_num_vf(dev->dev.parent);
-
- vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
- if (!vfinfo)
- goto nla_put_failure;
- for (i = 0; i < num_vfs; i++) {
- if (rtnl_fill_vfinfo(skb, dev, i, vfinfo))
- goto nla_put_failure;
- }
-
- nla_nest_end(skb, vfinfo);
- }
-
if (rtnl_port_fill(skb, dev, ext_filter_mask))
goto nla_put_failure;
@@ -1417,17 +1470,12 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
goto nla_put_failure;
}
- if (dev->rtnl_link_ops &&
- dev->rtnl_link_ops->get_link_net) {
- struct net *link_net = dev->rtnl_link_ops->get_link_net(dev);
-
- if (!net_eq(dev_net(dev), link_net)) {
- int id = peernet2id_alloc(dev_net(dev), link_net);
+ if (rtnl_fill_link_netnsid(skb, dev))
+ goto nla_put_failure;
- if (nla_put_s32(skb, IFLA_LINK_NETNSID, id))
- goto nla_put_failure;
- }
- }
+ if (new_nsid &&
+ nla_put_s32(skb, IFLA_NEW_NETNSID, *new_nsid) < 0)
+ goto nla_put_failure;
if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC)))
goto nla_put_failure;
@@ -1658,7 +1706,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, 0,
flags,
- ext_filter_mask, 0);
+ ext_filter_mask, 0, NULL);
if (err < 0) {
if (likely(skb->len))
@@ -1909,7 +1957,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
return err;
}
-static int do_set_master(struct net_device *dev, int ifindex)
+static int do_set_master(struct net_device *dev, int ifindex,
+ struct netlink_ext_ack *extack)
{
struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
const struct net_device_ops *ops;
@@ -1934,7 +1983,7 @@ static int do_set_master(struct net_device *dev, int ifindex)
return -EINVAL;
ops = upper_dev->netdev_ops;
if (ops->ndo_add_slave) {
- err = ops->ndo_add_slave(upper_dev, dev);
+ err = ops->ndo_add_slave(upper_dev, dev, extack);
if (err)
return err;
} else {
@@ -2067,7 +2116,7 @@ static int do_setlink(const struct sk_buff *skb,
}
if (tb[IFLA_MASTER]) {
- err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]));
+ err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack);
if (err)
goto errout;
status |= DO_SETLINK_MODIFIED;
@@ -2576,12 +2625,6 @@ replay:
return err;
slave_data = slave_attr;
}
- if (m_ops->slave_validate) {
- err = m_ops->slave_validate(tb, slave_data,
- extack);
- if (err < 0)
- return err;
- }
}
if (dev) {
@@ -2711,7 +2754,8 @@ replay:
goto out_unregister;
}
if (tb[IFLA_MASTER]) {
- err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]));
+ err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]),
+ extack);
if (err)
goto out_unregister;
}
@@ -2771,7 +2815,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
return -ENOBUFS;
err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).portid,
- nlh->nlmsg_seq, 0, 0, ext_filter_mask, 0);
+ nlh->nlmsg_seq, 0, 0, ext_filter_mask, 0, NULL);
if (err < 0) {
/* -EMSGSIZE implies BUG in if_nlmsg_size */
WARN_ON(err == -EMSGSIZE);
@@ -2856,7 +2900,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
unsigned int change,
- u32 event, gfp_t flags)
+ u32 event, gfp_t flags, int *new_nsid)
{
struct net *net = dev_net(dev);
struct sk_buff *skb;
@@ -2867,7 +2911,8 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
if (skb == NULL)
goto errout;
- err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0, event);
+ err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0, event,
+ new_nsid);
if (err < 0) {
/* -EMSGSIZE implies BUG in if_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
@@ -2890,14 +2935,14 @@ void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags)
static void rtmsg_ifinfo_event(int type, struct net_device *dev,
unsigned int change, u32 event,
- gfp_t flags)
+ gfp_t flags, int *new_nsid)
{
struct sk_buff *skb;
if (dev->reg_state != NETREG_REGISTERED)
return;
- skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags);
+ skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags, new_nsid);
if (skb)
rtmsg_ifinfo_send(skb, dev, flags);
}
@@ -2905,10 +2950,17 @@ static void rtmsg_ifinfo_event(int type, struct net_device *dev,
void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change,
gfp_t flags)
{
- rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags);
+ rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags, NULL);
}
EXPORT_SYMBOL(rtmsg_ifinfo);
+void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change,
+ gfp_t flags, int *new_nsid)
+{
+ rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags,
+ new_nsid);
+}
+
static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
struct net_device *dev,
u8 *addr, u16 vid, u32 pid, u32 seq,
@@ -4287,7 +4339,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
case NETDEV_RESEND_IGMP:
case NETDEV_CHANGEINFODATA:
rtmsg_ifinfo_event(RTM_NEWLINK, dev, 0, rtnl_get_event(event),
- GFP_KERNEL);
+ GFP_KERNEL, NULL);
break;
default:
break;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 16982de649b9..822a90e56aea 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1509,6 +1509,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
skb->nohdr = 0;
atomic_set(&skb_shinfo(skb)->dataref, 1);
+ skb_metadata_clear(skb);
+
/* It is not generally safe to change skb->truesize.
* For the moment, we really care of rx path, or
* when skb is orphaned (not attached to a socket).
@@ -2848,12 +2850,15 @@ EXPORT_SYMBOL(skb_queue_purge);
*/
void skb_rbtree_purge(struct rb_root *root)
{
- struct sk_buff *skb, *next;
+ struct rb_node *p = rb_first(root);
- rbtree_postorder_for_each_entry_safe(skb, next, root, rbnode)
- kfree_skb(skb);
+ while (p) {
+ struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
- *root = RB_ROOT;
+ p = rb_next(p);
+ rb_erase(&skb->rbnode, root);
+ kfree_skb(skb);
+ }
}
/**
@@ -4762,6 +4767,7 @@ EXPORT_SYMBOL(kfree_skb_partial);
bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
bool *fragstolen, int *delta_truesize)
{
+ struct skb_shared_info *to_shinfo, *from_shinfo;
int i, delta, len = from->len;
*fragstolen = false;
@@ -4776,7 +4782,9 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
return true;
}
- if (skb_has_frag_list(to) || skb_has_frag_list(from))
+ to_shinfo = skb_shinfo(to);
+ from_shinfo = skb_shinfo(from);
+ if (to_shinfo->frag_list || from_shinfo->frag_list)
return false;
if (skb_zcopy(to) || skb_zcopy(from))
return false;
@@ -4785,8 +4793,8 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
struct page *page;
unsigned int offset;
- if (skb_shinfo(to)->nr_frags +
- skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS)
+ if (to_shinfo->nr_frags +
+ from_shinfo->nr_frags >= MAX_SKB_FRAGS)
return false;
if (skb_head_is_locked(from))
@@ -4797,12 +4805,12 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
page = virt_to_head_page(from->head);
offset = from->data - (unsigned char *)page_address(page);
- skb_fill_page_desc(to, skb_shinfo(to)->nr_frags,
+ skb_fill_page_desc(to, to_shinfo->nr_frags,
page, offset, skb_headlen(from));
*fragstolen = true;
} else {
- if (skb_shinfo(to)->nr_frags +
- skb_shinfo(from)->nr_frags > MAX_SKB_FRAGS)
+ if (to_shinfo->nr_frags +
+ from_shinfo->nr_frags > MAX_SKB_FRAGS)
return false;
delta = from->truesize - SKB_TRUESIZE(skb_end_offset(from));
@@ -4810,19 +4818,19 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
WARN_ON_ONCE(delta < len);
- memcpy(skb_shinfo(to)->frags + skb_shinfo(to)->nr_frags,
- skb_shinfo(from)->frags,
- skb_shinfo(from)->nr_frags * sizeof(skb_frag_t));
- skb_shinfo(to)->nr_frags += skb_shinfo(from)->nr_frags;
+ memcpy(to_shinfo->frags + to_shinfo->nr_frags,
+ from_shinfo->frags,
+ from_shinfo->nr_frags * sizeof(skb_frag_t));
+ to_shinfo->nr_frags += from_shinfo->nr_frags;
if (!skb_cloned(from))
- skb_shinfo(from)->nr_frags = 0;
+ from_shinfo->nr_frags = 0;
/* if the skb is not cloned this does nothing
* since we set nr_frags to 0.
*/
- for (i = 0; i < skb_shinfo(from)->nr_frags; i++)
- skb_frag_ref(from, i);
+ for (i = 0; i < from_shinfo->nr_frags; i++)
+ __skb_frag_ref(&from_shinfo->frags[i]);
to->truesize += delta;
to->len += len;
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index fcce25da937c..2e7ac8bab19d 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -1,6 +1,6 @@
# the core
obj-$(CONFIG_NET_DSA) += dsa_core.o
-dsa_core-y += dsa.o dsa2.o legacy.o port.o slave.o switch.o
+dsa_core-y += dsa.o dsa2.o legacy.o master.o port.o slave.o switch.o
# tagging formats
dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 03c58b0eb082..51ca2a524a27 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -112,34 +112,6 @@ const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol)
return ops;
}
-int dsa_cpu_port_ethtool_setup(struct dsa_port *cpu_dp)
-{
- struct dsa_switch *ds = cpu_dp->ds;
- struct net_device *master;
- struct ethtool_ops *cpu_ops;
-
- master = cpu_dp->netdev;
-
- cpu_ops = devm_kzalloc(ds->dev, sizeof(*cpu_ops), GFP_KERNEL);
- if (!cpu_ops)
- return -ENOMEM;
-
- memcpy(&cpu_dp->ethtool_ops, master->ethtool_ops,
- sizeof(struct ethtool_ops));
- cpu_dp->orig_ethtool_ops = master->ethtool_ops;
- memcpy(cpu_ops, &cpu_dp->ethtool_ops,
- sizeof(struct ethtool_ops));
- dsa_cpu_port_ethtool_init(cpu_ops);
- master->ethtool_ops = cpu_ops;
-
- return 0;
-}
-
-void dsa_cpu_port_ethtool_restore(struct dsa_port *cpu_dp)
-{
- cpu_dp->netdev->ethtool_ops = cpu_dp->orig_ethtool_ops;
-}
-
void dsa_cpu_dsa_destroy(struct dsa_port *port)
{
struct device_node *port_dn = port->dn;
@@ -188,12 +160,12 @@ EXPORT_SYMBOL_GPL(dsa_dev_to_net_device);
static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *unused)
{
- struct dsa_switch_tree *dst = dev->dsa_ptr;
+ struct dsa_port *cpu_dp = dev->dsa_ptr;
struct sk_buff *nskb = NULL;
struct pcpu_sw_netstats *s;
struct dsa_slave_priv *p;
- if (unlikely(dst == NULL)) {
+ if (unlikely(!cpu_dp)) {
kfree_skb(skb);
return 0;
}
@@ -202,7 +174,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
if (!skb)
return 0;
- nskb = dst->rcv(skb, dev, pt);
+ nskb = cpu_dp->rcv(skb, dev, pt);
if (!nskb) {
kfree_skb(skb);
return 0;
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 873af0108e24..54ed054777bd 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -433,18 +433,17 @@ static int dsa_dst_apply(struct dsa_switch_tree *dst)
return err;
}
- if (dst->cpu_dp) {
- err = dsa_cpu_port_ethtool_setup(dst->cpu_dp);
- if (err)
- return err;
- }
-
/* If we use a tagging format that doesn't have an ethertype
* field, make sure that all packets from this point on get
* sent to the tag format's receive function.
*/
wmb();
- dst->cpu_dp->netdev->dsa_ptr = dst;
+ dst->cpu_dp->netdev->dsa_ptr = dst->cpu_dp;
+
+ err = dsa_master_ethtool_setup(dst->cpu_dp->netdev);
+ if (err)
+ return err;
+
dst->applied = true;
return 0;
@@ -458,6 +457,8 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst)
if (!dst->applied)
return;
+ dsa_master_ethtool_restore(dst->cpu_dp->netdev);
+
dst->cpu_dp->netdev->dsa_ptr = NULL;
/* If we used a tagging format that doesn't have an ethertype
@@ -474,10 +475,7 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst)
dsa_ds_unapply(dst, ds);
}
- if (dst->cpu_dp) {
- dsa_cpu_port_ethtool_restore(dst->cpu_dp);
- dst->cpu_dp = NULL;
- }
+ dst->cpu_dp = NULL;
pr_info("DSA: tree %d unapplied\n", dst->tree);
dst->applied = false;
@@ -487,6 +485,7 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index,
struct dsa_switch_tree *dst,
struct dsa_switch *ds)
{
+ const struct dsa_device_ops *tag_ops;
enum dsa_tag_protocol tag_protocol;
struct net_device *ethernet_dev;
struct device_node *ethernet;
@@ -516,14 +515,18 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index,
ds->cpu_port_mask |= BIT(index);
tag_protocol = ds->ops->get_tag_protocol(ds);
- dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol);
- if (IS_ERR(dst->tag_ops)) {
+ tag_ops = dsa_resolve_tag_protocol(tag_protocol);
+ if (IS_ERR(tag_ops)) {
dev_warn(ds->dev, "No tagger for this switch\n");
ds->cpu_port_mask &= ~BIT(index);
- return PTR_ERR(dst->tag_ops);
+ return PTR_ERR(tag_ops);
}
- dst->rcv = dst->tag_ops->rcv;
+ dst->cpu_dp->tag_ops = tag_ops;
+
+ /* Make a few copies for faster access in master receive hot path */
+ dst->cpu_dp->rcv = dst->cpu_dp->tag_ops->rcv;
+ dst->cpu_dp->dst = dst;
return 0;
}
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 9c3eeb72462d..2850077cc9cc 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -66,7 +66,7 @@ struct dsa_notifier_vlan_info {
};
struct dsa_slave_priv {
- /* Copy of dp->ds->dst->tag_ops->xmit for faster access in hot path */
+ /* Copy of CPU port xmit for faster access in slave transmit hot path */
struct sk_buff * (*xmit)(struct sk_buff *skb,
struct net_device *dev);
@@ -79,7 +79,6 @@ struct dsa_slave_priv {
* The phylib phy_device pointer for the PHY connected
* to this port.
*/
- struct phy_device *phy;
phy_interface_t phy_interface;
int old_link;
int old_pause;
@@ -97,8 +96,6 @@ struct dsa_slave_priv {
int dsa_cpu_dsa_setup(struct dsa_port *port);
void dsa_cpu_dsa_destroy(struct dsa_port *dport);
const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol);
-int dsa_cpu_port_ethtool_setup(struct dsa_port *cpu_dp);
-void dsa_cpu_port_ethtool_restore(struct dsa_port *cpu_dp);
bool dsa_schedule_work(struct work_struct *work);
/* legacy.c */
@@ -112,10 +109,35 @@ int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev,
const unsigned char *addr, u16 vid);
+/* master.c */
+int dsa_master_ethtool_setup(struct net_device *dev);
+void dsa_master_ethtool_restore(struct net_device *dev);
+
+static inline struct net_device *dsa_master_get_slave(struct net_device *dev,
+ int device, int port)
+{
+ struct dsa_port *cpu_dp = dev->dsa_ptr;
+ struct dsa_switch_tree *dst = cpu_dp->dst;
+ struct dsa_switch *ds;
+
+ if (device < 0 || device >= DSA_MAX_SWITCHES)
+ return NULL;
+
+ ds = dst->ds[device];
+ if (!ds)
+ return NULL;
+
+ if (port < 0 || port >= ds->num_ports)
+ return NULL;
+
+ return ds->ports[port].netdev;
+}
+
/* port.c */
int dsa_port_set_state(struct dsa_port *dp, u8 state,
struct switchdev_trans *trans);
-void dsa_port_set_state_now(struct dsa_port *dp, u8 state);
+int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy);
+void dsa_port_disable(struct dsa_port *dp, struct phy_device *phy);
int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br);
void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br);
int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
@@ -126,6 +148,7 @@ int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr,
u16 vid);
int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
u16 vid);
+int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data);
int dsa_port_mdb_add(struct dsa_port *dp,
const struct switchdev_obj_port_mdb *mdb,
struct switchdev_trans *trans);
@@ -139,7 +162,6 @@ int dsa_port_vlan_del(struct dsa_port *dp,
/* slave.c */
extern const struct dsa_device_ops notag_netdev_ops;
void dsa_slave_mii_bus_init(struct dsa_switch *ds);
-void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops);
int dsa_slave_create(struct dsa_port *port, const char *name);
void dsa_slave_destroy(struct net_device *slave_dev);
int dsa_slave_suspend(struct net_device *slave_dev);
@@ -180,9 +202,4 @@ static inline struct net_device *dsa_master_netdev(struct dsa_slave_priv *p)
return p->dp->cpu_dp->netdev;
}
-static inline struct dsa_port *dsa_get_cpu_port(struct dsa_switch_tree *dst)
-{
- return dst->cpu_dp;
-}
-
#endif
diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c
index 91e6f7981d39..19ff6e0a21dc 100644
--- a/net/dsa/legacy.c
+++ b/net/dsa/legacy.c
@@ -144,14 +144,19 @@ static int dsa_switch_setup_one(struct dsa_switch *ds,
* switch.
*/
if (dst->cpu_dp->ds == ds) {
+ const struct dsa_device_ops *tag_ops;
enum dsa_tag_protocol tag_protocol;
tag_protocol = ops->get_tag_protocol(ds);
- dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol);
- if (IS_ERR(dst->tag_ops))
- return PTR_ERR(dst->tag_ops);
+ tag_ops = dsa_resolve_tag_protocol(tag_protocol);
+ if (IS_ERR(tag_ops))
+ return PTR_ERR(tag_ops);
- dst->rcv = dst->tag_ops->rcv;
+ dst->cpu_dp->tag_ops = tag_ops;
+
+ /* Few copies for faster access in master receive hot path */
+ dst->cpu_dp->rcv = dst->cpu_dp->tag_ops->rcv;
+ dst->cpu_dp->dst = dst;
}
memcpy(ds->rtable, cd->rtable, sizeof(ds->rtable));
@@ -206,10 +211,6 @@ static int dsa_switch_setup_one(struct dsa_switch *ds,
netdev_err(master, "[%d] : can't configure CPU and DSA ports\n",
index);
- ret = dsa_cpu_port_ethtool_setup(ds->dst->cpu_dp);
- if (ret)
- return ret;
-
return 0;
}
@@ -604,9 +605,9 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev,
* sent to the tag format's receive function.
*/
wmb();
- dev->dsa_ptr = dst;
+ dev->dsa_ptr = dst->cpu_dp;
- return 0;
+ return dsa_master_ethtool_setup(dst->cpu_dp->netdev);
}
static int dsa_probe(struct platform_device *pdev)
@@ -671,6 +672,8 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst)
{
int i;
+ dsa_master_ethtool_restore(dst->cpu_dp->netdev);
+
dst->cpu_dp->netdev->dsa_ptr = NULL;
/* If we used a tagging format that doesn't have an ethertype
@@ -686,8 +689,6 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst)
dsa_switch_destroy(ds);
}
- dsa_cpu_port_ethtool_restore(dst->cpu_dp);
-
dev_put(dst->cpu_dp->netdev);
}
diff --git a/net/dsa/master.c b/net/dsa/master.c
new file mode 100644
index 000000000000..5f3f57e372e0
--- /dev/null
+++ b/net/dsa/master.c
@@ -0,0 +1,117 @@
+/*
+ * Handling of a master device, switching frames via its switch fabric CPU port
+ *
+ * Copyright (c) 2017 Savoir-faire Linux Inc.
+ * Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "dsa_priv.h"
+
+static void dsa_master_get_ethtool_stats(struct net_device *dev,
+ struct ethtool_stats *stats,
+ uint64_t *data)
+{
+ struct dsa_port *cpu_dp = dev->dsa_ptr;
+ const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops;
+ struct dsa_switch *ds = cpu_dp->ds;
+ int port = cpu_dp->index;
+ int count = 0;
+
+ if (ops && ops->get_sset_count && ops->get_ethtool_stats) {
+ count = ops->get_sset_count(dev, ETH_SS_STATS);
+ ops->get_ethtool_stats(dev, stats, data);
+ }
+
+ if (ds->ops->get_ethtool_stats)
+ ds->ops->get_ethtool_stats(ds, port, data + count);
+}
+
+static int dsa_master_get_sset_count(struct net_device *dev, int sset)
+{
+ struct dsa_port *cpu_dp = dev->dsa_ptr;
+ const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops;
+ struct dsa_switch *ds = cpu_dp->ds;
+ int count = 0;
+
+ if (ops && ops->get_sset_count)
+ count += ops->get_sset_count(dev, sset);
+
+ if (sset == ETH_SS_STATS && ds->ops->get_sset_count)
+ count += ds->ops->get_sset_count(ds);
+
+ return count;
+}
+
+static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset,
+ uint8_t *data)
+{
+ struct dsa_port *cpu_dp = dev->dsa_ptr;
+ const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops;
+ struct dsa_switch *ds = cpu_dp->ds;
+ int port = cpu_dp->index;
+ int len = ETH_GSTRING_LEN;
+ int mcount = 0, count;
+ unsigned int i;
+ uint8_t pfx[4];
+ uint8_t *ndata;
+
+ snprintf(pfx, sizeof(pfx), "p%.2d", port);
+ /* We do not want to be NULL-terminated, since this is a prefix */
+ pfx[sizeof(pfx) - 1] = '_';
+
+ if (ops && ops->get_sset_count && ops->get_strings) {
+ mcount = ops->get_sset_count(dev, ETH_SS_STATS);
+ ops->get_strings(dev, stringset, data);
+ }
+
+ if (stringset == ETH_SS_STATS && ds->ops->get_strings) {
+ ndata = data + mcount * len;
+ /* This function copies ETH_GSTRINGS_LEN bytes, we will mangle
+ * the output after to prepend our CPU port prefix we
+ * constructed earlier
+ */
+ ds->ops->get_strings(ds, port, ndata);
+ count = ds->ops->get_sset_count(ds);
+ for (i = 0; i < count; i++) {
+ memmove(ndata + (i * len + sizeof(pfx)),
+ ndata + i * len, len - sizeof(pfx));
+ memcpy(ndata + i * len, pfx, sizeof(pfx));
+ }
+ }
+}
+
+int dsa_master_ethtool_setup(struct net_device *dev)
+{
+ struct dsa_port *cpu_dp = dev->dsa_ptr;
+ struct dsa_switch *ds = cpu_dp->ds;
+ struct ethtool_ops *ops;
+
+ ops = devm_kzalloc(ds->dev, sizeof(*ops), GFP_KERNEL);
+ if (!ops)
+ return -ENOMEM;
+
+ cpu_dp->orig_ethtool_ops = dev->ethtool_ops;
+ if (cpu_dp->orig_ethtool_ops)
+ memcpy(ops, cpu_dp->orig_ethtool_ops, sizeof(*ops));
+
+ ops->get_sset_count = dsa_master_get_sset_count;
+ ops->get_ethtool_stats = dsa_master_get_ethtool_stats;
+ ops->get_strings = dsa_master_get_strings;
+
+ dev->ethtool_ops = ops;
+
+ return 0;
+}
+
+void dsa_master_ethtool_restore(struct net_device *dev)
+{
+ struct dsa_port *cpu_dp = dev->dsa_ptr;
+
+ dev->ethtool_ops = cpu_dp->orig_ethtool_ops;
+ cpu_dp->orig_ethtool_ops = NULL;
+}
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 659676ba3f8b..72c8dbd3d3f2 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -56,7 +56,7 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state,
return 0;
}
-void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
+static void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
{
int err;
@@ -65,6 +65,35 @@ void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
pr_err("DSA: failed to set STP state %u (%d)\n", state, err);
}
+int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy)
+{
+ u8 stp_state = dp->bridge_dev ? BR_STATE_BLOCKING : BR_STATE_FORWARDING;
+ struct dsa_switch *ds = dp->ds;
+ int port = dp->index;
+ int err;
+
+ if (ds->ops->port_enable) {
+ err = ds->ops->port_enable(ds, port, phy);
+ if (err)
+ return err;
+ }
+
+ dsa_port_set_state_now(dp, stp_state);
+
+ return 0;
+}
+
+void dsa_port_disable(struct dsa_port *dp, struct phy_device *phy)
+{
+ struct dsa_switch *ds = dp->ds;
+ int port = dp->index;
+
+ dsa_port_set_state_now(dp, BR_STATE_DISABLED);
+
+ if (ds->ops->port_disable)
+ ds->ops->port_disable(ds, port, phy);
+}
+
int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br)
{
struct dsa_notifier_bridge_info info = {
@@ -173,6 +202,17 @@ int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
return dsa_port_notify(dp, DSA_NOTIFIER_FDB_DEL, &info);
}
+int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data)
+{
+ struct dsa_switch *ds = dp->ds;
+ int port = dp->index;
+
+ if (!ds->ops->port_fdb_dump)
+ return -EOPNOTSUPP;
+
+ return ds->ops->port_fdb_dump(ds, port, cb, data);
+}
+
int dsa_port_mdb_add(struct dsa_port *dp,
const struct switchdev_obj_port_mdb *mdb,
struct switchdev_trans *trans)
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 865e29e62bad..fb2954ff198c 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -73,9 +73,7 @@ static int dsa_slave_open(struct net_device *dev)
{
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_port *dp = p->dp;
- struct dsa_switch *ds = dp->ds;
struct net_device *master = dsa_master_netdev(p);
- u8 stp_state = dp->bridge_dev ? BR_STATE_BLOCKING : BR_STATE_FORWARDING;
int err;
if (!(master->flags & IFF_UP))
@@ -98,16 +96,12 @@ static int dsa_slave_open(struct net_device *dev)
goto clear_allmulti;
}
- if (ds->ops->port_enable) {
- err = ds->ops->port_enable(ds, p->dp->index, p->phy);
- if (err)
- goto clear_promisc;
- }
-
- dsa_port_set_state_now(p->dp, stp_state);
+ err = dsa_port_enable(dp, dev->phydev);
+ if (err)
+ goto clear_promisc;
- if (p->phy)
- phy_start(p->phy);
+ if (dev->phydev)
+ phy_start(dev->phydev);
return 0;
@@ -128,10 +122,12 @@ static int dsa_slave_close(struct net_device *dev)
{
struct dsa_slave_priv *p = netdev_priv(dev);
struct net_device *master = dsa_master_netdev(p);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = p->dp;
- if (p->phy)
- phy_stop(p->phy);
+ if (dev->phydev)
+ phy_stop(dev->phydev);
+
+ dsa_port_disable(dp, dev->phydev);
dev_mc_unsync(master, dev);
dev_uc_unsync(master, dev);
@@ -143,11 +139,6 @@ static int dsa_slave_close(struct net_device *dev)
if (!ether_addr_equal(dev->dev_addr, master->dev_addr))
dev_uc_del(master, dev->dev_addr);
- if (ds->ops->port_disable)
- ds->ops->port_disable(ds, p->dp->index, p->phy);
-
- dsa_port_set_state_now(p->dp, BR_STATE_DISABLED);
-
return 0;
}
@@ -263,27 +254,20 @@ dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
};
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_port *dp = p->dp;
- struct dsa_switch *ds = dp->ds;
int err;
- if (!ds->ops->port_fdb_dump)
- return -EOPNOTSUPP;
-
- err = ds->ops->port_fdb_dump(ds, dp->index,
- dsa_slave_port_fdb_do_dump,
- &dump);
+ err = dsa_port_fdb_dump(dp, dsa_slave_port_fdb_do_dump, &dump);
*idx = dump.idx;
+
return err;
}
static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
-
- if (p->phy != NULL)
- return phy_mii_ioctl(p->phy, ifr, cmd);
+ if (!dev->phydev)
+ return -ENODEV;
- return -EOPNOTSUPP;
+ return phy_mii_ioctl(dev->phydev, ifr, cmd);
}
static int dsa_slave_port_attr_set(struct net_device *dev,
@@ -385,10 +369,12 @@ static int dsa_slave_port_attr_get(struct net_device *dev,
return 0;
}
-static inline netdev_tx_t dsa_netpoll_send_skb(struct dsa_slave_priv *p,
- struct sk_buff *skb)
+static inline netdev_tx_t dsa_slave_netpoll_send_skb(struct net_device *dev,
+ struct sk_buff *skb)
{
#ifdef CONFIG_NET_POLL_CONTROLLER
+ struct dsa_slave_priv *p = netdev_priv(dev);
+
if (p->netpoll)
netpoll_send_skb(p->netpoll, skb);
#else
@@ -422,7 +408,7 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
* tag to be successfully transmitted
*/
if (unlikely(netpoll_tx_running(dev)))
- return dsa_netpoll_send_skb(p, nskb);
+ return dsa_slave_netpoll_send_skb(dev, nskb);
/* Queue the SKB for transmission on the parent interface, but
* do not modify its EtherType
@@ -434,31 +420,6 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
}
/* ethtool operations *******************************************************/
-static int
-dsa_slave_get_link_ksettings(struct net_device *dev,
- struct ethtool_link_ksettings *cmd)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
-
- if (!p->phy)
- return -EOPNOTSUPP;
-
- phy_ethtool_ksettings_get(p->phy, cmd);
-
- return 0;
-}
-
-static int
-dsa_slave_set_link_ksettings(struct net_device *dev,
- const struct ethtool_link_ksettings *cmd)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
-
- if (p->phy != NULL)
- return phy_ethtool_ksettings_set(p->phy, cmd);
-
- return -EOPNOTSUPP;
-}
static void dsa_slave_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo *drvinfo)
@@ -489,26 +450,14 @@ dsa_slave_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p)
ds->ops->get_regs(ds, p->dp->index, regs, _p);
}
-static int dsa_slave_nway_reset(struct net_device *dev)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
-
- if (p->phy != NULL)
- return genphy_restart_aneg(p->phy);
-
- return -EOPNOTSUPP;
-}
-
static u32 dsa_slave_get_link(struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ if (!dev->phydev)
+ return -ENODEV;
- if (p->phy != NULL) {
- genphy_update_link(p->phy);
- return p->phy->link;
- }
+ genphy_update_link(dev->phydev);
- return -EOPNOTSUPP;
+ return dev->phydev->link;
}
static int dsa_slave_get_eeprom_len(struct net_device *dev)
@@ -567,79 +516,6 @@ static void dsa_slave_get_strings(struct net_device *dev,
}
}
-static void dsa_cpu_port_get_ethtool_stats(struct net_device *dev,
- struct ethtool_stats *stats,
- uint64_t *data)
-{
- struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
- struct dsa_switch *ds = cpu_dp->ds;
- s8 cpu_port = cpu_dp->index;
- int count = 0;
-
- if (cpu_dp->ethtool_ops.get_sset_count) {
- count = cpu_dp->ethtool_ops.get_sset_count(dev, ETH_SS_STATS);
- cpu_dp->ethtool_ops.get_ethtool_stats(dev, stats, data);
- }
-
- if (ds->ops->get_ethtool_stats)
- ds->ops->get_ethtool_stats(ds, cpu_port, data + count);
-}
-
-static int dsa_cpu_port_get_sset_count(struct net_device *dev, int sset)
-{
- struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
- struct dsa_switch *ds = cpu_dp->ds;
- int count = 0;
-
- if (cpu_dp->ethtool_ops.get_sset_count)
- count += cpu_dp->ethtool_ops.get_sset_count(dev, sset);
-
- if (sset == ETH_SS_STATS && ds->ops->get_sset_count)
- count += ds->ops->get_sset_count(ds);
-
- return count;
-}
-
-static void dsa_cpu_port_get_strings(struct net_device *dev,
- uint32_t stringset, uint8_t *data)
-{
- struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
- struct dsa_switch *ds = cpu_dp->ds;
- s8 cpu_port = cpu_dp->index;
- int len = ETH_GSTRING_LEN;
- int mcount = 0, count;
- unsigned int i;
- uint8_t pfx[4];
- uint8_t *ndata;
-
- snprintf(pfx, sizeof(pfx), "p%.2d", cpu_port);
- /* We do not want to be NULL-terminated, since this is a prefix */
- pfx[sizeof(pfx) - 1] = '_';
-
- if (cpu_dp->ethtool_ops.get_sset_count) {
- mcount = cpu_dp->ethtool_ops.get_sset_count(dev, ETH_SS_STATS);
- cpu_dp->ethtool_ops.get_strings(dev, stringset, data);
- }
-
- if (stringset == ETH_SS_STATS && ds->ops->get_strings) {
- ndata = data + mcount * len;
- /* This function copies ETH_GSTRINGS_LEN bytes, we will mangle
- * the output after to prepend our CPU port prefix we
- * constructed earlier
- */
- ds->ops->get_strings(ds, cpu_port, ndata);
- count = ds->ops->get_sset_count(ds);
- for (i = 0; i < count; i++) {
- memmove(ndata + (i * len + sizeof(pfx)),
- ndata + i * len, len - sizeof(pfx));
- memcpy(ndata + i * len, pfx, sizeof(pfx));
- }
- }
-}
-
static void dsa_slave_get_ethtool_stats(struct net_device *dev,
struct ethtool_stats *stats,
uint64_t *data)
@@ -716,7 +592,7 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e)
int ret;
/* Port's PHY and MAC both need to be EEE capable */
- if (!p->phy)
+ if (!dev->phydev)
return -ENODEV;
if (!ds->ops->set_mac_eee)
@@ -727,12 +603,12 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e)
return ret;
if (e->eee_enabled) {
- ret = phy_init_eee(p->phy, 0);
+ ret = phy_init_eee(dev->phydev, 0);
if (ret)
return ret;
}
- return phy_ethtool_set_eee(p->phy, e);
+ return phy_ethtool_set_eee(dev->phydev, e);
}
static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e)
@@ -742,7 +618,7 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e)
int ret;
/* Port's PHY and MAC both need to be EEE capable */
- if (!p->phy)
+ if (!dev->phydev)
return -ENODEV;
if (!ds->ops->get_mac_eee)
@@ -752,7 +628,7 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e)
if (ret)
return ret;
- return phy_ethtool_get_eee(p->phy, e);
+ return phy_ethtool_get_eee(dev->phydev, e);
}
#ifdef CONFIG_NET_POLL_CONTROLLER
@@ -809,9 +685,9 @@ static int dsa_slave_get_phys_port_name(struct net_device *dev,
}
static struct dsa_mall_tc_entry *
-dsa_slave_mall_tc_entry_find(struct dsa_slave_priv *p,
- unsigned long cookie)
+dsa_slave_mall_tc_entry_find(struct net_device *dev, unsigned long cookie)
{
+ struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_mall_tc_entry *mall_tc_entry;
list_for_each_entry(mall_tc_entry, &p->mall_tc_list, list)
@@ -893,7 +769,7 @@ static void dsa_slave_del_cls_matchall(struct net_device *dev,
if (!ds->ops->port_mirror_del)
return;
- mall_tc_entry = dsa_slave_mall_tc_entry_find(p, cls->cookie);
+ mall_tc_entry = dsa_slave_mall_tc_entry_find(dev, cls->cookie);
if (!mall_tc_entry)
return;
@@ -976,13 +852,6 @@ static void dsa_slave_get_stats64(struct net_device *dev,
}
}
-void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops)
-{
- ops->get_sset_count = dsa_cpu_port_get_sset_count;
- ops->get_ethtool_stats = dsa_cpu_port_get_ethtool_stats;
- ops->get_strings = dsa_cpu_port_get_strings;
-}
-
static int dsa_slave_get_rxnfc(struct net_device *dev,
struct ethtool_rxnfc *nfc, u32 *rule_locs)
{
@@ -1011,7 +880,7 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
.get_drvinfo = dsa_slave_get_drvinfo,
.get_regs_len = dsa_slave_get_regs_len,
.get_regs = dsa_slave_get_regs,
- .nway_reset = dsa_slave_nway_reset,
+ .nway_reset = phy_ethtool_nway_reset,
.get_link = dsa_slave_get_link,
.get_eeprom_len = dsa_slave_get_eeprom_len,
.get_eeprom = dsa_slave_get_eeprom,
@@ -1023,8 +892,8 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
.get_wol = dsa_slave_get_wol,
.set_eee = dsa_slave_set_eee,
.get_eee = dsa_slave_get_eee,
- .get_link_ksettings = dsa_slave_get_link_ksettings,
- .set_link_ksettings = dsa_slave_set_link_ksettings,
+ .get_link_ksettings = phy_ethtool_get_link_ksettings,
+ .set_link_ksettings = phy_ethtool_set_link_ksettings,
.get_rxnfc = dsa_slave_get_rxnfc,
.set_rxnfc = dsa_slave_set_rxnfc,
};
@@ -1068,26 +937,26 @@ static void dsa_slave_adjust_link(struct net_device *dev)
struct dsa_switch *ds = p->dp->ds;
unsigned int status_changed = 0;
- if (p->old_link != p->phy->link) {
+ if (p->old_link != dev->phydev->link) {
status_changed = 1;
- p->old_link = p->phy->link;
+ p->old_link = dev->phydev->link;
}
- if (p->old_duplex != p->phy->duplex) {
+ if (p->old_duplex != dev->phydev->duplex) {
status_changed = 1;
- p->old_duplex = p->phy->duplex;
+ p->old_duplex = dev->phydev->duplex;
}
- if (p->old_pause != p->phy->pause) {
+ if (p->old_pause != dev->phydev->pause) {
status_changed = 1;
- p->old_pause = p->phy->pause;
+ p->old_pause = dev->phydev->pause;
}
if (ds->ops->adjust_link && status_changed)
- ds->ops->adjust_link(ds, p->dp->index, p->phy);
+ ds->ops->adjust_link(ds, p->dp->index, dev->phydev);
if (status_changed)
- phy_print_status(p->phy);
+ phy_print_status(dev->phydev);
}
static int dsa_slave_fixed_link_update(struct net_device *dev,
@@ -1107,28 +976,28 @@ static int dsa_slave_fixed_link_update(struct net_device *dev,
}
/* slave device setup *******************************************************/
-static int dsa_slave_phy_connect(struct dsa_slave_priv *p,
- struct net_device *slave_dev,
- int addr)
+static int dsa_slave_phy_connect(struct net_device *slave_dev, int addr)
{
+ struct dsa_slave_priv *p = netdev_priv(slave_dev);
struct dsa_switch *ds = p->dp->ds;
- p->phy = mdiobus_get_phy(ds->slave_mii_bus, addr);
- if (!p->phy) {
+ slave_dev->phydev = mdiobus_get_phy(ds->slave_mii_bus, addr);
+ if (!slave_dev->phydev) {
netdev_err(slave_dev, "no phy at %d\n", addr);
return -ENODEV;
}
/* Use already configured phy mode */
if (p->phy_interface == PHY_INTERFACE_MODE_NA)
- p->phy_interface = p->phy->interface;
- return phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link,
- p->phy_interface);
+ p->phy_interface = slave_dev->phydev->interface;
+
+ return phy_connect_direct(slave_dev, slave_dev->phydev,
+ dsa_slave_adjust_link, p->phy_interface);
}
-static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
- struct net_device *slave_dev)
+static int dsa_slave_phy_setup(struct net_device *slave_dev)
{
+ struct dsa_slave_priv *p = netdev_priv(slave_dev);
struct dsa_switch *ds = p->dp->ds;
struct device_node *phy_dn, *port_dn;
bool phy_is_fixed = false;
@@ -1168,30 +1037,31 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
*/
if (!phy_is_fixed && phy_id >= 0 &&
(ds->phys_mii_mask & (1 << phy_id))) {
- ret = dsa_slave_phy_connect(p, slave_dev, phy_id);
+ ret = dsa_slave_phy_connect(slave_dev, phy_id);
if (ret) {
netdev_err(slave_dev, "failed to connect to phy%d: %d\n", phy_id, ret);
of_node_put(phy_dn);
return ret;
}
} else {
- p->phy = of_phy_connect(slave_dev, phy_dn,
- dsa_slave_adjust_link,
- phy_flags,
- p->phy_interface);
+ slave_dev->phydev = of_phy_connect(slave_dev, phy_dn,
+ dsa_slave_adjust_link,
+ phy_flags,
+ p->phy_interface);
}
of_node_put(phy_dn);
}
- if (p->phy && phy_is_fixed)
- fixed_phy_set_link_update(p->phy, dsa_slave_fixed_link_update);
+ if (slave_dev->phydev && phy_is_fixed)
+ fixed_phy_set_link_update(slave_dev->phydev,
+ dsa_slave_fixed_link_update);
/* We could not connect to a designated PHY, so use the switch internal
* MDIO bus instead
*/
- if (!p->phy) {
- ret = dsa_slave_phy_connect(p, slave_dev, p->dp->index);
+ if (!slave_dev->phydev) {
+ ret = dsa_slave_phy_connect(slave_dev, p->dp->index);
if (ret) {
netdev_err(slave_dev, "failed to connect to port %d: %d\n",
p->dp->index, ret);
@@ -1201,7 +1071,7 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
}
}
- phy_attached_info(p->phy);
+ phy_attached_info(slave_dev->phydev);
return 0;
}
@@ -1221,12 +1091,12 @@ int dsa_slave_suspend(struct net_device *slave_dev)
netif_device_detach(slave_dev);
- if (p->phy) {
- phy_stop(p->phy);
+ if (slave_dev->phydev) {
+ phy_stop(slave_dev->phydev);
p->old_pause = -1;
p->old_link = -1;
p->old_duplex = -1;
- phy_suspend(p->phy);
+ phy_suspend(slave_dev->phydev);
}
return 0;
@@ -1234,13 +1104,11 @@ int dsa_slave_suspend(struct net_device *slave_dev)
int dsa_slave_resume(struct net_device *slave_dev)
{
- struct dsa_slave_priv *p = netdev_priv(slave_dev);
-
netif_device_attach(slave_dev);
- if (p->phy) {
- phy_resume(p->phy);
- phy_start(p->phy);
+ if (slave_dev->phydev) {
+ phy_resume(slave_dev->phydev);
+ phy_start(slave_dev->phydev);
}
return 0;
@@ -1249,7 +1117,6 @@ int dsa_slave_resume(struct net_device *slave_dev)
int dsa_slave_create(struct dsa_port *port, const char *name)
{
struct dsa_switch *ds = port->ds;
- struct dsa_switch_tree *dst = ds->dst;
struct net_device *master;
struct net_device *slave_dev;
struct dsa_slave_priv *p;
@@ -1294,7 +1161,7 @@ int dsa_slave_create(struct dsa_port *port, const char *name)
}
p->dp = port;
INIT_LIST_HEAD(&p->mall_tc_list);
- p->xmit = dst->tag_ops->xmit;
+ p->xmit = cpu_dp->tag_ops->xmit;
p->old_pause = -1;
p->old_link = -1;
@@ -1304,7 +1171,7 @@ int dsa_slave_create(struct dsa_port *port, const char *name)
netif_carrier_off(slave_dev);
- ret = dsa_slave_phy_setup(p, slave_dev);
+ ret = dsa_slave_phy_setup(slave_dev);
if (ret) {
netdev_err(master, "error %d setting up slave phy\n", ret);
goto out_free;
@@ -1320,7 +1187,7 @@ int dsa_slave_create(struct dsa_port *port, const char *name)
return 0;
out_phy:
- phy_disconnect(p->phy);
+ phy_disconnect(slave_dev->phydev);
if (of_phy_is_fixed_link(p->dp->dn))
of_phy_deregister_fixed_link(p->dp->dn);
out_free:
@@ -1338,8 +1205,8 @@ void dsa_slave_destroy(struct net_device *slave_dev)
port_dn = p->dp->dn;
netif_carrier_off(slave_dev);
- if (p->phy) {
- phy_disconnect(p->phy);
+ if (slave_dev->phydev) {
+ phy_disconnect(slave_dev->phydev);
if (of_phy_is_fixed_link(port_dn))
of_phy_deregister_fixed_link(port_dn);
@@ -1379,7 +1246,7 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- if (dev->netdev_ops != &dsa_slave_netdev_ops)
+ if (!dsa_slave_dev_check(dev))
return NOTIFY_DONE;
if (event == NETDEV_CHANGEUPPER)
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index dbb016434ace..8e4bdb9d9ae3 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -92,9 +92,6 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev
static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt)
{
- struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
- struct dsa_switch *ds = cpu_dp->ds;
int source_port;
u8 *brcm_tag;
@@ -117,8 +114,8 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
/* Locate which port this is coming from */
source_port = brcm_tag[3] & BRCM_EG_PID_MASK;
- /* Validate port against switch setup, either the port is totally */
- if (source_port >= ds->num_ports || !ds->ports[source_port].netdev)
+ skb->dev = dsa_master_get_slave(dev, 0, source_port);
+ if (!skb->dev)
return NULL;
/* Remove Broadcom tag and update checksum */
@@ -129,8 +126,6 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
skb->data - ETH_HLEN - BRCM_TAG_LEN,
2 * ETH_ALEN);
- skb->dev = ds->ports[source_port].netdev;
-
return skb;
}
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index fbf9ca954773..c77218f173d1 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -67,8 +67,6 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt)
{
- struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_switch *ds;
u8 *dsa_header;
int source_device;
int source_port;
@@ -93,18 +91,8 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
source_device = dsa_header[0] & 0x1f;
source_port = (dsa_header[1] >> 3) & 0x1f;
- /*
- * Check that the source device exists and that the source
- * port is a registered DSA port.
- */
- if (source_device >= DSA_MAX_SWITCHES)
- return NULL;
-
- ds = dst->ds[source_device];
- if (!ds)
- return NULL;
-
- if (source_port >= ds->num_ports || !ds->ports[source_port].netdev)
+ skb->dev = dsa_master_get_slave(dev, source_device, source_port);
+ if (!skb->dev)
return NULL;
/*
@@ -153,8 +141,6 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
2 * ETH_ALEN);
}
- skb->dev = ds->ports[source_port].netdev;
-
return skb;
}
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index 76367ba1b2e2..0b83cbe0c9e8 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -80,8 +80,6 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt)
{
- struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_switch *ds;
u8 *edsa_header;
int source_device;
int source_port;
@@ -106,18 +104,8 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
source_device = edsa_header[0] & 0x1f;
source_port = (edsa_header[1] >> 3) & 0x1f;
- /*
- * Check that the source device exists and that the source
- * port is a registered DSA port.
- */
- if (source_device >= DSA_MAX_SWITCHES)
- return NULL;
-
- ds = dst->ds[source_device];
- if (!ds)
- return NULL;
-
- if (source_port >= ds->num_ports || !ds->ports[source_port].netdev)
+ skb->dev = dsa_master_get_slave(dev, source_device, source_port);
+ if (!skb->dev)
return NULL;
/*
@@ -172,8 +160,6 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
2 * ETH_ALEN);
}
- skb->dev = ds->ports[source_port].netdev;
-
return skb;
}
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index 010ca0a336c4..b241c990cde0 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -80,22 +80,19 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
static struct sk_buff *ksz_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt)
{
- struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
- struct dsa_switch *ds = cpu_dp->ds;
u8 *tag;
int source_port;
tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
source_port = tag[0] & 7;
- if (source_port >= ds->num_ports || !ds->ports[source_port].netdev)
+
+ skb->dev = dsa_master_get_slave(dev, 0, source_port);
+ if (!skb->dev)
return NULL;
pskb_trim_rcsum(skb, skb->len - KSZ_EGRESS_TAG_LEN);
- skb->dev = ds->ports[source_port].netdev;
-
return skb;
}
diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c
index 0b9826105e42..4f211e56c81f 100644
--- a/net/dsa/tag_lan9303.c
+++ b/net/dsa/tag_lan9303.c
@@ -71,17 +71,8 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt)
{
u16 *lan9303_tag;
- struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_switch *ds;
unsigned int source_port;
- ds = dst->ds[0];
-
- if (unlikely(!ds)) {
- dev_warn_ratelimited(&dev->dev, "Dropping packet, due to missing DSA switch device\n");
- return NULL;
- }
-
if (unlikely(!pskb_may_pull(skb, LAN9303_TAG_LEN))) {
dev_warn_ratelimited(&dev->dev,
"Dropping packet, cannot pull\n");
@@ -103,16 +94,12 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
source_port = ntohs(lan9303_tag[1]) & 0x3;
- if (source_port >= ds->num_ports) {
+ skb->dev = dsa_master_get_slave(dev, 0, source_port);
+ if (!skb->dev) {
dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid source port\n");
return NULL;
}
- if (!ds->ports[source_port].netdev) {
- dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid netdev or device\n");
- return NULL;
- }
-
/* remove the special VLAN tag between the MAC addresses
* and the current ethertype field.
*/
@@ -120,9 +107,6 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
memmove(skb->data - ETH_HLEN, skb->data - (ETH_HLEN + LAN9303_TAG_LEN),
2 * ETH_ALEN);
- /* forward the packet to the dedicated interface */
- skb->dev = ds->ports[source_port].netdev;
-
return skb;
}
diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
index ec8ee5f43255..968586c5d40e 100644
--- a/net/dsa/tag_mtk.c
+++ b/net/dsa/tag_mtk.c
@@ -46,8 +46,6 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt)
{
- struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_switch *ds;
int port;
__be16 *phdr, hdr;
@@ -68,20 +66,12 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
skb->data - ETH_HLEN - MTK_HDR_LEN,
2 * ETH_ALEN);
- /* This protocol doesn't support cascading multiple
- * switches so it's safe to assume the switch is first
- * in the tree.
- */
- ds = dst->ds[0];
- if (!ds)
- return NULL;
-
/* Get source port information */
port = (hdr & MTK_HDR_RECV_SOURCE_PORT_MASK);
- if (!ds->ports[port].netdev)
- return NULL;
- skb->dev = ds->ports[port].netdev;
+ skb->dev = dsa_master_get_slave(dev, 0, port);
+ if (!skb->dev)
+ return NULL;
return skb;
}
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index 1d4c70711c0f..8d33d9ebf910 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -65,9 +65,6 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt)
{
- struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
- struct dsa_switch *ds;
u8 ver;
int port;
__be16 *phdr, hdr;
@@ -92,20 +89,12 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - QCA_HDR_LEN,
ETH_HLEN - QCA_HDR_LEN);
- /* This protocol doesn't support cascading multiple switches so it's
- * safe to assume the switch is first in the tree
- */
- ds = cpu_dp->ds;
- if (!ds)
- return NULL;
-
/* Get source port information */
port = (hdr & QCA_HDR_RECV_SOURCE_PORT_MASK);
- if (!ds->ports[port].netdev)
- return NULL;
- /* Update skb & forward the frame accordingly */
- skb->dev = ds->ports[port].netdev;
+ skb->dev = dsa_master_get_slave(dev, 0, port);
+ if (!skb->dev)
+ return NULL;
return skb;
}
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index d2fd4923aa3e..61668be267f5 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -58,9 +58,6 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt)
{
- struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
- struct dsa_switch *ds = cpu_dp->ds;
u8 *trailer;
int source_port;
@@ -73,13 +70,13 @@ static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev,
return NULL;
source_port = trailer[1] & 7;
- if (source_port >= ds->num_ports || !ds->ports[source_port].netdev)
+
+ skb->dev = dsa_master_get_slave(dev, 0, source_port);
+ if (!skb->dev)
return NULL;
pskb_trim_rcsum(skb, skb->len - 4);
- skb->dev = ds->ports[source_port].netdev;
-
return skb;
}
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index e31108e5ef79..43a1bbed7a42 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -195,7 +195,7 @@ int inet_listen(struct socket *sock, int backlog)
{
struct sock *sk = sock->sk;
unsigned char old_state;
- int err;
+ int err, tcp_fastopen;
lock_sock(sk);
@@ -217,11 +217,12 @@ int inet_listen(struct socket *sock, int backlog)
* because the socket was in TCP_LISTEN state previously but
* was shutdown() rather than close().
*/
- if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
- (sysctl_tcp_fastopen & TFO_SERVER_ENABLE) &&
+ tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
+ if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
+ (tcp_fastopen & TFO_SERVER_ENABLE) &&
!inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
fastopen_queue_tune(sk, backlog);
- tcp_fastopen_init_key_once(true);
+ tcp_fastopen_init_key_once(sock_net(sk));
}
err = inet_csk_listen_start(sk, backlog);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index d7adc0616599..7ce22a2c07ce 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -137,22 +137,12 @@ static void inet_hash_remove(struct in_ifaddr *ifa)
*/
struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
{
- u32 hash = inet_addr_hash(net, addr);
struct net_device *result = NULL;
struct in_ifaddr *ifa;
rcu_read_lock();
- hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
- if (ifa->ifa_local == addr) {
- struct net_device *dev = ifa->ifa_dev->dev;
-
- if (!net_eq(dev_net(dev), net))
- continue;
- result = dev;
- break;
- }
- }
- if (!result) {
+ ifa = inet_lookup_ifaddr_rcu(net, addr);
+ if (!ifa) {
struct flowi4 fl4 = { .daddr = addr };
struct fib_result res = { 0 };
struct fib_table *local;
@@ -165,6 +155,8 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
!fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
res.type == RTN_LOCAL)
result = FIB_RES_DEV(res);
+ } else {
+ result = ifa->ifa_dev->dev;
}
if (result && devref)
dev_hold(result);
@@ -173,6 +165,20 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
}
EXPORT_SYMBOL(__ip_dev_find);
+/* called under RCU lock */
+struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
+{
+ u32 hash = inet_addr_hash(net, addr);
+ struct in_ifaddr *ifa;
+
+ hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
+ if (ifa->ifa_local == addr &&
+ net_eq(dev_net(ifa->ifa_dev->dev), net))
+ return ifa;
+
+ return NULL;
+}
+
static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 37819ab4cc74..f02819134ba2 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -345,9 +345,6 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
if (res.type != RTN_UNICAST &&
(res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev)))
goto e_inval;
- if (!rpf && !fib_num_tclassid_users(net) &&
- (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev)))
- goto last_resort;
fib_combine_itag(itag, &res);
dev_match = false;
@@ -402,13 +399,26 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
struct in_device *idev, u32 *itag)
{
int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev);
+ struct net *net = dev_net(dev);
- if (!r && !fib_num_tclassid_users(dev_net(dev)) &&
- IN_DEV_ACCEPT_LOCAL(idev) &&
+ if (!r && !fib_num_tclassid_users(net) &&
(dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) {
+ if (IN_DEV_ACCEPT_LOCAL(idev))
+ goto ok;
+ /* if no local routes are added from user space we can check
+ * for local addresses looking-up the ifaddr table
+ */
+ if (net->ipv4.fib_has_custom_local_routes)
+ goto full_check;
+ if (inet_lookup_ifaddr_rcu(net, src))
+ return -EINVAL;
+
+ok:
*itag = 0;
return 0;
}
+
+full_check:
return __fib_validate_source(skb, src, dst, tos, oif, dev, r, idev, itag);
}
@@ -759,6 +769,8 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
}
err = fib_table_insert(net, tb, &cfg, extack);
+ if (!err && cfg.fc_type == RTN_LOCAL)
+ net->ipv4.fib_has_custom_local_routes = true;
errout:
return err;
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 57a5d48acee8..467b3c15395f 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -601,17 +601,9 @@ static void fib_rebalance(struct fib_info *fi)
atomic_set(&nexthop_nh->nh_upper_bound, upper_bound);
} endfor_nexthops(fi);
}
-
-static inline void fib_add_weight(struct fib_info *fi,
- const struct fib_nh *nh)
-{
- fi->fib_weight += nh->nh_weight;
-}
-
#else /* CONFIG_IP_ROUTE_MULTIPATH */
#define fib_rebalance(fi) do { } while (0)
-#define fib_add_weight(fi, nh) do { } while (0)
#endif /* CONFIG_IP_ROUTE_MULTIPATH */
@@ -774,8 +766,8 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
* |
* |-> {local prefix} (terminal node)
*/
-static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
- struct fib_nh *nh, struct netlink_ext_ack *extack)
+static int fib_check_nh(struct fib_config *cfg, struct fib_nh *nh,
+ struct netlink_ext_ack *extack)
{
int err = 0;
struct net *net;
@@ -1258,7 +1250,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
int linkdown = 0;
change_nexthops(fi) {
- err = fib_check_nh(cfg, fi, nexthop_nh, extack);
+ err = fib_check_nh(cfg, nexthop_nh, extack);
if (err != 0)
goto failure;
if (nexthop_nh->nh_flags & RTNH_F_LINKDOWN)
@@ -1275,7 +1267,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
change_nexthops(fi) {
fib_info_update_nh_saddr(net, nexthop_nh);
- fib_add_weight(fi, nexthop_nh);
} endfor_nexthops(fi)
fib_rebalance(fi);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index c039c937ba90..8a91ebbf0c01 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -39,11 +39,11 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg);
* IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
* and 0.0.0.0 equals to 0.0.0.0 only
*/
-static int ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
- const struct in6_addr *sk2_rcv_saddr6,
- __be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
- bool sk1_ipv6only, bool sk2_ipv6only,
- bool match_wildcard)
+static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
+ const struct in6_addr *sk2_rcv_saddr6,
+ __be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
+ bool sk1_ipv6only, bool sk2_ipv6only,
+ bool match_wildcard)
{
int addr_type = ipv6_addr_type(sk1_rcv_saddr6);
int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
@@ -52,29 +52,29 @@ static int ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
if (!sk2_ipv6only) {
if (sk1_rcv_saddr == sk2_rcv_saddr)
- return 1;
+ return true;
if (!sk1_rcv_saddr || !sk2_rcv_saddr)
return match_wildcard;
}
- return 0;
+ return false;
}
if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
- return 1;
+ return true;
if (addr_type2 == IPV6_ADDR_ANY && match_wildcard &&
!(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
- return 1;
+ return true;
if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
!(sk1_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
- return 1;
+ return true;
if (sk2_rcv_saddr6 &&
ipv6_addr_equal(sk1_rcv_saddr6, sk2_rcv_saddr6))
- return 1;
+ return true;
- return 0;
+ return false;
}
#endif
@@ -82,20 +82,20 @@ static int ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
* match_wildcard == false: addresses must be exactly the same, i.e.
* 0.0.0.0 only equals to 0.0.0.0
*/
-static int ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
- bool sk2_ipv6only, bool match_wildcard)
+static bool ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
+ bool sk2_ipv6only, bool match_wildcard)
{
if (!sk2_ipv6only) {
if (sk1_rcv_saddr == sk2_rcv_saddr)
- return 1;
+ return true;
if (!sk1_rcv_saddr || !sk2_rcv_saddr)
return match_wildcard;
}
- return 0;
+ return false;
}
-int inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
- bool match_wildcard)
+bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
+ bool match_wildcard)
{
#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == AF_INET6)
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index b20c8ac64081..914d56928578 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -284,14 +284,17 @@ EXPORT_SYMBOL(inet_peer_xrlim_allow);
void inetpeer_invalidate_tree(struct inet_peer_base *base)
{
- struct inet_peer *p, *n;
+ struct rb_node *p = rb_first(&base->rb_root);
- rbtree_postorder_for_each_entry_safe(p, n, &base->rb_root, rb_node) {
- inet_putpeer(p);
+ while (p) {
+ struct inet_peer *peer = rb_entry(p, struct inet_peer, rb_node);
+
+ p = rb_next(p);
+ rb_erase(&peer->rb_node, &base->rb_root);
+ inet_putpeer(peer);
cond_resched();
}
- base->rb_root = RB_ROOT;
base->total = 0;
}
EXPORT_SYMBOL(inetpeer_invalidate_tree);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 467e44d7587d..dc2317776499 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1011,15 +1011,14 @@ static int __net_init ipgre_init_net(struct net *net)
return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
}
-static void __net_exit ipgre_exit_net(struct net *net)
+static void __net_exit ipgre_exit_batch_net(struct list_head *list_net)
{
- struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
- ip_tunnel_delete_net(itn, &ipgre_link_ops);
+ ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops);
}
static struct pernet_operations ipgre_net_ops = {
.init = ipgre_init_net,
- .exit = ipgre_exit_net,
+ .exit_batch = ipgre_exit_batch_net,
.id = &ipgre_net_id,
.size = sizeof(struct ip_tunnel_net),
};
@@ -1542,15 +1541,14 @@ static int __net_init ipgre_tap_init_net(struct net *net)
return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
}
-static void __net_exit ipgre_tap_exit_net(struct net *net)
+static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net)
{
- struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
- ip_tunnel_delete_net(itn, &ipgre_tap_ops);
+ ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops);
}
static struct pernet_operations ipgre_tap_net_ops = {
.init = ipgre_tap_init_net,
- .exit = ipgre_tap_exit_net,
+ .exit_batch = ipgre_tap_exit_batch_net,
.id = &gre_tap_net_id,
.size = sizeof(struct ip_tunnel_net),
};
@@ -1561,16 +1559,14 @@ static int __net_init erspan_init_net(struct net *net)
&erspan_link_ops, "erspan0");
}
-static void __net_exit erspan_exit_net(struct net *net)
+static void __net_exit erspan_exit_batch_net(struct list_head *net_list)
{
- struct ip_tunnel_net *itn = net_generic(net, erspan_net_id);
-
- ip_tunnel_delete_net(itn, &erspan_link_ops);
+ ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops);
}
static struct pernet_operations erspan_net_ops = {
.init = erspan_init_net,
- .exit = erspan_exit_net,
+ .exit_batch = erspan_exit_batch_net,
.id = &erspan_net_id,
.size = sizeof(struct ip_tunnel_net),
};
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index e9805ad664ac..fe6fee728ce4 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -1061,16 +1061,22 @@ static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
}
}
-void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
+void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
+ struct rtnl_link_ops *ops)
{
+ struct ip_tunnel_net *itn;
+ struct net *net;
LIST_HEAD(list);
rtnl_lock();
- ip_tunnel_destroy(itn, &list, ops);
+ list_for_each_entry(net, net_list, exit_list) {
+ itn = net_generic(net, id);
+ ip_tunnel_destroy(itn, &list, ops);
+ }
unregister_netdevice_many(&list);
rtnl_unlock();
}
-EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
+EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
struct ip_tunnel_parm *p, __u32 fwmark)
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 89453cf62158..58465c0a8682 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -453,15 +453,14 @@ static int __net_init vti_init_net(struct net *net)
return 0;
}
-static void __net_exit vti_exit_net(struct net *net)
+static void __net_exit vti_exit_batch_net(struct list_head *list_net)
{
- struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
- ip_tunnel_delete_net(itn, &vti_link_ops);
+ ip_tunnel_delete_nets(list_net, vti_net_id, &vti_link_ops);
}
static struct pernet_operations vti_net_ops = {
.init = vti_init_net,
- .exit = vti_exit_net,
+ .exit_batch = vti_exit_batch_net,
.id = &vti_net_id,
.size = sizeof(struct ip_tunnel_net),
};
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index fb1ad22b5e29..1e47818e38c7 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -634,15 +634,14 @@ static int __net_init ipip_init_net(struct net *net)
return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
}
-static void __net_exit ipip_exit_net(struct net *net)
+static void __net_exit ipip_exit_batch_net(struct list_head *list_net)
{
- struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
- ip_tunnel_delete_net(itn, &ipip_link_ops);
+ ip_tunnel_delete_nets(list_net, ipip_net_id, &ipip_link_ops);
}
static struct pernet_operations ipip_net_ops = {
.init = ipip_init_net,
- .exit = ipip_exit_net,
+ .exit_batch = ipip_exit_batch_net,
.id = &ipip_net_id,
.size = sizeof(struct ip_tunnel_net),
};
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index c9b3e6e069ae..b3ee01b0551b 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -67,6 +67,7 @@
#include <net/fib_rules.h>
#include <linux/netconf.h>
#include <net/nexthop.h>
+#include <net/switchdev.h>
struct ipmr_rule {
struct fib_rule common;
@@ -264,6 +265,22 @@ static void __net_exit ipmr_rules_exit(struct net *net)
fib_rules_unregister(net->ipv4.mr_rules_ops);
rtnl_unlock();
}
+
+static int ipmr_rules_dump(struct net *net, struct notifier_block *nb)
+{
+ return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR);
+}
+
+static unsigned int ipmr_rules_seq_read(struct net *net)
+{
+ return fib_rules_seq_read(net, RTNL_FAMILY_IPMR);
+}
+
+bool ipmr_rule_default(const struct fib_rule *rule)
+{
+ return fib_rule_matchall(rule) && rule->table == RT_TABLE_DEFAULT;
+}
+EXPORT_SYMBOL(ipmr_rule_default);
#else
#define ipmr_for_each_table(mrt, net) \
for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
@@ -298,6 +315,22 @@ static void __net_exit ipmr_rules_exit(struct net *net)
net->ipv4.mrt = NULL;
rtnl_unlock();
}
+
+static int ipmr_rules_dump(struct net *net, struct notifier_block *nb)
+{
+ return 0;
+}
+
+static unsigned int ipmr_rules_seq_read(struct net *net)
+{
+ return 0;
+}
+
+bool ipmr_rule_default(const struct fib_rule *rule)
+{
+ return true;
+}
+EXPORT_SYMBOL(ipmr_rule_default);
#endif
static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg,
@@ -587,6 +620,82 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
}
#endif
+static int call_ipmr_vif_entry_notifier(struct notifier_block *nb,
+ struct net *net,
+ enum fib_event_type event_type,
+ struct vif_device *vif,
+ vifi_t vif_index, u32 tb_id)
+{
+ struct vif_entry_notifier_info info = {
+ .info = {
+ .family = RTNL_FAMILY_IPMR,
+ .net = net,
+ },
+ .dev = vif->dev,
+ .vif_index = vif_index,
+ .vif_flags = vif->flags,
+ .tb_id = tb_id,
+ };
+
+ return call_fib_notifier(nb, net, event_type, &info.info);
+}
+
+static int call_ipmr_vif_entry_notifiers(struct net *net,
+ enum fib_event_type event_type,
+ struct vif_device *vif,
+ vifi_t vif_index, u32 tb_id)
+{
+ struct vif_entry_notifier_info info = {
+ .info = {
+ .family = RTNL_FAMILY_IPMR,
+ .net = net,
+ },
+ .dev = vif->dev,
+ .vif_index = vif_index,
+ .vif_flags = vif->flags,
+ .tb_id = tb_id,
+ };
+
+ ASSERT_RTNL();
+ net->ipv4.ipmr_seq++;
+ return call_fib_notifiers(net, event_type, &info.info);
+}
+
+static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb,
+ struct net *net,
+ enum fib_event_type event_type,
+ struct mfc_cache *mfc, u32 tb_id)
+{
+ struct mfc_entry_notifier_info info = {
+ .info = {
+ .family = RTNL_FAMILY_IPMR,
+ .net = net,
+ },
+ .mfc = mfc,
+ .tb_id = tb_id
+ };
+
+ return call_fib_notifier(nb, net, event_type, &info.info);
+}
+
+static int call_ipmr_mfc_entry_notifiers(struct net *net,
+ enum fib_event_type event_type,
+ struct mfc_cache *mfc, u32 tb_id)
+{
+ struct mfc_entry_notifier_info info = {
+ .info = {
+ .family = RTNL_FAMILY_IPMR,
+ .net = net,
+ },
+ .mfc = mfc,
+ .tb_id = tb_id
+ };
+
+ ASSERT_RTNL();
+ net->ipv4.ipmr_seq++;
+ return call_fib_notifiers(net, event_type, &info.info);
+}
+
/**
* vif_delete - Delete a VIF entry
* @notify: Set to 1, if the caller is a notifier_call
@@ -594,6 +703,7 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
static int vif_delete(struct mr_table *mrt, int vifi, int notify,
struct list_head *head)
{
+ struct net *net = read_pnet(&mrt->net);
struct vif_device *v;
struct net_device *dev;
struct in_device *in_dev;
@@ -603,6 +713,10 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
v = &mrt->vif_table[vifi];
+ if (VIF_EXISTS(mrt, vifi))
+ call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, vifi,
+ mrt->id);
+
write_lock_bh(&mrt_lock);
dev = v->dev;
v->dev = NULL;
@@ -652,10 +766,11 @@ static void ipmr_cache_free_rcu(struct rcu_head *head)
kmem_cache_free(mrt_cachep, c);
}
-static inline void ipmr_cache_free(struct mfc_cache *c)
+void ipmr_cache_free(struct mfc_cache *c)
{
call_rcu(&c->rcu, ipmr_cache_free_rcu);
}
+EXPORT_SYMBOL(ipmr_cache_free);
/* Destroy an unresolved cache entry, killing queued skbs
* and reporting error to netlink readers.
@@ -754,6 +869,9 @@ static int vif_add(struct net *net, struct mr_table *mrt,
struct vifctl *vifc, int mrtsock)
{
int vifi = vifc->vifc_vifi;
+ struct switchdev_attr attr = {
+ .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
+ };
struct vif_device *v = &mrt->vif_table[vifi];
struct net_device *dev;
struct in_device *in_dev;
@@ -828,6 +946,13 @@ static int vif_add(struct net *net, struct mr_table *mrt,
/* Fill in the VIF structures */
+ attr.orig_dev = dev;
+ if (!switchdev_port_attr_get(dev, &attr)) {
+ memcpy(v->dev_parent_id.id, attr.u.ppid.id, attr.u.ppid.id_len);
+ v->dev_parent_id.id_len = attr.u.ppid.id_len;
+ } else {
+ v->dev_parent_id.id_len = 0;
+ }
v->rate_limit = vifc->vifc_rate_limit;
v->local = vifc->vifc_lcl_addr.s_addr;
v->remote = vifc->vifc_rmt_addr.s_addr;
@@ -851,6 +976,7 @@ static int vif_add(struct net *net, struct mr_table *mrt,
if (vifi+1 > mrt->maxvif)
mrt->maxvif = vifi+1;
write_unlock_bh(&mrt_lock);
+ call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, vifi, mrt->id);
return 0;
}
@@ -949,6 +1075,7 @@ static struct mfc_cache *ipmr_cache_alloc(void)
if (c) {
c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
c->mfc_un.res.minvif = MAXVIFS;
+ refcount_set(&c->mfc_un.res.refcount, 1);
}
return c;
}
@@ -1150,6 +1277,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
{
+ struct net *net = read_pnet(&mrt->net);
struct mfc_cache *c;
/* The entries are added/deleted only under RTNL */
@@ -1161,8 +1289,9 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
return -ENOENT;
rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
list_del_rcu(&c->list);
+ call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id);
mroute_netlink_event(mrt, c, RTM_DELROUTE);
- ipmr_cache_free(c);
+ ipmr_cache_put(c);
return 0;
}
@@ -1189,6 +1318,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
if (!mrtsock)
c->mfc_flags |= MFC_STATIC;
write_unlock_bh(&mrt_lock);
+ call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c,
+ mrt->id);
mroute_netlink_event(mrt, c, RTM_NEWROUTE);
return 0;
}
@@ -1238,6 +1369,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
ipmr_cache_resolve(net, mrt, uc, c);
ipmr_cache_free(uc);
}
+ call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, c, mrt->id);
mroute_netlink_event(mrt, c, RTM_NEWROUTE);
return 0;
}
@@ -1245,6 +1377,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
/* Close the multicast socket, and clear the vif tables etc */
static void mroute_clean_tables(struct mr_table *mrt, bool all)
{
+ struct net *net = read_pnet(&mrt->net);
struct mfc_cache *c, *tmp;
LIST_HEAD(list);
int i;
@@ -1263,8 +1396,10 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all)
continue;
rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
list_del_rcu(&c->list);
+ call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c,
+ mrt->id);
mroute_netlink_event(mrt, c, RTM_DELROUTE);
- ipmr_cache_free(c);
+ ipmr_cache_put(c);
}
if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
@@ -1724,10 +1859,33 @@ static inline int ipmr_forward_finish(struct net *net, struct sock *sk,
return dst_output(net, sk, skb);
}
+#ifdef CONFIG_NET_SWITCHDEV
+static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
+ int in_vifi, int out_vifi)
+{
+ struct vif_device *out_vif = &mrt->vif_table[out_vifi];
+ struct vif_device *in_vif = &mrt->vif_table[in_vifi];
+
+ if (!skb->offload_mr_fwd_mark)
+ return false;
+ if (!out_vif->dev_parent_id.id_len || !in_vif->dev_parent_id.id_len)
+ return false;
+ return netdev_phys_item_id_same(&out_vif->dev_parent_id,
+ &in_vif->dev_parent_id);
+}
+#else
+static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
+ int in_vifi, int out_vifi)
+{
+ return false;
+}
+#endif
+
/* Processing handlers for ipmr_forward */
static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
- struct sk_buff *skb, struct mfc_cache *c, int vifi)
+ int in_vifi, struct sk_buff *skb,
+ struct mfc_cache *c, int vifi)
{
const struct iphdr *iph = ip_hdr(skb);
struct vif_device *vif = &mrt->vif_table[vifi];
@@ -1748,6 +1906,9 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
goto out_free;
}
+ if (ipmr_forward_offloaded(skb, mrt, in_vifi, vifi))
+ goto out_free;
+
if (vif->flags & VIFF_TUNNEL) {
rt = ip_route_output_ports(net, &fl4, NULL,
vif->remote, vif->local,
@@ -1925,8 +2086,8 @@ forward:
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2)
- ipmr_queue_xmit(net, mrt, skb2, cache,
- psend);
+ ipmr_queue_xmit(net, mrt, true_vifi,
+ skb2, cache, psend);
}
psend = ct;
}
@@ -1937,9 +2098,10 @@ last_forward:
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2)
- ipmr_queue_xmit(net, mrt, skb2, cache, psend);
+ ipmr_queue_xmit(net, mrt, true_vifi, skb2,
+ cache, psend);
} else {
- ipmr_queue_xmit(net, mrt, skb, cache, psend);
+ ipmr_queue_xmit(net, mrt, true_vifi, skb, cache, psend);
return;
}
}
@@ -2156,6 +2318,9 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
return -EMSGSIZE;
+ if (c->mfc_flags & MFC_OFFLOAD)
+ rtm->rtm_flags |= RTNH_F_OFFLOAD;
+
if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH)))
return -EMSGSIZE;
@@ -3048,14 +3213,87 @@ static const struct net_protocol pim_protocol = {
};
#endif
+static unsigned int ipmr_seq_read(struct net *net)
+{
+ ASSERT_RTNL();
+
+ return net->ipv4.ipmr_seq + ipmr_rules_seq_read(net);
+}
+
+static int ipmr_dump(struct net *net, struct notifier_block *nb)
+{
+ struct mr_table *mrt;
+ int err;
+
+ err = ipmr_rules_dump(net, nb);
+ if (err)
+ return err;
+
+ ipmr_for_each_table(mrt, net) {
+ struct vif_device *v = &mrt->vif_table[0];
+ struct mfc_cache *mfc;
+ int vifi;
+
+ /* Notifiy on table VIF entries */
+ read_lock(&mrt_lock);
+ for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) {
+ if (!v->dev)
+ continue;
+
+ call_ipmr_vif_entry_notifier(nb, net, FIB_EVENT_VIF_ADD,
+ v, vifi, mrt->id);
+ }
+ read_unlock(&mrt_lock);
+
+ /* Notify on table MFC entries */
+ list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
+ call_ipmr_mfc_entry_notifier(nb, net,
+ FIB_EVENT_ENTRY_ADD, mfc,
+ mrt->id);
+ }
+
+ return 0;
+}
+
+static const struct fib_notifier_ops ipmr_notifier_ops_template = {
+ .family = RTNL_FAMILY_IPMR,
+ .fib_seq_read = ipmr_seq_read,
+ .fib_dump = ipmr_dump,
+ .owner = THIS_MODULE,
+};
+
+static int __net_init ipmr_notifier_init(struct net *net)
+{
+ struct fib_notifier_ops *ops;
+
+ net->ipv4.ipmr_seq = 0;
+
+ ops = fib_notifier_ops_register(&ipmr_notifier_ops_template, net);
+ if (IS_ERR(ops))
+ return PTR_ERR(ops);
+ net->ipv4.ipmr_notifier_ops = ops;
+
+ return 0;
+}
+
+static void __net_exit ipmr_notifier_exit(struct net *net)
+{
+ fib_notifier_ops_unregister(net->ipv4.ipmr_notifier_ops);
+ net->ipv4.ipmr_notifier_ops = NULL;
+}
+
/* Setup for IP multicast routing */
static int __net_init ipmr_net_init(struct net *net)
{
int err;
+ err = ipmr_notifier_init(net);
+ if (err)
+ goto ipmr_notifier_fail;
+
err = ipmr_rules_init(net);
if (err < 0)
- goto fail;
+ goto ipmr_rules_fail;
#ifdef CONFIG_PROC_FS
err = -ENOMEM;
@@ -3072,7 +3310,9 @@ proc_cache_fail:
proc_vif_fail:
ipmr_rules_exit(net);
#endif
-fail:
+ipmr_rules_fail:
+ ipmr_notifier_exit(net);
+ipmr_notifier_fail:
return err;
}
@@ -3082,6 +3322,7 @@ static void __net_exit ipmr_net_exit(struct net *net)
remove_proc_entry("ip_mr_cache", net->proc_net);
remove_proc_entry("ip_mr_vif", net->proc_net);
#endif
+ ipmr_notifier_exit(net);
ipmr_rules_exit(net);
}
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 0d3c038d7b04..cac8dd309f39 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -251,10 +251,12 @@ static int proc_allowed_congestion_control(struct ctl_table *ctl,
return ret;
}
-static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
+static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
+ struct net *net = container_of(table->data, struct net,
+ ipv4.sysctl_tcp_fastopen);
struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) };
struct tcp_fastopen_context *ctxt;
int ret;
@@ -265,7 +267,7 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
return -ENOMEM;
rcu_read_lock();
- ctxt = rcu_dereference(tcp_fastopen_ctx);
+ ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
if (ctxt)
memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
else
@@ -282,12 +284,7 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
ret = -EINVAL;
goto bad_key;
}
- /* Generate a dummy secret but don't publish it. This
- * is needed so we don't regenerate a new key on the
- * first invocation of tcp_fastopen_cookie_gen
- */
- tcp_fastopen_init_key_once(false);
- tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH);
+ tcp_fastopen_reset_cipher(net, user_key, TCP_FASTOPEN_KEY_LENGTH);
}
bad_key:
@@ -358,11 +355,13 @@ static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
void __user *buffer,
size_t *lenp, loff_t *ppos)
{
+ struct net *net = container_of(table->data, struct net,
+ ipv4.sysctl_tcp_fastopen_blackhole_timeout);
int ret;
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (write && ret == 0)
- tcp_fastopen_active_timeout_reset();
+ atomic_set(&net->ipv4.tfo_active_disable_times, 0);
return ret;
}
@@ -401,27 +400,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_dointvec
},
{
- .procname = "tcp_fastopen",
- .data = &sysctl_tcp_fastopen,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "tcp_fastopen_key",
- .mode = 0600,
- .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
- .proc_handler = proc_tcp_fastopen_key,
- },
- {
- .procname = "tcp_fastopen_blackhole_timeout_sec",
- .data = &sysctl_tcp_fastopen_blackhole_timeout,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_tfo_blackhole_detect_timeout,
- .extra1 = &zero,
- },
- {
.procname = "tcp_abort_on_overflow",
.data = &sysctl_tcp_abort_on_overflow,
.maxlen = sizeof(int),
@@ -1085,6 +1063,28 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "tcp_fastopen",
+ .data = &init_net.ipv4.sysctl_tcp_fastopen,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "tcp_fastopen_key",
+ .mode = 0600,
+ .data = &init_net.ipv4.sysctl_tcp_fastopen,
+ .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
+ .proc_handler = proc_tcp_fastopen_key,
+ },
+ {
+ .procname = "tcp_fastopen_blackhole_timeout_sec",
+ .data = &init_net.ipv4.sysctl_tcp_fastopen_blackhole_timeout,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_tfo_blackhole_detect_timeout,
+ .extra1 = &zero,
+ },
#ifdef CONFIG_IP_ROUTE_MULTIPATH
{
.procname = "fib_multipath_use_neigh",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 5091402720ab..23225c98d287 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1126,7 +1126,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
struct sockaddr *uaddr = msg->msg_name;
int err, flags;
- if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
+ if (!(sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
(uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) &&
uaddr->sa_family == AF_UNSPEC))
return -EOPNOTSUPP;
@@ -2749,7 +2749,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
case TCP_FASTOPEN:
if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
TCPF_LISTEN))) {
- tcp_fastopen_init_key_once(true);
+ tcp_fastopen_init_key_once(net);
fastopen_queue_tune(sk, val);
} else {
@@ -2759,7 +2759,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
case TCP_FASTOPEN_CONNECT:
if (val > 1 || val < 0) {
err = -EINVAL;
- } else if (sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) {
+ } else if (net->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) {
if (sk->sk_state == TCP_CLOSE)
tp->fastopen_connect = val;
else
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index e3c33220c418..de470e7e586f 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -9,15 +9,18 @@
#include <net/inetpeer.h>
#include <net/tcp.h>
-int sysctl_tcp_fastopen __read_mostly = TFO_CLIENT_ENABLE;
-
-struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
-
-static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock);
-
-void tcp_fastopen_init_key_once(bool publish)
+void tcp_fastopen_init_key_once(struct net *net)
{
- static u8 key[TCP_FASTOPEN_KEY_LENGTH];
+ u8 key[TCP_FASTOPEN_KEY_LENGTH];
+ struct tcp_fastopen_context *ctxt;
+
+ rcu_read_lock();
+ ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
+ if (ctxt) {
+ rcu_read_unlock();
+ return;
+ }
+ rcu_read_unlock();
/* tcp_fastopen_reset_cipher publishes the new context
* atomically, so we allow this race happening here.
@@ -25,8 +28,8 @@ void tcp_fastopen_init_key_once(bool publish)
* All call sites of tcp_fastopen_cookie_gen also check
* for a valid cookie, so this is an acceptable risk.
*/
- if (net_get_random_once(key, sizeof(key)) && publish)
- tcp_fastopen_reset_cipher(key, sizeof(key));
+ get_random_bytes(key, sizeof(key));
+ tcp_fastopen_reset_cipher(net, key, sizeof(key));
}
static void tcp_fastopen_ctx_free(struct rcu_head *head)
@@ -37,7 +40,22 @@ static void tcp_fastopen_ctx_free(struct rcu_head *head)
kfree(ctx);
}
-int tcp_fastopen_reset_cipher(void *key, unsigned int len)
+void tcp_fastopen_ctx_destroy(struct net *net)
+{
+ struct tcp_fastopen_context *ctxt;
+
+ spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
+
+ ctxt = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx,
+ lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
+ rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, NULL);
+ spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock);
+
+ if (ctxt)
+ call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free);
+}
+
+int tcp_fastopen_reset_cipher(struct net *net, void *key, unsigned int len)
{
int err;
struct tcp_fastopen_context *ctx, *octx;
@@ -61,26 +79,27 @@ error: kfree(ctx);
}
memcpy(ctx->key, key, len);
- spin_lock(&tcp_fastopen_ctx_lock);
+ spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
- octx = rcu_dereference_protected(tcp_fastopen_ctx,
- lockdep_is_held(&tcp_fastopen_ctx_lock));
- rcu_assign_pointer(tcp_fastopen_ctx, ctx);
- spin_unlock(&tcp_fastopen_ctx_lock);
+ octx = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx,
+ lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
+ rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, ctx);
+ spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock);
if (octx)
call_rcu(&octx->rcu, tcp_fastopen_ctx_free);
return err;
}
-static bool __tcp_fastopen_cookie_gen(const void *path,
+static bool __tcp_fastopen_cookie_gen(struct net *net,
+ const void *path,
struct tcp_fastopen_cookie *foc)
{
struct tcp_fastopen_context *ctx;
bool ok = false;
rcu_read_lock();
- ctx = rcu_dereference(tcp_fastopen_ctx);
+ ctx = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
if (ctx) {
crypto_cipher_encrypt_one(ctx->tfm, foc->val, path);
foc->len = TCP_FASTOPEN_COOKIE_SIZE;
@@ -96,7 +115,8 @@ static bool __tcp_fastopen_cookie_gen(const void *path,
*
* XXX (TFO) - refactor when TCP_FASTOPEN_COOKIE_SIZE != AES_BLOCK_SIZE.
*/
-static bool tcp_fastopen_cookie_gen(struct request_sock *req,
+static bool tcp_fastopen_cookie_gen(struct net *net,
+ struct request_sock *req,
struct sk_buff *syn,
struct tcp_fastopen_cookie *foc)
{
@@ -104,7 +124,7 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req,
const struct iphdr *iph = ip_hdr(syn);
__be32 path[4] = { iph->saddr, iph->daddr, 0, 0 };
- return __tcp_fastopen_cookie_gen(path, foc);
+ return __tcp_fastopen_cookie_gen(net, path, foc);
}
#if IS_ENABLED(CONFIG_IPV6)
@@ -112,13 +132,13 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req,
const struct ipv6hdr *ip6h = ipv6_hdr(syn);
struct tcp_fastopen_cookie tmp;
- if (__tcp_fastopen_cookie_gen(&ip6h->saddr, &tmp)) {
+ if (__tcp_fastopen_cookie_gen(net, &ip6h->saddr, &tmp)) {
struct in6_addr *buf = &tmp.addr;
int i;
for (i = 0; i < 4; i++)
buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i];
- return __tcp_fastopen_cookie_gen(buf, foc);
+ return __tcp_fastopen_cookie_gen(net, buf, foc);
}
}
#endif
@@ -279,25 +299,26 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct tcp_fastopen_cookie *foc)
{
- struct tcp_fastopen_cookie valid_foc = { .len = -1 };
bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
+ int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
+ struct tcp_fastopen_cookie valid_foc = { .len = -1 };
struct sock *child;
if (foc->len == 0) /* Client requests a cookie */
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD);
- if (!((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) &&
+ if (!((tcp_fastopen & TFO_SERVER_ENABLE) &&
(syn_data || foc->len >= 0) &&
tcp_fastopen_queue_check(sk))) {
foc->len = -1;
return NULL;
}
- if (syn_data && (sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD))
+ if (syn_data && (tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD))
goto fastopen;
if (foc->len >= 0 && /* Client presents or requests a cookie */
- tcp_fastopen_cookie_gen(req, skb, &valid_foc) &&
+ tcp_fastopen_cookie_gen(sock_net(sk), req, skb, &valid_foc) &&
foc->len == TCP_FASTOPEN_COOKIE_SIZE &&
foc->len == valid_foc.len &&
!memcmp(foc->val, valid_foc.val, foc->len)) {
@@ -347,7 +368,7 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
return false;
}
- if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE) {
+ if (sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE) {
cookie->len = -1;
return true;
}
@@ -401,25 +422,16 @@ EXPORT_SYMBOL(tcp_fastopen_defer_connect);
* TFO connection with data exchanges.
*/
-/* Default to 1hr */
-unsigned int sysctl_tcp_fastopen_blackhole_timeout __read_mostly = 60 * 60;
-static atomic_t tfo_active_disable_times __read_mostly = ATOMIC_INIT(0);
-static unsigned long tfo_active_disable_stamp __read_mostly;
-
/* Disable active TFO and record current jiffies and
* tfo_active_disable_times
*/
void tcp_fastopen_active_disable(struct sock *sk)
{
- atomic_inc(&tfo_active_disable_times);
- tfo_active_disable_stamp = jiffies;
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENBLACKHOLE);
-}
+ struct net *net = sock_net(sk);
-/* Reset tfo_active_disable_times to 0 */
-void tcp_fastopen_active_timeout_reset(void)
-{
- atomic_set(&tfo_active_disable_times, 0);
+ atomic_inc(&net->ipv4.tfo_active_disable_times);
+ net->ipv4.tfo_active_disable_stamp = jiffies;
+ NET_INC_STATS(net, LINUX_MIB_TCPFASTOPENBLACKHOLE);
}
/* Calculate timeout for tfo active disable
@@ -428,17 +440,18 @@ void tcp_fastopen_active_timeout_reset(void)
*/
bool tcp_fastopen_active_should_disable(struct sock *sk)
{
- int tfo_da_times = atomic_read(&tfo_active_disable_times);
- int multiplier;
+ unsigned int tfo_bh_timeout = sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout;
+ int tfo_da_times = atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times);
unsigned long timeout;
+ int multiplier;
if (!tfo_da_times)
return false;
/* Limit timout to max: 2^6 * initial timeout */
multiplier = 1 << min(tfo_da_times - 1, 6);
- timeout = multiplier * sysctl_tcp_fastopen_blackhole_timeout * HZ;
- if (time_before(jiffies, tfo_active_disable_stamp + timeout))
+ timeout = multiplier * tfo_bh_timeout * HZ;
+ if (time_before(jiffies, sock_net(sk)->ipv4.tfo_active_disable_stamp + timeout))
return true;
/* Mark check bit so we can check for successful active TFO
@@ -474,10 +487,10 @@ void tcp_fastopen_active_disable_ofo_check(struct sock *sk)
}
}
} else if (tp->syn_fastopen_ch &&
- atomic_read(&tfo_active_disable_times)) {
+ atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times)) {
dst = sk_dst_get(sk);
if (!(dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK)))
- tcp_fastopen_active_timeout_reset();
+ atomic_set(&sock_net(sk)->ipv4.tfo_active_disable_times, 0);
dst_release(dst);
}
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c5d7656beeee..db9bb46b5776 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2333,9 +2333,9 @@ static bool tcp_any_retrans_done(const struct sock *sk)
return false;
}
-#if FASTRETRANS_DEBUG > 1
static void DBGUNDO(struct sock *sk, const char *msg)
{
+#if FASTRETRANS_DEBUG > 1
struct tcp_sock *tp = tcp_sk(sk);
struct inet_sock *inet = inet_sk(sk);
@@ -2357,10 +2357,8 @@ static void DBGUNDO(struct sock *sk, const char *msg)
tp->packets_out);
}
#endif
-}
-#else
-#define DBGUNDO(x...) do { } while (0)
#endif
+}
static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
{
@@ -4268,11 +4266,6 @@ static void tcp_sack_remove(struct tcp_sock *tp)
tp->rx_opt.num_sacks = num_sacks;
}
-enum tcp_queue {
- OOO_QUEUE,
- RCV_QUEUE,
-};
-
/**
* tcp_try_coalesce - try to merge skb to prior one
* @sk: socket
@@ -4288,7 +4281,6 @@ enum tcp_queue {
* Returns true if caller should free @from instead of queueing it
*/
static bool tcp_try_coalesce(struct sock *sk,
- enum tcp_queue dest,
struct sk_buff *to,
struct sk_buff *from,
bool *fragstolen)
@@ -4313,10 +4305,7 @@ static bool tcp_try_coalesce(struct sock *sk,
if (TCP_SKB_CB(from)->has_rxtstamp) {
TCP_SKB_CB(to)->has_rxtstamp = true;
- if (dest == OOO_QUEUE)
- TCP_SKB_CB(to)->swtstamp = TCP_SKB_CB(from)->swtstamp;
- else
- to->tstamp = from->tstamp;
+ to->tstamp = from->tstamp;
}
return true;
@@ -4353,9 +4342,6 @@ static void tcp_ofo_queue(struct sock *sk)
}
p = rb_next(p);
rb_erase(&skb->rbnode, &tp->out_of_order_queue);
- /* Replace tstamp which was stomped by rbnode */
- if (TCP_SKB_CB(skb)->has_rxtstamp)
- skb->tstamp = TCP_SKB_CB(skb)->swtstamp;
if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
SOCK_DEBUG(sk, "ofo packet was already received\n");
@@ -4367,8 +4353,7 @@ static void tcp_ofo_queue(struct sock *sk)
TCP_SKB_CB(skb)->end_seq);
tail = skb_peek_tail(&sk->sk_receive_queue);
- eaten = tail && tcp_try_coalesce(sk, RCV_QUEUE,
- tail, skb, &fragstolen);
+ eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
if (!eaten)
@@ -4422,10 +4407,6 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
return;
}
- /* Stash tstamp to avoid being stomped on by rbnode */
- if (TCP_SKB_CB(skb)->has_rxtstamp)
- TCP_SKB_CB(skb)->swtstamp = skb->tstamp;
-
/* Disable header prediction. */
tp->pred_flags = 0;
inet_csk_schedule_ack(sk);
@@ -4453,7 +4434,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
/* In the typical case, we are adding an skb to the end of the list.
* Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
*/
- if (tcp_try_coalesce(sk, OOO_QUEUE, tp->ooo_last_skb,
+ if (tcp_try_coalesce(sk, tp->ooo_last_skb,
skb, &fragstolen)) {
coalesce_done:
tcp_grow_window(sk, skb);
@@ -4504,7 +4485,7 @@ coalesce_done:
__kfree_skb(skb1);
goto merge_right;
}
- } else if (tcp_try_coalesce(sk, OOO_QUEUE, skb1,
+ } else if (tcp_try_coalesce(sk, skb1,
skb, &fragstolen)) {
goto coalesce_done;
}
@@ -4556,7 +4537,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
__skb_pull(skb, hdrlen);
eaten = (tail &&
- tcp_try_coalesce(sk, RCV_QUEUE, tail,
+ tcp_try_coalesce(sk, tail,
skb, fragstolen)) ? 1 : 0;
tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
if (!eaten) {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 85164d4d3e53..c7460fd90884 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2473,6 +2473,11 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_window_scaling = 1;
net->ipv4.sysctl_tcp_timestamps = 1;
+ net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
+ spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
+ net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;
+ atomic_set(&net->ipv4.tfo_active_disable_times, 0);
+
return 0;
fail:
tcp_sk_exit(net);
@@ -2482,7 +2487,12 @@ fail:
static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
{
+ struct net *net;
+
inet_twsk_purge(&tcp_hashinfo, AF_INET);
+
+ list_for_each_entry(net, net_exit_list, exit_list)
+ tcp_fastopen_ctx_destroy(net);
}
static struct pernet_operations __net_initdata tcp_sk_ops = {
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 102b2c90bb80..0ab78abc811b 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -892,10 +892,14 @@ static void tcp_metrics_flush_all(struct net *net)
for (row = 0; row < max_rows; row++, hb++) {
struct tcp_metrics_block __rcu **pp;
+ bool match;
+
spin_lock_bh(&tcp_metrics_lock);
pp = &hb->chain;
for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) {
- if (net_eq(tm_net(tm), net)) {
+ match = net ? net_eq(tm_net(tm), net) :
+ !atomic_read(&tm_net(tm)->count);
+ if (match) {
*pp = tm->tcpm_next;
kfree_rcu(tm, rcu_head);
} else {
@@ -1018,14 +1022,14 @@ static int __net_init tcp_net_metrics_init(struct net *net)
return 0;
}
-static void __net_exit tcp_net_metrics_exit(struct net *net)
+static void __net_exit tcp_net_metrics_exit_batch(struct list_head *net_exit_list)
{
- tcp_metrics_flush_all(net);
+ tcp_metrics_flush_all(NULL);
}
static __net_initdata struct pernet_operations tcp_net_metrics_ops = {
- .init = tcp_net_metrics_init,
- .exit = tcp_net_metrics_exit,
+ .init = tcp_net_metrics_init,
+ .exit_batch = tcp_net_metrics_exit_batch,
};
void __init tcp_metrics_init(void)
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 218cfcc77650..ee113ff15fd0 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -158,7 +158,7 @@ EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event);
static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp)
{
- return min(tp->snd_ssthresh, tp->snd_cwnd-1);
+ return min(tp->snd_ssthresh, tp->snd_cwnd);
}
static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 5676237d2b0f..eb0359bdaa11 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1212,8 +1212,7 @@ static void udp_rmem_release(struct sock *sk, int size, int partial,
if (likely(partial)) {
up->forward_deficit += size;
size = up->forward_deficit;
- if (size < (sk->sk_rcvbuf >> 2) &&
- !skb_queue_empty(&up->reader_queue))
+ if (size < (sk->sk_rcvbuf >> 2))
return;
} else {
size += up->forward_deficit;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 96861c702c06..837418ff2d4b 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -303,10 +303,10 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.disable_policy = 0,
};
-/* Check if a valid qdisc is available */
-static inline bool addrconf_qdisc_ok(const struct net_device *dev)
+/* Check if link is ready: is it up and is a valid qdisc available */
+static inline bool addrconf_link_ready(const struct net_device *dev)
{
- return !qdisc_tx_is_noop(dev);
+ return netif_oper_up(dev) && !qdisc_tx_is_noop(dev);
}
static void addrconf_del_rs_timer(struct inet6_dev *idev)
@@ -451,7 +451,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
ndev->token = in6addr_any;
- if (netif_running(dev) && addrconf_qdisc_ok(dev))
+ if (netif_running(dev) && addrconf_link_ready(dev))
ndev->if_flags |= IF_READY;
ipv6_mc_init_dev(ndev);
@@ -3297,7 +3297,7 @@ static int fixup_permanent_addr(struct inet6_dev *idev,
struct rt6_info *rt, *prev;
rt = addrconf_dst_alloc(idev, &ifp->addr, false);
- if (unlikely(IS_ERR(rt)))
+ if (IS_ERR(rt))
return PTR_ERR(rt);
/* ifp->rt can be accessed outside of rtnl */
@@ -3403,7 +3403,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
/* restore routes for permanent addresses */
addrconf_permanent_addr(dev);
- if (!addrconf_qdisc_ok(dev)) {
+ if (!addrconf_link_ready(dev)) {
/* device is not ready yet. */
pr_info("ADDRCONF(NETDEV_UP): %s: link is not ready\n",
dev->name);
@@ -3418,7 +3418,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
run_pending = 1;
}
} else if (event == NETDEV_CHANGE) {
- if (!addrconf_qdisc_ok(dev)) {
+ if (!addrconf_link_ready(dev)) {
/* device is still not ready. */
break;
}
@@ -6618,9 +6618,9 @@ void addrconf_cleanup(void)
unregister_pernet_subsys(&addrconf_ops);
ipv6_addr_label_cleanup();
- rtnl_lock();
+ rtnl_af_unregister(&inet6_ops);
- __rtnl_af_unregister(&inet6_ops);
+ rtnl_lock();
/* clean dev list */
for_each_netdev(&init_net, dev) {
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index b055bc79f56d..c6311d7108f6 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -30,7 +30,6 @@
* Policy Table
*/
struct ip6addrlbl_entry {
- possible_net_t lbl_net;
struct in6_addr prefix;
int prefixlen;
int ifindex;
@@ -41,19 +40,6 @@ struct ip6addrlbl_entry {
struct rcu_head rcu;
};
-static struct ip6addrlbl_table
-{
- struct hlist_head head;
- spinlock_t lock;
- u32 seq;
-} ip6addrlbl_table;
-
-static inline
-struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl)
-{
- return read_pnet(&lbl->lbl_net);
-}
-
/*
* Default policy table (RFC6724 + extensions)
*
@@ -148,13 +134,10 @@ static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p)
}
/* Find label */
-static bool __ip6addrlbl_match(struct net *net,
- const struct ip6addrlbl_entry *p,
+static bool __ip6addrlbl_match(const struct ip6addrlbl_entry *p,
const struct in6_addr *addr,
int addrtype, int ifindex)
{
- if (!net_eq(ip6addrlbl_net(p), net))
- return false;
if (p->ifindex && p->ifindex != ifindex)
return false;
if (p->addrtype && p->addrtype != addrtype)
@@ -169,8 +152,9 @@ static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net,
int type, int ifindex)
{
struct ip6addrlbl_entry *p;
- hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) {
- if (__ip6addrlbl_match(net, p, addr, type, ifindex))
+
+ hlist_for_each_entry_rcu(p, &net->ipv6.ip6addrlbl_table.head, list) {
+ if (__ip6addrlbl_match(p, addr, type, ifindex))
return p;
}
return NULL;
@@ -196,8 +180,7 @@ u32 ipv6_addr_label(struct net *net,
}
/* allocate one entry */
-static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net,
- const struct in6_addr *prefix,
+static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix,
int prefixlen, int ifindex,
u32 label)
{
@@ -236,24 +219,23 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net,
newp->addrtype = addrtype;
newp->label = label;
INIT_HLIST_NODE(&newp->list);
- write_pnet(&newp->lbl_net, net);
refcount_set(&newp->refcnt, 1);
return newp;
}
/* add a label */
-static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
+static int __ip6addrlbl_add(struct net *net, struct ip6addrlbl_entry *newp,
+ int replace)
{
- struct hlist_node *n;
struct ip6addrlbl_entry *last = NULL, *p = NULL;
+ struct hlist_node *n;
int ret = 0;
ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", __func__, newp,
replace);
- hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) {
+ hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) {
if (p->prefixlen == newp->prefixlen &&
- net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) &&
p->ifindex == newp->ifindex &&
ipv6_addr_equal(&p->prefix, &newp->prefix)) {
if (!replace) {
@@ -273,10 +255,10 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
if (last)
hlist_add_behind_rcu(&newp->list, &last->list);
else
- hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
+ hlist_add_head_rcu(&newp->list, &net->ipv6.ip6addrlbl_table.head);
out:
if (!ret)
- ip6addrlbl_table.seq++;
+ net->ipv6.ip6addrlbl_table.seq++;
return ret;
}
@@ -292,12 +274,12 @@ static int ip6addrlbl_add(struct net *net,
__func__, prefix, prefixlen, ifindex, (unsigned int)label,
replace);
- newp = ip6addrlbl_alloc(net, prefix, prefixlen, ifindex, label);
+ newp = ip6addrlbl_alloc(prefix, prefixlen, ifindex, label);
if (IS_ERR(newp))
return PTR_ERR(newp);
- spin_lock(&ip6addrlbl_table.lock);
- ret = __ip6addrlbl_add(newp, replace);
- spin_unlock(&ip6addrlbl_table.lock);
+ spin_lock(&net->ipv6.ip6addrlbl_table.lock);
+ ret = __ip6addrlbl_add(net, newp, replace);
+ spin_unlock(&net->ipv6.ip6addrlbl_table.lock);
if (ret)
ip6addrlbl_free(newp);
return ret;
@@ -315,9 +297,8 @@ static int __ip6addrlbl_del(struct net *net,
ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n",
__func__, prefix, prefixlen, ifindex);
- hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) {
+ hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) {
if (p->prefixlen == prefixlen &&
- net_eq(ip6addrlbl_net(p), net) &&
p->ifindex == ifindex &&
ipv6_addr_equal(&p->prefix, prefix)) {
hlist_del_rcu(&p->list);
@@ -340,9 +321,9 @@ static int ip6addrlbl_del(struct net *net,
__func__, prefix, prefixlen, ifindex);
ipv6_addr_prefix(&prefix_buf, prefix, prefixlen);
- spin_lock(&ip6addrlbl_table.lock);
+ spin_lock(&net->ipv6.ip6addrlbl_table.lock);
ret = __ip6addrlbl_del(net, &prefix_buf, prefixlen, ifindex);
- spin_unlock(&ip6addrlbl_table.lock);
+ spin_unlock(&net->ipv6.ip6addrlbl_table.lock);
return ret;
}
@@ -354,6 +335,9 @@ static int __net_init ip6addrlbl_net_init(struct net *net)
ADDRLABEL(KERN_DEBUG "%s\n", __func__);
+ spin_lock_init(&net->ipv6.ip6addrlbl_table.lock);
+ INIT_HLIST_HEAD(&net->ipv6.ip6addrlbl_table.head);
+
for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) {
int ret = ip6addrlbl_add(net,
ip6addrlbl_init_table[i].prefix,
@@ -373,14 +357,12 @@ static void __net_exit ip6addrlbl_net_exit(struct net *net)
struct hlist_node *n;
/* Remove all labels belonging to the exiting net */
- spin_lock(&ip6addrlbl_table.lock);
- hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) {
- if (net_eq(ip6addrlbl_net(p), net)) {
- hlist_del_rcu(&p->list);
- ip6addrlbl_put(p);
- }
+ spin_lock(&net->ipv6.ip6addrlbl_table.lock);
+ hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) {
+ hlist_del_rcu(&p->list);
+ ip6addrlbl_put(p);
}
- spin_unlock(&ip6addrlbl_table.lock);
+ spin_unlock(&net->ipv6.ip6addrlbl_table.lock);
}
static struct pernet_operations ipv6_addr_label_ops = {
@@ -390,8 +372,6 @@ static struct pernet_operations ipv6_addr_label_ops = {
int __init ipv6_addr_label_init(void)
{
- spin_lock_init(&ip6addrlbl_table.lock);
-
return register_pernet_subsys(&ipv6_addr_label_ops);
}
@@ -510,11 +490,10 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
int err;
rcu_read_lock();
- hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) {
- if (idx >= s_idx &&
- net_eq(ip6addrlbl_net(p), net)) {
+ hlist_for_each_entry_rcu(p, &net->ipv6.ip6addrlbl_table.head, list) {
+ if (idx >= s_idx) {
err = ip6addrlbl_fill(skb, p,
- ip6addrlbl_table.seq,
+ net->ipv6.ip6addrlbl_table.seq,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_NEWADDRLABEL,
@@ -571,7 +550,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
if (p && !ip6addrlbl_hold(p))
p = NULL;
- lseq = ip6addrlbl_table.seq;
+ lseq = net->ipv6.ip6addrlbl_table.seq;
rcu_read_unlock();
if (!p) {
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 305e2ed730bf..115d60919f72 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -99,7 +99,7 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp,
break;
hdrlen = 8;
} else if (nexthdr == NEXTHDR_AUTH)
- hdrlen = (hp->hdrlen+2)<<2;
+ hdrlen = ipv6_authlen(hp);
else
hdrlen = ipv6_optlen(hp);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 1602b491b281..241841fb479c 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1156,19 +1156,21 @@ err_alloc_dev:
return err;
}
-static void __net_exit ip6gre_exit_net(struct net *net)
+static void __net_exit ip6gre_exit_batch_net(struct list_head *net_list)
{
+ struct net *net;
LIST_HEAD(list);
rtnl_lock();
- ip6gre_destroy_tunnels(net, &list);
+ list_for_each_entry(net, net_list, exit_list)
+ ip6gre_destroy_tunnels(net, &list);
unregister_netdevice_many(&list);
rtnl_unlock();
}
static struct pernet_operations ip6gre_net_ops = {
.init = ip6gre_init_net,
- .exit = ip6gre_exit_net,
+ .exit_batch = ip6gre_exit_batch_net,
.id = &ip6gre_net_id,
.size = sizeof(struct ip6gre_net),
};
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index a1c24443cd9e..825548680b1e 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -2168,17 +2168,16 @@ static struct xfrm6_tunnel ip6ip6_handler __read_mostly = {
.priority = 1,
};
-static void __net_exit ip6_tnl_destroy_tunnels(struct net *net)
+static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head *list)
{
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
struct net_device *dev, *aux;
int h;
struct ip6_tnl *t;
- LIST_HEAD(list);
for_each_netdev_safe(net, dev, aux)
if (dev->rtnl_link_ops == &ip6_link_ops)
- unregister_netdevice_queue(dev, &list);
+ unregister_netdevice_queue(dev, list);
for (h = 0; h < IP6_TUNNEL_HASH_SIZE; h++) {
t = rtnl_dereference(ip6n->tnls_r_l[h]);
@@ -2187,12 +2186,10 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct net *net)
* been added to the list by the previous loop.
*/
if (!net_eq(dev_net(t->dev), net))
- unregister_netdevice_queue(t->dev, &list);
+ unregister_netdevice_queue(t->dev, list);
t = rtnl_dereference(t->next);
}
}
-
- unregister_netdevice_many(&list);
}
static int __net_init ip6_tnl_init_net(struct net *net)
@@ -2236,16 +2233,21 @@ err_alloc_dev:
return err;
}
-static void __net_exit ip6_tnl_exit_net(struct net *net)
+static void __net_exit ip6_tnl_exit_batch_net(struct list_head *net_list)
{
+ struct net *net;
+ LIST_HEAD(list);
+
rtnl_lock();
- ip6_tnl_destroy_tunnels(net);
+ list_for_each_entry(net, net_list, exit_list)
+ ip6_tnl_destroy_tunnels(net, &list);
+ unregister_netdevice_many(&list);
rtnl_unlock();
}
static struct pernet_operations ip6_tnl_net_ops = {
.init = ip6_tnl_init_net,
- .exit = ip6_tnl_exit_net,
+ .exit_batch = ip6_tnl_exit_batch_net,
.id = &ip6_tnl_net_id,
.size = sizeof(struct ip6_tnl_net),
};
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index bcdc2d557de1..dbb74f3c57a7 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -1053,23 +1053,22 @@ static struct rtnl_link_ops vti6_link_ops __read_mostly = {
.get_link_net = ip6_tnl_get_link_net,
};
-static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n)
+static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n,
+ struct list_head *list)
{
int h;
struct ip6_tnl *t;
- LIST_HEAD(list);
for (h = 0; h < IP6_VTI_HASH_SIZE; h++) {
t = rtnl_dereference(ip6n->tnls_r_l[h]);
while (t) {
- unregister_netdevice_queue(t->dev, &list);
+ unregister_netdevice_queue(t->dev, list);
t = rtnl_dereference(t->next);
}
}
t = rtnl_dereference(ip6n->tnls_wc[0]);
- unregister_netdevice_queue(t->dev, &list);
- unregister_netdevice_many(&list);
+ unregister_netdevice_queue(t->dev, list);
}
static int __net_init vti6_init_net(struct net *net)
@@ -1109,18 +1108,24 @@ err_alloc_dev:
return err;
}
-static void __net_exit vti6_exit_net(struct net *net)
+static void __net_exit vti6_exit_batch_net(struct list_head *net_list)
{
- struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+ struct vti6_net *ip6n;
+ struct net *net;
+ LIST_HEAD(list);
rtnl_lock();
- vti6_destroy_tunnels(ip6n);
+ list_for_each_entry(net, net_list, exit_list) {
+ ip6n = net_generic(net, vti6_net_id);
+ vti6_destroy_tunnels(ip6n, &list);
+ }
+ unregister_netdevice_many(&list);
rtnl_unlock();
}
static struct pernet_operations vti6_net_ops = {
.init = vti6_init_net,
- .exit = vti6_exit_net,
+ .exit_batch = vti6_exit_batch_net,
.id = &vti6_net_id,
.size = sizeof(struct vti6_net),
};
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index a5e466d4e093..b9404feabd78 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -377,6 +377,14 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
retv = 0;
break;
+ case IPV6_FREEBIND:
+ if (optlen < sizeof(int))
+ goto e_inval;
+ /* we also don't have a separate freebind bit for IPV6 */
+ inet_sk(sk)->freebind = valbool;
+ retv = 0;
+ break;
+
case IPV6_RECVORIGDSTADDR:
if (optlen < sizeof(int))
goto e_inval;
@@ -1214,6 +1222,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
val = inet_sk(sk)->transparent;
break;
+ case IPV6_FREEBIND:
+ val = inet_sk(sk)->freebind;
+ break;
+
case IPV6_RECVORIGDSTADDR:
val = np->rxopt.bits.rxorigdstaddr;
break;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index ac912bb21747..a799f5258614 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1848,19 +1848,22 @@ err_alloc_dev:
return err;
}
-static void __net_exit sit_exit_net(struct net *net)
+static void __net_exit sit_exit_batch_net(struct list_head *net_list)
{
LIST_HEAD(list);
+ struct net *net;
rtnl_lock();
- sit_destroy_tunnels(net, &list);
+ list_for_each_entry(net, net_list, exit_list)
+ sit_destroy_tunnels(net, &list);
+
unregister_netdevice_many(&list);
rtnl_unlock();
}
static struct pernet_operations sit_net_ops = {
.init = sit_init_net,
- .exit = sit_exit_net,
+ .exit_batch = sit_exit_batch_net,
.id = &sit_net_id,
.size = sizeof(struct sit_net),
};
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index af4e76ac88ff..0b750a22c4b9 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1650,7 +1650,7 @@ static int kcm_clone(struct socket *osock, struct kcm_clone *info,
}
newfile = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name);
- if (unlikely(IS_ERR(newfile))) {
+ if (IS_ERR(newfile)) {
err = PTR_ERR(newfile);
goto out_sock_alloc_fail;
}
diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c
index 1e1c9b20bab7..2fb703d70803 100644
--- a/net/mac802154/llsec.c
+++ b/net/mac802154/llsec.c
@@ -623,13 +623,18 @@ llsec_do_encrypt_unauth(struct sk_buff *skb, const struct mac802154_llsec *sec,
u8 iv[16];
struct scatterlist src;
SKCIPHER_REQUEST_ON_STACK(req, key->tfm0);
- int err;
+ int err, datalen;
+ unsigned char *data;
llsec_geniv(iv, sec->params.hwaddr, &hdr->sec);
- sg_init_one(&src, skb->data, skb->len);
+ /* Compute data payload offset and data length */
+ data = skb_mac_header(skb) + skb->mac_len;
+ datalen = skb_tail_pointer(skb) - data;
+ sg_init_one(&src, data, datalen);
+
skcipher_request_set_tfm(req, key->tfm0);
skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, &src, &src, skb->len, iv);
+ skcipher_request_set_crypt(req, &src, &src, datalen, iv);
err = crypto_skcipher_encrypt(req);
skcipher_request_zero(req);
return err;
@@ -713,7 +718,8 @@ int mac802154_llsec_encrypt(struct mac802154_llsec *sec, struct sk_buff *skb)
if (hlen < 0 || hdr.fc.type != IEEE802154_FC_TYPE_DATA)
return -EINVAL;
- if (!hdr.fc.security_enabled || hdr.sec.level == 0) {
+ if (!hdr.fc.security_enabled ||
+ (hdr.sec.level == IEEE802154_SCF_SECLEVEL_NONE)) {
skb_push(skb, hlen);
return 0;
}
diff --git a/net/nfc/core.c b/net/nfc/core.c
index 5cf33df888c3..e5e23c2cbe74 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -1094,9 +1094,8 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
dev->targets_generation = 1;
if (ops->check_presence) {
- init_timer(&dev->check_pres_timer);
- dev->check_pres_timer.data = (unsigned long)dev;
- dev->check_pres_timer.function = nfc_check_pres_timeout;
+ setup_timer(&dev->check_pres_timer, nfc_check_pres_timeout,
+ (unsigned long)dev);
INIT_WORK(&dev->check_pres_work, nfc_check_pres_work);
}
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index b740fef0acc5..a8a6e7814e09 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -1004,9 +1004,8 @@ int nfc_hci_register_device(struct nfc_hci_dev *hdev)
INIT_WORK(&hdev->msg_tx_work, nfc_hci_msg_tx_work);
- init_timer(&hdev->cmd_timer);
- hdev->cmd_timer.data = (unsigned long)hdev;
- hdev->cmd_timer.function = nfc_hci_cmd_timeout;
+ setup_timer(&hdev->cmd_timer, nfc_hci_cmd_timeout,
+ (unsigned long)hdev);
skb_queue_head_init(&hdev->rx_hcp_frags);
diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c
index 17e59a009ce6..58df37eae1e8 100644
--- a/net/nfc/hci/llc_shdlc.c
+++ b/net/nfc/hci/llc_shdlc.c
@@ -763,17 +763,14 @@ static void *llc_shdlc_init(struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv,
mutex_init(&shdlc->state_mutex);
shdlc->state = SHDLC_DISCONNECTED;
- init_timer(&shdlc->connect_timer);
- shdlc->connect_timer.data = (unsigned long)shdlc;
- shdlc->connect_timer.function = llc_shdlc_connect_timeout;
+ setup_timer(&shdlc->connect_timer, llc_shdlc_connect_timeout,
+ (unsigned long)shdlc);
- init_timer(&shdlc->t1_timer);
- shdlc->t1_timer.data = (unsigned long)shdlc;
- shdlc->t1_timer.function = llc_shdlc_t1_timeout;
+ setup_timer(&shdlc->t1_timer, llc_shdlc_t1_timeout,
+ (unsigned long)shdlc);
- init_timer(&shdlc->t2_timer);
- shdlc->t2_timer.data = (unsigned long)shdlc;
- shdlc->t2_timer.function = llc_shdlc_t2_timeout;
+ setup_timer(&shdlc->t2_timer, llc_shdlc_t2_timeout,
+ (unsigned long)shdlc);
shdlc->w = SHDLC_MAX_WINDOW;
shdlc->srej_support = SHDLC_SREJ_SUPPORT;
diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index 02eef5cf3cce..7988185072e5 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -1573,9 +1573,8 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
INIT_LIST_HEAD(&local->list);
kref_init(&local->ref);
mutex_init(&local->sdp_lock);
- init_timer(&local->link_timer);
- local->link_timer.data = (unsigned long) local;
- local->link_timer.function = nfc_llcp_symm_timer;
+ setup_timer(&local->link_timer, nfc_llcp_symm_timer,
+ (unsigned long)local);
skb_queue_head_init(&local->tx_queue);
INIT_WORK(&local->tx_work, nfc_llcp_tx_work);
@@ -1601,9 +1600,8 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
mutex_init(&local->sdreq_lock);
INIT_HLIST_HEAD(&local->pending_sdreqs);
- init_timer(&local->sdreq_timer);
- local->sdreq_timer.data = (unsigned long) local;
- local->sdreq_timer.function = nfc_llcp_sdreq_timer;
+ setup_timer(&local->sdreq_timer, nfc_llcp_sdreq_timer,
+ (unsigned long)local);
INIT_WORK(&local->sdreq_timeout_work, nfc_llcp_sdreq_timeout_work);
list_add(&local->list, &llcp_devices);
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 0389398fa4ab..2e5e7a41d8ef 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -108,7 +108,8 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name)
rtnl_lock();
err = netdev_master_upper_dev_link(vport->dev,
- get_dpdev(vport->dp), NULL, NULL);
+ get_dpdev(vport->dp),
+ NULL, NULL, NULL);
if (err)
goto error_unlock;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index bec01a3daf5b..3f5caa3fbd06 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -544,9 +544,7 @@ static void prb_init_blk_timer(struct packet_sock *po,
struct tpacket_kbdq_core *pkc,
void (*func) (unsigned long))
{
- init_timer(&pkc->retire_blk_timer);
- pkc->retire_blk_timer.data = (long)po;
- pkc->retire_blk_timer.function = func;
+ setup_timer(&pkc->retire_blk_timer, func, (long)po);
pkc->retire_blk_timer.expires = jiffies;
}
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index c0c707eb2c96..5ef8ce8c83d4 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -49,11 +49,11 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
filter = rcu_dereference(prog->filter);
if (at_ingress) {
__skb_push(skb, skb->mac_len);
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
filter_res = BPF_PROG_RUN(filter, skb);
__skb_pull(skb, skb->mac_len);
} else {
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
filter_res = BPF_PROG_RUN(filter, skb);
}
rcu_read_unlock();
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index d89ebafd2239..700b345b07f9 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -17,13 +17,14 @@
#include <linux/errno.h>
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
+#include <linux/idr.h>
#include <net/netlink.h>
#include <net/act_api.h>
#include <net/pkt_cls.h>
struct basic_head {
- u32 hgenerator;
struct list_head flist;
+ struct idr handle_idr;
struct rcu_head rcu;
};
@@ -78,6 +79,7 @@ static int basic_init(struct tcf_proto *tp)
if (head == NULL)
return -ENOBUFS;
INIT_LIST_HEAD(&head->flist);
+ idr_init(&head->handle_idr);
rcu_assign_pointer(tp->root, head);
return 0;
}
@@ -99,8 +101,10 @@ static void basic_destroy(struct tcf_proto *tp)
list_for_each_entry_safe(f, n, &head->flist, link) {
list_del_rcu(&f->link);
tcf_unbind_filter(tp, &f->res);
+ idr_remove_ext(&head->handle_idr, f->handle);
call_rcu(&f->rcu, basic_delete_filter);
}
+ idr_destroy(&head->handle_idr);
kfree_rcu(head, rcu);
}
@@ -111,6 +115,7 @@ static int basic_delete(struct tcf_proto *tp, void *arg, bool *last)
list_del_rcu(&f->link);
tcf_unbind_filter(tp, &f->res);
+ idr_remove_ext(&head->handle_idr, f->handle);
call_rcu(&f->rcu, basic_delete_filter);
*last = list_empty(&head->flist);
return 0;
@@ -154,6 +159,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
struct nlattr *tb[TCA_BASIC_MAX + 1];
struct basic_filter *fold = (struct basic_filter *) *arg;
struct basic_filter *fnew;
+ unsigned long idr_index;
if (tca[TCA_OPTIONS] == NULL)
return -EINVAL;
@@ -176,33 +182,33 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
goto errout;
- err = -EINVAL;
if (handle) {
fnew->handle = handle;
- } else if (fold) {
- fnew->handle = fold->handle;
+ if (!fold) {
+ err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index,
+ handle, handle + 1, GFP_KERNEL);
+ if (err)
+ goto errout;
+ }
} else {
- unsigned int i = 0x80000000;
- do {
- if (++head->hgenerator == 0x7FFFFFFF)
- head->hgenerator = 1;
- } while (--i > 0 && basic_get(tp, head->hgenerator));
-
- if (i <= 0) {
- pr_err("Insufficient number of handles\n");
+ err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index,
+ 1, 0x7FFFFFFF, GFP_KERNEL);
+ if (err)
goto errout;
- }
-
- fnew->handle = head->hgenerator;
+ fnew->handle = idr_index;
}
err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr);
- if (err < 0)
+ if (err < 0) {
+ if (!fold)
+ idr_remove_ext(&head->handle_idr, fnew->handle);
goto errout;
+ }
*arg = fnew;
if (fold) {
+ idr_replace_ext(&head->handle_idr, fnew, fnew->handle);
list_replace_rcu(&fold->link, &fnew->link);
tcf_unbind_filter(tp, &fold->res);
call_rcu(&fold->rcu, basic_delete_filter);
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 520c5027646a..6c6b21f6ba62 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -17,6 +17,7 @@
#include <linux/skbuff.h>
#include <linux/filter.h>
#include <linux/bpf.h>
+#include <linux/idr.h>
#include <net/rtnetlink.h>
#include <net/pkt_cls.h>
@@ -32,7 +33,7 @@ MODULE_DESCRIPTION("TC BPF based classifier");
struct cls_bpf_head {
struct list_head plist;
- u32 hgen;
+ struct idr handle_idr;
struct rcu_head rcu;
};
@@ -99,11 +100,11 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
} else if (at_ingress) {
/* It is safe to push/pull even if skb_shared() */
__skb_push(skb, skb->mac_len);
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
filter_res = BPF_PROG_RUN(prog->filter, skb);
__skb_pull(skb, skb->mac_len);
} else {
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
filter_res = BPF_PROG_RUN(prog->filter, skb);
}
@@ -238,6 +239,7 @@ static int cls_bpf_init(struct tcf_proto *tp)
return -ENOBUFS;
INIT_LIST_HEAD_RCU(&head->plist);
+ idr_init(&head->handle_idr);
rcu_assign_pointer(tp->root, head);
return 0;
@@ -264,6 +266,9 @@ static void cls_bpf_delete_prog_rcu(struct rcu_head *rcu)
static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog)
{
+ struct cls_bpf_head *head = rtnl_dereference(tp->root);
+
+ idr_remove_ext(&head->handle_idr, prog->handle);
cls_bpf_stop_offload(tp, prog);
list_del_rcu(&prog->link);
tcf_unbind_filter(tp, &prog->res);
@@ -287,6 +292,7 @@ static void cls_bpf_destroy(struct tcf_proto *tp)
list_for_each_entry_safe(prog, tmp, &head->plist, link)
__cls_bpf_delete(tp, prog);
+ idr_destroy(&head->handle_idr);
kfree_rcu(head, rcu);
}
@@ -421,27 +427,6 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
return 0;
}
-static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp,
- struct cls_bpf_head *head)
-{
- unsigned int i = 0x80000000;
- u32 handle;
-
- do {
- if (++head->hgen == 0x7FFFFFFF)
- head->hgen = 1;
- } while (--i > 0 && cls_bpf_get(tp, head->hgen));
-
- if (unlikely(i == 0)) {
- pr_err("Insufficient number of handles\n");
- handle = 0;
- } else {
- handle = head->hgen;
- }
-
- return handle;
-}
-
static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
@@ -451,6 +436,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
struct cls_bpf_prog *oldprog = *arg;
struct nlattr *tb[TCA_BPF_MAX + 1];
struct cls_bpf_prog *prog;
+ unsigned long idr_index;
int ret;
if (tca[TCA_OPTIONS] == NULL)
@@ -476,21 +462,30 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
}
}
- if (handle == 0)
- prog->handle = cls_bpf_grab_new_handle(tp, head);
- else
+ if (handle == 0) {
+ ret = idr_alloc_ext(&head->handle_idr, prog, &idr_index,
+ 1, 0x7FFFFFFF, GFP_KERNEL);
+ if (ret)
+ goto errout;
+ prog->handle = idr_index;
+ } else {
+ if (!oldprog) {
+ ret = idr_alloc_ext(&head->handle_idr, prog, &idr_index,
+ handle, handle + 1, GFP_KERNEL);
+ if (ret)
+ goto errout;
+ }
prog->handle = handle;
- if (prog->handle == 0) {
- ret = -EINVAL;
- goto errout;
}
ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], ovr);
if (ret < 0)
- goto errout;
+ goto errout_idr;
ret = cls_bpf_offload(tp, prog, oldprog);
if (ret) {
+ if (!oldprog)
+ idr_remove_ext(&head->handle_idr, prog->handle);
__cls_bpf_delete_prog(prog);
return ret;
}
@@ -499,6 +494,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW;
if (oldprog) {
+ idr_replace_ext(&head->handle_idr, prog, handle);
list_replace_rcu(&oldprog->link, &prog->link);
tcf_unbind_filter(tp, &oldprog->res);
call_rcu(&oldprog->rcu, cls_bpf_delete_prog_rcu);
@@ -509,6 +505,9 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
*arg = prog;
return 0;
+errout_idr:
+ if (!oldprog)
+ idr_remove_ext(&head->handle_idr, prog->handle);
errout:
tcf_exts_destroy(&prog->exts);
kfree(prog);
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index d230cb4c8094..db831ac708f6 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -152,37 +152,12 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct cls_fl_filter *f;
struct fl_flow_key skb_key;
struct fl_flow_key skb_mkey;
- struct ip_tunnel_info *info;
if (!atomic_read(&head->ht.nelems))
return -1;
fl_clear_masked_range(&skb_key, &head->mask);
- info = skb_tunnel_info(skb);
- if (info) {
- struct ip_tunnel_key *key = &info->key;
-
- switch (ip_tunnel_info_af(info)) {
- case AF_INET:
- skb_key.enc_control.addr_type =
- FLOW_DISSECTOR_KEY_IPV4_ADDRS;
- skb_key.enc_ipv4.src = key->u.ipv4.src;
- skb_key.enc_ipv4.dst = key->u.ipv4.dst;
- break;
- case AF_INET6:
- skb_key.enc_control.addr_type =
- FLOW_DISSECTOR_KEY_IPV6_ADDRS;
- skb_key.enc_ipv6.src = key->u.ipv6.src;
- skb_key.enc_ipv6.dst = key->u.ipv6.dst;
- break;
- }
-
- skb_key.enc_key_id.keyid = tunnel_id_to_key32(key->tun_id);
- skb_key.enc_tp.src = key->tp_src;
- skb_key.enc_tp.dst = key->tp_dst;
- }
-
skb_key.indev_ifindex = skb->skb_iif;
/* skb_flow_dissect() does not set n_proto in case an unknown protocol,
* so do it rather here.
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 10b8d851fc6b..094d224411a9 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -46,6 +46,7 @@
#include <net/act_api.h>
#include <net/pkt_cls.h>
#include <linux/netdevice.h>
+#include <linux/idr.h>
struct tc_u_knode {
struct tc_u_knode __rcu *next;
@@ -82,6 +83,7 @@ struct tc_u_hnode {
struct tc_u_common *tp_c;
int refcnt;
unsigned int divisor;
+ struct idr handle_idr;
struct rcu_head rcu;
/* The 'ht' field MUST be the last field in structure to allow for
* more entries allocated at end of structure.
@@ -93,7 +95,7 @@ struct tc_u_common {
struct tc_u_hnode __rcu *hlist;
struct Qdisc *q;
int refcnt;
- u32 hgenerator;
+ struct idr handle_idr;
struct hlist_node hnode;
struct rcu_head rcu;
};
@@ -311,19 +313,19 @@ static void *u32_get(struct tcf_proto *tp, u32 handle)
return u32_lookup_key(ht, handle);
}
-static u32 gen_new_htid(struct tc_u_common *tp_c)
+static u32 gen_new_htid(struct tc_u_common *tp_c, struct tc_u_hnode *ptr)
{
- int i = 0x800;
+ unsigned long idr_index;
+ int err;
- /* hgenerator only used inside rtnl lock it is safe to increment
+ /* This is only used inside rtnl lock it is safe to increment
* without read _copy_ update semantics
*/
- do {
- if (++tp_c->hgenerator == 0x7FF)
- tp_c->hgenerator = 1;
- } while (--i > 0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20));
-
- return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0;
+ err = idr_alloc_ext(&tp_c->handle_idr, ptr, &idr_index,
+ 1, 0x7FF, GFP_KERNEL);
+ if (err)
+ return 0;
+ return (u32)(idr_index | 0x800) << 20;
}
static struct hlist_head *tc_u_common_hash;
@@ -366,8 +368,9 @@ static int u32_init(struct tcf_proto *tp)
return -ENOBUFS;
root_ht->refcnt++;
- root_ht->handle = tp_c ? gen_new_htid(tp_c) : 0x80000000;
+ root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000;
root_ht->prio = tp->prio;
+ idr_init(&root_ht->handle_idr);
if (tp_c == NULL) {
tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL);
@@ -377,6 +380,7 @@ static int u32_init(struct tcf_proto *tp)
}
tp_c->q = tp->q;
INIT_HLIST_NODE(&tp_c->hnode);
+ idr_init(&tp_c->handle_idr);
h = tc_u_hash(tp);
hlist_add_head(&tp_c->hnode, &tc_u_common_hash[h]);
@@ -565,6 +569,7 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
rtnl_dereference(n->next));
tcf_unbind_filter(tp, &n->res);
u32_remove_hw_knode(tp, n->handle);
+ idr_remove_ext(&ht->handle_idr, n->handle);
call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
}
}
@@ -586,6 +591,8 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
hn = &phn->next, phn = rtnl_dereference(*hn)) {
if (phn == ht) {
u32_clear_hw_hnode(tp, ht);
+ idr_destroy(&ht->handle_idr);
+ idr_remove_ext(&tp_c->handle_idr, ht->handle);
RCU_INIT_POINTER(*hn, ht->next);
kfree_rcu(ht, rcu);
return 0;
@@ -633,6 +640,7 @@ static void u32_destroy(struct tcf_proto *tp)
kfree_rcu(ht, rcu);
}
+ idr_destroy(&tp_c->handle_idr);
kfree(tp_c);
}
@@ -701,27 +709,21 @@ ret:
return ret;
}
-#define NR_U32_NODE (1<<12)
-static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
+static u32 gen_new_kid(struct tc_u_hnode *ht, u32 htid)
{
- struct tc_u_knode *n;
- unsigned long i;
- unsigned long *bitmap = kzalloc(BITS_TO_LONGS(NR_U32_NODE) * sizeof(unsigned long),
- GFP_KERNEL);
- if (!bitmap)
- return handle | 0xFFF;
-
- for (n = rtnl_dereference(ht->ht[TC_U32_HASH(handle)]);
- n;
- n = rtnl_dereference(n->next))
- set_bit(TC_U32_NODE(n->handle), bitmap);
-
- i = find_next_zero_bit(bitmap, NR_U32_NODE, 0x800);
- if (i >= NR_U32_NODE)
- i = find_next_zero_bit(bitmap, NR_U32_NODE, 1);
+ unsigned long idr_index;
+ u32 start = htid | 0x800;
+ u32 max = htid | 0xFFF;
+ u32 min = htid;
+
+ if (idr_alloc_ext(&ht->handle_idr, NULL, &idr_index,
+ start, max + 1, GFP_KERNEL)) {
+ if (idr_alloc_ext(&ht->handle_idr, NULL, &idr_index,
+ min + 1, max + 1, GFP_KERNEL))
+ return max;
+ }
- kfree(bitmap);
- return handle | (i >= NR_U32_NODE ? 0xFFF : i);
+ return (u32)idr_index;
}
static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
@@ -806,6 +808,7 @@ static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c,
if (pins->handle == n->handle)
break;
+ idr_replace_ext(&ht->handle_idr, n, n->handle);
RCU_INIT_POINTER(n->next, pins->next);
rcu_assign_pointer(*ins, n);
}
@@ -937,22 +940,33 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return -EINVAL;
if (TC_U32_KEY(handle))
return -EINVAL;
- if (handle == 0) {
- handle = gen_new_htid(tp->data);
- if (handle == 0)
- return -ENOMEM;
- }
ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL);
if (ht == NULL)
return -ENOBUFS;
+ if (handle == 0) {
+ handle = gen_new_htid(tp->data, ht);
+ if (handle == 0) {
+ kfree(ht);
+ return -ENOMEM;
+ }
+ } else {
+ err = idr_alloc_ext(&tp_c->handle_idr, ht, NULL,
+ handle, handle + 1, GFP_KERNEL);
+ if (err) {
+ kfree(ht);
+ return err;
+ }
+ }
ht->tp_c = tp_c;
ht->refcnt = 1;
ht->divisor = divisor;
ht->handle = handle;
ht->prio = tp->prio;
+ idr_init(&ht->handle_idr);
err = u32_replace_hw_hnode(tp, ht, flags);
if (err) {
+ idr_remove_ext(&tp_c->handle_idr, handle);
kfree(ht);
return err;
}
@@ -986,24 +1000,33 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
if (TC_U32_HTID(handle) && TC_U32_HTID(handle^htid))
return -EINVAL;
handle = htid | TC_U32_NODE(handle);
+ err = idr_alloc_ext(&ht->handle_idr, NULL, NULL,
+ handle, handle + 1,
+ GFP_KERNEL);
+ if (err)
+ return err;
} else
handle = gen_new_kid(ht, htid);
- if (tb[TCA_U32_SEL] == NULL)
- return -EINVAL;
+ if (tb[TCA_U32_SEL] == NULL) {
+ err = -EINVAL;
+ goto erridr;
+ }
s = nla_data(tb[TCA_U32_SEL]);
n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL);
- if (n == NULL)
- return -ENOBUFS;
+ if (n == NULL) {
+ err = -ENOBUFS;
+ goto erridr;
+ }
#ifdef CONFIG_CLS_U32_PERF
size = sizeof(struct tc_u32_pcnt) + s->nkeys * sizeof(u64);
n->pf = __alloc_percpu(size, __alignof__(struct tc_u32_pcnt));
if (!n->pf) {
- kfree(n);
- return -ENOBUFS;
+ err = -ENOBUFS;
+ goto errfree;
}
#endif
@@ -1066,9 +1089,12 @@ errhw:
errout:
tcf_exts_destroy(&n->exts);
#ifdef CONFIG_CLS_U32_PERF
+errfree:
free_percpu(n->pf);
#endif
kfree(n);
+erridr:
+ idr_remove_ext(&ht->handle_idr, handle);
return err;
}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index c6deb74e3d2f..aa82116ed10c 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1500,7 +1500,6 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
int s_idx, s_q_idx;
struct net_device *dev;
const struct nlmsghdr *nlh = cb->nlh;
- struct tcmsg *tcm = nlmsg_data(nlh);
struct nlattr *tca[TCA_MAX + 1];
int err;
@@ -1510,7 +1509,7 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
idx = 0;
ASSERT_RTNL();
- err = nlmsg_parse(nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL);
+ err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX, NULL, NULL);
if (err < 0)
return err;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index bf8c81e07c70..a0a198768aad 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -689,10 +689,8 @@ void qdisc_reset(struct Qdisc *qdisc)
}
EXPORT_SYMBOL(qdisc_reset);
-static void qdisc_rcu_free(struct rcu_head *head)
+static void qdisc_free(struct Qdisc *qdisc)
{
- struct Qdisc *qdisc = container_of(head, struct Qdisc, rcu_head);
-
if (qdisc_is_percpu_stats(qdisc)) {
free_percpu(qdisc->cpu_bstats);
free_percpu(qdisc->cpu_qstats);
@@ -725,11 +723,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
kfree_skb_list(qdisc->gso_skb);
kfree_skb(qdisc->skb_bad_txq);
- /*
- * gen_estimator est_timer() might access qdisc->q.lock,
- * wait a RCU grace period before freeing qdisc.
- */
- call_rcu(&qdisc->rcu_head, qdisc_rcu_free);
+ qdisc_free(qdisc);
}
EXPORT_SYMBOL(qdisc_destroy);
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 7e148376ba52..c6d7ae81b41f 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -142,6 +142,7 @@ struct htb_class {
struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
unsigned int drops ____cacheline_aligned_in_smp;
+ unsigned int overlimits;
};
struct htb_level {
@@ -533,6 +534,9 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
if (new_mode == cl->cmode)
return;
+ if (new_mode == HTB_CANT_SEND)
+ cl->overlimits++;
+
if (cl->prio_activity) { /* not necessary: speed optimization */
if (cl->cmode != HTB_CANT_SEND)
htb_deactivate_prios(q, cl);
@@ -1143,6 +1147,7 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
struct htb_class *cl = (struct htb_class *)arg;
struct gnet_stats_queue qs = {
.drops = cl->drops,
+ .overlimits = cl->overlimits,
};
__u32 qlen = 0;
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index b1266e75ca43..5a4f10080290 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -146,7 +146,6 @@ struct netem_sched_data {
*/
struct netem_skb_cb {
psched_time_t time_to_send;
- ktime_t tstamp_save;
};
@@ -362,12 +361,13 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche
static void tfifo_reset(struct Qdisc *sch)
{
struct netem_sched_data *q = qdisc_priv(sch);
- struct rb_node *p;
+ struct rb_node *p = rb_first(&q->t_root);
- while ((p = rb_first(&q->t_root))) {
+ while (p) {
struct sk_buff *skb = netem_rb_to_skb(p);
- rb_erase(p, &q->t_root);
+ p = rb_next(p);
+ rb_erase(&skb->rbnode, &q->t_root);
rtnl_kfree_skbs(skb, skb);
}
}
@@ -561,7 +561,6 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
}
cb->time_to_send = now + delay;
- cb->tstamp_save = skb->tstamp;
++q->counter;
tfifo_enqueue(skb, sch);
} else {
@@ -629,7 +628,10 @@ deliver:
qdisc_qstats_backlog_dec(sch, skb);
skb->next = NULL;
skb->prev = NULL;
- skb->tstamp = netem_skb_cb(skb)->tstamp_save;
+ /* skb->dev shares skb->rbnode area,
+ * we need to restore its value.
+ */
+ skb->dev = qdisc_dev(sch);
#ifdef CONFIG_NET_CLS_ACT
/*
diff --git a/net/sctp/Makefile b/net/sctp/Makefile
index 70f1b570bab9..bf90c5397719 100644
--- a/net/sctp/Makefile
+++ b/net/sctp/Makefile
@@ -12,7 +12,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
inqueue.o outqueue.o ulpqueue.o \
tsnmap.o bind_addr.o socket.o primitive.o \
output.o input.o debug.o stream.o auth.o \
- offload.o
+ offload.o stream_sched.o stream_sched_prio.o \
+ stream_sched_rr.o
sctp_probe-y := probe.o
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 3afac275ee82..7b261afc47b9 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -311,10 +311,10 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
if (chunk->sent_count) {
chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
- streamout->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
+ streamout->ext->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
} else {
chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
- streamout->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
+ streamout->ext->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
}
return 1;
} else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) &&
@@ -323,7 +323,7 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
&chunk->asoc->stream.out[chunk->sinfo.sinfo_stream];
chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
- streamout->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
+ streamout->ext->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
return 1;
} else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) &&
chunk->msg->expires_at &&
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 2966ff400755..4db012aa25f7 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -50,6 +50,7 @@
#include <net/sctp/sctp.h>
#include <net/sctp/sm.h>
+#include <net/sctp/stream_sched.h>
/* Declare internal functions here. */
static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn);
@@ -72,32 +73,38 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp);
/* Add data to the front of the queue. */
static inline void sctp_outq_head_data(struct sctp_outq *q,
- struct sctp_chunk *ch)
+ struct sctp_chunk *ch)
{
+ struct sctp_stream_out_ext *oute;
+ __u16 stream;
+
list_add(&ch->list, &q->out_chunk_list);
q->out_qlen += ch->skb->len;
+
+ stream = sctp_chunk_stream_no(ch);
+ oute = q->asoc->stream.out[stream].ext;
+ list_add(&ch->stream_list, &oute->outq);
}
/* Take data from the front of the queue. */
static inline struct sctp_chunk *sctp_outq_dequeue_data(struct sctp_outq *q)
{
- struct sctp_chunk *ch = NULL;
-
- if (!list_empty(&q->out_chunk_list)) {
- struct list_head *entry = q->out_chunk_list.next;
-
- ch = list_entry(entry, struct sctp_chunk, list);
- list_del_init(entry);
- q->out_qlen -= ch->skb->len;
- }
- return ch;
+ return q->sched->dequeue(q);
}
+
/* Add data chunk to the end of the queue. */
static inline void sctp_outq_tail_data(struct sctp_outq *q,
struct sctp_chunk *ch)
{
+ struct sctp_stream_out_ext *oute;
+ __u16 stream;
+
list_add_tail(&ch->list, &q->out_chunk_list);
q->out_qlen += ch->skb->len;
+
+ stream = sctp_chunk_stream_no(ch);
+ oute = q->asoc->stream.out[stream].ext;
+ list_add_tail(&ch->stream_list, &oute->outq);
}
/*
@@ -207,6 +214,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q)
INIT_LIST_HEAD(&q->retransmit);
INIT_LIST_HEAD(&q->sacked);
INIT_LIST_HEAD(&q->abandoned);
+ sctp_sched_set_sched(asoc, SCTP_SS_FCFS);
}
/* Free the outqueue structure and any related pending chunks.
@@ -258,6 +266,7 @@ static void __sctp_outq_teardown(struct sctp_outq *q)
/* Throw away any leftover data chunks. */
while ((chunk = sctp_outq_dequeue_data(q)) != NULL) {
+ sctp_sched_dequeue_done(q, chunk);
/* Mark as send failure. */
sctp_chunk_fail(chunk, q->error);
@@ -366,7 +375,7 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
streamout = &asoc->stream.out[chk->sinfo.sinfo_stream];
asoc->sent_cnt_removable--;
asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
- streamout->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
+ streamout->ext->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
if (!chk->tsn_gap_acked) {
if (chk->transport)
@@ -391,20 +400,21 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
struct sctp_outq *q = &asoc->outqueue;
struct sctp_chunk *chk, *temp;
+ q->sched->unsched_all(&asoc->stream);
+
list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) {
if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)
continue;
- list_del_init(&chk->list);
- q->out_qlen -= chk->skb->len;
+ sctp_sched_dequeue_common(q, chk);
asoc->sent_cnt_removable--;
asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
if (chk->sinfo.sinfo_stream < asoc->stream.outcnt) {
struct sctp_stream_out *streamout =
&asoc->stream.out[chk->sinfo.sinfo_stream];
- streamout->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
+ streamout->ext->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
}
msg_len -= SCTP_DATA_SNDSIZE(chk) +
@@ -415,6 +425,8 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
break;
}
+ q->sched->sched_all(&asoc->stream);
+
return msg_len;
}
@@ -1033,22 +1045,9 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
while ((chunk = sctp_outq_dequeue_data(q)) != NULL) {
__u32 sid = ntohs(chunk->subh.data_hdr->stream);
- /* RFC 2960 6.5 Every DATA chunk MUST carry a valid
- * stream identifier.
- */
- if (chunk->sinfo.sinfo_stream >= asoc->stream.outcnt) {
-
- /* Mark as failed send. */
- sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM);
- if (asoc->peer.prsctp_capable &&
- SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
- asoc->sent_cnt_removable--;
- sctp_chunk_free(chunk);
- continue;
- }
-
/* Has this chunk expired? */
if (sctp_chunk_abandoned(chunk)) {
+ sctp_sched_dequeue_done(q, chunk);
sctp_chunk_fail(chunk, 0);
sctp_chunk_free(chunk);
continue;
@@ -1070,6 +1069,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
new_transport = asoc->peer.active_path;
if (new_transport->state == SCTP_UNCONFIRMED) {
WARN_ONCE(1, "Attempt to send packet on unconfirmed path.");
+ sctp_sched_dequeue_done(q, chunk);
sctp_chunk_fail(chunk, 0);
sctp_chunk_free(chunk);
continue;
@@ -1133,6 +1133,11 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
else
asoc->stats.oodchunks++;
+ /* Only now it's safe to consider this
+ * chunk as sent, sched-wise.
+ */
+ sctp_sched_dequeue_done(q, chunk);
+
break;
default:
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index e6a2974e020e..402bfbb888cd 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -50,6 +50,7 @@
#include <net/sock.h>
#include <net/sctp/sctp.h>
#include <net/sctp/sm.h>
+#include <net/sctp/stream_sched.h>
static int sctp_cmd_interpreter(enum sctp_event event_type,
union sctp_subtype subtype,
@@ -1089,6 +1090,8 @@ static void sctp_cmd_send_msg(struct sctp_association *asoc,
list_for_each_entry(chunk, &msg->chunks, frag_list)
sctp_outq_tail(&asoc->outqueue, chunk, gfp);
+
+ asoc->outqueue.sched->enqueue(&asoc->outqueue, msg);
}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index d4730ada7f32..88c28421ec15 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -79,6 +79,7 @@
#include <net/sock.h>
#include <net/sctp/sctp.h>
#include <net/sctp/sm.h>
+#include <net/sctp/stream_sched.h>
/* Forward declarations for internal helper functions. */
static int sctp_writeable(struct sock *sk);
@@ -1927,6 +1928,13 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
goto out_free;
}
+ /* Allocate sctp_stream_out_ext if not already done */
+ if (unlikely(!asoc->stream.out[sinfo->sinfo_stream].ext)) {
+ err = sctp_stream_init_ext(&asoc->stream, sinfo->sinfo_stream);
+ if (err)
+ goto out_free;
+ }
+
if (sctp_wspace(asoc) < msg_len)
sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
@@ -3907,6 +3915,64 @@ out:
return retval;
}
+static int sctp_setsockopt_scheduler(struct sock *sk,
+ char __user *optval,
+ unsigned int optlen)
+{
+ struct sctp_association *asoc;
+ struct sctp_assoc_value params;
+ int retval = -EINVAL;
+
+ if (optlen < sizeof(params))
+ goto out;
+
+ optlen = sizeof(params);
+ if (copy_from_user(&params, optval, optlen)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ if (params.assoc_value > SCTP_SS_MAX)
+ goto out;
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (!asoc)
+ goto out;
+
+ retval = sctp_sched_set_sched(asoc, params.assoc_value);
+
+out:
+ return retval;
+}
+
+static int sctp_setsockopt_scheduler_value(struct sock *sk,
+ char __user *optval,
+ unsigned int optlen)
+{
+ struct sctp_association *asoc;
+ struct sctp_stream_value params;
+ int retval = -EINVAL;
+
+ if (optlen < sizeof(params))
+ goto out;
+
+ optlen = sizeof(params);
+ if (copy_from_user(&params, optval, optlen)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (!asoc)
+ goto out;
+
+ retval = sctp_sched_set_value(asoc, params.stream_id,
+ params.stream_value, GFP_KERNEL);
+
+out:
+ return retval;
+}
+
/* API 6.2 setsockopt(), getsockopt()
*
* Applications use setsockopt() and getsockopt() to set or retrieve
@@ -4088,6 +4154,12 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
case SCTP_ADD_STREAMS:
retval = sctp_setsockopt_add_streams(sk, optval, optlen);
break;
+ case SCTP_STREAM_SCHEDULER:
+ retval = sctp_setsockopt_scheduler(sk, optval, optlen);
+ break;
+ case SCTP_STREAM_SCHEDULER_VALUE:
+ retval = sctp_setsockopt_scheduler_value(sk, optval, optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
@@ -6645,7 +6717,7 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len,
char __user *optval,
int __user *optlen)
{
- struct sctp_stream_out *streamout;
+ struct sctp_stream_out_ext *streamoute;
struct sctp_association *asoc;
struct sctp_prstatus params;
int retval = -EINVAL;
@@ -6668,21 +6740,29 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len,
if (!asoc || params.sprstat_sid >= asoc->stream.outcnt)
goto out;
- streamout = &asoc->stream.out[params.sprstat_sid];
+ streamoute = asoc->stream.out[params.sprstat_sid].ext;
+ if (!streamoute) {
+ /* Not allocated yet, means all stats are 0 */
+ params.sprstat_abandoned_unsent = 0;
+ params.sprstat_abandoned_sent = 0;
+ retval = 0;
+ goto out;
+ }
+
if (policy == SCTP_PR_SCTP_NONE) {
params.sprstat_abandoned_unsent = 0;
params.sprstat_abandoned_sent = 0;
for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) {
params.sprstat_abandoned_unsent +=
- streamout->abandoned_unsent[policy];
+ streamoute->abandoned_unsent[policy];
params.sprstat_abandoned_sent +=
- streamout->abandoned_sent[policy];
+ streamoute->abandoned_sent[policy];
}
} else {
params.sprstat_abandoned_unsent =
- streamout->abandoned_unsent[__SCTP_PR_INDEX(policy)];
+ streamoute->abandoned_unsent[__SCTP_PR_INDEX(policy)];
params.sprstat_abandoned_sent =
- streamout->abandoned_sent[__SCTP_PR_INDEX(policy)];
+ streamoute->abandoned_sent[__SCTP_PR_INDEX(policy)];
}
if (put_user(len, optlen) || copy_to_user(optval, &params, len)) {
@@ -6778,6 +6858,85 @@ out:
return retval;
}
+static int sctp_getsockopt_scheduler(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ struct sctp_assoc_value params;
+ struct sctp_association *asoc;
+ int retval = -EFAULT;
+
+ if (len < sizeof(params)) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ len = sizeof(params);
+ if (copy_from_user(&params, optval, len))
+ goto out;
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (!asoc) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ params.assoc_value = sctp_sched_get_sched(asoc);
+
+ if (put_user(len, optlen))
+ goto out;
+
+ if (copy_to_user(optval, &params, len))
+ goto out;
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
+static int sctp_getsockopt_scheduler_value(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ struct sctp_stream_value params;
+ struct sctp_association *asoc;
+ int retval = -EFAULT;
+
+ if (len < sizeof(params)) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ len = sizeof(params);
+ if (copy_from_user(&params, optval, len))
+ goto out;
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (!asoc) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ retval = sctp_sched_get_value(asoc, params.stream_id,
+ &params.stream_value);
+ if (retval)
+ goto out;
+
+ if (put_user(len, optlen)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ if (copy_to_user(optval, &params, len)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+out:
+ return retval;
+}
+
static int sctp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
@@ -6960,6 +7119,14 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
retval = sctp_getsockopt_enable_strreset(sk, len, optval,
optlen);
break;
+ case SCTP_STREAM_SCHEDULER:
+ retval = sctp_getsockopt_scheduler(sk, len, optval,
+ optlen);
+ break;
+ case SCTP_STREAM_SCHEDULER_VALUE:
+ retval = sctp_getsockopt_scheduler_value(sk, len, optval,
+ optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index 63ea15503714..5ea33a2c453b 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -32,44 +32,181 @@
* Xin Long <lucien.xin@gmail.com>
*/
+#include <linux/list.h>
#include <net/sctp/sctp.h>
#include <net/sctp/sm.h>
+#include <net/sctp/stream_sched.h>
+
+/* Migrates chunks from stream queues to new stream queues if needed,
+ * but not across associations. Also, removes those chunks to streams
+ * higher than the new max.
+ */
+static void sctp_stream_outq_migrate(struct sctp_stream *stream,
+ struct sctp_stream *new, __u16 outcnt)
+{
+ struct sctp_association *asoc;
+ struct sctp_chunk *ch, *temp;
+ struct sctp_outq *outq;
+ int i;
+
+ asoc = container_of(stream, struct sctp_association, stream);
+ outq = &asoc->outqueue;
+
+ list_for_each_entry_safe(ch, temp, &outq->out_chunk_list, list) {
+ __u16 sid = sctp_chunk_stream_no(ch);
+
+ if (sid < outcnt)
+ continue;
+
+ sctp_sched_dequeue_common(outq, ch);
+ /* No need to call dequeue_done here because
+ * the chunks are not scheduled by now.
+ */
+
+ /* Mark as failed send. */
+ sctp_chunk_fail(ch, SCTP_ERROR_INV_STRM);
+ if (asoc->peer.prsctp_capable &&
+ SCTP_PR_PRIO_ENABLED(ch->sinfo.sinfo_flags))
+ asoc->sent_cnt_removable--;
+
+ sctp_chunk_free(ch);
+ }
+
+ if (new) {
+ /* Here we actually move the old ext stuff into the new
+ * buffer, because we want to keep it. Then
+ * sctp_stream_update will swap ->out pointers.
+ */
+ for (i = 0; i < outcnt; i++) {
+ kfree(new->out[i].ext);
+ new->out[i].ext = stream->out[i].ext;
+ stream->out[i].ext = NULL;
+ }
+ }
+
+ for (i = outcnt; i < stream->outcnt; i++)
+ kfree(stream->out[i].ext);
+}
+
+static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt,
+ gfp_t gfp)
+{
+ struct sctp_stream_out *out;
+
+ out = kmalloc_array(outcnt, sizeof(*out), gfp);
+ if (!out)
+ return -ENOMEM;
+
+ if (stream->out) {
+ memcpy(out, stream->out, min(outcnt, stream->outcnt) *
+ sizeof(*out));
+ kfree(stream->out);
+ }
+
+ if (outcnt > stream->outcnt)
+ memset(out + stream->outcnt, 0,
+ (outcnt - stream->outcnt) * sizeof(*out));
+
+ stream->out = out;
+
+ return 0;
+}
+
+static int sctp_stream_alloc_in(struct sctp_stream *stream, __u16 incnt,
+ gfp_t gfp)
+{
+ struct sctp_stream_in *in;
+
+ in = kmalloc_array(incnt, sizeof(*stream->in), gfp);
+
+ if (!in)
+ return -ENOMEM;
+
+ if (stream->in) {
+ memcpy(in, stream->in, min(incnt, stream->incnt) *
+ sizeof(*in));
+ kfree(stream->in);
+ }
+
+ if (incnt > stream->incnt)
+ memset(in + stream->incnt, 0,
+ (incnt - stream->incnt) * sizeof(*in));
+
+ stream->in = in;
+
+ return 0;
+}
int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
gfp_t gfp)
{
- int i;
+ struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream);
+ int i, ret = 0;
+
+ gfp |= __GFP_NOWARN;
/* Initial stream->out size may be very big, so free it and alloc
- * a new one with new outcnt to save memory.
+ * a new one with new outcnt to save memory if needed.
*/
- kfree(stream->out);
+ if (outcnt == stream->outcnt)
+ goto in;
- stream->out = kcalloc(outcnt, sizeof(*stream->out), gfp);
- if (!stream->out)
- return -ENOMEM;
+ /* Filter out chunks queued on streams that won't exist anymore */
+ sched->unsched_all(stream);
+ sctp_stream_outq_migrate(stream, NULL, outcnt);
+ sched->sched_all(stream);
+
+ i = sctp_stream_alloc_out(stream, outcnt, gfp);
+ if (i)
+ return i;
stream->outcnt = outcnt;
for (i = 0; i < stream->outcnt; i++)
stream->out[i].state = SCTP_STREAM_OPEN;
+ sched->init(stream);
+
+in:
if (!incnt)
- return 0;
+ goto out;
- stream->in = kcalloc(incnt, sizeof(*stream->in), gfp);
- if (!stream->in) {
- kfree(stream->out);
- stream->out = NULL;
- return -ENOMEM;
+ i = sctp_stream_alloc_in(stream, incnt, gfp);
+ if (i) {
+ ret = -ENOMEM;
+ goto free;
}
stream->incnt = incnt;
+ goto out;
- return 0;
+free:
+ sched->free(stream);
+ kfree(stream->out);
+ stream->out = NULL;
+out:
+ return ret;
+}
+
+int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid)
+{
+ struct sctp_stream_out_ext *soute;
+
+ soute = kzalloc(sizeof(*soute), GFP_KERNEL);
+ if (!soute)
+ return -ENOMEM;
+ stream->out[sid].ext = soute;
+
+ return sctp_sched_init_sid(stream, sid, GFP_KERNEL);
}
void sctp_stream_free(struct sctp_stream *stream)
{
+ struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream);
+ int i;
+
+ sched->free(stream);
+ for (i = 0; i < stream->outcnt; i++)
+ kfree(stream->out[i].ext);
kfree(stream->out);
kfree(stream->in);
}
@@ -87,6 +224,10 @@ void sctp_stream_clear(struct sctp_stream *stream)
void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new)
{
+ struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream);
+
+ sched->unsched_all(stream);
+ sctp_stream_outq_migrate(stream, new, new->outcnt);
sctp_stream_free(stream);
stream->out = new->out;
@@ -94,6 +235,8 @@ void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new)
stream->outcnt = new->outcnt;
stream->incnt = new->incnt;
+ sched->sched_all(stream);
+
new->out = NULL;
new->in = NULL;
}
@@ -270,15 +413,9 @@ int sctp_send_add_streams(struct sctp_association *asoc,
}
if (out) {
- struct sctp_stream_out *streamout;
-
- streamout = krealloc(stream->out, outcnt * sizeof(*streamout),
- GFP_KERNEL);
- if (!streamout)
+ retval = sctp_stream_alloc_out(stream, outcnt, GFP_KERNEL);
+ if (retval)
goto out;
-
- memset(streamout + stream->outcnt, 0, out * sizeof(*streamout));
- stream->out = streamout;
}
chunk = sctp_make_strreset_addstrm(asoc, out, in);
@@ -601,7 +738,6 @@ struct sctp_chunk *sctp_process_strreset_addstrm_out(
struct sctp_strreset_addstrm *addstrm = param.v;
struct sctp_stream *stream = &asoc->stream;
__u32 result = SCTP_STRRESET_DENIED;
- struct sctp_stream_in *streamin;
__u32 request_seq, incnt;
__u16 in, i;
@@ -648,13 +784,9 @@ struct sctp_chunk *sctp_process_strreset_addstrm_out(
if (!in || incnt > SCTP_MAX_STREAM)
goto out;
- streamin = krealloc(stream->in, incnt * sizeof(*streamin),
- GFP_ATOMIC);
- if (!streamin)
+ if (sctp_stream_alloc_in(stream, incnt, GFP_ATOMIC))
goto out;
- memset(streamin + stream->incnt, 0, in * sizeof(*streamin));
- stream->in = streamin;
stream->incnt = incnt;
result = SCTP_STRRESET_PERFORMED;
@@ -676,10 +808,10 @@ struct sctp_chunk *sctp_process_strreset_addstrm_in(
struct sctp_strreset_addstrm *addstrm = param.v;
struct sctp_stream *stream = &asoc->stream;
__u32 result = SCTP_STRRESET_DENIED;
- struct sctp_stream_out *streamout;
struct sctp_chunk *chunk = NULL;
__u32 request_seq, outcnt;
__u16 out, i;
+ int ret;
request_seq = ntohl(addstrm->request_seq);
if (TSN_lt(asoc->strreset_inseq, request_seq) ||
@@ -708,14 +840,10 @@ struct sctp_chunk *sctp_process_strreset_addstrm_in(
if (!out || outcnt > SCTP_MAX_STREAM)
goto out;
- streamout = krealloc(stream->out, outcnt * sizeof(*streamout),
- GFP_ATOMIC);
- if (!streamout)
+ ret = sctp_stream_alloc_out(stream, outcnt, GFP_ATOMIC);
+ if (ret)
goto out;
- memset(streamout + stream->outcnt, 0, out * sizeof(*streamout));
- stream->out = streamout;
-
chunk = sctp_make_strreset_addstrm(asoc, out, 0);
if (!chunk)
goto out;
diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c
new file mode 100644
index 000000000000..03513a9fa110
--- /dev/null
+++ b/net/sctp/stream_sched.c
@@ -0,0 +1,275 @@
+/* SCTP kernel implementation
+ * (C) Copyright Red Hat Inc. 2017
+ *
+ * This file is part of the SCTP kernel implementation
+ *
+ * These functions manipulate sctp stream queue/scheduling.
+ *
+ * This SCTP implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This SCTP implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ * ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING. If not, see
+ * <http://www.gnu.org/licenses/>.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email addresched(es):
+ * lksctp developers <linux-sctp@vger.kernel.org>
+ *
+ * Written or modified by:
+ * Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+ */
+
+#include <linux/list.h>
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+#include <net/sctp/stream_sched.h>
+
+/* First Come First Serve (a.k.a. FIFO)
+ * RFC DRAFT ndata Section 3.1
+ */
+static int sctp_sched_fcfs_set(struct sctp_stream *stream, __u16 sid,
+ __u16 value, gfp_t gfp)
+{
+ return 0;
+}
+
+static int sctp_sched_fcfs_get(struct sctp_stream *stream, __u16 sid,
+ __u16 *value)
+{
+ *value = 0;
+ return 0;
+}
+
+static int sctp_sched_fcfs_init(struct sctp_stream *stream)
+{
+ return 0;
+}
+
+static int sctp_sched_fcfs_init_sid(struct sctp_stream *stream, __u16 sid,
+ gfp_t gfp)
+{
+ return 0;
+}
+
+static void sctp_sched_fcfs_free(struct sctp_stream *stream)
+{
+}
+
+static void sctp_sched_fcfs_enqueue(struct sctp_outq *q,
+ struct sctp_datamsg *msg)
+{
+}
+
+static struct sctp_chunk *sctp_sched_fcfs_dequeue(struct sctp_outq *q)
+{
+ struct sctp_stream *stream = &q->asoc->stream;
+ struct sctp_chunk *ch = NULL;
+ struct list_head *entry;
+
+ if (list_empty(&q->out_chunk_list))
+ goto out;
+
+ if (stream->out_curr) {
+ ch = list_entry(stream->out_curr->ext->outq.next,
+ struct sctp_chunk, stream_list);
+ } else {
+ entry = q->out_chunk_list.next;
+ ch = list_entry(entry, struct sctp_chunk, list);
+ }
+
+ sctp_sched_dequeue_common(q, ch);
+
+out:
+ return ch;
+}
+
+static void sctp_sched_fcfs_dequeue_done(struct sctp_outq *q,
+ struct sctp_chunk *chunk)
+{
+}
+
+static void sctp_sched_fcfs_sched_all(struct sctp_stream *stream)
+{
+}
+
+static void sctp_sched_fcfs_unsched_all(struct sctp_stream *stream)
+{
+}
+
+static struct sctp_sched_ops sctp_sched_fcfs = {
+ .set = sctp_sched_fcfs_set,
+ .get = sctp_sched_fcfs_get,
+ .init = sctp_sched_fcfs_init,
+ .init_sid = sctp_sched_fcfs_init_sid,
+ .free = sctp_sched_fcfs_free,
+ .enqueue = sctp_sched_fcfs_enqueue,
+ .dequeue = sctp_sched_fcfs_dequeue,
+ .dequeue_done = sctp_sched_fcfs_dequeue_done,
+ .sched_all = sctp_sched_fcfs_sched_all,
+ .unsched_all = sctp_sched_fcfs_unsched_all,
+};
+
+/* API to other parts of the stack */
+
+extern struct sctp_sched_ops sctp_sched_prio;
+extern struct sctp_sched_ops sctp_sched_rr;
+
+struct sctp_sched_ops *sctp_sched_ops[] = {
+ &sctp_sched_fcfs,
+ &sctp_sched_prio,
+ &sctp_sched_rr,
+};
+
+int sctp_sched_set_sched(struct sctp_association *asoc,
+ enum sctp_sched_type sched)
+{
+ struct sctp_sched_ops *n = sctp_sched_ops[sched];
+ struct sctp_sched_ops *old = asoc->outqueue.sched;
+ struct sctp_datamsg *msg = NULL;
+ struct sctp_chunk *ch;
+ int i, ret = 0;
+
+ if (old == n)
+ return ret;
+
+ if (sched > SCTP_SS_MAX)
+ return -EINVAL;
+
+ if (old) {
+ old->free(&asoc->stream);
+
+ /* Give the next scheduler a clean slate. */
+ for (i = 0; i < asoc->stream.outcnt; i++) {
+ void *p = asoc->stream.out[i].ext;
+
+ if (!p)
+ continue;
+
+ p += offsetofend(struct sctp_stream_out_ext, outq);
+ memset(p, 0, sizeof(struct sctp_stream_out_ext) -
+ offsetofend(struct sctp_stream_out_ext, outq));
+ }
+ }
+
+ asoc->outqueue.sched = n;
+ n->init(&asoc->stream);
+ for (i = 0; i < asoc->stream.outcnt; i++) {
+ if (!asoc->stream.out[i].ext)
+ continue;
+
+ ret = n->init_sid(&asoc->stream, i, GFP_KERNEL);
+ if (ret)
+ goto err;
+ }
+
+ /* We have to requeue all chunks already queued. */
+ list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) {
+ if (ch->msg == msg)
+ continue;
+ msg = ch->msg;
+ n->enqueue(&asoc->outqueue, msg);
+ }
+
+ return ret;
+
+err:
+ n->free(&asoc->stream);
+ asoc->outqueue.sched = &sctp_sched_fcfs; /* Always safe */
+
+ return ret;
+}
+
+int sctp_sched_get_sched(struct sctp_association *asoc)
+{
+ int i;
+
+ for (i = 0; i <= SCTP_SS_MAX; i++)
+ if (asoc->outqueue.sched == sctp_sched_ops[i])
+ return i;
+
+ return 0;
+}
+
+int sctp_sched_set_value(struct sctp_association *asoc, __u16 sid,
+ __u16 value, gfp_t gfp)
+{
+ if (sid >= asoc->stream.outcnt)
+ return -EINVAL;
+
+ if (!asoc->stream.out[sid].ext) {
+ int ret;
+
+ ret = sctp_stream_init_ext(&asoc->stream, sid);
+ if (ret)
+ return ret;
+ }
+
+ return asoc->outqueue.sched->set(&asoc->stream, sid, value, gfp);
+}
+
+int sctp_sched_get_value(struct sctp_association *asoc, __u16 sid,
+ __u16 *value)
+{
+ if (sid >= asoc->stream.outcnt)
+ return -EINVAL;
+
+ if (!asoc->stream.out[sid].ext)
+ return 0;
+
+ return asoc->outqueue.sched->get(&asoc->stream, sid, value);
+}
+
+void sctp_sched_dequeue_done(struct sctp_outq *q, struct sctp_chunk *ch)
+{
+ if (!list_is_last(&ch->frag_list, &ch->msg->chunks)) {
+ struct sctp_stream_out *sout;
+ __u16 sid;
+
+ /* datamsg is not finish, so save it as current one,
+ * in case application switch scheduler or a higher
+ * priority stream comes in.
+ */
+ sid = sctp_chunk_stream_no(ch);
+ sout = &q->asoc->stream.out[sid];
+ q->asoc->stream.out_curr = sout;
+ return;
+ }
+
+ q->asoc->stream.out_curr = NULL;
+ q->sched->dequeue_done(q, ch);
+}
+
+/* Auxiliary functions for the schedulers */
+void sctp_sched_dequeue_common(struct sctp_outq *q, struct sctp_chunk *ch)
+{
+ list_del_init(&ch->list);
+ list_del_init(&ch->stream_list);
+ q->out_qlen -= ch->skb->len;
+}
+
+int sctp_sched_init_sid(struct sctp_stream *stream, __u16 sid, gfp_t gfp)
+{
+ struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream);
+
+ INIT_LIST_HEAD(&stream->out[sid].ext->outq);
+ return sched->init_sid(stream, sid, gfp);
+}
+
+struct sctp_sched_ops *sctp_sched_ops_from_stream(struct sctp_stream *stream)
+{
+ struct sctp_association *asoc;
+
+ asoc = container_of(stream, struct sctp_association, stream);
+
+ return asoc->outqueue.sched;
+}
diff --git a/net/sctp/stream_sched_prio.c b/net/sctp/stream_sched_prio.c
new file mode 100644
index 000000000000..384dbf3c8760
--- /dev/null
+++ b/net/sctp/stream_sched_prio.c
@@ -0,0 +1,347 @@
+/* SCTP kernel implementation
+ * (C) Copyright Red Hat Inc. 2017
+ *
+ * This file is part of the SCTP kernel implementation
+ *
+ * These functions manipulate sctp stream queue/scheduling.
+ *
+ * This SCTP implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This SCTP implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ * ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING. If not, see
+ * <http://www.gnu.org/licenses/>.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email addresched(es):
+ * lksctp developers <linux-sctp@vger.kernel.org>
+ *
+ * Written or modified by:
+ * Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+ */
+
+#include <linux/list.h>
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+#include <net/sctp/stream_sched.h>
+
+/* Priority handling
+ * RFC DRAFT ndata section 3.4
+ */
+
+static void sctp_sched_prio_unsched_all(struct sctp_stream *stream);
+
+static struct sctp_stream_priorities *sctp_sched_prio_new_head(
+ struct sctp_stream *stream, int prio, gfp_t gfp)
+{
+ struct sctp_stream_priorities *p;
+
+ p = kmalloc(sizeof(*p), gfp);
+ if (!p)
+ return NULL;
+
+ INIT_LIST_HEAD(&p->prio_sched);
+ INIT_LIST_HEAD(&p->active);
+ p->next = NULL;
+ p->prio = prio;
+
+ return p;
+}
+
+static struct sctp_stream_priorities *sctp_sched_prio_get_head(
+ struct sctp_stream *stream, int prio, gfp_t gfp)
+{
+ struct sctp_stream_priorities *p;
+ int i;
+
+ /* Look into scheduled priorities first, as they are sorted and
+ * we can find it fast IF it's scheduled.
+ */
+ list_for_each_entry(p, &stream->prio_list, prio_sched) {
+ if (p->prio == prio)
+ return p;
+ if (p->prio > prio)
+ break;
+ }
+
+ /* No luck. So we search on all streams now. */
+ for (i = 0; i < stream->outcnt; i++) {
+ if (!stream->out[i].ext)
+ continue;
+
+ p = stream->out[i].ext->prio_head;
+ if (!p)
+ /* Means all other streams won't be initialized
+ * as well.
+ */
+ break;
+ if (p->prio == prio)
+ return p;
+ }
+
+ /* If not even there, allocate a new one. */
+ return sctp_sched_prio_new_head(stream, prio, gfp);
+}
+
+static void sctp_sched_prio_next_stream(struct sctp_stream_priorities *p)
+{
+ struct list_head *pos;
+
+ pos = p->next->prio_list.next;
+ if (pos == &p->active)
+ pos = pos->next;
+ p->next = list_entry(pos, struct sctp_stream_out_ext, prio_list);
+}
+
+static bool sctp_sched_prio_unsched(struct sctp_stream_out_ext *soute)
+{
+ bool scheduled = false;
+
+ if (!list_empty(&soute->prio_list)) {
+ struct sctp_stream_priorities *prio_head = soute->prio_head;
+
+ /* Scheduled */
+ scheduled = true;
+
+ if (prio_head->next == soute)
+ /* Try to move to the next stream */
+ sctp_sched_prio_next_stream(prio_head);
+
+ list_del_init(&soute->prio_list);
+
+ /* Also unsched the priority if this was the last stream */
+ if (list_empty(&prio_head->active)) {
+ list_del_init(&prio_head->prio_sched);
+ /* If there is no stream left, clear next */
+ prio_head->next = NULL;
+ }
+ }
+
+ return scheduled;
+}
+
+static void sctp_sched_prio_sched(struct sctp_stream *stream,
+ struct sctp_stream_out_ext *soute)
+{
+ struct sctp_stream_priorities *prio, *prio_head;
+
+ prio_head = soute->prio_head;
+
+ /* Nothing to do if already scheduled */
+ if (!list_empty(&soute->prio_list))
+ return;
+
+ /* Schedule the stream. If there is a next, we schedule the new
+ * one before it, so it's the last in round robin order.
+ * If there isn't, we also have to schedule the priority.
+ */
+ if (prio_head->next) {
+ list_add(&soute->prio_list, prio_head->next->prio_list.prev);
+ return;
+ }
+
+ list_add(&soute->prio_list, &prio_head->active);
+ prio_head->next = soute;
+
+ list_for_each_entry(prio, &stream->prio_list, prio_sched) {
+ if (prio->prio > prio_head->prio) {
+ list_add(&prio_head->prio_sched, prio->prio_sched.prev);
+ return;
+ }
+ }
+
+ list_add_tail(&prio_head->prio_sched, &stream->prio_list);
+}
+
+static int sctp_sched_prio_set(struct sctp_stream *stream, __u16 sid,
+ __u16 prio, gfp_t gfp)
+{
+ struct sctp_stream_out *sout = &stream->out[sid];
+ struct sctp_stream_out_ext *soute = sout->ext;
+ struct sctp_stream_priorities *prio_head, *old;
+ bool reschedule = false;
+ int i;
+
+ prio_head = sctp_sched_prio_get_head(stream, prio, gfp);
+ if (!prio_head)
+ return -ENOMEM;
+
+ reschedule = sctp_sched_prio_unsched(soute);
+ old = soute->prio_head;
+ soute->prio_head = prio_head;
+ if (reschedule)
+ sctp_sched_prio_sched(stream, soute);
+
+ if (!old)
+ /* Happens when we set the priority for the first time */
+ return 0;
+
+ for (i = 0; i < stream->outcnt; i++) {
+ soute = stream->out[i].ext;
+ if (soute && soute->prio_head == old)
+ /* It's still in use, nothing else to do here. */
+ return 0;
+ }
+
+ /* No hits, we are good to free it. */
+ kfree(old);
+
+ return 0;
+}
+
+static int sctp_sched_prio_get(struct sctp_stream *stream, __u16 sid,
+ __u16 *value)
+{
+ *value = stream->out[sid].ext->prio_head->prio;
+ return 0;
+}
+
+static int sctp_sched_prio_init(struct sctp_stream *stream)
+{
+ INIT_LIST_HEAD(&stream->prio_list);
+
+ return 0;
+}
+
+static int sctp_sched_prio_init_sid(struct sctp_stream *stream, __u16 sid,
+ gfp_t gfp)
+{
+ INIT_LIST_HEAD(&stream->out[sid].ext->prio_list);
+ return sctp_sched_prio_set(stream, sid, 0, gfp);
+}
+
+static void sctp_sched_prio_free(struct sctp_stream *stream)
+{
+ struct sctp_stream_priorities *prio, *n;
+ LIST_HEAD(list);
+ int i;
+
+ /* As we don't keep a list of priorities, to avoid multiple
+ * frees we have to do it in 3 steps:
+ * 1. unsched everyone, so the lists are free to use in 2.
+ * 2. build the list of the priorities
+ * 3. free the list
+ */
+ sctp_sched_prio_unsched_all(stream);
+ for (i = 0; i < stream->outcnt; i++) {
+ if (!stream->out[i].ext)
+ continue;
+ prio = stream->out[i].ext->prio_head;
+ if (prio && list_empty(&prio->prio_sched))
+ list_add(&prio->prio_sched, &list);
+ }
+ list_for_each_entry_safe(prio, n, &list, prio_sched) {
+ list_del_init(&prio->prio_sched);
+ kfree(prio);
+ }
+}
+
+static void sctp_sched_prio_enqueue(struct sctp_outq *q,
+ struct sctp_datamsg *msg)
+{
+ struct sctp_stream *stream;
+ struct sctp_chunk *ch;
+ __u16 sid;
+
+ ch = list_first_entry(&msg->chunks, struct sctp_chunk, frag_list);
+ sid = sctp_chunk_stream_no(ch);
+ stream = &q->asoc->stream;
+ sctp_sched_prio_sched(stream, stream->out[sid].ext);
+}
+
+static struct sctp_chunk *sctp_sched_prio_dequeue(struct sctp_outq *q)
+{
+ struct sctp_stream *stream = &q->asoc->stream;
+ struct sctp_stream_priorities *prio;
+ struct sctp_stream_out_ext *soute;
+ struct sctp_chunk *ch = NULL;
+
+ /* Bail out quickly if queue is empty */
+ if (list_empty(&q->out_chunk_list))
+ goto out;
+
+ /* Find which chunk is next. It's easy, it's either the current
+ * one or the first chunk on the next active stream.
+ */
+ if (stream->out_curr) {
+ soute = stream->out_curr->ext;
+ } else {
+ prio = list_entry(stream->prio_list.next,
+ struct sctp_stream_priorities, prio_sched);
+ soute = prio->next;
+ }
+ ch = list_entry(soute->outq.next, struct sctp_chunk, stream_list);
+ sctp_sched_dequeue_common(q, ch);
+
+out:
+ return ch;
+}
+
+static void sctp_sched_prio_dequeue_done(struct sctp_outq *q,
+ struct sctp_chunk *ch)
+{
+ struct sctp_stream_priorities *prio;
+ struct sctp_stream_out_ext *soute;
+ __u16 sid;
+
+ /* Last chunk on that msg, move to the next stream on
+ * this priority.
+ */
+ sid = sctp_chunk_stream_no(ch);
+ soute = q->asoc->stream.out[sid].ext;
+ prio = soute->prio_head;
+
+ sctp_sched_prio_next_stream(prio);
+
+ if (list_empty(&soute->outq))
+ sctp_sched_prio_unsched(soute);
+}
+
+static void sctp_sched_prio_sched_all(struct sctp_stream *stream)
+{
+ struct sctp_association *asoc;
+ struct sctp_stream_out *sout;
+ struct sctp_chunk *ch;
+
+ asoc = container_of(stream, struct sctp_association, stream);
+ list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) {
+ __u16 sid;
+
+ sid = sctp_chunk_stream_no(ch);
+ sout = &stream->out[sid];
+ if (sout->ext)
+ sctp_sched_prio_sched(stream, sout->ext);
+ }
+}
+
+static void sctp_sched_prio_unsched_all(struct sctp_stream *stream)
+{
+ struct sctp_stream_priorities *p, *tmp;
+ struct sctp_stream_out_ext *soute, *souttmp;
+
+ list_for_each_entry_safe(p, tmp, &stream->prio_list, prio_sched)
+ list_for_each_entry_safe(soute, souttmp, &p->active, prio_list)
+ sctp_sched_prio_unsched(soute);
+}
+
+struct sctp_sched_ops sctp_sched_prio = {
+ .set = sctp_sched_prio_set,
+ .get = sctp_sched_prio_get,
+ .init = sctp_sched_prio_init,
+ .init_sid = sctp_sched_prio_init_sid,
+ .free = sctp_sched_prio_free,
+ .enqueue = sctp_sched_prio_enqueue,
+ .dequeue = sctp_sched_prio_dequeue,
+ .dequeue_done = sctp_sched_prio_dequeue_done,
+ .sched_all = sctp_sched_prio_sched_all,
+ .unsched_all = sctp_sched_prio_unsched_all,
+};
diff --git a/net/sctp/stream_sched_rr.c b/net/sctp/stream_sched_rr.c
new file mode 100644
index 000000000000..7612a438c5b9
--- /dev/null
+++ b/net/sctp/stream_sched_rr.c
@@ -0,0 +1,201 @@
+/* SCTP kernel implementation
+ * (C) Copyright Red Hat Inc. 2017
+ *
+ * This file is part of the SCTP kernel implementation
+ *
+ * These functions manipulate sctp stream queue/scheduling.
+ *
+ * This SCTP implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This SCTP implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ * ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING. If not, see
+ * <http://www.gnu.org/licenses/>.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email addresched(es):
+ * lksctp developers <linux-sctp@vger.kernel.org>
+ *
+ * Written or modified by:
+ * Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+ */
+
+#include <linux/list.h>
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+#include <net/sctp/stream_sched.h>
+
+/* Priority handling
+ * RFC DRAFT ndata section 3.2
+ */
+static void sctp_sched_rr_unsched_all(struct sctp_stream *stream);
+
+static void sctp_sched_rr_next_stream(struct sctp_stream *stream)
+{
+ struct list_head *pos;
+
+ pos = stream->rr_next->rr_list.next;
+ if (pos == &stream->rr_list)
+ pos = pos->next;
+ stream->rr_next = list_entry(pos, struct sctp_stream_out_ext, rr_list);
+}
+
+static void sctp_sched_rr_unsched(struct sctp_stream *stream,
+ struct sctp_stream_out_ext *soute)
+{
+ if (stream->rr_next == soute)
+ /* Try to move to the next stream */
+ sctp_sched_rr_next_stream(stream);
+
+ list_del_init(&soute->rr_list);
+
+ /* If we have no other stream queued, clear next */
+ if (list_empty(&stream->rr_list))
+ stream->rr_next = NULL;
+}
+
+static void sctp_sched_rr_sched(struct sctp_stream *stream,
+ struct sctp_stream_out_ext *soute)
+{
+ if (!list_empty(&soute->rr_list))
+ /* Already scheduled. */
+ return;
+
+ /* Schedule the stream */
+ list_add_tail(&soute->rr_list, &stream->rr_list);
+
+ if (!stream->rr_next)
+ stream->rr_next = soute;
+}
+
+static int sctp_sched_rr_set(struct sctp_stream *stream, __u16 sid,
+ __u16 prio, gfp_t gfp)
+{
+ return 0;
+}
+
+static int sctp_sched_rr_get(struct sctp_stream *stream, __u16 sid,
+ __u16 *value)
+{
+ return 0;
+}
+
+static int sctp_sched_rr_init(struct sctp_stream *stream)
+{
+ INIT_LIST_HEAD(&stream->rr_list);
+ stream->rr_next = NULL;
+
+ return 0;
+}
+
+static int sctp_sched_rr_init_sid(struct sctp_stream *stream, __u16 sid,
+ gfp_t gfp)
+{
+ INIT_LIST_HEAD(&stream->out[sid].ext->rr_list);
+
+ return 0;
+}
+
+static void sctp_sched_rr_free(struct sctp_stream *stream)
+{
+ sctp_sched_rr_unsched_all(stream);
+}
+
+static void sctp_sched_rr_enqueue(struct sctp_outq *q,
+ struct sctp_datamsg *msg)
+{
+ struct sctp_stream *stream;
+ struct sctp_chunk *ch;
+ __u16 sid;
+
+ ch = list_first_entry(&msg->chunks, struct sctp_chunk, frag_list);
+ sid = sctp_chunk_stream_no(ch);
+ stream = &q->asoc->stream;
+ sctp_sched_rr_sched(stream, stream->out[sid].ext);
+}
+
+static struct sctp_chunk *sctp_sched_rr_dequeue(struct sctp_outq *q)
+{
+ struct sctp_stream *stream = &q->asoc->stream;
+ struct sctp_stream_out_ext *soute;
+ struct sctp_chunk *ch = NULL;
+
+ /* Bail out quickly if queue is empty */
+ if (list_empty(&q->out_chunk_list))
+ goto out;
+
+ /* Find which chunk is next */
+ if (stream->out_curr)
+ soute = stream->out_curr->ext;
+ else
+ soute = stream->rr_next;
+ ch = list_entry(soute->outq.next, struct sctp_chunk, stream_list);
+
+ sctp_sched_dequeue_common(q, ch);
+
+out:
+ return ch;
+}
+
+static void sctp_sched_rr_dequeue_done(struct sctp_outq *q,
+ struct sctp_chunk *ch)
+{
+ struct sctp_stream_out_ext *soute;
+ __u16 sid;
+
+ /* Last chunk on that msg, move to the next stream */
+ sid = sctp_chunk_stream_no(ch);
+ soute = q->asoc->stream.out[sid].ext;
+
+ sctp_sched_rr_next_stream(&q->asoc->stream);
+
+ if (list_empty(&soute->outq))
+ sctp_sched_rr_unsched(&q->asoc->stream, soute);
+}
+
+static void sctp_sched_rr_sched_all(struct sctp_stream *stream)
+{
+ struct sctp_association *asoc;
+ struct sctp_stream_out_ext *soute;
+ struct sctp_chunk *ch;
+
+ asoc = container_of(stream, struct sctp_association, stream);
+ list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) {
+ __u16 sid;
+
+ sid = sctp_chunk_stream_no(ch);
+ soute = stream->out[sid].ext;
+ if (soute)
+ sctp_sched_rr_sched(stream, soute);
+ }
+}
+
+static void sctp_sched_rr_unsched_all(struct sctp_stream *stream)
+{
+ struct sctp_stream_out_ext *soute, *tmp;
+
+ list_for_each_entry_safe(soute, tmp, &stream->rr_list, rr_list)
+ sctp_sched_rr_unsched(stream, soute);
+}
+
+struct sctp_sched_ops sctp_sched_rr = {
+ .set = sctp_sched_rr_set,
+ .get = sctp_sched_rr_get,
+ .init = sctp_sched_rr_init,
+ .init_sid = sctp_sched_rr_init_sid,
+ .free = sctp_sched_rr_free,
+ .enqueue = sctp_sched_rr_enqueue,
+ .dequeue = sctp_sched_rr_dequeue,
+ .dequeue_done = sctp_sched_rr_dequeue_done,
+ .sched_all = sctp_sched_rr_sched_all,
+ .unsched_all = sctp_sched_rr_unsched_all,
+};
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index a7294edbc221..5ef97e5a5f78 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -62,10 +62,12 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
bh_unlock_sock(&smc->sk);
}
-int smc_cdc_get_free_slot(struct smc_link *link,
+int smc_cdc_get_free_slot(struct smc_connection *conn,
struct smc_wr_buf **wr_buf,
struct smc_cdc_tx_pend **pend)
{
+ struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
+
return smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
(struct smc_wr_tx_pend_priv **)pend);
}
@@ -118,8 +120,7 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)
struct smc_wr_buf *wr_buf;
int rc;
- rc = smc_cdc_get_free_slot(&conn->lgr->lnk[SMC_SINGLE_LINK], &wr_buf,
- &pend);
+ rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend);
if (rc)
return rc;
diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
index 8e1d76f26007..56f883d1159c 100644
--- a/net/smc/smc_cdc.h
+++ b/net/smc/smc_cdc.h
@@ -206,7 +206,8 @@ static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local,
struct smc_cdc_tx_pend;
-int smc_cdc_get_free_slot(struct smc_link *link, struct smc_wr_buf **wr_buf,
+int smc_cdc_get_free_slot(struct smc_connection *conn,
+ struct smc_wr_buf **wr_buf,
struct smc_cdc_tx_pend **pend);
void smc_cdc_tx_dismiss_slots(struct smc_connection *conn);
int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf,
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 3866573288dd..ec49bc3c3283 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -396,8 +396,7 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
int rc;
spin_lock_bh(&conn->send_lock);
- rc = smc_cdc_get_free_slot(&conn->lgr->lnk[SMC_SINGLE_LINK], &wr_buf,
- &pend);
+ rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend);
if (rc < 0) {
if (rc == -EBUSY) {
struct smc_sock *smc =
@@ -466,8 +465,7 @@ void smc_tx_consumer_update(struct smc_connection *conn)
((to_confirm > conn->rmbe_update_limit) &&
((to_confirm > (conn->rmbe_size / 2)) ||
conn->local_rx_ctrl.prod_flags.write_blocked))) {
- rc = smc_cdc_get_free_slot(&conn->lgr->lnk[SMC_SINGLE_LINK],
- &wr_buf, &pend);
+ rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend);
if (!rc)
rc = smc_cdc_msg_send(conn, wr_buf, pend);
if (rc < 0) {
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 10ae7823a19d..0206155bff53 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -21,7 +21,6 @@
#include <linux/kernel.h>
#include <linux/kmod.h>
#include <linux/list.h>
-#include <linux/miscdevice.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/net.h>