aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/core/dev.c21
-rw-r--r--net/core/devlink.c8
-rw-r--r--net/core/ethtool.c64
-rw-r--r--net/core/pktgen.c16
-rw-r--r--net/core/rtnetlink.c6
-rw-r--r--net/core/sock.c2
-rw-r--r--net/core/sysctl_net_core.c12
-rw-r--r--net/ipv4/ip_tunnel.c20
-rw-r--r--net/ipv4/netfilter/arptable_filter.c1
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c1
-rw-r--r--net/ipv4/netfilter/iptable_nat.c1
-rw-r--r--net/ipv4/netfilter/iptable_raw.c1
-rw-r--r--net/ipv4/netfilter/iptable_security.c1
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c1
-rw-r--r--net/ipv4/sysctl_net_ipv4.c32
-rw-r--r--net/ipv4/tcp.c3
-rw-r--r--net/ipv4/tcp_bbr.c5
-rw-r--r--net/ipv4/udp.c86
-rw-r--r--net/ipv6/addrconf.c26
-rw-r--r--net/ipv6/anycast.c9
-rw-r--r--net/ipv6/datagram.c5
-rw-r--r--net/ipv6/ip6_gre.c4
-rw-r--r--net/ipv6/ip6_tunnel.c14
-rw-r--r--net/ipv6/ndisc.c2
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c1
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c1
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c1
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c1
-rw-r--r--net/ipv6/netfilter/ip6table_security.c1
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c1
-rw-r--r--net/ipv6/route.c133
-rw-r--r--net/ipv6/sit.c5
-rw-r--r--net/ipv6/udp.c52
-rw-r--r--net/l2tp/l2tp_core.c1
-rw-r--r--net/llc/llc_sap.c7
-rw-r--r--net/mac80211/rx.c5
-rw-r--r--net/mpls/af_mpls.c1
-rw-r--r--net/ncsi/ncsi-netlink.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c1
-rw-r--r--net/netfilter/nf_tables_api.c1
-rw-r--r--net/netfilter/nfnetlink.c1
-rw-r--r--net/netfilter/nfnetlink_acct.c1
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c1
-rw-r--r--net/netfilter/nfnetlink_log.c1
-rw-r--r--net/netfilter/nfnetlink_queue.c6
-rw-r--r--net/openvswitch/datapath.c1
-rw-r--r--net/openvswitch/vport.c8
-rw-r--r--net/rds/connection.c7
-rw-r--r--net/rds/ib.c3
-rw-r--r--net/rds/message.c6
-rw-r--r--net/rds/rds.h1
-rw-r--r--net/rds/tcp.c18
-rw-r--r--net/rxrpc/recvmsg.c2
-rw-r--r--net/sched/act_api.c64
-rw-r--r--net/sched/act_gact.c14
-rw-r--r--net/sched/cls_api.c3
-rw-r--r--net/sched/cls_flower.c6
-rw-r--r--net/sctp/auth.c146
-rw-r--r--net/sctp/chunk.c14
-rw-r--r--net/sctp/objcnt.c8
-rw-r--r--net/sctp/output.c18
-rw-r--r--net/sctp/proc.c90
-rw-r--r--net/sctp/protocol.c61
-rw-r--r--net/sctp/sm_make_chunk.c33
-rw-r--r--net/sctp/sm_sideeffect.c13
-rw-r--r--net/sctp/sm_statefuns.c56
-rw-r--r--net/sctp/socket.c113
-rw-r--r--net/smc/af_smc.c79
-rw-r--r--net/smc/smc.h4
-rw-r--r--net/smc/smc_clc.c203
-rw-r--r--net/smc/smc_clc.h17
-rw-r--r--net/smc/smc_core.c23
-rw-r--r--net/smc/smc_ib.c10
-rw-r--r--net/smc/smc_wr.h1
-rw-r--r--net/tipc/addr.c31
-rw-r--r--net/tipc/addr.h10
-rw-r--r--net/tipc/core.c1
-rw-r--r--net/tipc/core.h5
-rw-r--r--net/tipc/msg.c2
-rw-r--r--net/tipc/name_distr.c63
-rw-r--r--net/tipc/name_distr.h2
-rw-r--r--net/tipc/name_table.c206
-rw-r--r--net/tipc/name_table.h54
-rw-r--r--net/tipc/net.c2
-rw-r--r--net/tipc/socket.c29
-rw-r--r--net/xfrm/xfrm_user.c1
87 files changed, 1256 insertions, 743 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index e5b8d42b6410..d8887cc38e7b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4351,6 +4351,9 @@ int netdev_rx_handler_register(struct net_device *dev,
if (netdev_is_rx_handler_busy(dev))
return -EBUSY;
+ if (dev->priv_flags & IFF_NO_RX_HANDLER)
+ return -EINVAL;
+
/* Note: rx_handler_data must be set before rx_handler */
rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
rcu_assign_pointer(dev->rx_handler, rx_handler);
@@ -7546,10 +7549,17 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
}
}
- /* LRO feature cannot be combined with RX-FCS */
- if ((features & NETIF_F_LRO) && (features & NETIF_F_RXFCS)) {
- netdev_dbg(dev, "Dropping LRO feature since RX-FCS is requested.\n");
- features &= ~NETIF_F_LRO;
+ /* LRO/HW-GRO features cannot be combined with RX-FCS */
+ if (features & NETIF_F_RXFCS) {
+ if (features & NETIF_F_LRO) {
+ netdev_dbg(dev, "Dropping LRO feature since RX-FCS is requested.\n");
+ features &= ~NETIF_F_LRO;
+ }
+
+ if (features & NETIF_F_GRO_HW) {
+ netdev_dbg(dev, "Dropping HW-GRO feature since RX-FCS is requested.\n");
+ features &= ~NETIF_F_GRO_HW;
+ }
}
return features;
@@ -8008,7 +8018,8 @@ int register_netdev(struct net_device *dev)
{
int err;
- rtnl_lock();
+ if (rtnl_lock_killable())
+ return -EINTR;
err = register_netdevice(dev);
rtnl_unlock();
return err;
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 1b5bf0d1cee9..f23e5ed7c90f 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2744,22 +2744,22 @@ static const struct genl_ops devlink_nl_ops[] = {
.cmd = DEVLINK_CMD_DPIPE_TABLE_GET,
.doit = devlink_nl_cmd_dpipe_table_get,
.policy = devlink_nl_policy,
- .flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ /* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET,
.doit = devlink_nl_cmd_dpipe_entries_get,
.policy = devlink_nl_policy,
- .flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ /* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_DPIPE_HEADERS_GET,
.doit = devlink_nl_cmd_dpipe_headers_get,
.policy = devlink_nl_policy,
- .flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ /* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET,
@@ -2779,8 +2779,8 @@ static const struct genl_ops devlink_nl_ops[] = {
.cmd = DEVLINK_CMD_RESOURCE_DUMP,
.doit = devlink_nl_cmd_resource_dump,
.policy = devlink_nl_policy,
- .flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ /* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_RELOAD,
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 3f89c76d5c24..157cd9efa4be 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1022,6 +1022,15 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
if (copy_from_user(&info, useraddr, info_size))
return -EFAULT;
+ /* If FLOW_RSS was requested then user-space must be using the
+ * new definition, as FLOW_RSS is newer.
+ */
+ if (cmd == ETHTOOL_GRXFH && info.flow_type & FLOW_RSS) {
+ info_size = sizeof(info);
+ if (copy_from_user(&info, useraddr, info_size))
+ return -EFAULT;
+ }
+
if (info.cmd == ETHTOOL_GRXCLSRLALL) {
if (info.rule_cnt > 0) {
if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32))
@@ -1251,9 +1260,11 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
user_key_size = rxfh.key_size;
/* Check that reserved fields are 0 for now */
- if (rxfh.rss_context || rxfh.rsvd8[0] || rxfh.rsvd8[1] ||
- rxfh.rsvd8[2] || rxfh.rsvd32)
+ if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd8[2] || rxfh.rsvd32)
return -EINVAL;
+ /* Most drivers don't handle rss_context, check it's 0 as well */
+ if (rxfh.rss_context && !ops->get_rxfh_context)
+ return -EOPNOTSUPP;
rxfh.indir_size = dev_indir_size;
rxfh.key_size = dev_key_size;
@@ -1276,7 +1287,12 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
if (user_key_size)
hkey = rss_config + indir_bytes;
- ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey, &dev_hfunc);
+ if (rxfh.rss_context)
+ ret = dev->ethtool_ops->get_rxfh_context(dev, indir, hkey,
+ &dev_hfunc,
+ rxfh.rss_context);
+ else
+ ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey, &dev_hfunc);
if (ret)
goto out;
@@ -1306,6 +1322,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
u8 *hkey = NULL;
u8 *rss_config;
u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]);
+ bool delete = false;
if (!ops->get_rxnfc || !ops->set_rxfh)
return -EOPNOTSUPP;
@@ -1319,9 +1336,11 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
return -EFAULT;
/* Check that reserved fields are 0 for now */
- if (rxfh.rss_context || rxfh.rsvd8[0] || rxfh.rsvd8[1] ||
- rxfh.rsvd8[2] || rxfh.rsvd32)
+ if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd8[2] || rxfh.rsvd32)
return -EINVAL;
+ /* Most drivers don't handle rss_context, check it's 0 as well */
+ if (rxfh.rss_context && !ops->set_rxfh_context)
+ return -EOPNOTSUPP;
/* If either indir, hash key or function is valid, proceed further.
* Must request at least one change: indir size, hash key or function.
@@ -1346,7 +1365,8 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
if (ret)
goto out;
- /* rxfh.indir_size == 0 means reset the indir table to default.
+ /* rxfh.indir_size == 0 means reset the indir table to default (master
+ * context) or delete the context (other RSS contexts).
* rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE means leave it unchanged.
*/
if (rxfh.indir_size &&
@@ -1359,9 +1379,13 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
if (ret)
goto out;
} else if (rxfh.indir_size == 0) {
- indir = (u32 *)rss_config;
- for (i = 0; i < dev_indir_size; i++)
- indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
+ if (rxfh.rss_context == 0) {
+ indir = (u32 *)rss_config;
+ for (i = 0; i < dev_indir_size; i++)
+ indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
+ } else {
+ delete = true;
+ }
}
if (rxfh.key_size) {
@@ -1374,15 +1398,25 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
}
}
- ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc);
+ if (rxfh.rss_context)
+ ret = ops->set_rxfh_context(dev, indir, hkey, rxfh.hfunc,
+ &rxfh.rss_context, delete);
+ else
+ ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc);
if (ret)
goto out;
- /* indicate whether rxfh was set to default */
- if (rxfh.indir_size == 0)
- dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
- else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
- dev->priv_flags |= IFF_RXFH_CONFIGURED;
+ if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, rss_context),
+ &rxfh.rss_context, sizeof(rxfh.rss_context)))
+ ret = -EFAULT;
+
+ if (!rxfh.rss_context) {
+ /* indicate whether rxfh was set to default */
+ if (rxfh.indir_size == 0)
+ dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
+ else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
+ dev->priv_flags |= IFF_RXFH_CONFIGURED;
+ }
out:
kfree(rss_config);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index b8ab5c829511..545cf08cd558 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -906,13 +906,14 @@ static ssize_t pktgen_if_write(struct file *file,
i += len;
if (debug) {
- size_t copy = min_t(size_t, count, 1023);
- char tb[copy + 1];
- if (copy_from_user(tb, user_buffer, copy))
- return -EFAULT;
- tb[copy] = 0;
- pr_debug("%s,%lu buffer -:%s:-\n",
- name, (unsigned long)count, tb);
+ size_t copy = min_t(size_t, count + 1, 1024);
+ char *tp = strndup_user(user_buffer, copy);
+
+ if (IS_ERR(tp))
+ return PTR_ERR(tp);
+
+ pr_debug("%s,%zu buffer -:%s:-\n", name, count, tp);
+ kfree(tp);
}
if (!strcmp(name, "min_pkt_size")) {
@@ -3851,6 +3852,7 @@ static struct pernet_operations pg_net_ops = {
.exit = pg_net_exit,
.id = &pg_net_id,
.size = sizeof(struct pktgen_net),
+ .async = true,
};
static int __init pg_init(void)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 67f375cfb982..87079eaa871b 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -75,6 +75,12 @@ void rtnl_lock(void)
}
EXPORT_SYMBOL(rtnl_lock);
+int rtnl_lock_killable(void)
+{
+ return mutex_lock_killable(&rtnl_mutex);
+}
+EXPORT_SYMBOL(rtnl_lock_killable);
+
static struct sk_buff *defer_kfree_skb_list;
void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail)
{
diff --git a/net/core/sock.c b/net/core/sock.c
index 4f92c2910200..f704324d1219 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1052,8 +1052,6 @@ set_rcvbuf:
if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) {
if (sk->sk_protocol != IPPROTO_TCP)
ret = -ENOTSUPP;
- else if (sk->sk_state != TCP_CLOSE)
- ret = -EBUSY;
} else if (sk->sk_family != PF_RDS) {
ret = -ENOTSUPP;
}
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index d714f65782b7..4f47f92459cc 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -32,6 +32,9 @@ static int max_skb_frags = MAX_SKB_FRAGS;
static int net_msg_warn; /* Unused, but still a sysctl */
+int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0;
+EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net);
+
#ifdef CONFIG_RPS
static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -513,6 +516,15 @@ static struct ctl_table net_core_table[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
},
+ {
+ .procname = "fb_tunnels_only_for_init_net",
+ .data = &sysctl_fb_tunnels_only_for_init_net,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
{ }
};
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 602597dfc395..5fcb17cb426b 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -347,8 +347,7 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,
struct net_device *dev;
int t_hlen;
- BUG_ON(!itn->fb_tunnel_dev);
- dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
+ dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
if (IS_ERR(dev))
return ERR_CAST(dev);
@@ -822,7 +821,6 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
struct net *net = t->net;
struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
- BUG_ON(!itn->fb_tunnel_dev);
switch (cmd) {
case SIOCGETTUNNEL:
if (dev == itn->fb_tunnel_dev) {
@@ -847,7 +845,7 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
p->o_key = 0;
}
- t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
+ t = ip_tunnel_find(itn, p, itn->type);
if (cmd == SIOCADDTUNNEL) {
if (!t) {
@@ -991,10 +989,15 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
struct ip_tunnel_parm parms;
unsigned int i;
+ itn->rtnl_link_ops = ops;
for (i = 0; i < IP_TNL_HASH_SIZE; i++)
INIT_HLIST_HEAD(&itn->tunnels[i]);
- if (!ops) {
+ if (!ops || !net_has_fallback_tunnels(net)) {
+ struct ip_tunnel_net *it_init_net;
+
+ it_init_net = net_generic(&init_net, ip_tnl_net_id);
+ itn->type = it_init_net->type;
itn->fb_tunnel_dev = NULL;
return 0;
}
@@ -1012,6 +1015,7 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
+ itn->type = itn->fb_tunnel_dev->type;
}
rtnl_unlock();
@@ -1019,10 +1023,10 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
}
EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
-static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
+static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
+ struct list_head *head,
struct rtnl_link_ops *ops)
{
- struct net *net = dev_net(itn->fb_tunnel_dev);
struct net_device *dev, *aux;
int h;
@@ -1054,7 +1058,7 @@ void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
rtnl_lock();
list_for_each_entry(net, net_list, exit_list) {
itn = net_generic(net, id);
- ip_tunnel_destroy(itn, &list, ops);
+ ip_tunnel_destroy(net, itn, &list, ops);
}
unregister_netdevice_many(&list);
rtnl_unlock();
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 8f8713b4388f..49c2490193ae 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -65,6 +65,7 @@ static void __net_exit arptable_filter_net_exit(struct net *net)
static struct pernet_operations arptable_filter_net_ops = {
.exit = arptable_filter_net_exit,
+ .async = true,
};
static int __init arptable_filter_init(void)
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index dea138ca8925..f6074059531a 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -113,6 +113,7 @@ static void __net_exit iptable_mangle_net_exit(struct net *net)
static struct pernet_operations iptable_mangle_net_ops = {
.exit = iptable_mangle_net_exit,
+ .async = true,
};
static int __init iptable_mangle_init(void)
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 0f7255cc65ee..b771af74be79 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -129,6 +129,7 @@ static void __net_exit iptable_nat_net_exit(struct net *net)
static struct pernet_operations iptable_nat_net_ops = {
.exit = iptable_nat_net_exit,
+ .async = true,
};
static int __init iptable_nat_init(void)
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 960625aabf04..963753e50842 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -76,6 +76,7 @@ static void __net_exit iptable_raw_net_exit(struct net *net)
static struct pernet_operations iptable_raw_net_ops = {
.exit = iptable_raw_net_exit,
+ .async = true,
};
static int __init iptable_raw_init(void)
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index e5379fe57b64..c40d6b3d8b6a 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -76,6 +76,7 @@ static void __net_exit iptable_security_net_exit(struct net *net)
static struct pernet_operations iptable_security_net_ops = {
.exit = iptable_security_net_exit,
+ .async = true,
};
static int __init iptable_security_init(void)
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index b50721d9d30e..6531f69db010 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -399,6 +399,7 @@ static struct pernet_operations ipv4_net_ops = {
.exit = ipv4_net_exit,
.id = &conntrack4_net_id,
.size = sizeof(struct conntrack4_net),
+ .async = true,
};
static int __init nf_conntrack_l3proto_ipv4_init(void)
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 011de9a20ec6..5b72d97693f8 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -520,22 +520,6 @@ static struct ctl_table ipv4_table[] = {
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
},
- {
- .procname = "udp_rmem_min",
- .data = &sysctl_udp_rmem_min,
- .maxlen = sizeof(sysctl_udp_rmem_min),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &one
- },
- {
- .procname = "udp_wmem_min",
- .data = &sysctl_udp_wmem_min,
- .maxlen = sizeof(sysctl_udp_wmem_min),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &one
- },
{ }
};
@@ -1167,6 +1151,22 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = &one,
},
+ {
+ .procname = "udp_rmem_min",
+ .data = &init_net.ipv4.sysctl_udp_rmem_min,
+ .maxlen = sizeof(init_net.ipv4.sysctl_udp_rmem_min),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one
+ },
+ {
+ .procname = "udp_wmem_min",
+ .data = &init_net.ipv4.sysctl_udp_wmem_min,
+ .maxlen = sizeof(init_net.ipv4.sysctl_udp_wmem_min),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one
+ },
{ }
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f90ec24c2cc8..d763fae1b574 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3033,7 +3033,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
u32 rate;
stats = alloc_skb(7 * nla_total_size_64bit(sizeof(u64)) +
- 4 * nla_total_size(sizeof(u32)) +
+ 5 * nla_total_size(sizeof(u32)) +
3 * nla_total_size(sizeof(u8)), GFP_ATOMIC);
if (!stats)
return NULL;
@@ -3063,6 +3063,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, inet_csk(sk)->icsk_retransmits);
nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited);
+ nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, tp->snd_ssthresh);
nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una);
nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state);
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index c92014cb1e16..158d105e76da 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -731,6 +731,8 @@ static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs)
bbr->mode = BBR_DRAIN; /* drain queue we created */
bbr->pacing_gain = bbr_drain_gain; /* pace slow to drain */
bbr->cwnd_gain = bbr_high_gain; /* maintain cwnd */
+ tcp_sk(sk)->snd_ssthresh =
+ bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT);
} /* fall through to check if in-flight is already small: */
if (bbr->mode == BBR_DRAIN &&
tcp_packets_in_flight(tcp_sk(sk)) <=
@@ -834,6 +836,7 @@ static void bbr_init(struct sock *sk)
struct bbr *bbr = inet_csk_ca(sk);
bbr->prior_cwnd = 0;
+ tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
bbr->rtt_cnt = 0;
bbr->next_rtt_delivered = 0;
bbr->prev_ca_state = TCP_CA_Open;
@@ -886,7 +889,7 @@ static u32 bbr_undo_cwnd(struct sock *sk)
static u32 bbr_ssthresh(struct sock *sk)
{
bbr_save_cwnd(sk);
- return TCP_INFINITE_SSTHRESH; /* BBR does not use ssthresh */
+ return tcp_sk(sk)->snd_ssthresh;
}
static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 3013404d0935..908fc02fb4f8 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -122,12 +122,6 @@ EXPORT_SYMBOL(udp_table);
long sysctl_udp_mem[3] __read_mostly;
EXPORT_SYMBOL(sysctl_udp_mem);
-int sysctl_udp_rmem_min __read_mostly;
-EXPORT_SYMBOL(sysctl_udp_rmem_min);
-
-int sysctl_udp_wmem_min __read_mostly;
-EXPORT_SYMBOL(sysctl_udp_wmem_min);
-
atomic_long_t udp_memory_allocated;
EXPORT_SYMBOL(udp_memory_allocated);
@@ -2533,35 +2527,35 @@ int udp_abort(struct sock *sk, int err)
EXPORT_SYMBOL_GPL(udp_abort);
struct proto udp_prot = {
- .name = "UDP",
- .owner = THIS_MODULE,
- .close = udp_lib_close,
- .connect = ip4_datagram_connect,
- .disconnect = udp_disconnect,
- .ioctl = udp_ioctl,
- .init = udp_init_sock,
- .destroy = udp_destroy_sock,
- .setsockopt = udp_setsockopt,
- .getsockopt = udp_getsockopt,
- .sendmsg = udp_sendmsg,
- .recvmsg = udp_recvmsg,
- .sendpage = udp_sendpage,
- .release_cb = ip4_datagram_release_cb,
- .hash = udp_lib_hash,
- .unhash = udp_lib_unhash,
- .rehash = udp_v4_rehash,
- .get_port = udp_v4_get_port,
- .memory_allocated = &udp_memory_allocated,
- .sysctl_mem = sysctl_udp_mem,
- .sysctl_wmem = &sysctl_udp_wmem_min,
- .sysctl_rmem = &sysctl_udp_rmem_min,
- .obj_size = sizeof(struct udp_sock),
- .h.udp_table = &udp_table,
+ .name = "UDP",
+ .owner = THIS_MODULE,
+ .close = udp_lib_close,
+ .connect = ip4_datagram_connect,
+ .disconnect = udp_disconnect,
+ .ioctl = udp_ioctl,
+ .init = udp_init_sock,
+ .destroy = udp_destroy_sock,
+ .setsockopt = udp_setsockopt,
+ .getsockopt = udp_getsockopt,
+ .sendmsg = udp_sendmsg,
+ .recvmsg = udp_recvmsg,
+ .sendpage = udp_sendpage,
+ .release_cb = ip4_datagram_release_cb,
+ .hash = udp_lib_hash,
+ .unhash = udp_lib_unhash,
+ .rehash = udp_v4_rehash,
+ .get_port = udp_v4_get_port,
+ .memory_allocated = &udp_memory_allocated,
+ .sysctl_mem = sysctl_udp_mem,
+ .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
+ .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
+ .obj_size = sizeof(struct udp_sock),
+ .h.udp_table = &udp_table,
#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_udp_setsockopt,
- .compat_getsockopt = compat_udp_getsockopt,
+ .compat_setsockopt = compat_udp_setsockopt,
+ .compat_getsockopt = compat_udp_getsockopt,
#endif
- .diag_destroy = udp_abort,
+ .diag_destroy = udp_abort,
};
EXPORT_SYMBOL(udp_prot);
@@ -2831,6 +2825,26 @@ u32 udp_flow_hashrnd(void)
}
EXPORT_SYMBOL(udp_flow_hashrnd);
+static void __udp_sysctl_init(struct net *net)
+{
+ net->ipv4.sysctl_udp_rmem_min = SK_MEM_QUANTUM;
+ net->ipv4.sysctl_udp_wmem_min = SK_MEM_QUANTUM;
+
+#ifdef CONFIG_NET_L3_MASTER_DEV
+ net->ipv4.sysctl_udp_l3mdev_accept = 0;
+#endif
+}
+
+static int __net_init udp_sysctl_init(struct net *net)
+{
+ __udp_sysctl_init(net);
+ return 0;
+}
+
+static struct pernet_operations __net_initdata udp_sysctl_ops = {
+ .init = udp_sysctl_init,
+};
+
void __init udp_init(void)
{
unsigned long limit;
@@ -2843,8 +2857,7 @@ void __init udp_init(void)
sysctl_udp_mem[1] = limit;
sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2;
- sysctl_udp_rmem_min = SK_MEM_QUANTUM;
- sysctl_udp_wmem_min = SK_MEM_QUANTUM;
+ __udp_sysctl_init(&init_net);
/* 16 spinlocks per cpu */
udp_busylocks_log = ilog2(nr_cpu_ids) + 4;
@@ -2854,4 +2867,7 @@ void __init udp_init(void)
panic("UDP: failed to alloc udp_busylocks\n");
for (i = 0; i < (1U << udp_busylocks_log); i++)
spin_lock_init(udp_busylocks + i);
+
+ if (register_pernet_subsys(&udp_sysctl_ops))
+ panic("UDP: failed to init sysctl parameters.\n");
}
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index b5fd116c046a..6fd4bbdc444f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1851,22 +1851,42 @@ static int ipv6_count_addresses(const struct inet6_dev *idev)
int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
const struct net_device *dev, int strict)
{
- return ipv6_chk_addr_and_flags(net, addr, dev, strict, IFA_F_TENTATIVE);
+ return ipv6_chk_addr_and_flags(net, addr, dev, !dev,
+ strict, IFA_F_TENTATIVE);
}
EXPORT_SYMBOL(ipv6_chk_addr);
+/* device argument is used to find the L3 domain of interest. If
+ * skip_dev_check is set, then the ifp device is not checked against
+ * the passed in dev argument. So the 2 cases for addresses checks are:
+ * 1. does the address exist in the L3 domain that dev is part of
+ * (skip_dev_check = true), or
+ *
+ * 2. does the address exist on the specific device
+ * (skip_dev_check = false)
+ */
int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
- const struct net_device *dev, int strict,
- u32 banned_flags)
+ const struct net_device *dev, bool skip_dev_check,
+ int strict, u32 banned_flags)
{
unsigned int hash = inet6_addr_hash(net, addr);
+ const struct net_device *l3mdev;
struct inet6_ifaddr *ifp;
u32 ifp_flags;
rcu_read_lock();
+
+ l3mdev = l3mdev_master_dev_rcu(dev);
+ if (skip_dev_check)
+ dev = NULL;
+
hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
if (!net_eq(dev_net(ifp->idev->dev), net))
continue;
+
+ if (l3mdev_master_dev_rcu(ifp->idev->dev) != l3mdev)
+ continue;
+
/* Decouple optimistic from tentative for evaluation here.
* Ban optimistic addresses explicitly, when required.
*/
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index c61718dba2e6..d580d4d456a5 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -66,7 +66,11 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
return -EPERM;
if (ipv6_addr_is_multicast(addr))
return -EINVAL;
- if (ipv6_chk_addr(net, addr, NULL, 0))
+
+ if (ifindex)
+ dev = __dev_get_by_index(net, ifindex);
+
+ if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE))
return -EINVAL;
pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
@@ -90,8 +94,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
dev = __dev_get_by_flags(net, IFF_UP,
IFF_UP | IFF_LOOPBACK);
}
- } else
- dev = __dev_get_by_index(net, ifindex);
+ }
if (!dev) {
err = -ENODEV;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index fbf08ce3f5ab..b27333d7b099 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -801,8 +801,9 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
if (addr_type != IPV6_ADDR_ANY) {
int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;
if (!(inet_sk(sk)->freebind || inet_sk(sk)->transparent) &&
- !ipv6_chk_addr(net, &src_info->ipi6_addr,
- strict ? dev : NULL, 0) &&
+ !ipv6_chk_addr_and_flags(net, &src_info->ipi6_addr,
+ dev, !strict, 0,
+ IFA_F_TENTATIVE) &&
!ipv6_chk_acast_addr_src(net, dev,
&src_info->ipi6_addr))
err = -EINVAL;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 18a3dfbd0300..7d8775c9570d 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -236,7 +236,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
return t;
dev = ign->fb_tunnel_dev;
- if (dev->flags & IFF_UP)
+ if (dev && dev->flags & IFF_UP)
return netdev_priv(dev);
return NULL;
@@ -1472,6 +1472,8 @@ static int __net_init ip6gre_init_net(struct net *net)
struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
int err;
+ if (!net_has_fallback_tunnels(net))
+ return 0;
ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0",
NET_NAME_UNKNOWN,
ip6gre_tunnel_setup);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 56c4967f1868..456fcf942f95 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -758,9 +758,11 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
ldev = dev_get_by_index_rcu(net, p->link);
if ((ipv6_addr_is_multicast(laddr) ||
- likely(ipv6_chk_addr(net, laddr, ldev, 0))) &&
+ likely(ipv6_chk_addr_and_flags(net, laddr, ldev, false,
+ 0, IFA_F_TENTATIVE))) &&
((p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) ||
- likely(!ipv6_chk_addr(net, raddr, NULL, 0))))
+ likely(!ipv6_chk_addr_and_flags(net, raddr, ldev, true,
+ 0, IFA_F_TENTATIVE))))
ret = 1;
}
return ret;
@@ -990,12 +992,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
if (p->link)
ldev = dev_get_by_index_rcu(net, p->link);
- if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0)))
+ if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false,
+ 0, IFA_F_TENTATIVE)))
pr_warn("%s xmit: Local address not yet configured!\n",
p->name);
else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) &&
!ipv6_addr_is_multicast(raddr) &&
- unlikely(ipv6_chk_addr(net, raddr, NULL, 0)))
+ unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev,
+ true, 0, IFA_F_TENTATIVE)))
pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
p->name);
else
@@ -2205,6 +2209,8 @@ static int __net_init ip6_tnl_init_net(struct net *net)
ip6n->tnls[0] = ip6n->tnls_wc;
ip6n->tnls[1] = ip6n->tnls_r_l;
+ if (!net_has_fallback_tunnels(net))
+ return 0;
err = -ENOMEM;
ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
NET_NAME_UNKNOWN, ip6_tnl_dev_setup);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 8af5eef464c1..10024eb0c521 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -707,7 +707,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
int probes = atomic_read(&neigh->probes);
if (skb && ipv6_chk_addr_and_flags(dev_net(dev), &ipv6_hdr(skb)->saddr,
- dev, 1,
+ dev, false, 1,
IFA_F_TENTATIVE|IFA_F_OPTIMISTIC))
saddr = &ipv6_hdr(skb)->saddr;
probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES);
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 1343077dde93..06561c84c0bc 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -87,6 +87,7 @@ static void __net_exit ip6table_filter_net_exit(struct net *net)
static struct pernet_operations ip6table_filter_net_ops = {
.init = ip6table_filter_net_init,
.exit = ip6table_filter_net_exit,
+ .async = true,
};
static int __init ip6table_filter_init(void)
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index b0524b18c4fb..a11e25936b45 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -107,6 +107,7 @@ static void __net_exit ip6table_mangle_net_exit(struct net *net)
static struct pernet_operations ip6table_mangle_net_ops = {
.exit = ip6table_mangle_net_exit,
+ .async = true,
};
static int __init ip6table_mangle_init(void)
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 47306e45a80a..4475fd300bb6 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -131,6 +131,7 @@ static void __net_exit ip6table_nat_net_exit(struct net *net)
static struct pernet_operations ip6table_nat_net_ops = {
.exit = ip6table_nat_net_exit,
+ .async = true,
};
static int __init ip6table_nat_init(void)
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 710fa0806c37..a88f3b1995b1 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -75,6 +75,7 @@ static void __net_exit ip6table_raw_net_exit(struct net *net)
static struct pernet_operations ip6table_raw_net_ops = {
.exit = ip6table_raw_net_exit,
+ .async = true,
};
static int __init ip6table_raw_init(void)
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index cf26ccb04056..320048c008dc 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -74,6 +74,7 @@ static void __net_exit ip6table_security_net_exit(struct net *net)
static struct pernet_operations ip6table_security_net_ops = {
.exit = ip6table_security_net_exit,
+ .async = true,
};
static int __init ip6table_security_init(void)
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 663827ee3cf8..ba54bb3bd1e4 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -401,6 +401,7 @@ static struct pernet_operations ipv6_net_ops = {
.exit = ipv6_net_exit,
.id = &conntrack6_net_id,
.size = sizeof(struct conntrack6_net),
+ .async = true,
};
static int __init nf_conntrack_l3proto_ipv6_init(void)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f0ae58424c45..939d122e71b4 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1846,7 +1846,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
struct flow_keys hash_keys;
u32 mhash;
- switch (net->ipv6.sysctl.multipath_hash_policy) {
+ switch (ip6_multipath_hash_policy(net)) {
case 0:
memset(&hash_keys, 0, sizeof(hash_keys));
hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
@@ -2550,7 +2550,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
static int ip6_route_check_nh_onlink(struct net *net,
struct fib6_config *cfg,
- struct net_device *dev,
+ const struct net_device *dev,
struct netlink_ext_ack *extack)
{
u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
@@ -2626,6 +2626,79 @@ out:
return err;
}
+static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
+ struct net_device **_dev, struct inet6_dev **idev,
+ struct netlink_ext_ack *extack)
+{
+ const struct in6_addr *gw_addr = &cfg->fc_gateway;
+ int gwa_type = ipv6_addr_type(gw_addr);
+ bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
+ const struct net_device *dev = *_dev;
+ bool need_addr_check = !dev;
+ int err = -EINVAL;
+
+ /* if gw_addr is local we will fail to detect this in case
+ * address is still TENTATIVE (DAD in progress). rt6_lookup()
+ * will return already-added prefix route via interface that
+ * prefix route was assigned to, which might be non-loopback.
+ */
+ if (dev &&
+ ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
+ NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
+ goto out;
+ }
+
+ if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
+ /* IPv6 strictly inhibits using not link-local
+ * addresses as nexthop address.
+ * Otherwise, router will not able to send redirects.
+ * It is very good, but in some (rare!) circumstances
+ * (SIT, PtP, NBMA NOARP links) it is handy to allow
+ * some exceptions. --ANK
+ * We allow IPv4-mapped nexthops to support RFC4798-type
+ * addressing
+ */
+ if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
+ NL_SET_ERR_MSG(extack, "Invalid gateway address");
+ goto out;
+ }
+
+ if (cfg->fc_flags & RTNH_F_ONLINK)
+ err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
+ else
+ err = ip6_route_check_nh(net, cfg, _dev, idev);
+
+ if (err)
+ goto out;
+ }
+
+ /* reload in case device was changed */
+ dev = *_dev;
+
+ err = -EINVAL;
+ if (!dev) {
+ NL_SET_ERR_MSG(extack, "Egress device not specified");
+ goto out;
+ } else if (dev->flags & IFF_LOOPBACK) {
+ NL_SET_ERR_MSG(extack,
+ "Egress device can not be loopback device for this route");
+ goto out;
+ }
+
+ /* if we did not check gw_addr above, do so now that the
+ * egress device has been resolved.
+ */
+ if (need_addr_check &&
+ ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
+ NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
+ goto out;
+ }
+
+ err = 0;
+out:
+ return err;
+}
+
static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
@@ -2808,61 +2881,11 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
}
if (cfg->fc_flags & RTF_GATEWAY) {
- const struct in6_addr *gw_addr;
- int gwa_type;
-
- gw_addr = &cfg->fc_gateway;
- gwa_type = ipv6_addr_type(gw_addr);
-
- /* if gw_addr is local we will fail to detect this in case
- * address is still TENTATIVE (DAD in progress). rt6_lookup()
- * will return already-added prefix route via interface that
- * prefix route was assigned to, which might be non-loopback.
- */
- err = -EINVAL;
- if (ipv6_chk_addr_and_flags(net, gw_addr,
- gwa_type & IPV6_ADDR_LINKLOCAL ?
- dev : NULL, 0, 0)) {
- NL_SET_ERR_MSG(extack, "Invalid gateway address");
+ err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
+ if (err)
goto out;
- }
- rt->rt6i_gateway = *gw_addr;
-
- if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
- /* IPv6 strictly inhibits using not link-local
- addresses as nexthop address.
- Otherwise, router will not able to send redirects.
- It is very good, but in some (rare!) circumstances
- (SIT, PtP, NBMA NOARP links) it is handy to allow
- some exceptions. --ANK
- We allow IPv4-mapped nexthops to support RFC4798-type
- addressing
- */
- if (!(gwa_type & (IPV6_ADDR_UNICAST |
- IPV6_ADDR_MAPPED))) {
- NL_SET_ERR_MSG(extack,
- "Invalid gateway address");
- goto out;
- }
- if (cfg->fc_flags & RTNH_F_ONLINK) {
- err = ip6_route_check_nh_onlink(net, cfg, dev,
- extack);
- } else {
- err = ip6_route_check_nh(net, cfg, &dev, &idev);
- }
- if (err)
- goto out;
- }
- err = -EINVAL;
- if (!dev) {
- NL_SET_ERR_MSG(extack, "Egress device not specified");
- goto out;
- } else if (dev->flags & IFF_LOOPBACK) {
- NL_SET_ERR_MSG(extack,
- "Egress device can not be loopback device for this route");
- goto out;
- }
+ rt->rt6i_gateway = cfg->fc_gateway;
}
err = -ENODEV;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index a9c4ac6efe22..8a4f8fddd812 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -182,7 +182,7 @@ static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn)
#ifdef CONFIG_IPV6_SIT_6RD
struct ip_tunnel *t = netdev_priv(dev);
- if (dev == sitn->fb_tunnel_dev) {
+ if (dev == sitn->fb_tunnel_dev || !sitn->fb_tunnel_dev) {
ipv6_addr_set(&t->ip6rd.prefix, htonl(0x20020000), 0, 0, 0);
t->ip6rd.relay_prefix = 0;
t->ip6rd.prefixlen = 16;
@@ -1835,6 +1835,9 @@ static int __net_init sit_init_net(struct net *net)
sitn->tunnels[2] = sitn->tunnels_r;
sitn->tunnels[3] = sitn->tunnels_r_l;
+ if (!net_has_fallback_tunnels(net))
+ return 0;
+
sitn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0",
NET_NAME_UNKNOWN,
ipip6_tunnel_setup);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 52e3ea0e6f50..ad30f5e31969 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1509,34 +1509,34 @@ void udp6_proc_exit(struct net *net)
/* ------------------------------------------------------------------------ */
struct proto udpv6_prot = {
- .name = "UDPv6",
- .owner = THIS_MODULE,
- .close = udp_lib_close,
- .connect = ip6_datagram_connect,
- .disconnect = udp_disconnect,
- .ioctl = udp_ioctl,
- .init = udp_init_sock,
- .destroy = udpv6_destroy_sock,
- .setsockopt = udpv6_setsockopt,
- .getsockopt = udpv6_getsockopt,
- .sendmsg = udpv6_sendmsg,
- .recvmsg = udpv6_recvmsg,
- .release_cb = ip6_datagram_release_cb,
- .hash = udp_lib_hash,
- .unhash = udp_lib_unhash,
- .rehash = udp_v6_rehash,
- .get_port = udp_v6_get_port,
- .memory_allocated = &udp_memory_allocated,
- .sysctl_mem = sysctl_udp_mem,
- .sysctl_wmem = &sysctl_udp_wmem_min,
- .sysctl_rmem = &sysctl_udp_rmem_min,
- .obj_size = sizeof(struct udp6_sock),
- .h.udp_table = &udp_table,
+ .name = "UDPv6",
+ .owner = THIS_MODULE,
+ .close = udp_lib_close,
+ .connect = ip6_datagram_connect,
+ .disconnect = udp_disconnect,
+ .ioctl = udp_ioctl,
+ .init = udp_init_sock,
+ .destroy = udpv6_destroy_sock,
+ .setsockopt = udpv6_setsockopt,
+ .getsockopt = udpv6_getsockopt,
+ .sendmsg = udpv6_sendmsg,
+ .recvmsg = udpv6_recvmsg,
+ .release_cb = ip6_datagram_release_cb,
+ .hash = udp_lib_hash,
+ .unhash = udp_lib_unhash,
+ .rehash = udp_v6_rehash,
+ .get_port = udp_v6_get_port,
+ .memory_allocated = &udp_memory_allocated,
+ .sysctl_mem = sysctl_udp_mem,
+ .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
+ .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
+ .obj_size = sizeof(struct udp6_sock),
+ .h.udp_table = &udp_table,
#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_udpv6_setsockopt,
- .compat_getsockopt = compat_udpv6_getsockopt,
+ .compat_setsockopt = compat_udpv6_setsockopt,
+ .compat_getsockopt = compat_udpv6_getsockopt,
#endif
- .diag_destroy = udp_abort,
+ .diag_destroy = udp_abort,
};
static struct inet_protosw udpv6_protosw = {
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 83421c6f0bef..189a12a5e4ac 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1787,6 +1787,7 @@ static struct pernet_operations l2tp_net_ops = {
.exit = l2tp_exit_net,
.id = &l2tp_net_id,
.size = sizeof(struct l2tp_net),
+ .async = true,
};
static int __init l2tp_init(void)
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index d90928f50226..a7f7b8ff4729 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -394,8 +394,9 @@ static void llc_sap_mcast(struct llc_sap *sap,
const struct llc_addr *laddr,
struct sk_buff *skb)
{
- int i = 0, count = 256 / sizeof(struct sock *);
- struct sock *sk, *stack[count];
+ int i = 0;
+ struct sock *sk;
+ struct sock *stack[256 / sizeof(struct sock *)];
struct llc_sock *llc;
struct hlist_head *dev_hb = llc_sk_dev_hash(sap, skb->dev->ifindex);
@@ -408,7 +409,7 @@ static void llc_sap_mcast(struct llc_sap *sap,
continue;
sock_hold(sk);
- if (i < count)
+ if (i < ARRAY_SIZE(stack))
stack[i++] = sk;
else {
llc_do_mcast(sap, skb, stack, i);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index d01743234cf6..9c898a3688c6 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2549,11 +2549,8 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
fwd_skb = skb_copy_expand(skb, local->tx_headroom +
sdata->encrypt_headroom, 0, GFP_ATOMIC);
- if (!fwd_skb) {
- net_info_ratelimited("%s: failed to clone mesh frame\n",
- sdata->name);
+ if (!fwd_skb)
goto out;
- }
fwd_hdr = (struct ieee80211_hdr *) fwd_skb->data;
fwd_hdr->frame_control &= ~cpu_to_le16(IEEE80211_FCTL_RETRY);
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 7a4de6d618b1..d4a89a8be013 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -2488,6 +2488,7 @@ static void mpls_net_exit(struct net *net)
static struct pernet_operations mpls_net_ops = {
.init = mpls_net_init,
.exit = mpls_net_exit,
+ .async = true,
};
static struct rtnl_af_ops mpls_af_ops __read_mostly = {
diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
index d4201665a580..05fcfb4fbe1d 100644
--- a/net/ncsi/ncsi-netlink.c
+++ b/net/ncsi/ncsi-netlink.c
@@ -183,7 +183,7 @@ static int ncsi_pkg_info_nl(struct sk_buff *msg, struct genl_info *info)
hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
&ncsi_genl_family, 0, NCSI_CMD_PKG_INFO);
if (!hdr) {
- kfree(skb);
+ kfree_skb(skb);
return -EMSGSIZE;
}
@@ -204,7 +204,7 @@ static int ncsi_pkg_info_nl(struct sk_buff *msg, struct genl_info *info)
err:
genlmsg_cancel(skb, hdr);
- kfree(skb);
+ kfree_skb(skb);
return rc;
}
@@ -299,6 +299,7 @@ static int ncsi_set_interface_nl(struct sk_buff *msg, struct genl_info *info)
package = np;
if (!package) {
/* The user has set a package that does not exist */
+ spin_unlock_irqrestore(&ndp->lock, flags);
return -ERANGE;
}
@@ -317,6 +318,7 @@ static int ncsi_set_interface_nl(struct sk_buff *msg, struct genl_info *info)
/* The user has set a channel that does not exist on this
* package
*/
+ spin_unlock_irqrestore(&ndp->lock, flags);
netdev_info(ndp->ndev.dev, "NCSI: Channel %u does not exist!\n",
channel_id);
return -ERANGE;
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 5f6f73cf2174..6a6cb9db030b 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -2289,10 +2289,12 @@ static struct pernet_operations ipvs_core_ops = {
.exit = __ip_vs_cleanup,
.id = &ip_vs_net_id,
.size = sizeof(struct netns_ipvs),
+ .async = true,
};
static struct pernet_operations ipvs_core_dev_ops = {
.exit = __ip_vs_dev_cleanup,
+ .async = true,
};
/*
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 58d5d05aec24..8b25aab41928 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -479,6 +479,7 @@ static void __ip_vs_ftp_exit(struct net *net)
static struct pernet_operations ip_vs_ftp_ops = {
.init = __ip_vs_ftp_init,
.exit = __ip_vs_ftp_exit,
+ .async = true,
};
static int __init ip_vs_ftp_init(void)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 558593e6a0a3..8e19c86d1aa6 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -6596,6 +6596,7 @@ static void __net_exit nf_tables_exit_net(struct net *net)
static struct pernet_operations nf_tables_net_ops = {
.init = nf_tables_init_net,
.exit = nf_tables_exit_net,
+ .async = true,
};
static int __init nf_tables_module_init(void)
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 03ead8a9e90c..84fc4954862d 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -566,6 +566,7 @@ static void __net_exit nfnetlink_net_exit_batch(struct list_head *net_exit_list)
static struct pernet_operations nfnetlink_net_ops = {
.init = nfnetlink_net_init,
.exit_batch = nfnetlink_net_exit_batch,
+ .async = true,
};
static int __init nfnetlink_init(void)
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index 88d427f9f9e6..8d9f18bb8840 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -515,6 +515,7 @@ static void __net_exit nfnl_acct_net_exit(struct net *net)
static struct pernet_operations nfnl_acct_ops = {
.init = nfnl_acct_net_init,
.exit = nfnl_acct_net_exit,
+ .async = true,
};
static int __init nfnl_acct_init(void)
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 95b04702a655..6819300f7fb7 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -586,6 +586,7 @@ static void __net_exit cttimeout_net_exit(struct net *net)
static struct pernet_operations cttimeout_ops = {
.init = cttimeout_net_init,
.exit = cttimeout_net_exit,
+ .async = true,
};
static int __init cttimeout_init(void)
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 7b46aa4c478d..b21ef79849a1 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -1108,6 +1108,7 @@ static struct pernet_operations nfnl_log_net_ops = {
.exit = nfnl_log_net_exit,
.id = &nfnl_log_net_id,
.size = sizeof(struct nfnl_log_net),
+ .async = true,
};
static int __init nfnetlink_log_init(void)
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 8bba23160a68..9f572ed56208 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -833,11 +833,8 @@ nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff)
if (diff > skb_tailroom(e->skb)) {
nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
diff, GFP_ATOMIC);
- if (!nskb) {
- printk(KERN_WARNING "nf_queue: OOM "
- "in mangle, dropping packet\n");
+ if (!nskb)
return -ENOMEM;
- }
kfree_skb(e->skb);
e->skb = nskb;
}
@@ -1528,6 +1525,7 @@ static struct pernet_operations nfnl_queue_net_ops = {
.exit_batch = nfnl_queue_net_exit_batch,
.id = &nfnl_queue_net_id,
.size = sizeof(struct nfnl_queue_net),
+ .async = true,
};
static int __init nfnetlink_queue_init(void)
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index ef38e5aecd28..100191df0371 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -2384,6 +2384,7 @@ static struct pernet_operations ovs_net_ops = {
.exit = ovs_exit_net,
.id = &ovs_net_id,
.size = sizeof(struct ovs_net),
+ .async = true,
};
static int __init dp_init(void)
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index b6c8524032a0..f81c1d0ddff4 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -464,10 +464,10 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
return 0;
}
-static unsigned int packet_length(const struct sk_buff *skb,
- struct net_device *dev)
+static int packet_length(const struct sk_buff *skb,
+ struct net_device *dev)
{
- unsigned int length = skb->len - dev->hard_header_len;
+ int length = skb->len - dev->hard_header_len;
if (!skb_vlan_tag_present(skb) &&
eth_type_vlan(skb->protocol))
@@ -478,7 +478,7 @@ static unsigned int packet_length(const struct sk_buff *skb,
* account for 802.1ad. e.g. is_skb_forwardable().
*/
- return length;
+ return length > 0 ? length : 0;
}
void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto)
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 2da3176bf792..abef75da89a7 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -540,9 +540,9 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
struct rds_info_iterator *iter,
struct rds_info_lengths *lens,
int (*visitor)(struct rds_connection *, void *),
+ u64 *buffer,
size_t item_len)
{
- uint64_t buffer[(item_len + 7) / 8];
struct hlist_head *head;
struct rds_connection *conn;
size_t i;
@@ -578,9 +578,9 @@ static void rds_walk_conn_path_info(struct socket *sock, unsigned int len,
struct rds_info_iterator *iter,
struct rds_info_lengths *lens,
int (*visitor)(struct rds_conn_path *, void *),
+ u64 *buffer,
size_t item_len)
{
- u64 buffer[(item_len + 7) / 8];
struct hlist_head *head;
struct rds_connection *conn;
size_t i;
@@ -649,8 +649,11 @@ static void rds_conn_info(struct socket *sock, unsigned int len,
struct rds_info_iterator *iter,
struct rds_info_lengths *lens)
{
+ u64 buffer[(sizeof(struct rds_info_connection) + 7) / 8];
+
rds_walk_conn_path_info(sock, len, iter, lens,
rds_conn_info_visitor,
+ buffer,
sizeof(struct rds_info_connection));
}
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 50a88f3e7e39..02deee29e7f1 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -321,8 +321,11 @@ static void rds_ib_ic_info(struct socket *sock, unsigned int len,
struct rds_info_iterator *iter,
struct rds_info_lengths *lens)
{
+ u64 buffer[(sizeof(struct rds_info_rdma_connection) + 7) / 8];
+
rds_for_each_conn_info(sock, len, iter, lens,
rds_ib_conn_info_visitor,
+ buffer,
sizeof(struct rds_info_rdma_connection));
}
diff --git a/net/rds/message.c b/net/rds/message.c
index 90dcdcfe9f62..a35f76971984 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -67,7 +67,7 @@ static inline bool rds_zcookie_add(struct rds_msg_zcopy_info *info, u32 cookie)
return true;
}
-struct rds_msg_zcopy_info *rds_info_from_znotifier(struct rds_znotifier *znotif)
+static struct rds_msg_zcopy_info *rds_info_from_znotifier(struct rds_znotifier *znotif)
{
return container_of(znotif, struct rds_msg_zcopy_info, znotif);
}
@@ -355,9 +355,8 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in
return rm;
}
-int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter *from)
+static int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter *from)
{
- unsigned long sg_off;
struct scatterlist *sg;
int ret = 0;
int length = iov_iter_count(from);
@@ -370,7 +369,6 @@ int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter *from)
* now allocate and copy in the data payload.
*/
sg = rm->data.op_sg;
- sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 74cd27c661de..b04c333d9d1c 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -735,6 +735,7 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
struct rds_info_iterator *iter,
struct rds_info_lengths *lens,
int (*visitor)(struct rds_connection *, void *),
+ u64 *buffer,
size_t item_len);
__printf(2, 3)
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 08230a145042..08ea9cd5c2f6 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -272,13 +272,14 @@ static int rds_tcp_laddr_check(struct net *net, __be32 addr)
static void rds_tcp_conn_free(void *arg)
{
struct rds_tcp_connection *tc = arg;
+ unsigned long flags;
rdsdebug("freeing tc %p\n", tc);
- spin_lock_bh(&rds_tcp_conn_lock);
+ spin_lock_irqsave(&rds_tcp_conn_lock, flags);
if (!tc->t_tcp_node_detached)
list_del(&tc->t_tcp_node);
- spin_unlock_bh(&rds_tcp_conn_lock);
+ spin_unlock_irqrestore(&rds_tcp_conn_lock, flags);
kmem_cache_free(rds_tcp_conn_slab, tc);
}
@@ -308,13 +309,13 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
rdsdebug("rds_conn_path [%d] tc %p\n", i,
conn->c_path[i].cp_transport_data);
}
- spin_lock_bh(&rds_tcp_conn_lock);
+ spin_lock_irq(&rds_tcp_conn_lock);
for (i = 0; i < RDS_MPATH_WORKERS; i++) {
tc = conn->c_path[i].cp_transport_data;
tc->t_tcp_node_detached = false;
list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list);
}
- spin_unlock_bh(&rds_tcp_conn_lock);
+ spin_unlock_irq(&rds_tcp_conn_lock);
fail:
if (ret) {
for (j = 0; j < i; j++)
@@ -515,6 +516,7 @@ static struct pernet_operations rds_tcp_net_ops = {
.exit = rds_tcp_exit_net,
.id = &rds_tcp_netid,
.size = sizeof(struct rds_tcp_net),
+ .async = true,
};
static void rds_tcp_kill_sock(struct net *net)
@@ -526,7 +528,7 @@ static void rds_tcp_kill_sock(struct net *net)
rtn->rds_tcp_listen_sock = NULL;
rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
- spin_lock_bh(&rds_tcp_conn_lock);
+ spin_lock_irq(&rds_tcp_conn_lock);
list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
@@ -539,7 +541,7 @@ static void rds_tcp_kill_sock(struct net *net)
tc->t_tcp_node_detached = true;
}
}
- spin_unlock_bh(&rds_tcp_conn_lock);
+ spin_unlock_irq(&rds_tcp_conn_lock);
list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node)
rds_conn_destroy(tc->t_cpath->cp_conn);
}
@@ -587,7 +589,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
{
struct rds_tcp_connection *tc, *_tc;
- spin_lock_bh(&rds_tcp_conn_lock);
+ spin_lock_irq(&rds_tcp_conn_lock);
list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
@@ -597,7 +599,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
/* reconnect with new parameters */
rds_conn_path_drop(tc->t_cpath, false);
}
- spin_unlock_bh(&rds_tcp_conn_lock);
+ spin_unlock_irq(&rds_tcp_conn_lock);
}
static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write,
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 9d45d8b56744..7bff716e911e 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -272,7 +272,7 @@ static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb,
unsigned int *_offset, unsigned int *_len)
{
unsigned int offset = sizeof(struct rxrpc_wire_header);
- unsigned int len = *_len;
+ unsigned int len;
int ret;
u8 annotation = *_annotation;
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index a54fa7b8c217..57cf37145282 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -109,6 +109,42 @@ int __tcf_idr_release(struct tc_action *p, bool bind, bool strict)
}
EXPORT_SYMBOL(__tcf_idr_release);
+static size_t tcf_action_shared_attrs_size(const struct tc_action *act)
+{
+ u32 cookie_len = 0;
+
+ if (act->act_cookie)
+ cookie_len = nla_total_size(act->act_cookie->len);
+
+ return nla_total_size(0) /* action number nested */
+ + nla_total_size(IFNAMSIZ) /* TCA_ACT_KIND */
+ + cookie_len /* TCA_ACT_COOKIE */
+ + nla_total_size(0) /* TCA_ACT_STATS nested */
+ /* TCA_STATS_BASIC */
+ + nla_total_size_64bit(sizeof(struct gnet_stats_basic))
+ /* TCA_STATS_QUEUE */
+ + nla_total_size_64bit(sizeof(struct gnet_stats_queue))
+ + nla_total_size(0) /* TCA_OPTIONS nested */
+ + nla_total_size(sizeof(struct tcf_t)); /* TCA_GACT_TM */
+}
+
+static size_t tcf_action_full_attrs_size(size_t sz)
+{
+ return NLMSG_HDRLEN /* struct nlmsghdr */
+ + sizeof(struct tcamsg)
+ + nla_total_size(0) /* TCA_ACT_TAB nested */
+ + sz;
+}
+
+static size_t tcf_action_fill_size(const struct tc_action *act)
+{
+ size_t sz = tcf_action_shared_attrs_size(act);
+
+ if (act->ops->get_fill_size)
+ return act->ops->get_fill_size(act) + sz;
+ return sz;
+}
+
static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
struct netlink_callback *cb)
{
@@ -741,10 +777,12 @@ static void cleanup_a(struct list_head *actions, int ovr)
int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
struct nlattr *est, char *name, int ovr, int bind,
- struct list_head *actions, struct netlink_ext_ack *extack)
+ struct list_head *actions, size_t *attr_size,
+ struct netlink_ext_ack *extack)
{
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
struct tc_action *act;
+ size_t sz = 0;
int err;
int i;
@@ -760,11 +798,14 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
goto err;
}
act->order = i;
+ sz += tcf_action_fill_size(act);
if (ovr)
act->tcfa_refcnt++;
list_add_tail(&act->list, actions);
}
+ *attr_size = tcf_action_full_attrs_size(sz);
+
/* Remove the temp refcnt which was necessary to protect against
* destroying an existing action which was being replaced
*/
@@ -994,12 +1035,13 @@ err_out:
static int
tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
- u32 portid, struct netlink_ext_ack *extack)
+ u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
{
int ret;
struct sk_buff *skb;
- skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size,
+ GFP_KERNEL);
if (!skb)
return -ENOBUFS;
@@ -1032,6 +1074,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
int i, ret;
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
struct tc_action *act;
+ size_t attr_size = 0;
LIST_HEAD(actions);
ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack);
@@ -1053,13 +1096,16 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
goto err;
}
act->order = i;
+ attr_size += tcf_action_fill_size(act);
list_add_tail(&act->list, &actions);
}
+ attr_size = tcf_action_full_attrs_size(attr_size);
+
if (event == RTM_GETACTION)
ret = tcf_get_notify(net, portid, n, &actions, event, extack);
else { /* delete */
- ret = tcf_del_notify(net, n, &actions, portid, extack);
+ ret = tcf_del_notify(net, n, &actions, portid, attr_size, extack);
if (ret)
goto err;
return ret;
@@ -1072,12 +1118,13 @@ err:
static int
tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
- u32 portid, struct netlink_ext_ack *extack)
+ u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
{
struct sk_buff *skb;
int err = 0;
- skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size,
+ GFP_KERNEL);
if (!skb)
return -ENOBUFS;
@@ -1099,15 +1146,16 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
struct nlmsghdr *n, u32 portid, int ovr,
struct netlink_ext_ack *extack)
{
+ size_t attr_size = 0;
int ret = 0;
LIST_HEAD(actions);
ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions,
- extack);
+ &attr_size, extack);
if (ret)
return ret;
- return tcf_add_notify(net, n, &actions, portid, extack);
+ return tcf_add_notify(net, n, &actions, portid, attr_size, extack);
}
static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON;
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 74563254e676..88fbb8403565 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -217,6 +217,19 @@ static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
+static size_t tcf_gact_get_fill_size(const struct tc_action *act)
+{
+ size_t sz = nla_total_size(sizeof(struct tc_gact)); /* TCA_GACT_PARMS */
+
+#ifdef CONFIG_GACT_PROB
+ if (to_gact(act)->tcfg_ptype)
+ /* TCA_GACT_PROB */
+ sz += nla_total_size(sizeof(struct tc_gact_p));
+#endif
+
+ return sz;
+}
+
static struct tc_action_ops act_gact_ops = {
.kind = "gact",
.type = TCA_ACT_GACT,
@@ -227,6 +240,7 @@ static struct tc_action_ops act_gact_ops = {
.init = tcf_gact_init,
.walk = tcf_gact_walker,
.lookup = tcf_gact_search,
+ .get_fill_size = tcf_gact_get_fill_size,
.size = sizeof(struct tcf_gact),
};
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 19f9f421d5b7..ec5fe8ec0c3e 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1433,6 +1433,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
#ifdef CONFIG_NET_CLS_ACT
{
struct tc_action *act;
+ size_t attr_size = 0;
if (exts->police && tb[exts->police]) {
act = tcf_action_init_1(net, tp, tb[exts->police],
@@ -1450,7 +1451,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
err = tcf_action_init(net, tp, tb[exts->action],
rate_tlv, NULL, ovr, TCA_ACT_BIND,
- &actions, extack);
+ &actions, &attr_size, extack);
if (err)
return err;
list_for_each_entry(act, &actions, list)
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 7d0ce2c40f93..d964e60c730e 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -511,6 +511,9 @@ static int fl_set_key_flags(struct nlattr **tb,
fl_set_key_flag(key, mask, flags_key, flags_mask,
TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT);
+ fl_set_key_flag(key, mask, flags_key, flags_mask,
+ TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST,
+ FLOW_DIS_FIRST_FRAG);
return 0;
}
@@ -1130,6 +1133,9 @@ static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask)
fl_get_key_flag(flags_key, flags_mask, &key, &mask,
TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT);
+ fl_get_key_flag(flags_key, flags_mask, &key, &mask,
+ TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST,
+ FLOW_DIS_FIRST_FRAG);
_key = cpu_to_be32(key);
_mask = cpu_to_be32(mask);
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 00667c50efa7..e64630cd3331 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -101,13 +101,14 @@ struct sctp_shared_key *sctp_auth_shkey_create(__u16 key_id, gfp_t gfp)
return NULL;
INIT_LIST_HEAD(&new->key_list);
+ refcount_set(&new->refcnt, 1);
new->key_id = key_id;
return new;
}
/* Free the shared key structure */
-static void sctp_auth_shkey_free(struct sctp_shared_key *sh_key)
+static void sctp_auth_shkey_destroy(struct sctp_shared_key *sh_key)
{
BUG_ON(!list_empty(&sh_key->key_list));
sctp_auth_key_put(sh_key->key);
@@ -115,6 +116,17 @@ static void sctp_auth_shkey_free(struct sctp_shared_key *sh_key)
kfree(sh_key);
}
+void sctp_auth_shkey_release(struct sctp_shared_key *sh_key)
+{
+ if (refcount_dec_and_test(&sh_key->refcnt))
+ sctp_auth_shkey_destroy(sh_key);
+}
+
+void sctp_auth_shkey_hold(struct sctp_shared_key *sh_key)
+{
+ refcount_inc(&sh_key->refcnt);
+}
+
/* Destroy the entire key list. This is done during the
* associon and endpoint free process.
*/
@@ -128,7 +140,7 @@ void sctp_auth_destroy_keys(struct list_head *keys)
key_for_each_safe(ep_key, tmp, keys) {
list_del_init(&ep_key->key_list);
- sctp_auth_shkey_free(ep_key);
+ sctp_auth_shkey_release(ep_key);
}
}
@@ -409,13 +421,19 @@ int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp)
sctp_auth_key_put(asoc->asoc_shared_key);
asoc->asoc_shared_key = secret;
+ asoc->shkey = ep_key;
/* Update send queue in case any chunk already in there now
* needs authenticating
*/
list_for_each_entry(chunk, &asoc->outqueue.out_chunk_list, list) {
- if (sctp_auth_send_cid(chunk->chunk_hdr->type, asoc))
+ if (sctp_auth_send_cid(chunk->chunk_hdr->type, asoc)) {
chunk->auth = 1;
+ if (!chunk->shkey) {
+ chunk->shkey = asoc->shkey;
+ sctp_auth_shkey_hold(chunk->shkey);
+ }
+ }
}
return 0;
@@ -431,8 +449,11 @@ struct sctp_shared_key *sctp_auth_get_shkey(
/* First search associations set of endpoint pair shared keys */
key_for_each(key, &asoc->endpoint_shared_keys) {
- if (key->key_id == key_id)
- return key;
+ if (key->key_id == key_id) {
+ if (!key->deactivated)
+ return key;
+ break;
+ }
}
return NULL;
@@ -703,16 +724,15 @@ int sctp_auth_recv_cid(enum sctp_cid chunk, const struct sctp_association *asoc)
* after the AUTH chunk in the SCTP packet.
*/
void sctp_auth_calculate_hmac(const struct sctp_association *asoc,
- struct sk_buff *skb,
- struct sctp_auth_chunk *auth,
- gfp_t gfp)
+ struct sk_buff *skb, struct sctp_auth_chunk *auth,
+ struct sctp_shared_key *ep_key, gfp_t gfp)
{
- struct crypto_shash *tfm;
struct sctp_auth_bytes *asoc_key;
+ struct crypto_shash *tfm;
__u16 key_id, hmac_id;
- __u8 *digest;
unsigned char *end;
int free_key = 0;
+ __u8 *digest;
/* Extract the info we need:
* - hmac id
@@ -724,12 +744,7 @@ void sctp_auth_calculate_hmac(const struct sctp_association *asoc,
if (key_id == asoc->active_key_id)
asoc_key = asoc->asoc_shared_key;
else {
- struct sctp_shared_key *ep_key;
-
- ep_key = sctp_auth_get_shkey(asoc, key_id);
- if (!ep_key)
- return;
-
+ /* ep_key can't be NULL here */
asoc_key = sctp_auth_asoc_create_secret(asoc, ep_key, gfp);
if (!asoc_key)
return;
@@ -829,7 +844,7 @@ int sctp_auth_set_key(struct sctp_endpoint *ep,
struct sctp_association *asoc,
struct sctp_authkey *auth_key)
{
- struct sctp_shared_key *cur_key = NULL;
+ struct sctp_shared_key *cur_key, *shkey;
struct sctp_auth_bytes *key;
struct list_head *sh_keys;
int replace = 0;
@@ -842,46 +857,34 @@ int sctp_auth_set_key(struct sctp_endpoint *ep,
else
sh_keys = &ep->endpoint_shared_keys;
- key_for_each(cur_key, sh_keys) {
- if (cur_key->key_id == auth_key->sca_keynumber) {
+ key_for_each(shkey, sh_keys) {
+ if (shkey->key_id == auth_key->sca_keynumber) {
replace = 1;
break;
}
}
- /* If we are not replacing a key id, we need to allocate
- * a shared key.
- */
- if (!replace) {
- cur_key = sctp_auth_shkey_create(auth_key->sca_keynumber,
- GFP_KERNEL);
- if (!cur_key)
- return -ENOMEM;
- }
+ cur_key = sctp_auth_shkey_create(auth_key->sca_keynumber, GFP_KERNEL);
+ if (!cur_key)
+ return -ENOMEM;
/* Create a new key data based on the info passed in */
key = sctp_auth_create_key(auth_key->sca_keylength, GFP_KERNEL);
- if (!key)
- goto nomem;
+ if (!key) {
+ kfree(cur_key);
+ return -ENOMEM;
+ }
memcpy(key->data, &auth_key->sca_key[0], auth_key->sca_keylength);
+ cur_key->key = key;
- /* If we are replacing, remove the old keys data from the
- * key id. If we are adding new key id, add it to the
- * list.
- */
- if (replace)
- sctp_auth_key_put(cur_key->key);
- else
- list_add(&cur_key->key_list, sh_keys);
+ if (replace) {
+ list_del_init(&shkey->key_list);
+ sctp_auth_shkey_release(shkey);
+ }
+ list_add(&cur_key->key_list, sh_keys);
- cur_key->key = key;
return 0;
-nomem:
- if (!replace)
- sctp_auth_shkey_free(cur_key);
-
- return -ENOMEM;
}
int sctp_auth_set_active_key(struct sctp_endpoint *ep,
@@ -905,7 +908,7 @@ int sctp_auth_set_active_key(struct sctp_endpoint *ep,
}
}
- if (!found)
+ if (!found || key->deactivated)
return -EINVAL;
if (asoc) {
@@ -952,7 +955,58 @@ int sctp_auth_del_key_id(struct sctp_endpoint *ep,
/* Delete the shared key */
list_del_init(&key->key_list);
- sctp_auth_shkey_free(key);
+ sctp_auth_shkey_release(key);
+
+ return 0;
+}
+
+int sctp_auth_deact_key_id(struct sctp_endpoint *ep,
+ struct sctp_association *asoc, __u16 key_id)
+{
+ struct sctp_shared_key *key;
+ struct list_head *sh_keys;
+ int found = 0;
+
+ /* The key identifier MUST NOT be the current active key
+ * The key identifier MUST correst to an existing key
+ */
+ if (asoc) {
+ if (asoc->active_key_id == key_id)
+ return -EINVAL;
+
+ sh_keys = &asoc->endpoint_shared_keys;
+ } else {
+ if (ep->active_key_id == key_id)
+ return -EINVAL;
+
+ sh_keys = &ep->endpoint_shared_keys;
+ }
+
+ key_for_each(key, sh_keys) {
+ if (key->key_id == key_id) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found)
+ return -EINVAL;
+
+ /* refcnt == 1 and !list_empty mean it's not being used anywhere
+ * and deactivated will be set, so it's time to notify userland
+ * that this shkey can be freed.
+ */
+ if (asoc && !list_empty(&key->key_list) &&
+ refcount_read(&key->refcnt) == 1) {
+ struct sctp_ulpevent *ev;
+
+ ev = sctp_ulpevent_make_authkey(asoc, key->key_id,
+ SCTP_AUTH_FREE_KEY, GFP_KERNEL);
+ if (ev)
+ asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
+ }
+
+ key->deactivated = 1;
return 0;
}
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 991a530c6b31..f889a84f264d 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -168,6 +168,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
{
size_t len, first_len, max_data, remaining;
size_t msg_len = iov_iter_count(from);
+ struct sctp_shared_key *shkey = NULL;
struct list_head *pos, *temp;
struct sctp_chunk *chunk;
struct sctp_datamsg *msg;
@@ -204,6 +205,17 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
if (hmac_desc)
max_data -= SCTP_PAD4(sizeof(struct sctp_auth_chunk) +
hmac_desc->hmac_len);
+
+ if (sinfo->sinfo_tsn &&
+ sinfo->sinfo_ssn != asoc->active_key_id) {
+ shkey = sctp_auth_get_shkey(asoc, sinfo->sinfo_ssn);
+ if (!shkey) {
+ err = -EINVAL;
+ goto errout;
+ }
+ } else {
+ shkey = asoc->shkey;
+ }
}
/* Check what's our max considering the above */
@@ -275,6 +287,8 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
if (err < 0)
goto errout_chunk_free;
+ chunk->shkey = shkey;
+
/* Put the chunk->skb back into the form expected by send. */
__skb_pull(chunk->skb, (__u8 *)chunk->chunk_hdr -
chunk->skb->data);
diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c
index aeea6da81441..fd2684ad94c8 100644
--- a/net/sctp/objcnt.c
+++ b/net/sctp/objcnt.c
@@ -130,11 +130,3 @@ void sctp_dbg_objcnt_init(struct net *net)
if (!ent)
pr_warn("sctp_dbg_objcnt: Unable to create /proc entry.\n");
}
-
-/* Cleanup the objcount entry in the proc filesystem. */
-void sctp_dbg_objcnt_exit(struct net *net)
-{
- remove_proc_entry("sctp_dbg_objcnt", net->sctp.proc_net_sctp);
-}
-
-
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 01a26ee051e3..d6e1c90cc09a 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -241,10 +241,13 @@ static enum sctp_xmit sctp_packet_bundle_auth(struct sctp_packet *pkt,
if (!chunk->auth)
return retval;
- auth = sctp_make_auth(asoc);
+ auth = sctp_make_auth(asoc, chunk->shkey->key_id);
if (!auth)
return retval;
+ auth->shkey = chunk->shkey;
+ sctp_auth_shkey_hold(auth->shkey);
+
retval = __sctp_packet_append_chunk(pkt, auth);
if (retval != SCTP_XMIT_OK)
@@ -490,7 +493,8 @@ merge:
}
if (auth) {
- sctp_auth_calculate_hmac(tp->asoc, nskb, auth, gfp);
+ sctp_auth_calculate_hmac(tp->asoc, nskb, auth,
+ packet->auth->shkey, gfp);
/* free auth if no more chunks, or add it back */
if (list_empty(&packet->chunk_list))
sctp_chunk_free(packet->auth);
@@ -770,6 +774,16 @@ static enum sctp_xmit sctp_packet_will_fit(struct sctp_packet *packet,
enum sctp_xmit retval = SCTP_XMIT_OK;
size_t psize, pmtu, maxsize;
+ /* Don't bundle in this packet if this chunk's auth key doesn't
+ * match other chunks already enqueued on this packet. Also,
+ * don't bundle the chunk with auth key if other chunks in this
+ * packet don't have auth key.
+ */
+ if ((packet->auth && chunk->shkey != packet->auth->shkey) ||
+ (!packet->auth && chunk->shkey &&
+ chunk->chunk_hdr->type != SCTP_CID_AUTH))
+ return SCTP_XMIT_PMTU_FULL;
+
psize = packet->size;
if (packet->transport->asoc)
pmtu = packet->transport->asoc->pathmtu;
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 537545ebcb0e..17d0155d9de3 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -101,25 +101,6 @@ static const struct file_operations sctp_snmp_seq_fops = {
.release = single_release_net,
};
-/* Set up the proc fs entry for 'snmp' object. */
-int __net_init sctp_snmp_proc_init(struct net *net)
-{
- struct proc_dir_entry *p;
-
- p = proc_create("snmp", S_IRUGO, net->sctp.proc_net_sctp,
- &sctp_snmp_seq_fops);
- if (!p)
- return -ENOMEM;
-
- return 0;
-}
-
-/* Cleanup the proc fs entry for 'snmp' object. */
-void sctp_snmp_proc_exit(struct net *net)
-{
- remove_proc_entry("snmp", net->sctp.proc_net_sctp);
-}
-
/* Dump local addresses of an association/endpoint. */
static void sctp_seq_dump_local_addrs(struct seq_file *seq, struct sctp_ep_common *epb)
{
@@ -259,25 +240,6 @@ static const struct file_operations sctp_eps_seq_fops = {
.release = seq_release_net,
};
-/* Set up the proc fs entry for 'eps' object. */
-int __net_init sctp_eps_proc_init(struct net *net)
-{
- struct proc_dir_entry *p;
-
- p = proc_create("eps", S_IRUGO, net->sctp.proc_net_sctp,
- &sctp_eps_seq_fops);
- if (!p)
- return -ENOMEM;
-
- return 0;
-}
-
-/* Cleanup the proc fs entry for 'eps' object. */
-void sctp_eps_proc_exit(struct net *net)
-{
- remove_proc_entry("eps", net->sctp.proc_net_sctp);
-}
-
struct sctp_ht_iter {
struct seq_net_private p;
struct rhashtable_iter hti;
@@ -390,25 +352,6 @@ static const struct file_operations sctp_assocs_seq_fops = {
.release = seq_release_net,
};
-/* Set up the proc fs entry for 'assocs' object. */
-int __net_init sctp_assocs_proc_init(struct net *net)
-{
- struct proc_dir_entry *p;
-
- p = proc_create("assocs", S_IRUGO, net->sctp.proc_net_sctp,
- &sctp_assocs_seq_fops);
- if (!p)
- return -ENOMEM;
-
- return 0;
-}
-
-/* Cleanup the proc fs entry for 'assocs' object. */
-void sctp_assocs_proc_exit(struct net *net)
-{
- remove_proc_entry("assocs", net->sctp.proc_net_sctp);
-}
-
static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
{
struct sctp_association *assoc;
@@ -488,12 +431,6 @@ static const struct seq_operations sctp_remaddr_ops = {
.show = sctp_remaddr_seq_show,
};
-/* Cleanup the proc fs entry for 'remaddr' object. */
-void sctp_remaddr_proc_exit(struct net *net)
-{
- remove_proc_entry("remaddr", net->sctp.proc_net_sctp);
-}
-
static int sctp_remaddr_seq_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &sctp_remaddr_ops,
@@ -507,13 +444,28 @@ static const struct file_operations sctp_remaddr_seq_fops = {
.release = seq_release_net,
};
-int __net_init sctp_remaddr_proc_init(struct net *net)
+/* Set up the proc fs entry for the SCTP protocol. */
+int __net_init sctp_proc_init(struct net *net)
{
- struct proc_dir_entry *p;
-
- p = proc_create("remaddr", S_IRUGO, net->sctp.proc_net_sctp,
- &sctp_remaddr_seq_fops);
- if (!p)
+ net->sctp.proc_net_sctp = proc_net_mkdir(net, "sctp", net->proc_net);
+ if (!net->sctp.proc_net_sctp)
return -ENOMEM;
+ if (!proc_create("snmp", S_IRUGO, net->sctp.proc_net_sctp,
+ &sctp_snmp_seq_fops))
+ goto cleanup;
+ if (!proc_create("eps", S_IRUGO, net->sctp.proc_net_sctp,
+ &sctp_eps_seq_fops))
+ goto cleanup;
+ if (!proc_create("assocs", S_IRUGO, net->sctp.proc_net_sctp,
+ &sctp_assocs_seq_fops))
+ goto cleanup;
+ if (!proc_create("remaddr", S_IRUGO, net->sctp.proc_net_sctp,
+ &sctp_remaddr_seq_fops))
+ goto cleanup;
return 0;
+
+cleanup:
+ remove_proc_subtree("sctp", net->proc_net);
+ net->sctp.proc_net_sctp = NULL;
+ return -ENOMEM;
}
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 91813e686c67..493b817f6a2a 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -80,56 +80,6 @@ long sysctl_sctp_mem[3];
int sysctl_sctp_rmem[3];
int sysctl_sctp_wmem[3];
-/* Set up the proc fs entry for the SCTP protocol. */
-static int __net_init sctp_proc_init(struct net *net)
-{
-#ifdef CONFIG_PROC_FS
- net->sctp.proc_net_sctp = proc_net_mkdir(net, "sctp", net->proc_net);
- if (!net->sctp.proc_net_sctp)
- goto out_proc_net_sctp;
- if (sctp_snmp_proc_init(net))
- goto out_snmp_proc_init;
- if (sctp_eps_proc_init(net))
- goto out_eps_proc_init;
- if (sctp_assocs_proc_init(net))
- goto out_assocs_proc_init;
- if (sctp_remaddr_proc_init(net))
- goto out_remaddr_proc_init;
-
- return 0;
-
-out_remaddr_proc_init:
- sctp_assocs_proc_exit(net);
-out_assocs_proc_init:
- sctp_eps_proc_exit(net);
-out_eps_proc_init:
- sctp_snmp_proc_exit(net);
-out_snmp_proc_init:
- remove_proc_entry("sctp", net->proc_net);
- net->sctp.proc_net_sctp = NULL;
-out_proc_net_sctp:
- return -ENOMEM;
-#endif /* CONFIG_PROC_FS */
- return 0;
-}
-
-/* Clean up the proc fs entry for the SCTP protocol.
- * Note: Do not make this __exit as it is used in the init error
- * path.
- */
-static void sctp_proc_exit(struct net *net)
-{
-#ifdef CONFIG_PROC_FS
- sctp_snmp_proc_exit(net);
- sctp_eps_proc_exit(net);
- sctp_assocs_proc_exit(net);
- sctp_remaddr_proc_exit(net);
-
- remove_proc_entry("sctp", net->proc_net);
- net->sctp.proc_net_sctp = NULL;
-#endif
-}
-
/* Private helper to extract ipv4 address and stash them in
* the protocol structure.
*/
@@ -1285,10 +1235,12 @@ static int __net_init sctp_defaults_init(struct net *net)
if (status)
goto err_init_mibs;
+#ifdef CONFIG_PROC_FS
/* Initialize proc fs directory. */
status = sctp_proc_init(net);
if (status)
goto err_init_proc;
+#endif
sctp_dbg_objcnt_init(net);
@@ -1320,9 +1272,10 @@ static void __net_exit sctp_defaults_exit(struct net *net)
sctp_free_addr_wq(net);
sctp_free_local_addr_list(net);
- sctp_dbg_objcnt_exit(net);
-
- sctp_proc_exit(net);
+#ifdef CONFIG_PROC_FS
+ remove_proc_subtree("sctp", net->proc_net);
+ net->sctp.proc_net_sctp = NULL;
+#endif
cleanup_sctp_mibs(net);
sctp_sysctl_net_unregister(net);
}
@@ -1330,6 +1283,7 @@ static void __net_exit sctp_defaults_exit(struct net *net)
static struct pernet_operations sctp_defaults_ops = {
.init = sctp_defaults_init,
.exit = sctp_defaults_exit,
+ .async = true,
};
static int __net_init sctp_ctrlsock_init(struct net *net)
@@ -1353,6 +1307,7 @@ static void __net_init sctp_ctrlsock_exit(struct net *net)
static struct pernet_operations sctp_ctrlsock_ops = {
.init = sctp_ctrlsock_init,
.exit = sctp_ctrlsock_exit,
+ .async = true,
};
/* Initialize the universe into something sensible. */
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index d01475f5f710..cc20bc39ee7c 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -87,7 +87,28 @@ static void *sctp_addto_chunk_fixed(struct sctp_chunk *, int len,
/* Control chunk destructor */
static void sctp_control_release_owner(struct sk_buff *skb)
{
- /*TODO: do memory release */
+ struct sctp_chunk *chunk = skb_shinfo(skb)->destructor_arg;
+
+ if (chunk->shkey) {
+ struct sctp_shared_key *shkey = chunk->shkey;
+ struct sctp_association *asoc = chunk->asoc;
+
+ /* refcnt == 2 and !list_empty mean after this release, it's
+ * not being used anywhere, and it's time to notify userland
+ * that this shkey can be freed if it's been deactivated.
+ */
+ if (shkey->deactivated && !list_empty(&shkey->key_list) &&
+ refcount_read(&shkey->refcnt) == 2) {
+ struct sctp_ulpevent *ev;
+
+ ev = sctp_ulpevent_make_authkey(asoc, shkey->key_id,
+ SCTP_AUTH_FREE_KEY,
+ GFP_KERNEL);
+ if (ev)
+ asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
+ }
+ sctp_auth_shkey_release(chunk->shkey);
+ }
}
static void sctp_control_set_owner_w(struct sctp_chunk *chunk)
@@ -102,7 +123,12 @@ static void sctp_control_set_owner_w(struct sctp_chunk *chunk)
*
* For now don't do anything for now.
*/
+ if (chunk->auth) {
+ chunk->shkey = asoc->shkey;
+ sctp_auth_shkey_hold(chunk->shkey);
+ }
skb->sk = asoc ? asoc->base.sk : NULL;
+ skb_shinfo(skb)->destructor_arg = chunk;
skb->destructor = sctp_control_release_owner;
}
@@ -1271,7 +1297,8 @@ nodata:
return retval;
}
-struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
+struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc,
+ __u16 key_id)
{
struct sctp_authhdr auth_hdr;
struct sctp_hmac *hmac_desc;
@@ -1289,7 +1316,7 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
return NULL;
auth_hdr.hmac_id = htons(hmac_desc->hmac_id);
- auth_hdr.shkey_id = htons(asoc->active_key_id);
+ auth_hdr.shkey_id = htons(key_id);
retval->subh.auth_hdr = sctp_addto_chunk(retval, sizeof(auth_hdr),
&auth_hdr);
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index b71e7fb0a20a..298112ca8c06 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -1049,6 +1049,16 @@ static void sctp_cmd_assoc_change(struct sctp_cmd_seq *commands,
asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
}
+static void sctp_cmd_peer_no_auth(struct sctp_cmd_seq *commands,
+ struct sctp_association *asoc)
+{
+ struct sctp_ulpevent *ev;
+
+ ev = sctp_ulpevent_make_authkey(asoc, 0, SCTP_AUTH_NO_AUTH, GFP_ATOMIC);
+ if (ev)
+ asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
+}
+
/* Helper function to generate an adaptation indication event */
static void sctp_cmd_adaptation_ind(struct sctp_cmd_seq *commands,
struct sctp_association *asoc)
@@ -1755,6 +1765,9 @@ static int sctp_cmd_interpreter(enum sctp_event event_type,
case SCTP_CMD_ADAPTATION_IND:
sctp_cmd_adaptation_ind(commands, asoc);
break;
+ case SCTP_CMD_PEER_NO_AUTH:
+ sctp_cmd_peer_no_auth(commands, asoc);
+ break;
case SCTP_CMD_ASSOC_SHKEY:
error = sctp_auth_asoc_init_active_key(asoc,
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index eb7905ffe5f2..cc56a67dbb4d 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -659,7 +659,7 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
void *arg,
struct sctp_cmd_seq *commands)
{
- struct sctp_ulpevent *ev, *ai_ev = NULL;
+ struct sctp_ulpevent *ev, *ai_ev = NULL, *auth_ev = NULL;
struct sctp_association *new_asoc;
struct sctp_init_chunk *peer_init;
struct sctp_chunk *chunk = arg;
@@ -820,6 +820,14 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
goto nomem_aiev;
}
+ if (!new_asoc->peer.auth_capable) {
+ auth_ev = sctp_ulpevent_make_authkey(new_asoc, 0,
+ SCTP_AUTH_NO_AUTH,
+ GFP_ATOMIC);
+ if (!auth_ev)
+ goto nomem_authev;
+ }
+
/* Add all the state machine commands now since we've created
* everything. This way we don't introduce memory corruptions
* during side-effect processing and correclty count established
@@ -847,8 +855,14 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
SCTP_ULPEVENT(ai_ev));
+ if (auth_ev)
+ sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
+ SCTP_ULPEVENT(auth_ev));
+
return SCTP_DISPOSITION_CONSUME;
+nomem_authev:
+ sctp_ulpevent_free(ai_ev);
nomem_aiev:
sctp_ulpevent_free(ev);
nomem_ev:
@@ -953,6 +967,15 @@ enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net,
SCTP_ULPEVENT(ev));
}
+ if (!asoc->peer.auth_capable) {
+ ev = sctp_ulpevent_make_authkey(asoc, 0, SCTP_AUTH_NO_AUTH,
+ GFP_ATOMIC);
+ if (!ev)
+ goto nomem;
+ sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
+ SCTP_ULPEVENT(ev));
+ }
+
return SCTP_DISPOSITION_CONSUME;
nomem:
return SCTP_DISPOSITION_NOMEM;
@@ -1908,6 +1931,9 @@ static enum sctp_disposition sctp_sf_do_dupcook_b(
if (asoc->peer.adaptation_ind)
sctp_add_cmd_sf(commands, SCTP_CMD_ADAPTATION_IND, SCTP_NULL());
+ if (!asoc->peer.auth_capable)
+ sctp_add_cmd_sf(commands, SCTP_CMD_PEER_NO_AUTH, SCTP_NULL());
+
return SCTP_DISPOSITION_CONSUME;
nomem:
@@ -1954,7 +1980,7 @@ static enum sctp_disposition sctp_sf_do_dupcook_d(
struct sctp_cmd_seq *commands,
struct sctp_association *new_asoc)
{
- struct sctp_ulpevent *ev = NULL, *ai_ev = NULL;
+ struct sctp_ulpevent *ev = NULL, *ai_ev = NULL, *auth_ev = NULL;
struct sctp_chunk *repl;
/* Clarification from Implementor's Guide:
@@ -2001,6 +2027,14 @@ static enum sctp_disposition sctp_sf_do_dupcook_d(
goto nomem;
}
+
+ if (!asoc->peer.auth_capable) {
+ auth_ev = sctp_ulpevent_make_authkey(asoc, 0,
+ SCTP_AUTH_NO_AUTH,
+ GFP_ATOMIC);
+ if (!auth_ev)
+ goto nomem;
+ }
}
repl = sctp_make_cookie_ack(new_asoc, chunk);
@@ -2015,10 +2049,15 @@ static enum sctp_disposition sctp_sf_do_dupcook_d(
if (ai_ev)
sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
SCTP_ULPEVENT(ai_ev));
+ if (auth_ev)
+ sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
+ SCTP_ULPEVENT(auth_ev));
return SCTP_DISPOSITION_CONSUME;
nomem:
+ if (auth_ev)
+ sctp_ulpevent_free(auth_ev);
if (ai_ev)
sctp_ulpevent_free(ai_ev);
if (ev)
@@ -4114,6 +4153,7 @@ static enum sctp_ierror sctp_sf_authenticate(
const union sctp_subtype type,
struct sctp_chunk *chunk)
{
+ struct sctp_shared_key *sh_key = NULL;
struct sctp_authhdr *auth_hdr;
__u8 *save_digest, *digest;
struct sctp_hmac *hmac;
@@ -4135,9 +4175,11 @@ static enum sctp_ierror sctp_sf_authenticate(
* configured
*/
key_id = ntohs(auth_hdr->shkey_id);
- if (key_id != asoc->active_key_id && !sctp_auth_get_shkey(asoc, key_id))
- return SCTP_IERROR_AUTH_BAD_KEYID;
-
+ if (key_id != asoc->active_key_id) {
+ sh_key = sctp_auth_get_shkey(asoc, key_id);
+ if (!sh_key)
+ return SCTP_IERROR_AUTH_BAD_KEYID;
+ }
/* Make sure that the length of the signature matches what
* we expect.
@@ -4166,7 +4208,7 @@ static enum sctp_ierror sctp_sf_authenticate(
sctp_auth_calculate_hmac(asoc, chunk->skb,
(struct sctp_auth_chunk *)chunk->chunk_hdr,
- GFP_ATOMIC);
+ sh_key, GFP_ATOMIC);
/* Discard the packet if the digests do not match */
if (memcmp(save_digest, digest, sig_len)) {
@@ -4243,7 +4285,7 @@ enum sctp_disposition sctp_sf_eat_auth(struct net *net,
struct sctp_ulpevent *ev;
ev = sctp_ulpevent_make_authkey(asoc, ntohs(auth_hdr->shkey_id),
- SCTP_AUTH_NEWKEY, GFP_ATOMIC);
+ SCTP_AUTH_NEW_KEY, GFP_ATOMIC);
if (!ev)
return -ENOMEM;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 7d3476a4860d..7a10ae3c3d82 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -156,6 +156,9 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk)
/* The sndbuf space is tracked per association. */
sctp_association_hold(asoc);
+ if (chunk->shkey)
+ sctp_auth_shkey_hold(chunk->shkey);
+
skb_set_owner_w(chunk->skb, sk);
chunk->skb->destructor = sctp_wfree;
@@ -1677,7 +1680,7 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags,
struct sctp_association *asoc;
enum sctp_scope scope;
struct cmsghdr *cmsg;
- int err = -EINVAL;
+ int err;
*tp = NULL;
@@ -1761,16 +1764,20 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags,
memset(daddr, 0, sizeof(*daddr));
dlen = cmsg->cmsg_len - sizeof(struct cmsghdr);
if (cmsg->cmsg_type == SCTP_DSTADDRV4) {
- if (dlen < sizeof(struct in_addr))
+ if (dlen < sizeof(struct in_addr)) {
+ err = -EINVAL;
goto free;
+ }
dlen = sizeof(struct in_addr);
daddr->v4.sin_family = AF_INET;
daddr->v4.sin_port = htons(asoc->peer.port);
memcpy(&daddr->v4.sin_addr, CMSG_DATA(cmsg), dlen);
} else {
- if (dlen < sizeof(struct in6_addr))
+ if (dlen < sizeof(struct in6_addr)) {
+ err = -EINVAL;
goto free;
+ }
dlen = sizeof(struct in6_addr);
daddr->v6.sin6_family = AF_INET6;
@@ -1876,6 +1883,19 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
goto err;
}
+ if (asoc->pmtu_pending)
+ sctp_assoc_pending_pmtu(asoc);
+
+ if (sctp_wspace(asoc) < msg_len)
+ sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
+
+ if (!sctp_wspace(asoc)) {
+ timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+ err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
+ if (err)
+ goto err;
+ }
+
if (sctp_state(asoc, CLOSED)) {
err = sctp_primitive_ASSOCIATE(net, asoc, NULL);
if (err)
@@ -1893,19 +1913,6 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
pr_debug("%s: we associated primitively\n", __func__);
}
- if (asoc->pmtu_pending)
- sctp_assoc_pending_pmtu(asoc);
-
- if (sctp_wspace(asoc) < msg_len)
- sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
-
- if (!sctp_wspace(asoc)) {
- timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
- err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
- if (err)
- goto err;
- }
-
datamsg = sctp_datamsg_from_user(asoc, sinfo, &msg->msg_iter);
if (IS_ERR(datamsg)) {
err = PTR_ERR(datamsg);
@@ -1980,6 +1987,14 @@ static void sctp_sendmsg_update_sinfo(struct sctp_association *asoc,
if (!cmsgs->srinfo && !cmsgs->prinfo)
sinfo->sinfo_timetolive = asoc->default_timetolive;
+
+ if (cmsgs->authinfo) {
+ /* Reuse sinfo_tsn to indicate that authinfo was set and
+ * sinfo_ssn to save the keyid on tx path.
+ */
+ sinfo->sinfo_tsn = 1;
+ sinfo->sinfo_ssn = cmsgs->authinfo->auth_keynumber;
+ }
}
static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
@@ -3632,6 +3647,33 @@ static int sctp_setsockopt_del_key(struct sock *sk,
}
/*
+ * 8.3.4 Deactivate a Shared Key (SCTP_AUTH_DEACTIVATE_KEY)
+ *
+ * This set option will deactivate a shared secret key.
+ */
+static int sctp_setsockopt_deactivate_key(struct sock *sk, char __user *optval,
+ unsigned int optlen)
+{
+ struct sctp_endpoint *ep = sctp_sk(sk)->ep;
+ struct sctp_authkeyid val;
+ struct sctp_association *asoc;
+
+ if (!ep->auth_enable)
+ return -EACCES;
+
+ if (optlen != sizeof(struct sctp_authkeyid))
+ return -EINVAL;
+ if (copy_from_user(&val, optval, optlen))
+ return -EFAULT;
+
+ asoc = sctp_id2assoc(sk, val.scact_assoc_id);
+ if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP))
+ return -EINVAL;
+
+ return sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber);
+}
+
+/*
* 8.1.23 SCTP_AUTO_ASCONF
*
* This option will enable or disable the use of the automatic generation of
@@ -4223,6 +4265,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
case SCTP_AUTH_DELETE_KEY:
retval = sctp_setsockopt_del_key(sk, optval, optlen);
break;
+ case SCTP_AUTH_DEACTIVATE_KEY:
+ retval = sctp_setsockopt_deactivate_key(sk, optval, optlen);
+ break;
case SCTP_AUTO_ASCONF:
retval = sctp_setsockopt_auto_asconf(sk, optval, optlen);
break;
@@ -7197,6 +7242,7 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
case SCTP_AUTH_KEY:
case SCTP_AUTH_CHUNK:
case SCTP_AUTH_DELETE_KEY:
+ case SCTP_AUTH_DEACTIVATE_KEY:
retval = -EOPNOTSUPP;
break;
case SCTP_HMAC_IDENT:
@@ -7867,6 +7913,21 @@ static int sctp_msghdr_parse(const struct msghdr *msg, struct sctp_cmsgs *cmsgs)
if (cmsgs->prinfo->pr_policy == SCTP_PR_SCTP_NONE)
cmsgs->prinfo->pr_value = 0;
break;
+ case SCTP_AUTHINFO:
+ /* SCTP Socket API Extension
+ * 5.3.8 SCTP AUTH Information Structure (SCTP_AUTHINFO)
+ *
+ * This cmsghdr structure specifies SCTP options for sendmsg().
+ *
+ * cmsg_level cmsg_type cmsg_data[]
+ * ------------ ------------ ---------------------
+ * IPPROTO_SCTP SCTP_AUTHINFO struct sctp_authinfo
+ */
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct sctp_authinfo)))
+ return -EINVAL;
+
+ cmsgs->authinfo = CMSG_DATA(cmsg);
+ break;
case SCTP_DSTADDRV4:
case SCTP_DSTADDRV6:
/* SCTP Socket API Extension
@@ -8105,6 +8166,26 @@ static void sctp_wfree(struct sk_buff *skb)
sk->sk_wmem_queued -= skb->truesize;
sk_mem_uncharge(sk, skb->truesize);
+ if (chunk->shkey) {
+ struct sctp_shared_key *shkey = chunk->shkey;
+
+ /* refcnt == 2 and !list_empty mean after this release, it's
+ * not being used anywhere, and it's time to notify userland
+ * that this shkey can be freed if it's been deactivated.
+ */
+ if (shkey->deactivated && !list_empty(&shkey->key_list) &&
+ refcount_read(&shkey->refcnt) == 2) {
+ struct sctp_ulpevent *ev;
+
+ ev = sctp_ulpevent_make_authkey(asoc, shkey->key_id,
+ SCTP_AUTH_FREE_KEY,
+ GFP_KERNEL);
+ if (ev)
+ asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
+ }
+ sctp_auth_shkey_release(chunk->shkey);
+ }
+
sock_wfree(skb);
sctp_wake_up_waiters(sk, asoc);
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 2c6f4e0a9f3d..86913eb5cfa0 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -7,12 +7,11 @@
* applicable with RoCE-cards only
*
* Initial restrictions:
- * - IPv6 support postponed
* - support for alternate links postponed
* - partial support for non-blocking sockets only
* - support for urgent data postponed
*
- * Copyright IBM Corp. 2016
+ * Copyright IBM Corp. 2016, 2018
*
* Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
* based on prototype from Frank Blaschka
@@ -64,6 +63,10 @@ static struct smc_hashinfo smc_v4_hashinfo = {
.lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock),
};
+static struct smc_hashinfo smc_v6_hashinfo = {
+ .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock),
+};
+
int smc_hash_sk(struct sock *sk)
{
struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
@@ -103,6 +106,18 @@ struct proto smc_proto = {
};
EXPORT_SYMBOL_GPL(smc_proto);
+struct proto smc_proto6 = {
+ .name = "SMC6",
+ .owner = THIS_MODULE,
+ .keepalive = smc_set_keepalive,
+ .hash = smc_hash_sk,
+ .unhash = smc_unhash_sk,
+ .obj_size = sizeof(struct smc_sock),
+ .h.smc_hash = &smc_v6_hashinfo,
+ .slab_flags = SLAB_TYPESAFE_BY_RCU,
+};
+EXPORT_SYMBOL_GPL(smc_proto6);
+
static int smc_release(struct socket *sock)
{
struct sock *sk = sock->sk;
@@ -159,19 +174,22 @@ static void smc_destruct(struct sock *sk)
sk_refcnt_debug_dec(sk);
}
-static struct sock *smc_sock_alloc(struct net *net, struct socket *sock)
+static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
+ int protocol)
{
struct smc_sock *smc;
+ struct proto *prot;
struct sock *sk;
- sk = sk_alloc(net, PF_SMC, GFP_KERNEL, &smc_proto, 0);
+ prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
+ sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
if (!sk)
return NULL;
sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
sk->sk_state = SMC_INIT;
sk->sk_destruct = smc_destruct;
- sk->sk_protocol = SMCPROTO_SMC;
+ sk->sk_protocol = protocol;
smc = smc_sk(sk);
INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
INIT_LIST_HEAD(&smc->accept_q);
@@ -198,10 +216,13 @@ static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
goto out;
rc = -EAFNOSUPPORT;
+ if (addr->sin_family != AF_INET &&
+ addr->sin_family != AF_INET6 &&
+ addr->sin_family != AF_UNSPEC)
+ goto out;
/* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */
- if ((addr->sin_family != AF_INET) &&
- ((addr->sin_family != AF_UNSPEC) ||
- (addr->sin_addr.s_addr != htonl(INADDR_ANY))))
+ if (addr->sin_family == AF_UNSPEC &&
+ addr->sin_addr.s_addr != htonl(INADDR_ANY))
goto out;
lock_sock(sk);
@@ -529,7 +550,7 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
/* separate smc parameter checking to be safe */
if (alen < sizeof(addr->sa_family))
goto out_err;
- if (addr->sa_family != AF_INET)
+ if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
goto out_err;
lock_sock(sk);
@@ -571,7 +592,7 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
int rc;
release_sock(lsk);
- new_sk = smc_sock_alloc(sock_net(lsk), NULL);
+ new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol);
if (!new_sk) {
rc = -ENOMEM;
lsk->sk_err = ENOMEM;
@@ -767,8 +788,6 @@ static void smc_listen_work(struct work_struct *work)
struct smc_link *link;
int reason_code = 0;
int rc = 0;
- __be32 subnet;
- u8 prefix_len;
u8 ibport;
/* check if peer is smc capable */
@@ -803,17 +822,11 @@ static void smc_listen_work(struct work_struct *work)
goto decline_rdma;
}
- /* determine subnet and mask from internal TCP socket */
- rc = smc_clc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len);
- if (rc) {
- reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
- goto decline_rdma;
- }
-
pclc = (struct smc_clc_msg_proposal *)&buf;
pclc_prfx = smc_clc_proposal_get_prefix(pclc);
- if (pclc_prfx->outgoing_subnet != subnet ||
- pclc_prfx->prefix_len != prefix_len) {
+
+ rc = smc_clc_prfx_match(newclcsock, pclc_prfx);
+ if (rc) {
reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
goto decline_rdma;
}
@@ -1375,6 +1388,7 @@ static const struct proto_ops smc_sock_ops = {
static int smc_create(struct net *net, struct socket *sock, int protocol,
int kern)
{
+ int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET;
struct smc_sock *smc;
struct sock *sk;
int rc;
@@ -1384,20 +1398,20 @@ static int smc_create(struct net *net, struct socket *sock, int protocol,
goto out;
rc = -EPROTONOSUPPORT;
- if ((protocol != IPPROTO_IP) && (protocol != IPPROTO_TCP))
+ if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6)
goto out;
rc = -ENOBUFS;
sock->ops = &smc_sock_ops;
- sk = smc_sock_alloc(net, sock);
+ sk = smc_sock_alloc(net, sock, protocol);
if (!sk)
goto out;
/* create internal TCP socket for CLC handshake and fallback */
smc = smc_sk(sk);
smc->use_fallback = false; /* assume rdma capability first */
- rc = sock_create_kern(net, PF_INET, SOCK_STREAM,
- IPPROTO_TCP, &smc->clcsock);
+ rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
+ &smc->clcsock);
if (rc) {
sk_common_release(sk);
goto out;
@@ -1437,16 +1451,23 @@ static int __init smc_init(void)
rc = proto_register(&smc_proto, 1);
if (rc) {
- pr_err("%s: proto_register fails with %d\n", __func__, rc);
+ pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc);
goto out_pnet;
}
+ rc = proto_register(&smc_proto6, 1);
+ if (rc) {
+ pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc);
+ goto out_proto;
+ }
+
rc = sock_register(&smc_sock_family_ops);
if (rc) {
pr_err("%s: sock_register fails with %d\n", __func__, rc);
- goto out_proto;
+ goto out_proto6;
}
INIT_HLIST_HEAD(&smc_v4_hashinfo.ht);
+ INIT_HLIST_HEAD(&smc_v6_hashinfo.ht);
rc = smc_ib_register_client();
if (rc) {
@@ -1459,6 +1480,8 @@ static int __init smc_init(void)
out_sock:
sock_unregister(PF_SMC);
+out_proto6:
+ proto_unregister(&smc_proto6);
out_proto:
proto_unregister(&smc_proto);
out_pnet:
@@ -1477,11 +1500,13 @@ static void __exit smc_exit(void)
spin_unlock_bh(&smc_lgr_list.lock);
list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) {
list_del_init(&lgr->list);
+ cancel_delayed_work_sync(&lgr->free_work);
smc_lgr_free(lgr); /* free link group */
}
static_branch_disable(&tcp_have_smc);
smc_ib_unregister_client();
sock_unregister(PF_SMC);
+ proto_unregister(&smc_proto6);
proto_unregister(&smc_proto);
smc_pnet_exit();
}
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 268cdf11533c..e4829a2f46ba 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -18,11 +18,13 @@
#include "smc_ib.h"
-#define SMCPROTO_SMC 0 /* SMC protocol */
+#define SMCPROTO_SMC 0 /* SMC protocol, IPv4 */
+#define SMCPROTO_SMC6 1 /* SMC protocol, IPv6 */
#define SMC_MAX_PORTS 2 /* Max # of ports */
extern struct proto smc_proto;
+extern struct proto smc_proto6;
#ifdef ATOMIC64_INIT
#define KERNEL_HAS_ATOMIC64
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 874c5a75d6dd..64fbc3230e6c 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -5,7 +5,7 @@
* CLC (connection layer control) handshake over initial TCP socket to
* prepare for RDMA traffic
*
- * Copyright IBM Corp. 2016
+ * Copyright IBM Corp. 2016, 2018
*
* Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
*/
@@ -15,6 +15,7 @@
#include <linux/if_ether.h>
#include <linux/sched/signal.h>
+#include <net/addrconf.h>
#include <net/sock.h>
#include <net/tcp.h>
@@ -74,15 +75,67 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
return true;
}
-/* determine subnet and mask of internal TCP socket */
-int smc_clc_netinfo_by_tcpsk(struct socket *clcsock,
- __be32 *subnet, u8 *prefix_len)
+/* find ipv4 addr on device and get the prefix len, fill CLC proposal msg */
+static int smc_clc_prfx_set4_rcu(struct dst_entry *dst, __be32 ipv4,
+ struct smc_clc_msg_proposal_prefix *prop)
+{
+ struct in_device *in_dev = __in_dev_get_rcu(dst->dev);
+
+ if (!in_dev)
+ return -ENODEV;
+ for_ifa(in_dev) {
+ if (!inet_ifa_match(ipv4, ifa))
+ continue;
+ prop->prefix_len = inet_mask_len(ifa->ifa_mask);
+ prop->outgoing_subnet = ifa->ifa_address & ifa->ifa_mask;
+ /* prop->ipv6_prefixes_cnt = 0; already done by memset before */
+ return 0;
+ } endfor_ifa(in_dev);
+ return -ENOENT;
+}
+
+/* fill CLC proposal msg with ipv6 prefixes from device */
+static int smc_clc_prfx_set6_rcu(struct dst_entry *dst,
+ struct smc_clc_msg_proposal_prefix *prop,
+ struct smc_clc_ipv6_prefix *ipv6_prfx)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ struct inet6_dev *in6_dev = __in6_dev_get(dst->dev);
+ struct inet6_ifaddr *ifa;
+ int cnt = 0;
+
+ if (!in6_dev)
+ return -ENODEV;
+ /* use a maximum of 8 IPv6 prefixes from device */
+ list_for_each_entry(ifa, &in6_dev->addr_list, if_list) {
+ if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)
+ continue;
+ ipv6_addr_prefix(&ipv6_prfx[cnt].prefix,
+ &ifa->addr, ifa->prefix_len);
+ ipv6_prfx[cnt].prefix_len = ifa->prefix_len;
+ cnt++;
+ if (cnt == SMC_CLC_MAX_V6_PREFIX)
+ break;
+ }
+ prop->ipv6_prefixes_cnt = cnt;
+ if (cnt)
+ return 0;
+#endif
+ return -ENOENT;
+}
+
+/* retrieve and set prefixes in CLC proposal msg */
+static int smc_clc_prfx_set(struct socket *clcsock,
+ struct smc_clc_msg_proposal_prefix *prop,
+ struct smc_clc_ipv6_prefix *ipv6_prfx)
{
struct dst_entry *dst = sk_dst_get(clcsock->sk);
- struct in_device *in_dev;
- struct sockaddr_in addr;
+ struct sockaddr_storage addrs;
+ struct sockaddr_in6 *addr6;
+ struct sockaddr_in *addr;
int rc = -ENOENT;
+ memset(prop, 0, sizeof(*prop));
if (!dst) {
rc = -ENOTCONN;
goto out;
@@ -91,22 +144,97 @@ int smc_clc_netinfo_by_tcpsk(struct socket *clcsock,
rc = -ENODEV;
goto out_rel;
}
-
/* get address to which the internal TCP socket is bound */
- kernel_getsockname(clcsock, (struct sockaddr *)&addr);
- /* analyze IPv4 specific data of net_device belonging to TCP socket */
+ kernel_getsockname(clcsock, (struct sockaddr *)&addrs);
+ /* analyze IP specific data of net_device belonging to TCP socket */
+ addr6 = (struct sockaddr_in6 *)&addrs;
rcu_read_lock();
- in_dev = __in_dev_get_rcu(dst->dev);
+ if (addrs.ss_family == PF_INET) {
+ /* IPv4 */
+ addr = (struct sockaddr_in *)&addrs;
+ rc = smc_clc_prfx_set4_rcu(dst, addr->sin_addr.s_addr, prop);
+ } else if (ipv6_addr_v4mapped(&addr6->sin6_addr)) {
+ /* mapped IPv4 address - peer is IPv4 only */
+ rc = smc_clc_prfx_set4_rcu(dst, addr6->sin6_addr.s6_addr32[3],
+ prop);
+ } else {
+ /* IPv6 */
+ rc = smc_clc_prfx_set6_rcu(dst, prop, ipv6_prfx);
+ }
+ rcu_read_unlock();
+out_rel:
+ dst_release(dst);
+out:
+ return rc;
+}
+
+/* match ipv4 addrs of dev against addr in CLC proposal */
+static int smc_clc_prfx_match4_rcu(struct net_device *dev,
+ struct smc_clc_msg_proposal_prefix *prop)
+{
+ struct in_device *in_dev = __in_dev_get_rcu(dev);
+
+ if (!in_dev)
+ return -ENODEV;
for_ifa(in_dev) {
- if (!inet_ifa_match(addr.sin_addr.s_addr, ifa))
- continue;
- *prefix_len = inet_mask_len(ifa->ifa_mask);
- *subnet = ifa->ifa_address & ifa->ifa_mask;
- rc = 0;
- break;
+ if (prop->prefix_len == inet_mask_len(ifa->ifa_mask) &&
+ inet_ifa_match(prop->outgoing_subnet, ifa))
+ return 0;
} endfor_ifa(in_dev);
- rcu_read_unlock();
+ return -ENOENT;
+}
+
+/* match ipv6 addrs of dev against addrs in CLC proposal */
+static int smc_clc_prfx_match6_rcu(struct net_device *dev,
+ struct smc_clc_msg_proposal_prefix *prop)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ struct inet6_dev *in6_dev = __in6_dev_get(dev);
+ struct smc_clc_ipv6_prefix *ipv6_prfx;
+ struct inet6_ifaddr *ifa;
+ int i, max;
+
+ if (!in6_dev)
+ return -ENODEV;
+ /* ipv6 prefix list starts behind smc_clc_msg_proposal_prefix */
+ ipv6_prfx = (struct smc_clc_ipv6_prefix *)((u8 *)prop + sizeof(*prop));
+ max = min_t(u8, prop->ipv6_prefixes_cnt, SMC_CLC_MAX_V6_PREFIX);
+ list_for_each_entry(ifa, &in6_dev->addr_list, if_list) {
+ if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)
+ continue;
+ for (i = 0; i < max; i++) {
+ if (ifa->prefix_len == ipv6_prfx[i].prefix_len &&
+ ipv6_prefix_equal(&ifa->addr, &ipv6_prfx[i].prefix,
+ ifa->prefix_len))
+ return 0;
+ }
+ }
+#endif
+ return -ENOENT;
+}
+
+/* check if proposed prefixes match one of our device prefixes */
+int smc_clc_prfx_match(struct socket *clcsock,
+ struct smc_clc_msg_proposal_prefix *prop)
+{
+ struct dst_entry *dst = sk_dst_get(clcsock->sk);
+ int rc;
+
+ if (!dst) {
+ rc = -ENOTCONN;
+ goto out;
+ }
+ if (!dst->dev) {
+ rc = -ENODEV;
+ goto out_rel;
+ }
+ rcu_read_lock();
+ if (!prop->ipv6_prefixes_cnt)
+ rc = smc_clc_prfx_match4_rcu(dst->dev, prop);
+ else
+ rc = smc_clc_prfx_match6_rcu(dst->dev, prop);
+ rcu_read_unlock();
out_rel:
dst_release(dst);
out:
@@ -232,16 +360,24 @@ int smc_clc_send_proposal(struct smc_sock *smc,
struct smc_ib_device *smcibdev,
u8 ibport)
{
+ struct smc_clc_ipv6_prefix ipv6_prfx[SMC_CLC_MAX_V6_PREFIX];
struct smc_clc_msg_proposal_prefix pclc_prfx;
struct smc_clc_msg_proposal pclc;
struct smc_clc_msg_trail trl;
+ int len, i, plen, rc;
int reason_code = 0;
- struct kvec vec[3];
+ struct kvec vec[4];
struct msghdr msg;
- int len, plen, rc;
+
+ /* retrieve ip prefixes for CLC proposal msg */
+ rc = smc_clc_prfx_set(smc->clcsock, &pclc_prfx, ipv6_prfx);
+ if (rc)
+ return SMC_CLC_DECL_CNFERR; /* configuration error */
/* send SMC Proposal CLC message */
- plen = sizeof(pclc) + sizeof(pclc_prfx) + sizeof(trl);
+ plen = sizeof(pclc) + sizeof(pclc_prfx) +
+ (pclc_prfx.ipv6_prefixes_cnt * sizeof(ipv6_prfx[0])) +
+ sizeof(trl);
memset(&pclc, 0, sizeof(pclc));
memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
pclc.hdr.type = SMC_CLC_PROPOSAL;
@@ -252,23 +388,22 @@ int smc_clc_send_proposal(struct smc_sock *smc,
memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN);
pclc.iparea_offset = htons(0);
- memset(&pclc_prfx, 0, sizeof(pclc_prfx));
- /* determine subnet and mask from internal TCP socket */
- rc = smc_clc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet,
- &pclc_prfx.prefix_len);
- if (rc)
- return SMC_CLC_DECL_CNFERR; /* configuration error */
- pclc_prfx.ipv6_prefixes_cnt = 0;
memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
memset(&msg, 0, sizeof(msg));
- vec[0].iov_base = &pclc;
- vec[0].iov_len = sizeof(pclc);
- vec[1].iov_base = &pclc_prfx;
- vec[1].iov_len = sizeof(pclc_prfx);
- vec[2].iov_base = &trl;
- vec[2].iov_len = sizeof(trl);
+ i = 0;
+ vec[i].iov_base = &pclc;
+ vec[i++].iov_len = sizeof(pclc);
+ vec[i].iov_base = &pclc_prfx;
+ vec[i++].iov_len = sizeof(pclc_prfx);
+ if (pclc_prfx.ipv6_prefixes_cnt > 0) {
+ vec[i].iov_base = &ipv6_prfx[0];
+ vec[i++].iov_len = pclc_prfx.ipv6_prefixes_cnt *
+ sizeof(ipv6_prfx[0]);
+ }
+ vec[i].iov_base = &trl;
+ vec[i++].iov_len = sizeof(trl);
/* due to the few bytes needed for clc-handshake this cannot block */
- len = kernel_sendmsg(smc->clcsock, &msg, vec, 3, plen);
+ len = kernel_sendmsg(smc->clcsock, &msg, vec, i, plen);
if (len < sizeof(pclc)) {
if (len >= 0) {
reason_code = -ENETUNREACH;
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 20e048beac30..63bf1dc2c1f9 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -60,10 +60,15 @@ struct smc_clc_msg_local { /* header2 of clc messages */
u8 mac[6]; /* mac of ib_device port */
};
+#define SMC_CLC_MAX_V6_PREFIX 8
+
+/* Struct would be 4 byte aligned, but it is used in an array that is sent
+ * to peers and must conform to RFC7609, hence we need to use packed here.
+ */
struct smc_clc_ipv6_prefix {
- u8 prefix[4];
+ struct in6_addr prefix;
u8 prefix_len;
-} __packed;
+} __packed; /* format defined in RFC7609 */
struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/
__be32 outgoing_subnet; /* subnet mask */
@@ -79,9 +84,11 @@ struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */
} __aligned(4);
#define SMC_CLC_PROPOSAL_MAX_OFFSET 0x28
-#define SMC_CLC_PROPOSAL_MAX_PREFIX (8 * sizeof(struct smc_clc_ipv6_prefix))
+#define SMC_CLC_PROPOSAL_MAX_PREFIX (SMC_CLC_MAX_V6_PREFIX * \
+ sizeof(struct smc_clc_ipv6_prefix))
#define SMC_CLC_MAX_LEN (sizeof(struct smc_clc_msg_proposal) + \
SMC_CLC_PROPOSAL_MAX_OFFSET + \
+ sizeof(struct smc_clc_msg_proposal_prefix) + \
SMC_CLC_PROPOSAL_MAX_PREFIX + \
sizeof(struct smc_clc_msg_trail))
@@ -122,8 +129,8 @@ smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc)
((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset));
}
-int smc_clc_netinfo_by_tcpsk(struct socket *clcsock, __be32 *subnet,
- u8 *prefix_len);
+int smc_clc_prfx_match(struct socket *clcsock,
+ struct smc_clc_msg_proposal_prefix *prop);
int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
u8 expected_type);
int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index f76f60e463cb..f44f6803f7ff 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -32,6 +32,17 @@
static u32 smc_lgr_num; /* unique link group number */
+static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
+{
+ /* client link group creation always follows the server link group
+ * creation. For client use a somewhat higher removal delay time,
+ * otherwise there is a risk of out-of-sync link groups.
+ */
+ mod_delayed_work(system_wq, &lgr->free_work,
+ lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT :
+ SMC_LGR_FREE_DELAY_SERV);
+}
+
/* Register connection's alert token in our lookup structure.
* To use rbtrees we have to implement our own insert core.
* Requires @conns_lock
@@ -111,13 +122,7 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
write_unlock_bh(&lgr->conns_lock);
if (!reduced || lgr->conns_num)
return;
- /* client link group creation always follows the server link group
- * creation. For client use a somewhat higher removal delay time,
- * otherwise there is a risk of out-of-sync link groups.
- */
- mod_delayed_work(system_wq, &lgr->free_work,
- lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT :
- SMC_LGR_FREE_DELAY_SERV);
+ smc_lgr_schedule_free_work(lgr);
}
static void smc_lgr_free_work(struct work_struct *work)
@@ -140,7 +145,8 @@ static void smc_lgr_free_work(struct work_struct *work)
list_del_init(&lgr->list); /* remove from smc_lgr_list */
free:
spin_unlock_bh(&smc_lgr_list.lock);
- smc_lgr_free(lgr);
+ if (!delayed_work_pending(&lgr->free_work))
+ smc_lgr_free(lgr);
}
/* create a new SMC link group */
@@ -343,6 +349,7 @@ void smc_lgr_terminate(struct smc_link_group *lgr)
}
write_unlock_bh(&lgr->conns_lock);
wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait);
+ smc_lgr_schedule_free_work(lgr);
}
/* Determine vlan of internal TCP socket.
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 2a8957bd6d38..26df554f7588 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -23,6 +23,8 @@
#include "smc_wr.h"
#include "smc.h"
+#define SMC_MAX_CQE 32766 /* max. # of completion queue elements */
+
#define SMC_QP_MIN_RNR_TIMER 5
#define SMC_QP_TIMEOUT 15 /* 4096 * 2 ** timeout usec */
#define SMC_QP_RETRY_CNT 7 /* 7: infinite */
@@ -438,9 +440,15 @@ out:
long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev)
{
struct ib_cq_init_attr cqattr = {
- .cqe = SMC_WR_MAX_CQE, .comp_vector = 0 };
+ .cqe = SMC_MAX_CQE, .comp_vector = 0 };
+ int cqe_size_order, smc_order;
long rc;
+ /* the calculated number of cq entries fits to mlx5 cq allocation */
+ cqe_size_order = cache_line_size() == 128 ? 7 : 6;
+ smc_order = MAX_ORDER - cqe_size_order - 1;
+ if (SMC_MAX_CQE + 2 > (0x00000001 << smc_order) * PAGE_SIZE)
+ cqattr.cqe = (0x00000001 << smc_order) * PAGE_SIZE - 2;
smcibdev->roce_cq_send = ib_create_cq(smcibdev->ibdev,
smc_wr_tx_cq_handler, NULL,
smcibdev, &cqattr);
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index ef0c3494c9cb..210bec3c3ebe 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -19,7 +19,6 @@
#include "smc.h"
#include "smc_core.h"
-#define SMC_WR_MAX_CQE 32768 /* max. # of completion queue elements */
#define SMC_WR_BUF_CNT 16 /* # of ctrl buffers per link */
#define SMC_WR_TX_WAIT_FREE_SLOT_TIME (10 * HZ)
diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index 48fd3b5a73fb..97cd857d7f43 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -64,23 +64,6 @@ int in_own_node(struct net *net, u32 addr)
}
/**
- * addr_domain - convert 2-bit scope value to equivalent message lookup domain
- *
- * Needed when address of a named message must be looked up a second time
- * after a network hop.
- */
-u32 addr_domain(struct net *net, u32 sc)
-{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
-
- if (likely(sc == TIPC_NODE_SCOPE))
- return tn->own_addr;
- if (sc == TIPC_CLUSTER_SCOPE)
- return tipc_cluster_mask(tn->own_addr);
- return tipc_zone_mask(tn->own_addr);
-}
-
-/**
* tipc_addr_domain_valid - validates a network domain address
*
* Accepts <Z.C.N>, <Z.C.0>, <Z.0.0>, and <0.0.0>,
@@ -124,20 +107,6 @@ int tipc_in_scope(u32 domain, u32 addr)
return 0;
}
-/**
- * tipc_addr_scope - convert message lookup domain to a 2-bit scope value
- */
-int tipc_addr_scope(u32 domain)
-{
- if (likely(!domain))
- return TIPC_ZONE_SCOPE;
- if (tipc_node(domain))
- return TIPC_NODE_SCOPE;
- if (tipc_cluster(domain))
- return TIPC_CLUSTER_SCOPE;
- return TIPC_ZONE_SCOPE;
-}
-
char *tipc_addr_string_fill(char *string, u32 addr)
{
snprintf(string, 16, "<%u.%u.%u>",
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index bebb347803ce..2ecf5a5d40dd 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -60,6 +60,16 @@ static inline u32 tipc_cluster_mask(u32 addr)
return addr & TIPC_ZONE_CLUSTER_MASK;
}
+static inline int tipc_node2scope(u32 node)
+{
+ return node ? TIPC_NODE_SCOPE : TIPC_CLUSTER_SCOPE;
+}
+
+static inline int tipc_scope2node(struct net *net, int sc)
+{
+ return sc != TIPC_NODE_SCOPE ? 0 : tipc_own_addr(net);
+}
+
u32 tipc_own_addr(struct net *net);
int in_own_cluster(struct net *net, u32 addr);
int in_own_cluster_exact(struct net *net, u32 addr);
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 0b982d048fb9..04fd91bb11d7 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -105,6 +105,7 @@ static struct pernet_operations tipc_net_ops = {
.exit = tipc_exit_net,
.id = &tipc_net_id,
.size = sizeof(struct tipc_net),
+ .async = true,
};
static int __init tipc_init(void)
diff --git a/net/tipc/core.h b/net/tipc/core.h
index ff8b071654f5..347f850dc872 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -131,6 +131,11 @@ static inline struct list_head *tipc_nodes(struct net *net)
return &tipc_net(net)->node_list;
}
+static inline struct name_table *tipc_name_table(struct net *net)
+{
+ return tipc_net(net)->nametbl;
+}
+
static inline struct tipc_topsrv *tipc_topsrv(struct net *net)
{
return tipc_net(net)->topsrv;
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 4e1c6f6450bb..b6c45dccba3d 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -580,7 +580,7 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err)
msg = buf_msg(skb);
if (msg_reroute_cnt(msg))
return false;
- dnode = addr_domain(net, msg_lookup_scope(msg));
+ dnode = tipc_scope2node(net, msg_lookup_scope(msg));
dport = tipc_nametbl_translate(net, msg_nametype(msg),
msg_nameinst(msg), &dnode);
if (!dport)
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 23f8899e0f8c..28d095a7d8bb 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -56,7 +56,7 @@ static void publ_to_item(struct distr_item *i, struct publication *p)
i->type = htonl(p->type);
i->lower = htonl(p->lower);
i->upper = htonl(p->upper);
- i->ref = htonl(p->ref);
+ i->port = htonl(p->port);
i->key = htonl(p->key);
}
@@ -86,25 +86,25 @@ static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size,
*/
struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct sk_buff *buf;
+ struct name_table *nt = tipc_name_table(net);
struct distr_item *item;
+ struct sk_buff *skb;
- list_add_tail_rcu(&publ->local_list,
- &tn->nametbl->publ_list[publ->scope]);
-
- if (publ->scope == TIPC_NODE_SCOPE)
+ if (publ->scope == TIPC_NODE_SCOPE) {
+ list_add_tail_rcu(&publ->binding_node, &nt->node_scope);
return NULL;
+ }
+ list_add_tail_rcu(&publ->binding_node, &nt->cluster_scope);
- buf = named_prepare_buf(net, PUBLICATION, ITEM_SIZE, 0);
- if (!buf) {
+ skb = named_prepare_buf(net, PUBLICATION, ITEM_SIZE, 0);
+ if (!skb) {
pr_warn("Publication distribution failure\n");
return NULL;
}
- item = (struct distr_item *)msg_data(buf_msg(buf));
+ item = (struct distr_item *)msg_data(buf_msg(skb));
publ_to_item(item, publ);
- return buf;
+ return skb;
}
/**
@@ -115,7 +115,7 @@ struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ)
struct sk_buff *buf;
struct distr_item *item;
- list_del(&publ->local_list);
+ list_del(&publ->binding_node);
if (publ->scope == TIPC_NODE_SCOPE)
return NULL;
@@ -147,7 +147,7 @@ static void named_distribute(struct net *net, struct sk_buff_head *list,
ITEM_SIZE) * ITEM_SIZE;
u32 msg_rem = msg_dsz;
- list_for_each_entry(publ, pls, local_list) {
+ list_for_each_entry(publ, pls, binding_node) {
/* Prepare next buffer: */
if (!skb) {
skb = named_prepare_buf(net, PUBLICATION, msg_rem,
@@ -184,16 +184,13 @@ static void named_distribute(struct net *net, struct sk_buff_head *list,
*/
void tipc_named_node_up(struct net *net, u32 dnode)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct name_table *nt = tipc_name_table(net);
struct sk_buff_head head;
__skb_queue_head_init(&head);
rcu_read_lock();
- named_distribute(net, &head, dnode,
- &tn->nametbl->publ_list[TIPC_CLUSTER_SCOPE]);
- named_distribute(net, &head, dnode,
- &tn->nametbl->publ_list[TIPC_ZONE_SCOPE]);
+ named_distribute(net, &head, dnode, &nt->cluster_scope);
rcu_read_unlock();
tipc_node_xmit(net, &head, dnode, 0);
@@ -212,15 +209,15 @@ static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr)
spin_lock_bh(&tn->nametbl_lock);
p = tipc_nametbl_remove_publ(net, publ->type, publ->lower,
- publ->node, publ->ref, publ->key);
+ publ->node, publ->port, publ->key);
if (p)
- tipc_node_unsubscribe(net, &p->nodesub_list, addr);
+ tipc_node_unsubscribe(net, &p->binding_node, addr);
spin_unlock_bh(&tn->nametbl_lock);
if (p != publ) {
pr_err("Unable to remove publication from failed node\n"
- " (type=%u, lower=%u, node=0x%x, ref=%u, key=%u)\n",
- publ->type, publ->lower, publ->node, publ->ref,
+ " (type=%u, lower=%u, node=0x%x, port=%u, key=%u)\n",
+ publ->type, publ->lower, publ->node, publ->port,
publ->key);
}
@@ -249,7 +246,7 @@ void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr)
{
struct publication *publ, *tmp;
- list_for_each_entry_safe(publ, tmp, nsub_list, nodesub_list)
+ list_for_each_entry_safe(publ, tmp, nsub_list, binding_node)
tipc_publ_purge(net, publ, addr);
tipc_dist_queue_purge(net, addr);
}
@@ -271,18 +268,18 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i,
ntohl(i->lower),
ntohl(i->upper),
TIPC_CLUSTER_SCOPE, node,
- ntohl(i->ref), ntohl(i->key));
+ ntohl(i->port), ntohl(i->key));
if (publ) {
- tipc_node_subscribe(net, &publ->nodesub_list, node);
+ tipc_node_subscribe(net, &publ->binding_node, node);
return true;
}
} else if (dtype == WITHDRAWAL) {
publ = tipc_nametbl_remove_publ(net, ntohl(i->type),
ntohl(i->lower),
- node, ntohl(i->ref),
+ node, ntohl(i->port),
ntohl(i->key));
if (publ) {
- tipc_node_unsubscribe(net, &publ->nodesub_list, node);
+ tipc_node_unsubscribe(net, &publ->binding_node, node);
kfree_rcu(publ, rcu);
return true;
}
@@ -382,16 +379,16 @@ void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq)
*/
void tipc_named_reinit(struct net *net)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct name_table *nt = tipc_name_table(net);
+ struct tipc_net *tn = tipc_net(net);
struct publication *publ;
- int scope;
spin_lock_bh(&tn->nametbl_lock);
- for (scope = TIPC_ZONE_SCOPE; scope <= TIPC_NODE_SCOPE; scope++)
- list_for_each_entry_rcu(publ, &tn->nametbl->publ_list[scope],
- local_list)
- publ->node = tn->own_addr;
+ list_for_each_entry_rcu(publ, &nt->node_scope, binding_node)
+ publ->node = tn->own_addr;
+ list_for_each_entry_rcu(publ, &nt->cluster_scope, binding_node)
+ publ->node = tn->own_addr;
spin_unlock_bh(&tn->nametbl_lock);
}
diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h
index 1264ba0af937..4753e628d7c4 100644
--- a/net/tipc/name_distr.h
+++ b/net/tipc/name_distr.h
@@ -63,7 +63,7 @@ struct distr_item {
__be32 type;
__be32 lower;
__be32 upper;
- __be32 ref;
+ __be32 port;
__be32 key;
};
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index e01c9c691ba2..bbbfc0702634 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -1,7 +1,7 @@
/*
* net/tipc/name_table.c: TIPC name table code
*
- * Copyright (c) 2000-2006, 2014-2015, Ericsson AB
+ * Copyright (c) 2000-2006, 2014-2018, Ericsson AB
* Copyright (c) 2004-2008, 2010-2014, Wind River Systems
* All rights reserved.
*
@@ -50,24 +50,12 @@
/**
* struct name_info - name sequence publication info
- * @node_list: circular list of publications made by own node
- * @cluster_list: circular list of publications made by own cluster
- * @zone_list: circular list of publications made by own zone
- * @node_list_size: number of entries in "node_list"
- * @cluster_list_size: number of entries in "cluster_list"
- * @zone_list_size: number of entries in "zone_list"
- *
- * Note: The zone list always contains at least one entry, since all
- * publications of the associated name sequence belong to it.
- * (The cluster and node lists may be empty.)
+ * @node_list: list of publications on own node of this <type,lower,upper>
+ * @all_publ: list of all publications of this <type,lower,upper>
*/
struct name_info {
- struct list_head node_list;
- struct list_head cluster_list;
- struct list_head zone_list;
- u32 node_list_size;
- u32 cluster_list_size;
- u32 zone_list_size;
+ struct list_head local_publ;
+ struct list_head all_publ;
};
/**
@@ -114,7 +102,7 @@ static int hash(int x)
* publ_create - create a publication structure
*/
static struct publication *publ_create(u32 type, u32 lower, u32 upper,
- u32 scope, u32 node, u32 port_ref,
+ u32 scope, u32 node, u32 port,
u32 key)
{
struct publication *publ = kzalloc(sizeof(*publ), GFP_ATOMIC);
@@ -128,9 +116,9 @@ static struct publication *publ_create(u32 type, u32 lower, u32 upper,
publ->upper = upper;
publ->scope = scope;
publ->node = node;
- publ->ref = port_ref;
+ publ->port = port;
publ->key = key;
- INIT_LIST_HEAD(&publ->pport_list);
+ INIT_LIST_HEAD(&publ->binding_sock);
return publ;
}
@@ -249,9 +237,9 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net,
info = sseq->info;
/* Check if an identical publication already exists */
- list_for_each_entry(publ, &info->zone_list, zone_list) {
- if ((publ->ref == port) && (publ->key == key) &&
- (!publ->node || (publ->node == node)))
+ list_for_each_entry(publ, &info->all_publ, all_publ) {
+ if (publ->port == port && publ->key == key &&
+ (!publ->node || publ->node == node))
return NULL;
}
} else {
@@ -290,9 +278,8 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net,
return NULL;
}
- INIT_LIST_HEAD(&info->node_list);
- INIT_LIST_HEAD(&info->cluster_list);
- INIT_LIST_HEAD(&info->zone_list);
+ INIT_LIST_HEAD(&info->local_publ);
+ INIT_LIST_HEAD(&info->all_publ);
/* Insert new sub-sequence */
sseq = &nseq->sseqs[inspos];
@@ -311,23 +298,15 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net,
if (!publ)
return NULL;
- list_add(&publ->zone_list, &info->zone_list);
- info->zone_list_size++;
-
- if (in_own_cluster(net, node)) {
- list_add(&publ->cluster_list, &info->cluster_list);
- info->cluster_list_size++;
- }
+ list_add(&publ->all_publ, &info->all_publ);
- if (in_own_node(net, node)) {
- list_add(&publ->node_list, &info->node_list);
- info->node_list_size++;
- }
+ if (in_own_node(net, node))
+ list_add(&publ->local_publ, &info->local_publ);
/* Any subscriptions waiting for notification? */
list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
tipc_sub_report_overlap(s, publ->lower, publ->upper,
- TIPC_PUBLISHED, publ->ref,
+ TIPC_PUBLISHED, publ->port,
publ->node, publ->scope,
created_subseq);
}
@@ -348,7 +327,7 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net,
static struct publication *tipc_nameseq_remove_publ(struct net *net,
struct name_seq *nseq,
u32 inst, u32 node,
- u32 ref, u32 key)
+ u32 port, u32 key)
{
struct publication *publ;
struct sub_seq *sseq = nameseq_find_subseq(nseq, inst);
@@ -363,32 +342,20 @@ static struct publication *tipc_nameseq_remove_publ(struct net *net,
info = sseq->info;
/* Locate publication, if it exists */
- list_for_each_entry(publ, &info->zone_list, zone_list) {
- if ((publ->key == key) && (publ->ref == ref) &&
- (!publ->node || (publ->node == node)))
+ list_for_each_entry(publ, &info->all_publ, all_publ) {
+ if (publ->key == key && publ->port == port &&
+ (!publ->node || publ->node == node))
goto found;
}
return NULL;
found:
- /* Remove publication from zone scope list */
- list_del(&publ->zone_list);
- info->zone_list_size--;
-
- /* Remove publication from cluster scope list, if present */
- if (in_own_cluster(net, node)) {
- list_del(&publ->cluster_list);
- info->cluster_list_size--;
- }
-
- /* Remove publication from node scope list, if present */
- if (in_own_node(net, node)) {
- list_del(&publ->node_list);
- info->node_list_size--;
- }
+ list_del(&publ->all_publ);
+ if (in_own_node(net, node))
+ list_del(&publ->local_publ);
/* Contract subseq list if no more publications for that subseq */
- if (list_empty(&info->zone_list)) {
+ if (list_empty(&info->all_publ)) {
kfree(info);
free = &nseq->sseqs[nseq->first_free--];
memmove(sseq, sseq + 1, (free - (sseq + 1)) * sizeof(*sseq));
@@ -398,8 +365,9 @@ found:
/* Notify any waiting subscriptions */
list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
tipc_sub_report_overlap(s, publ->lower, publ->upper,
- TIPC_WITHDRAWN, publ->ref, publ->node,
- publ->scope, removed_subseq);
+ TIPC_WITHDRAWN, publ->port,
+ publ->node, publ->scope,
+ removed_subseq);
}
return publ;
@@ -435,11 +403,12 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq,
struct name_info *info = sseq->info;
int must_report = 1;
- list_for_each_entry(crs, &info->zone_list, zone_list) {
+ list_for_each_entry(crs, &info->all_publ, all_publ) {
tipc_sub_report_overlap(sub, sseq->lower,
sseq->upper,
TIPC_PUBLISHED,
- crs->ref, crs->node,
+ crs->port,
+ crs->node,
crs->scope,
must_report);
must_report = 0;
@@ -473,8 +442,7 @@ struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
struct name_seq *seq = nametbl_find_seq(net, type);
int index = hash(type);
- if ((scope < TIPC_ZONE_SCOPE) || (scope > TIPC_NODE_SCOPE) ||
- (lower > upper)) {
+ if (scope > TIPC_NODE_SCOPE || lower > upper) {
pr_debug("Failed to publish illegal {%u,%u,%u} with scope %u\n",
type, lower, upper, scope);
return NULL;
@@ -493,7 +461,7 @@ struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
}
struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
- u32 lower, u32 node, u32 ref,
+ u32 lower, u32 node, u32 port,
u32 key)
{
struct publication *publ;
@@ -503,7 +471,7 @@ struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
return NULL;
spin_lock_bh(&seq->lock);
- publ = tipc_nameseq_remove_publ(net, seq, lower, node, ref, key);
+ publ = tipc_nameseq_remove_publ(net, seq, lower, node, port, key);
if (!seq->first_free && list_empty(&seq->subscriptions)) {
hlist_del_init_rcu(&seq->ns_list);
kfree(seq->sseqs);
@@ -536,7 +504,7 @@ u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance,
struct name_info *info;
struct publication *publ;
struct name_seq *seq;
- u32 ref = 0;
+ u32 port = 0;
u32 node = 0;
if (!tipc_in_scope(*destnode, tn->own_addr))
@@ -554,54 +522,42 @@ u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance,
/* Closest-First Algorithm */
if (likely(!*destnode)) {
- if (!list_empty(&info->node_list)) {
- publ = list_first_entry(&info->node_list,
- struct publication,
- node_list);
- list_move_tail(&publ->node_list,
- &info->node_list);
- } else if (!list_empty(&info->cluster_list)) {
- publ = list_first_entry(&info->cluster_list,
+ if (!list_empty(&info->local_publ)) {
+ publ = list_first_entry(&info->local_publ,
struct publication,
- cluster_list);
- list_move_tail(&publ->cluster_list,
- &info->cluster_list);
+ local_publ);
+ list_move_tail(&publ->local_publ,
+ &info->local_publ);
} else {
- publ = list_first_entry(&info->zone_list,
+ publ = list_first_entry(&info->all_publ,
struct publication,
- zone_list);
- list_move_tail(&publ->zone_list,
- &info->zone_list);
+ all_publ);
+ list_move_tail(&publ->all_publ,
+ &info->all_publ);
}
}
/* Round-Robin Algorithm */
else if (*destnode == tn->own_addr) {
- if (list_empty(&info->node_list))
+ if (list_empty(&info->local_publ))
goto no_match;
- publ = list_first_entry(&info->node_list, struct publication,
- node_list);
- list_move_tail(&publ->node_list, &info->node_list);
- } else if (in_own_cluster_exact(net, *destnode)) {
- if (list_empty(&info->cluster_list))
- goto no_match;
- publ = list_first_entry(&info->cluster_list, struct publication,
- cluster_list);
- list_move_tail(&publ->cluster_list, &info->cluster_list);
+ publ = list_first_entry(&info->local_publ, struct publication,
+ local_publ);
+ list_move_tail(&publ->local_publ, &info->local_publ);
} else {
- publ = list_first_entry(&info->zone_list, struct publication,
- zone_list);
- list_move_tail(&publ->zone_list, &info->zone_list);
+ publ = list_first_entry(&info->all_publ, struct publication,
+ all_publ);
+ list_move_tail(&publ->all_publ, &info->all_publ);
}
- ref = publ->ref;
+ port = publ->port;
node = publ->node;
no_match:
spin_unlock_bh(&seq->lock);
not_found:
rcu_read_unlock();
*destnode = node;
- return ref;
+ return port;
}
bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope,
@@ -623,16 +579,16 @@ bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope,
sseq = nameseq_find_subseq(seq, instance);
if (likely(sseq)) {
info = sseq->info;
- list_for_each_entry(publ, &info->zone_list, zone_list) {
+ list_for_each_entry(publ, &info->all_publ, all_publ) {
if (publ->scope != scope)
continue;
- if (publ->ref == exclude && publ->node == self)
+ if (publ->port == exclude && publ->node == self)
continue;
- tipc_dest_push(dsts, publ->node, publ->ref);
+ tipc_dest_push(dsts, publ->node, publ->port);
(*dstcnt)++;
if (all)
continue;
- list_move_tail(&publ->zone_list, &info->zone_list);
+ list_move_tail(&publ->all_publ, &info->all_publ);
break;
}
}
@@ -642,15 +598,14 @@ exit:
return !list_empty(dsts);
}
-int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
- u32 scope, bool exact, struct list_head *dports)
+void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
+ u32 scope, bool exact, struct list_head *dports)
{
struct sub_seq *sseq_stop;
struct name_info *info;
struct publication *p;
struct name_seq *seq;
struct sub_seq *sseq;
- int res = 0;
rcu_read_lock();
seq = nametbl_find_seq(net, type);
@@ -664,18 +619,14 @@ int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
if (sseq->lower > upper)
break;
info = sseq->info;
- list_for_each_entry(p, &info->node_list, node_list) {
+ list_for_each_entry(p, &info->local_publ, local_publ) {
if (p->scope == scope || (!exact && p->scope < scope))
- tipc_dest_push(dports, 0, p->ref);
+ tipc_dest_push(dports, 0, p->port);
}
-
- if (info->cluster_list_size != info->node_list_size)
- res = 1;
}
spin_unlock_bh(&seq->lock);
exit:
rcu_read_unlock();
- return res;
}
/* tipc_nametbl_lookup_dst_nodes - find broadcast destination nodes
@@ -700,7 +651,7 @@ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
stop = seq->sseqs + seq->first_free;
for (; sseq != stop && sseq->lower <= upper; sseq++) {
info = sseq->info;
- list_for_each_entry(publ, &info->zone_list, zone_list) {
+ list_for_each_entry(publ, &info->all_publ, all_publ) {
tipc_nlist_add(nodes, publ->node);
}
}
@@ -729,10 +680,10 @@ void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
stop = seq->sseqs + seq->first_free;
for (; sseq != stop; sseq++) {
info = sseq->info;
- list_for_each_entry(p, &info->zone_list, zone_list) {
+ list_for_each_entry(p, &info->all_publ, all_publ) {
if (p->scope != scope)
continue;
- tipc_group_add_member(grp, p->node, p->ref, p->lower);
+ tipc_group_add_member(grp, p->node, p->port, p->lower);
}
}
spin_unlock_bh(&seq->lock);
@@ -777,7 +728,7 @@ struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
/**
* tipc_nametbl_withdraw - withdraw name publication from network name tables
*/
-int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref,
+int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 port,
u32 key)
{
struct publication *publ;
@@ -786,18 +737,18 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref,
spin_lock_bh(&tn->nametbl_lock);
publ = tipc_nametbl_remove_publ(net, type, lower, tn->own_addr,
- ref, key);
+ port, key);
if (likely(publ)) {
tn->nametbl->local_publ_count--;
skb = tipc_named_withdraw(net, publ);
/* Any pending external events? */
tipc_named_process_backlog(net);
- list_del_init(&publ->pport_list);
+ list_del_init(&publ->binding_sock);
kfree_rcu(publ, rcu);
} else {
pr_err("Unable to remove local publication\n"
- "(type=%u, lower=%u, ref=%u, key=%u)\n",
- type, lower, ref, key);
+ "(type=%u, lower=%u, port=%u, key=%u)\n",
+ type, lower, port, key);
}
spin_unlock_bh(&tn->nametbl_lock);
@@ -879,9 +830,8 @@ int tipc_nametbl_init(struct net *net)
for (i = 0; i < TIPC_NAMETBL_SIZE; i++)
INIT_HLIST_HEAD(&tipc_nametbl->seq_hlist[i]);
- INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_ZONE_SCOPE]);
- INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_CLUSTER_SCOPE]);
- INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_NODE_SCOPE]);
+ INIT_LIST_HEAD(&tipc_nametbl->node_scope);
+ INIT_LIST_HEAD(&tipc_nametbl->cluster_scope);
tn->nametbl = tipc_nametbl;
spin_lock_init(&tn->nametbl_lock);
return 0;
@@ -901,9 +851,9 @@ static void tipc_purge_publications(struct net *net, struct name_seq *seq)
spin_lock_bh(&seq->lock);
sseq = seq->sseqs;
info = sseq->info;
- list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) {
+ list_for_each_entry_safe(publ, safe, &info->all_publ, all_publ) {
tipc_nameseq_remove_publ(net, seq, publ->lower, publ->node,
- publ->ref, publ->key);
+ publ->port, publ->key);
kfree_rcu(publ, rcu);
}
hlist_del_init_rcu(&seq->ns_list);
@@ -950,17 +900,17 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg,
struct publication *p;
if (*last_publ) {
- list_for_each_entry(p, &sseq->info->zone_list, zone_list)
+ list_for_each_entry(p, &sseq->info->all_publ, all_publ)
if (p->key == *last_publ)
break;
if (p->key != *last_publ)
return -EPIPE;
} else {
- p = list_first_entry(&sseq->info->zone_list, struct publication,
- zone_list);
+ p = list_first_entry(&sseq->info->all_publ, struct publication,
+ all_publ);
}
- list_for_each_entry_from(p, &sseq->info->zone_list, zone_list) {
+ list_for_each_entry_from(p, &sseq->info->all_publ, all_publ) {
*last_publ = p->key;
hdr = genlmsg_put(msg->skb, msg->portid, msg->seq,
@@ -987,7 +937,7 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg,
goto publ_msg_full;
if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_NODE, p->node))
goto publ_msg_full;
- if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_REF, p->ref))
+ if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_REF, p->port))
goto publ_msg_full;
if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_KEY, p->key))
goto publ_msg_full;
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 17652602d5e2..34a4ccb907aa 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -1,7 +1,7 @@
/*
* net/tipc/name_table.h: Include file for TIPC name table code
*
- * Copyright (c) 2000-2006, 2014-2015, Ericsson AB
+ * Copyright (c) 2000-2006, 2014-2018, Ericsson AB
* Copyright (c) 2004-2005, 2010-2011, Wind River Systems
* All rights reserved.
*
@@ -54,19 +54,22 @@ struct tipc_group;
* @type: name sequence type
* @lower: name sequence lower bound
* @upper: name sequence upper bound
- * @scope: scope of publication
- * @node: network address of publishing port's node
- * @ref: publishing port
- * @key: publication key
- * @nodesub_list: subscription to "node down" event (off-node publication only)
- * @local_list: adjacent entries in list of publications made by this node
- * @pport_list: adjacent entries in list of publications made by this port
- * @node_list: adjacent matching name seq publications with >= node scope
- * @cluster_list: adjacent matching name seq publications with >= cluster scope
- * @zone_list: adjacent matching name seq publications with >= zone scope
+ * @scope: scope of publication, TIPC_NODE_SCOPE or TIPC_CLUSTER_SCOPE
+ * @node: network address of publishing socket's node
+ * @port: publishing port
+ * @key: publication key, unique across the cluster
+ * @binding_node: all publications from the same node which bound this one
+ * - Remote publications: in node->publ_list
+ * Used by node/name distr to withdraw publications when node is lost
+ * - Local/node scope publications: in name_table->node_scope list
+ * - Local/cluster scope publications: in name_table->cluster_scope list
+ * @binding_sock: all publications from the same socket which bound this one
+ * Used by socket to withdraw publications when socket is unbound/released
+ * @local_publ: list of identical publications made from this node
+ * Used by closest_first and multicast receive lookup algorithms
+ * @all_publ: all publications identical to this one, whatever node and scope
+ * Used by round-robin lookup algorithm
* @rcu: RCU callback head used for deferred freeing
- *
- * Note that the node list, cluster list, and zone list are circular lists.
*/
struct publication {
u32 type;
@@ -74,34 +77,37 @@ struct publication {
u32 upper;
u32 scope;
u32 node;
- u32 ref;
+ u32 port;
u32 key;
- struct list_head nodesub_list;
- struct list_head local_list;
- struct list_head pport_list;
- struct list_head node_list;
- struct list_head cluster_list;
- struct list_head zone_list;
+ struct list_head binding_node;
+ struct list_head binding_sock;
+ struct list_head local_publ;
+ struct list_head all_publ;
struct rcu_head rcu;
};
/**
* struct name_table - table containing all existing port name publications
* @seq_hlist: name sequence hash lists
- * @publ_list: pulication lists
+ * @node_scope: all local publications with node scope
+ * - used by name_distr during re-init of name table
+ * @cluster_scope: all local publications with cluster scope
+ * - used by name_distr to send bulk updates to new nodes
+ * - used by name_distr during re-init of name table
* @local_publ_count: number of publications issued by this node
*/
struct name_table {
struct hlist_head seq_hlist[TIPC_NAMETBL_SIZE];
- struct list_head publ_list[TIPC_PUBL_SCOPE_NUM];
+ struct list_head node_scope;
+ struct list_head cluster_scope;
u32 local_publ_count;
};
int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb);
u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node);
-int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
- u32 scope, bool exact, struct list_head *dports);
+void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
+ u32 scope, bool exact, struct list_head *dports);
void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
u32 type, u32 domain);
void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 1a2fde0d6f61..5c4c4405b78e 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -118,7 +118,7 @@ int tipc_net_start(struct net *net, u32 addr)
tipc_sk_reinit(net);
tipc_nametbl_publish(net, TIPC_CFG_SRV, tn->own_addr, tn->own_addr,
- TIPC_ZONE_SCOPE, 0, tn->own_addr);
+ TIPC_CLUSTER_SCOPE, 0, tn->own_addr);
pr_info("Started in network mode\n");
pr_info("Own node address %s, network identity %u\n",
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 8b04e601311c..a4a9148d4629 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -644,7 +644,7 @@ static int tipc_bind(struct socket *sock, struct sockaddr *uaddr,
goto exit;
}
- res = (addr->scope > 0) ?
+ res = (addr->scope >= 0) ?
tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq) :
tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq);
exit:
@@ -1280,8 +1280,8 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
struct tipc_msg *hdr = &tsk->phdr;
struct tipc_name_seq *seq;
struct sk_buff_head pkts;
- u32 type, inst, domain;
u32 dnode, dport;
+ u32 type, inst;
int mtu, rc;
if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE))
@@ -1332,13 +1332,12 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
if (dest->addrtype == TIPC_ADDR_NAME) {
type = dest->addr.name.name.type;
inst = dest->addr.name.name.instance;
- domain = dest->addr.name.domain;
- dnode = domain;
+ dnode = dest->addr.name.domain;
msg_set_type(hdr, TIPC_NAMED_MSG);
msg_set_hdr_sz(hdr, NAMED_H_SIZE);
msg_set_nametype(hdr, type);
msg_set_nameinst(hdr, inst);
- msg_set_lookup_scope(hdr, tipc_addr_scope(domain));
+ msg_set_lookup_scope(hdr, tipc_node2scope(dnode));
dport = tipc_nametbl_translate(net, type, inst, &dnode);
msg_set_destnode(hdr, dnode);
msg_set_destport(hdr, dport);
@@ -2592,6 +2591,9 @@ static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
struct publication *publ;
u32 key;
+ if (scope != TIPC_NODE_SCOPE)
+ scope = TIPC_CLUSTER_SCOPE;
+
if (tipc_sk_connected(sk))
return -EINVAL;
key = tsk->portid + tsk->pub_count + 1;
@@ -2603,7 +2605,7 @@ static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
if (unlikely(!publ))
return -EINVAL;
- list_add(&publ->pport_list, &tsk->publications);
+ list_add(&publ->binding_sock, &tsk->publications);
tsk->pub_count++;
tsk->published = 1;
return 0;
@@ -2617,7 +2619,10 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
struct publication *safe;
int rc = -EINVAL;
- list_for_each_entry_safe(publ, safe, &tsk->publications, pport_list) {
+ if (scope != TIPC_NODE_SCOPE)
+ scope = TIPC_CLUSTER_SCOPE;
+
+ list_for_each_entry_safe(publ, safe, &tsk->publications, binding_sock) {
if (seq) {
if (publ->scope != scope)
continue;
@@ -2628,12 +2633,12 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
if (publ->upper != seq->upper)
break;
tipc_nametbl_withdraw(net, publ->type, publ->lower,
- publ->ref, publ->key);
+ publ->port, publ->key);
rc = 0;
break;
}
tipc_nametbl_withdraw(net, publ->type, publ->lower,
- publ->ref, publ->key);
+ publ->port, publ->key);
rc = 0;
}
if (list_empty(&tsk->publications))
@@ -3287,7 +3292,7 @@ static int __tipc_nl_list_sk_publ(struct sk_buff *skb,
struct publication *p;
if (*last_publ) {
- list_for_each_entry(p, &tsk->publications, pport_list) {
+ list_for_each_entry(p, &tsk->publications, binding_sock) {
if (p->key == *last_publ)
break;
}
@@ -3304,10 +3309,10 @@ static int __tipc_nl_list_sk_publ(struct sk_buff *skb,
}
} else {
p = list_first_entry(&tsk->publications, struct publication,
- pport_list);
+ binding_sock);
}
- list_for_each_entry_from(p, &tsk->publications, pport_list) {
+ list_for_each_entry_from(p, &tsk->publications, binding_sock) {
err = __tipc_nl_add_sk_publ(skb, cb, p);
if (err) {
*last_publ = p->key;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 7f52b8eb177d..aff2e84ec761 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -3258,6 +3258,7 @@ static void __net_exit xfrm_user_net_exit(struct list_head *net_exit_list)
static struct pernet_operations xfrm_user_net_ops = {
.init = xfrm_user_net_init,
.exit_batch = xfrm_user_net_exit,
+ .async = true,
};
static int __init xfrm_user_init(void)