diff options
Diffstat (limited to 'net')
87 files changed, 1256 insertions, 743 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index e5b8d42b6410..d8887cc38e7b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4351,6 +4351,9 @@ int netdev_rx_handler_register(struct net_device *dev, if (netdev_is_rx_handler_busy(dev)) return -EBUSY; + if (dev->priv_flags & IFF_NO_RX_HANDLER) + return -EINVAL; + /* Note: rx_handler_data must be set before rx_handler */ rcu_assign_pointer(dev->rx_handler_data, rx_handler_data); rcu_assign_pointer(dev->rx_handler, rx_handler); @@ -7546,10 +7549,17 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, } } - /* LRO feature cannot be combined with RX-FCS */ - if ((features & NETIF_F_LRO) && (features & NETIF_F_RXFCS)) { - netdev_dbg(dev, "Dropping LRO feature since RX-FCS is requested.\n"); - features &= ~NETIF_F_LRO; + /* LRO/HW-GRO features cannot be combined with RX-FCS */ + if (features & NETIF_F_RXFCS) { + if (features & NETIF_F_LRO) { + netdev_dbg(dev, "Dropping LRO feature since RX-FCS is requested.\n"); + features &= ~NETIF_F_LRO; + } + + if (features & NETIF_F_GRO_HW) { + netdev_dbg(dev, "Dropping HW-GRO feature since RX-FCS is requested.\n"); + features &= ~NETIF_F_GRO_HW; + } } return features; @@ -8008,7 +8018,8 @@ int register_netdev(struct net_device *dev) { int err; - rtnl_lock(); + if (rtnl_lock_killable()) + return -EINTR; err = register_netdevice(dev); rtnl_unlock(); return err; diff --git a/net/core/devlink.c b/net/core/devlink.c index 1b5bf0d1cee9..f23e5ed7c90f 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -2744,22 +2744,22 @@ static const struct genl_ops devlink_nl_ops[] = { .cmd = DEVLINK_CMD_DPIPE_TABLE_GET, .doit = devlink_nl_cmd_dpipe_table_get, .policy = devlink_nl_policy, - .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET, .doit = devlink_nl_cmd_dpipe_entries_get, .policy = devlink_nl_policy, - .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_DPIPE_HEADERS_GET, .doit = devlink_nl_cmd_dpipe_headers_get, .policy = devlink_nl_policy, - .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET, @@ -2779,8 +2779,8 @@ static const struct genl_ops devlink_nl_ops[] = { .cmd = DEVLINK_CMD_RESOURCE_DUMP, .doit = devlink_nl_cmd_resource_dump, .policy = devlink_nl_policy, - .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_RELOAD, diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 3f89c76d5c24..157cd9efa4be 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1022,6 +1022,15 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, if (copy_from_user(&info, useraddr, info_size)) return -EFAULT; + /* If FLOW_RSS was requested then user-space must be using the + * new definition, as FLOW_RSS is newer. + */ + if (cmd == ETHTOOL_GRXFH && info.flow_type & FLOW_RSS) { + info_size = sizeof(info); + if (copy_from_user(&info, useraddr, info_size)) + return -EFAULT; + } + if (info.cmd == ETHTOOL_GRXCLSRLALL) { if (info.rule_cnt > 0) { if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32)) @@ -1251,9 +1260,11 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, user_key_size = rxfh.key_size; /* Check that reserved fields are 0 for now */ - if (rxfh.rss_context || rxfh.rsvd8[0] || rxfh.rsvd8[1] || - rxfh.rsvd8[2] || rxfh.rsvd32) + if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd8[2] || rxfh.rsvd32) return -EINVAL; + /* Most drivers don't handle rss_context, check it's 0 as well */ + if (rxfh.rss_context && !ops->get_rxfh_context) + return -EOPNOTSUPP; rxfh.indir_size = dev_indir_size; rxfh.key_size = dev_key_size; @@ -1276,7 +1287,12 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, if (user_key_size) hkey = rss_config + indir_bytes; - ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey, &dev_hfunc); + if (rxfh.rss_context) + ret = dev->ethtool_ops->get_rxfh_context(dev, indir, hkey, + &dev_hfunc, + rxfh.rss_context); + else + ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey, &dev_hfunc); if (ret) goto out; @@ -1306,6 +1322,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, u8 *hkey = NULL; u8 *rss_config; u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]); + bool delete = false; if (!ops->get_rxnfc || !ops->set_rxfh) return -EOPNOTSUPP; @@ -1319,9 +1336,11 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, return -EFAULT; /* Check that reserved fields are 0 for now */ - if (rxfh.rss_context || rxfh.rsvd8[0] || rxfh.rsvd8[1] || - rxfh.rsvd8[2] || rxfh.rsvd32) + if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd8[2] || rxfh.rsvd32) return -EINVAL; + /* Most drivers don't handle rss_context, check it's 0 as well */ + if (rxfh.rss_context && !ops->set_rxfh_context) + return -EOPNOTSUPP; /* If either indir, hash key or function is valid, proceed further. * Must request at least one change: indir size, hash key or function. @@ -1346,7 +1365,8 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, if (ret) goto out; - /* rxfh.indir_size == 0 means reset the indir table to default. + /* rxfh.indir_size == 0 means reset the indir table to default (master + * context) or delete the context (other RSS contexts). * rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE means leave it unchanged. */ if (rxfh.indir_size && @@ -1359,9 +1379,13 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, if (ret) goto out; } else if (rxfh.indir_size == 0) { - indir = (u32 *)rss_config; - for (i = 0; i < dev_indir_size; i++) - indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data); + if (rxfh.rss_context == 0) { + indir = (u32 *)rss_config; + for (i = 0; i < dev_indir_size; i++) + indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data); + } else { + delete = true; + } } if (rxfh.key_size) { @@ -1374,15 +1398,25 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, } } - ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc); + if (rxfh.rss_context) + ret = ops->set_rxfh_context(dev, indir, hkey, rxfh.hfunc, + &rxfh.rss_context, delete); + else + ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc); if (ret) goto out; - /* indicate whether rxfh was set to default */ - if (rxfh.indir_size == 0) - dev->priv_flags &= ~IFF_RXFH_CONFIGURED; - else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) - dev->priv_flags |= IFF_RXFH_CONFIGURED; + if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, rss_context), + &rxfh.rss_context, sizeof(rxfh.rss_context))) + ret = -EFAULT; + + if (!rxfh.rss_context) { + /* indicate whether rxfh was set to default */ + if (rxfh.indir_size == 0) + dev->priv_flags &= ~IFF_RXFH_CONFIGURED; + else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) + dev->priv_flags |= IFF_RXFH_CONFIGURED; + } out: kfree(rss_config); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index b8ab5c829511..545cf08cd558 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -906,13 +906,14 @@ static ssize_t pktgen_if_write(struct file *file, i += len; if (debug) { - size_t copy = min_t(size_t, count, 1023); - char tb[copy + 1]; - if (copy_from_user(tb, user_buffer, copy)) - return -EFAULT; - tb[copy] = 0; - pr_debug("%s,%lu buffer -:%s:-\n", - name, (unsigned long)count, tb); + size_t copy = min_t(size_t, count + 1, 1024); + char *tp = strndup_user(user_buffer, copy); + + if (IS_ERR(tp)) + return PTR_ERR(tp); + + pr_debug("%s,%zu buffer -:%s:-\n", name, count, tp); + kfree(tp); } if (!strcmp(name, "min_pkt_size")) { @@ -3851,6 +3852,7 @@ static struct pernet_operations pg_net_ops = { .exit = pg_net_exit, .id = &pg_net_id, .size = sizeof(struct pktgen_net), + .async = true, }; static int __init pg_init(void) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 67f375cfb982..87079eaa871b 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -75,6 +75,12 @@ void rtnl_lock(void) } EXPORT_SYMBOL(rtnl_lock); +int rtnl_lock_killable(void) +{ + return mutex_lock_killable(&rtnl_mutex); +} +EXPORT_SYMBOL(rtnl_lock_killable); + static struct sk_buff *defer_kfree_skb_list; void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail) { diff --git a/net/core/sock.c b/net/core/sock.c index 4f92c2910200..f704324d1219 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1052,8 +1052,6 @@ set_rcvbuf: if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) { if (sk->sk_protocol != IPPROTO_TCP) ret = -ENOTSUPP; - else if (sk->sk_state != TCP_CLOSE) - ret = -EBUSY; } else if (sk->sk_family != PF_RDS) { ret = -ENOTSUPP; } diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index d714f65782b7..4f47f92459cc 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -32,6 +32,9 @@ static int max_skb_frags = MAX_SKB_FRAGS; static int net_msg_warn; /* Unused, but still a sysctl */ +int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0; +EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net); + #ifdef CONFIG_RPS static int rps_sock_flow_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -513,6 +516,15 @@ static struct ctl_table net_core_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = &zero, }, + { + .procname = "fb_tunnels_only_for_init_net", + .data = &sysctl_fb_tunnels_only_for_init_net, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, { } }; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 602597dfc395..5fcb17cb426b 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -347,8 +347,7 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net, struct net_device *dev; int t_hlen; - BUG_ON(!itn->fb_tunnel_dev); - dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms); + dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms); if (IS_ERR(dev)) return ERR_CAST(dev); @@ -822,7 +821,6 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) struct net *net = t->net; struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id); - BUG_ON(!itn->fb_tunnel_dev); switch (cmd) { case SIOCGETTUNNEL: if (dev == itn->fb_tunnel_dev) { @@ -847,7 +845,7 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) p->o_key = 0; } - t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); + t = ip_tunnel_find(itn, p, itn->type); if (cmd == SIOCADDTUNNEL) { if (!t) { @@ -991,10 +989,15 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id, struct ip_tunnel_parm parms; unsigned int i; + itn->rtnl_link_ops = ops; for (i = 0; i < IP_TNL_HASH_SIZE; i++) INIT_HLIST_HEAD(&itn->tunnels[i]); - if (!ops) { + if (!ops || !net_has_fallback_tunnels(net)) { + struct ip_tunnel_net *it_init_net; + + it_init_net = net_generic(&init_net, ip_tnl_net_id); + itn->type = it_init_net->type; itn->fb_tunnel_dev = NULL; return 0; } @@ -1012,6 +1015,7 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id, itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev); ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev)); + itn->type = itn->fb_tunnel_dev->type; } rtnl_unlock(); @@ -1019,10 +1023,10 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id, } EXPORT_SYMBOL_GPL(ip_tunnel_init_net); -static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head, +static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn, + struct list_head *head, struct rtnl_link_ops *ops) { - struct net *net = dev_net(itn->fb_tunnel_dev); struct net_device *dev, *aux; int h; @@ -1054,7 +1058,7 @@ void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id, rtnl_lock(); list_for_each_entry(net, net_list, exit_list) { itn = net_generic(net, id); - ip_tunnel_destroy(itn, &list, ops); + ip_tunnel_destroy(net, itn, &list, ops); } unregister_netdevice_many(&list); rtnl_unlock(); diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 8f8713b4388f..49c2490193ae 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -65,6 +65,7 @@ static void __net_exit arptable_filter_net_exit(struct net *net) static struct pernet_operations arptable_filter_net_ops = { .exit = arptable_filter_net_exit, + .async = true, }; static int __init arptable_filter_init(void) diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index dea138ca8925..f6074059531a 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -113,6 +113,7 @@ static void __net_exit iptable_mangle_net_exit(struct net *net) static struct pernet_operations iptable_mangle_net_ops = { .exit = iptable_mangle_net_exit, + .async = true, }; static int __init iptable_mangle_init(void) diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index 0f7255cc65ee..b771af74be79 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -129,6 +129,7 @@ static void __net_exit iptable_nat_net_exit(struct net *net) static struct pernet_operations iptable_nat_net_ops = { .exit = iptable_nat_net_exit, + .async = true, }; static int __init iptable_nat_init(void) diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 960625aabf04..963753e50842 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -76,6 +76,7 @@ static void __net_exit iptable_raw_net_exit(struct net *net) static struct pernet_operations iptable_raw_net_ops = { .exit = iptable_raw_net_exit, + .async = true, }; static int __init iptable_raw_init(void) diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index e5379fe57b64..c40d6b3d8b6a 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -76,6 +76,7 @@ static void __net_exit iptable_security_net_exit(struct net *net) static struct pernet_operations iptable_security_net_ops = { .exit = iptable_security_net_exit, + .async = true, }; static int __init iptable_security_init(void) diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index b50721d9d30e..6531f69db010 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -399,6 +399,7 @@ static struct pernet_operations ipv4_net_ops = { .exit = ipv4_net_exit, .id = &conntrack4_net_id, .size = sizeof(struct conntrack4_net), + .async = true, }; static int __init nf_conntrack_l3proto_ipv4_init(void) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 011de9a20ec6..5b72d97693f8 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -520,22 +520,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, - { - .procname = "udp_rmem_min", - .data = &sysctl_udp_rmem_min, - .maxlen = sizeof(sysctl_udp_rmem_min), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &one - }, - { - .procname = "udp_wmem_min", - .data = &sysctl_udp_wmem_min, - .maxlen = sizeof(sysctl_udp_wmem_min), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &one - }, { } }; @@ -1167,6 +1151,22 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = &one, }, + { + .procname = "udp_rmem_min", + .data = &init_net.ipv4.sysctl_udp_rmem_min, + .maxlen = sizeof(init_net.ipv4.sysctl_udp_rmem_min), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one + }, + { + .procname = "udp_wmem_min", + .data = &init_net.ipv4.sysctl_udp_wmem_min, + .maxlen = sizeof(init_net.ipv4.sysctl_udp_wmem_min), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one + }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f90ec24c2cc8..d763fae1b574 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3033,7 +3033,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) u32 rate; stats = alloc_skb(7 * nla_total_size_64bit(sizeof(u64)) + - 4 * nla_total_size(sizeof(u32)) + + 5 * nla_total_size(sizeof(u32)) + 3 * nla_total_size(sizeof(u8)), GFP_ATOMIC); if (!stats) return NULL; @@ -3063,6 +3063,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, inet_csk(sk)->icsk_retransmits); nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited); + nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, tp->snd_ssthresh); nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una); nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state); diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index c92014cb1e16..158d105e76da 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -731,6 +731,8 @@ static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs) bbr->mode = BBR_DRAIN; /* drain queue we created */ bbr->pacing_gain = bbr_drain_gain; /* pace slow to drain */ bbr->cwnd_gain = bbr_high_gain; /* maintain cwnd */ + tcp_sk(sk)->snd_ssthresh = + bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT); } /* fall through to check if in-flight is already small: */ if (bbr->mode == BBR_DRAIN && tcp_packets_in_flight(tcp_sk(sk)) <= @@ -834,6 +836,7 @@ static void bbr_init(struct sock *sk) struct bbr *bbr = inet_csk_ca(sk); bbr->prior_cwnd = 0; + tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; bbr->rtt_cnt = 0; bbr->next_rtt_delivered = 0; bbr->prev_ca_state = TCP_CA_Open; @@ -886,7 +889,7 @@ static u32 bbr_undo_cwnd(struct sock *sk) static u32 bbr_ssthresh(struct sock *sk) { bbr_save_cwnd(sk); - return TCP_INFINITE_SSTHRESH; /* BBR does not use ssthresh */ + return tcp_sk(sk)->snd_ssthresh; } static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 3013404d0935..908fc02fb4f8 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -122,12 +122,6 @@ EXPORT_SYMBOL(udp_table); long sysctl_udp_mem[3] __read_mostly; EXPORT_SYMBOL(sysctl_udp_mem); -int sysctl_udp_rmem_min __read_mostly; -EXPORT_SYMBOL(sysctl_udp_rmem_min); - -int sysctl_udp_wmem_min __read_mostly; -EXPORT_SYMBOL(sysctl_udp_wmem_min); - atomic_long_t udp_memory_allocated; EXPORT_SYMBOL(udp_memory_allocated); @@ -2533,35 +2527,35 @@ int udp_abort(struct sock *sk, int err) EXPORT_SYMBOL_GPL(udp_abort); struct proto udp_prot = { - .name = "UDP", - .owner = THIS_MODULE, - .close = udp_lib_close, - .connect = ip4_datagram_connect, - .disconnect = udp_disconnect, - .ioctl = udp_ioctl, - .init = udp_init_sock, - .destroy = udp_destroy_sock, - .setsockopt = udp_setsockopt, - .getsockopt = udp_getsockopt, - .sendmsg = udp_sendmsg, - .recvmsg = udp_recvmsg, - .sendpage = udp_sendpage, - .release_cb = ip4_datagram_release_cb, - .hash = udp_lib_hash, - .unhash = udp_lib_unhash, - .rehash = udp_v4_rehash, - .get_port = udp_v4_get_port, - .memory_allocated = &udp_memory_allocated, - .sysctl_mem = sysctl_udp_mem, - .sysctl_wmem = &sysctl_udp_wmem_min, - .sysctl_rmem = &sysctl_udp_rmem_min, - .obj_size = sizeof(struct udp_sock), - .h.udp_table = &udp_table, + .name = "UDP", + .owner = THIS_MODULE, + .close = udp_lib_close, + .connect = ip4_datagram_connect, + .disconnect = udp_disconnect, + .ioctl = udp_ioctl, + .init = udp_init_sock, + .destroy = udp_destroy_sock, + .setsockopt = udp_setsockopt, + .getsockopt = udp_getsockopt, + .sendmsg = udp_sendmsg, + .recvmsg = udp_recvmsg, + .sendpage = udp_sendpage, + .release_cb = ip4_datagram_release_cb, + .hash = udp_lib_hash, + .unhash = udp_lib_unhash, + .rehash = udp_v4_rehash, + .get_port = udp_v4_get_port, + .memory_allocated = &udp_memory_allocated, + .sysctl_mem = sysctl_udp_mem, + .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min), + .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min), + .obj_size = sizeof(struct udp_sock), + .h.udp_table = &udp_table, #ifdef CONFIG_COMPAT - .compat_setsockopt = compat_udp_setsockopt, - .compat_getsockopt = compat_udp_getsockopt, + .compat_setsockopt = compat_udp_setsockopt, + .compat_getsockopt = compat_udp_getsockopt, #endif - .diag_destroy = udp_abort, + .diag_destroy = udp_abort, }; EXPORT_SYMBOL(udp_prot); @@ -2831,6 +2825,26 @@ u32 udp_flow_hashrnd(void) } EXPORT_SYMBOL(udp_flow_hashrnd); +static void __udp_sysctl_init(struct net *net) +{ + net->ipv4.sysctl_udp_rmem_min = SK_MEM_QUANTUM; + net->ipv4.sysctl_udp_wmem_min = SK_MEM_QUANTUM; + +#ifdef CONFIG_NET_L3_MASTER_DEV + net->ipv4.sysctl_udp_l3mdev_accept = 0; +#endif +} + +static int __net_init udp_sysctl_init(struct net *net) +{ + __udp_sysctl_init(net); + return 0; +} + +static struct pernet_operations __net_initdata udp_sysctl_ops = { + .init = udp_sysctl_init, +}; + void __init udp_init(void) { unsigned long limit; @@ -2843,8 +2857,7 @@ void __init udp_init(void) sysctl_udp_mem[1] = limit; sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2; - sysctl_udp_rmem_min = SK_MEM_QUANTUM; - sysctl_udp_wmem_min = SK_MEM_QUANTUM; + __udp_sysctl_init(&init_net); /* 16 spinlocks per cpu */ udp_busylocks_log = ilog2(nr_cpu_ids) + 4; @@ -2854,4 +2867,7 @@ void __init udp_init(void) panic("UDP: failed to alloc udp_busylocks\n"); for (i = 0; i < (1U << udp_busylocks_log); i++) spin_lock_init(udp_busylocks + i); + + if (register_pernet_subsys(&udp_sysctl_ops)) + panic("UDP: failed to init sysctl parameters.\n"); } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index b5fd116c046a..6fd4bbdc444f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1851,22 +1851,42 @@ static int ipv6_count_addresses(const struct inet6_dev *idev) int ipv6_chk_addr(struct net *net, const struct in6_addr *addr, const struct net_device *dev, int strict) { - return ipv6_chk_addr_and_flags(net, addr, dev, strict, IFA_F_TENTATIVE); + return ipv6_chk_addr_and_flags(net, addr, dev, !dev, + strict, IFA_F_TENTATIVE); } EXPORT_SYMBOL(ipv6_chk_addr); +/* device argument is used to find the L3 domain of interest. If + * skip_dev_check is set, then the ifp device is not checked against + * the passed in dev argument. So the 2 cases for addresses checks are: + * 1. does the address exist in the L3 domain that dev is part of + * (skip_dev_check = true), or + * + * 2. does the address exist on the specific device + * (skip_dev_check = false) + */ int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr, - const struct net_device *dev, int strict, - u32 banned_flags) + const struct net_device *dev, bool skip_dev_check, + int strict, u32 banned_flags) { unsigned int hash = inet6_addr_hash(net, addr); + const struct net_device *l3mdev; struct inet6_ifaddr *ifp; u32 ifp_flags; rcu_read_lock(); + + l3mdev = l3mdev_master_dev_rcu(dev); + if (skip_dev_check) + dev = NULL; + hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) { if (!net_eq(dev_net(ifp->idev->dev), net)) continue; + + if (l3mdev_master_dev_rcu(ifp->idev->dev) != l3mdev) + continue; + /* Decouple optimistic from tentative for evaluation here. * Ban optimistic addresses explicitly, when required. */ diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index c61718dba2e6..d580d4d456a5 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -66,7 +66,11 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) return -EPERM; if (ipv6_addr_is_multicast(addr)) return -EINVAL; - if (ipv6_chk_addr(net, addr, NULL, 0)) + + if (ifindex) + dev = __dev_get_by_index(net, ifindex); + + if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE)) return -EINVAL; pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL); @@ -90,8 +94,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) dev = __dev_get_by_flags(net, IFF_UP, IFF_UP | IFF_LOOPBACK); } - } else - dev = __dev_get_by_index(net, ifindex); + } if (!dev) { err = -ENODEV; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index fbf08ce3f5ab..b27333d7b099 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -801,8 +801,9 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, if (addr_type != IPV6_ADDR_ANY) { int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL; if (!(inet_sk(sk)->freebind || inet_sk(sk)->transparent) && - !ipv6_chk_addr(net, &src_info->ipi6_addr, - strict ? dev : NULL, 0) && + !ipv6_chk_addr_and_flags(net, &src_info->ipi6_addr, + dev, !strict, 0, + IFA_F_TENTATIVE) && !ipv6_chk_acast_addr_src(net, dev, &src_info->ipi6_addr)) err = -EINVAL; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 18a3dfbd0300..7d8775c9570d 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -236,7 +236,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, return t; dev = ign->fb_tunnel_dev; - if (dev->flags & IFF_UP) + if (dev && dev->flags & IFF_UP) return netdev_priv(dev); return NULL; @@ -1472,6 +1472,8 @@ static int __net_init ip6gre_init_net(struct net *net) struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); int err; + if (!net_has_fallback_tunnels(net)) + return 0; ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0", NET_NAME_UNKNOWN, ip6gre_tunnel_setup); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 56c4967f1868..456fcf942f95 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -758,9 +758,11 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t, ldev = dev_get_by_index_rcu(net, p->link); if ((ipv6_addr_is_multicast(laddr) || - likely(ipv6_chk_addr(net, laddr, ldev, 0))) && + likely(ipv6_chk_addr_and_flags(net, laddr, ldev, false, + 0, IFA_F_TENTATIVE))) && ((p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) || - likely(!ipv6_chk_addr(net, raddr, NULL, 0)))) + likely(!ipv6_chk_addr_and_flags(net, raddr, ldev, true, + 0, IFA_F_TENTATIVE)))) ret = 1; } return ret; @@ -990,12 +992,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t, if (p->link) ldev = dev_get_by_index_rcu(net, p->link); - if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0))) + if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false, + 0, IFA_F_TENTATIVE))) pr_warn("%s xmit: Local address not yet configured!\n", p->name); else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) && !ipv6_addr_is_multicast(raddr) && - unlikely(ipv6_chk_addr(net, raddr, NULL, 0))) + unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev, + true, 0, IFA_F_TENTATIVE))) pr_warn("%s xmit: Routing loop! Remote address found on this node!\n", p->name); else @@ -2205,6 +2209,8 @@ static int __net_init ip6_tnl_init_net(struct net *net) ip6n->tnls[0] = ip6n->tnls_wc; ip6n->tnls[1] = ip6n->tnls_r_l; + if (!net_has_fallback_tunnels(net)) + return 0; err = -ENOMEM; ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0", NET_NAME_UNKNOWN, ip6_tnl_dev_setup); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 8af5eef464c1..10024eb0c521 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -707,7 +707,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) int probes = atomic_read(&neigh->probes); if (skb && ipv6_chk_addr_and_flags(dev_net(dev), &ipv6_hdr(skb)->saddr, - dev, 1, + dev, false, 1, IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) saddr = &ipv6_hdr(skb)->saddr; probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES); diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 1343077dde93..06561c84c0bc 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -87,6 +87,7 @@ static void __net_exit ip6table_filter_net_exit(struct net *net) static struct pernet_operations ip6table_filter_net_ops = { .init = ip6table_filter_net_init, .exit = ip6table_filter_net_exit, + .async = true, }; static int __init ip6table_filter_init(void) diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index b0524b18c4fb..a11e25936b45 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -107,6 +107,7 @@ static void __net_exit ip6table_mangle_net_exit(struct net *net) static struct pernet_operations ip6table_mangle_net_ops = { .exit = ip6table_mangle_net_exit, + .async = true, }; static int __init ip6table_mangle_init(void) diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 47306e45a80a..4475fd300bb6 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -131,6 +131,7 @@ static void __net_exit ip6table_nat_net_exit(struct net *net) static struct pernet_operations ip6table_nat_net_ops = { .exit = ip6table_nat_net_exit, + .async = true, }; static int __init ip6table_nat_init(void) diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 710fa0806c37..a88f3b1995b1 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -75,6 +75,7 @@ static void __net_exit ip6table_raw_net_exit(struct net *net) static struct pernet_operations ip6table_raw_net_ops = { .exit = ip6table_raw_net_exit, + .async = true, }; static int __init ip6table_raw_init(void) diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index cf26ccb04056..320048c008dc 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -74,6 +74,7 @@ static void __net_exit ip6table_security_net_exit(struct net *net) static struct pernet_operations ip6table_security_net_ops = { .exit = ip6table_security_net_exit, + .async = true, }; static int __init ip6table_security_init(void) diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 663827ee3cf8..ba54bb3bd1e4 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -401,6 +401,7 @@ static struct pernet_operations ipv6_net_ops = { .exit = ipv6_net_exit, .id = &conntrack6_net_id, .size = sizeof(struct conntrack6_net), + .async = true, }; static int __init nf_conntrack_l3proto_ipv6_init(void) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f0ae58424c45..939d122e71b4 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1846,7 +1846,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, struct flow_keys hash_keys; u32 mhash; - switch (net->ipv6.sysctl.multipath_hash_policy) { + switch (ip6_multipath_hash_policy(net)) { case 0: memset(&hash_keys, 0, sizeof(hash_keys)); hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; @@ -2550,7 +2550,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net, static int ip6_route_check_nh_onlink(struct net *net, struct fib6_config *cfg, - struct net_device *dev, + const struct net_device *dev, struct netlink_ext_ack *extack) { u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN; @@ -2626,6 +2626,79 @@ out: return err; } +static int ip6_validate_gw(struct net *net, struct fib6_config *cfg, + struct net_device **_dev, struct inet6_dev **idev, + struct netlink_ext_ack *extack) +{ + const struct in6_addr *gw_addr = &cfg->fc_gateway; + int gwa_type = ipv6_addr_type(gw_addr); + bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true; + const struct net_device *dev = *_dev; + bool need_addr_check = !dev; + int err = -EINVAL; + + /* if gw_addr is local we will fail to detect this in case + * address is still TENTATIVE (DAD in progress). rt6_lookup() + * will return already-added prefix route via interface that + * prefix route was assigned to, which might be non-loopback. + */ + if (dev && + ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) { + NL_SET_ERR_MSG(extack, "Gateway can not be a local address"); + goto out; + } + + if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) { + /* IPv6 strictly inhibits using not link-local + * addresses as nexthop address. + * Otherwise, router will not able to send redirects. + * It is very good, but in some (rare!) circumstances + * (SIT, PtP, NBMA NOARP links) it is handy to allow + * some exceptions. --ANK + * We allow IPv4-mapped nexthops to support RFC4798-type + * addressing + */ + if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) { + NL_SET_ERR_MSG(extack, "Invalid gateway address"); + goto out; + } + + if (cfg->fc_flags & RTNH_F_ONLINK) + err = ip6_route_check_nh_onlink(net, cfg, dev, extack); + else + err = ip6_route_check_nh(net, cfg, _dev, idev); + + if (err) + goto out; + } + + /* reload in case device was changed */ + dev = *_dev; + + err = -EINVAL; + if (!dev) { + NL_SET_ERR_MSG(extack, "Egress device not specified"); + goto out; + } else if (dev->flags & IFF_LOOPBACK) { + NL_SET_ERR_MSG(extack, + "Egress device can not be loopback device for this route"); + goto out; + } + + /* if we did not check gw_addr above, do so now that the + * egress device has been resolved. + */ + if (need_addr_check && + ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) { + NL_SET_ERR_MSG(extack, "Gateway can not be a local address"); + goto out; + } + + err = 0; +out: + return err; +} + static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, struct netlink_ext_ack *extack) { @@ -2808,61 +2881,11 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, } if (cfg->fc_flags & RTF_GATEWAY) { - const struct in6_addr *gw_addr; - int gwa_type; - - gw_addr = &cfg->fc_gateway; - gwa_type = ipv6_addr_type(gw_addr); - - /* if gw_addr is local we will fail to detect this in case - * address is still TENTATIVE (DAD in progress). rt6_lookup() - * will return already-added prefix route via interface that - * prefix route was assigned to, which might be non-loopback. - */ - err = -EINVAL; - if (ipv6_chk_addr_and_flags(net, gw_addr, - gwa_type & IPV6_ADDR_LINKLOCAL ? - dev : NULL, 0, 0)) { - NL_SET_ERR_MSG(extack, "Invalid gateway address"); + err = ip6_validate_gw(net, cfg, &dev, &idev, extack); + if (err) goto out; - } - rt->rt6i_gateway = *gw_addr; - - if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { - /* IPv6 strictly inhibits using not link-local - addresses as nexthop address. - Otherwise, router will not able to send redirects. - It is very good, but in some (rare!) circumstances - (SIT, PtP, NBMA NOARP links) it is handy to allow - some exceptions. --ANK - We allow IPv4-mapped nexthops to support RFC4798-type - addressing - */ - if (!(gwa_type & (IPV6_ADDR_UNICAST | - IPV6_ADDR_MAPPED))) { - NL_SET_ERR_MSG(extack, - "Invalid gateway address"); - goto out; - } - if (cfg->fc_flags & RTNH_F_ONLINK) { - err = ip6_route_check_nh_onlink(net, cfg, dev, - extack); - } else { - err = ip6_route_check_nh(net, cfg, &dev, &idev); - } - if (err) - goto out; - } - err = -EINVAL; - if (!dev) { - NL_SET_ERR_MSG(extack, "Egress device not specified"); - goto out; - } else if (dev->flags & IFF_LOOPBACK) { - NL_SET_ERR_MSG(extack, - "Egress device can not be loopback device for this route"); - goto out; - } + rt->rt6i_gateway = cfg->fc_gateway; } err = -ENODEV; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index a9c4ac6efe22..8a4f8fddd812 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -182,7 +182,7 @@ static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn) #ifdef CONFIG_IPV6_SIT_6RD struct ip_tunnel *t = netdev_priv(dev); - if (dev == sitn->fb_tunnel_dev) { + if (dev == sitn->fb_tunnel_dev || !sitn->fb_tunnel_dev) { ipv6_addr_set(&t->ip6rd.prefix, htonl(0x20020000), 0, 0, 0); t->ip6rd.relay_prefix = 0; t->ip6rd.prefixlen = 16; @@ -1835,6 +1835,9 @@ static int __net_init sit_init_net(struct net *net) sitn->tunnels[2] = sitn->tunnels_r; sitn->tunnels[3] = sitn->tunnels_r_l; + if (!net_has_fallback_tunnels(net)) + return 0; + sitn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0", NET_NAME_UNKNOWN, ipip6_tunnel_setup); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 52e3ea0e6f50..ad30f5e31969 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1509,34 +1509,34 @@ void udp6_proc_exit(struct net *net) /* ------------------------------------------------------------------------ */ struct proto udpv6_prot = { - .name = "UDPv6", - .owner = THIS_MODULE, - .close = udp_lib_close, - .connect = ip6_datagram_connect, - .disconnect = udp_disconnect, - .ioctl = udp_ioctl, - .init = udp_init_sock, - .destroy = udpv6_destroy_sock, - .setsockopt = udpv6_setsockopt, - .getsockopt = udpv6_getsockopt, - .sendmsg = udpv6_sendmsg, - .recvmsg = udpv6_recvmsg, - .release_cb = ip6_datagram_release_cb, - .hash = udp_lib_hash, - .unhash = udp_lib_unhash, - .rehash = udp_v6_rehash, - .get_port = udp_v6_get_port, - .memory_allocated = &udp_memory_allocated, - .sysctl_mem = sysctl_udp_mem, - .sysctl_wmem = &sysctl_udp_wmem_min, - .sysctl_rmem = &sysctl_udp_rmem_min, - .obj_size = sizeof(struct udp6_sock), - .h.udp_table = &udp_table, + .name = "UDPv6", + .owner = THIS_MODULE, + .close = udp_lib_close, + .connect = ip6_datagram_connect, + .disconnect = udp_disconnect, + .ioctl = udp_ioctl, + .init = udp_init_sock, + .destroy = udpv6_destroy_sock, + .setsockopt = udpv6_setsockopt, + .getsockopt = udpv6_getsockopt, + .sendmsg = udpv6_sendmsg, + .recvmsg = udpv6_recvmsg, + .release_cb = ip6_datagram_release_cb, + .hash = udp_lib_hash, + .unhash = udp_lib_unhash, + .rehash = udp_v6_rehash, + .get_port = udp_v6_get_port, + .memory_allocated = &udp_memory_allocated, + .sysctl_mem = sysctl_udp_mem, + .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min), + .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min), + .obj_size = sizeof(struct udp6_sock), + .h.udp_table = &udp_table, #ifdef CONFIG_COMPAT - .compat_setsockopt = compat_udpv6_setsockopt, - .compat_getsockopt = compat_udpv6_getsockopt, + .compat_setsockopt = compat_udpv6_setsockopt, + .compat_getsockopt = compat_udpv6_getsockopt, #endif - .diag_destroy = udp_abort, + .diag_destroy = udp_abort, }; static struct inet_protosw udpv6_protosw = { diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 83421c6f0bef..189a12a5e4ac 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1787,6 +1787,7 @@ static struct pernet_operations l2tp_net_ops = { .exit = l2tp_exit_net, .id = &l2tp_net_id, .size = sizeof(struct l2tp_net), + .async = true, }; static int __init l2tp_init(void) diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index d90928f50226..a7f7b8ff4729 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -394,8 +394,9 @@ static void llc_sap_mcast(struct llc_sap *sap, const struct llc_addr *laddr, struct sk_buff *skb) { - int i = 0, count = 256 / sizeof(struct sock *); - struct sock *sk, *stack[count]; + int i = 0; + struct sock *sk; + struct sock *stack[256 / sizeof(struct sock *)]; struct llc_sock *llc; struct hlist_head *dev_hb = llc_sk_dev_hash(sap, skb->dev->ifindex); @@ -408,7 +409,7 @@ static void llc_sap_mcast(struct llc_sap *sap, continue; sock_hold(sk); - if (i < count) + if (i < ARRAY_SIZE(stack)) stack[i++] = sk; else { llc_do_mcast(sap, skb, stack, i); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index d01743234cf6..9c898a3688c6 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2549,11 +2549,8 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) fwd_skb = skb_copy_expand(skb, local->tx_headroom + sdata->encrypt_headroom, 0, GFP_ATOMIC); - if (!fwd_skb) { - net_info_ratelimited("%s: failed to clone mesh frame\n", - sdata->name); + if (!fwd_skb) goto out; - } fwd_hdr = (struct ieee80211_hdr *) fwd_skb->data; fwd_hdr->frame_control &= ~cpu_to_le16(IEEE80211_FCTL_RETRY); diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 7a4de6d618b1..d4a89a8be013 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -2488,6 +2488,7 @@ static void mpls_net_exit(struct net *net) static struct pernet_operations mpls_net_ops = { .init = mpls_net_init, .exit = mpls_net_exit, + .async = true, }; static struct rtnl_af_ops mpls_af_ops __read_mostly = { diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c index d4201665a580..05fcfb4fbe1d 100644 --- a/net/ncsi/ncsi-netlink.c +++ b/net/ncsi/ncsi-netlink.c @@ -183,7 +183,7 @@ static int ncsi_pkg_info_nl(struct sk_buff *msg, struct genl_info *info) hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq, &ncsi_genl_family, 0, NCSI_CMD_PKG_INFO); if (!hdr) { - kfree(skb); + kfree_skb(skb); return -EMSGSIZE; } @@ -204,7 +204,7 @@ static int ncsi_pkg_info_nl(struct sk_buff *msg, struct genl_info *info) err: genlmsg_cancel(skb, hdr); - kfree(skb); + kfree_skb(skb); return rc; } @@ -299,6 +299,7 @@ static int ncsi_set_interface_nl(struct sk_buff *msg, struct genl_info *info) package = np; if (!package) { /* The user has set a package that does not exist */ + spin_unlock_irqrestore(&ndp->lock, flags); return -ERANGE; } @@ -317,6 +318,7 @@ static int ncsi_set_interface_nl(struct sk_buff *msg, struct genl_info *info) /* The user has set a channel that does not exist on this * package */ + spin_unlock_irqrestore(&ndp->lock, flags); netdev_info(ndp->ndev.dev, "NCSI: Channel %u does not exist!\n", channel_id); return -ERANGE; diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 5f6f73cf2174..6a6cb9db030b 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -2289,10 +2289,12 @@ static struct pernet_operations ipvs_core_ops = { .exit = __ip_vs_cleanup, .id = &ip_vs_net_id, .size = sizeof(struct netns_ipvs), + .async = true, }; static struct pernet_operations ipvs_core_dev_ops = { .exit = __ip_vs_dev_cleanup, + .async = true, }; /* diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 58d5d05aec24..8b25aab41928 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -479,6 +479,7 @@ static void __ip_vs_ftp_exit(struct net *net) static struct pernet_operations ip_vs_ftp_ops = { .init = __ip_vs_ftp_init, .exit = __ip_vs_ftp_exit, + .async = true, }; static int __init ip_vs_ftp_init(void) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 558593e6a0a3..8e19c86d1aa6 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -6596,6 +6596,7 @@ static void __net_exit nf_tables_exit_net(struct net *net) static struct pernet_operations nf_tables_net_ops = { .init = nf_tables_init_net, .exit = nf_tables_exit_net, + .async = true, }; static int __init nf_tables_module_init(void) diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 03ead8a9e90c..84fc4954862d 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -566,6 +566,7 @@ static void __net_exit nfnetlink_net_exit_batch(struct list_head *net_exit_list) static struct pernet_operations nfnetlink_net_ops = { .init = nfnetlink_net_init, .exit_batch = nfnetlink_net_exit_batch, + .async = true, }; static int __init nfnetlink_init(void) diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 88d427f9f9e6..8d9f18bb8840 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -515,6 +515,7 @@ static void __net_exit nfnl_acct_net_exit(struct net *net) static struct pernet_operations nfnl_acct_ops = { .init = nfnl_acct_net_init, .exit = nfnl_acct_net_exit, + .async = true, }; static int __init nfnl_acct_init(void) diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 95b04702a655..6819300f7fb7 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -586,6 +586,7 @@ static void __net_exit cttimeout_net_exit(struct net *net) static struct pernet_operations cttimeout_ops = { .init = cttimeout_net_init, .exit = cttimeout_net_exit, + .async = true, }; static int __init cttimeout_init(void) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 7b46aa4c478d..b21ef79849a1 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -1108,6 +1108,7 @@ static struct pernet_operations nfnl_log_net_ops = { .exit = nfnl_log_net_exit, .id = &nfnl_log_net_id, .size = sizeof(struct nfnl_log_net), + .async = true, }; static int __init nfnetlink_log_init(void) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 8bba23160a68..9f572ed56208 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -833,11 +833,8 @@ nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff) if (diff > skb_tailroom(e->skb)) { nskb = skb_copy_expand(e->skb, skb_headroom(e->skb), diff, GFP_ATOMIC); - if (!nskb) { - printk(KERN_WARNING "nf_queue: OOM " - "in mangle, dropping packet\n"); + if (!nskb) return -ENOMEM; - } kfree_skb(e->skb); e->skb = nskb; } @@ -1528,6 +1525,7 @@ static struct pernet_operations nfnl_queue_net_ops = { .exit_batch = nfnl_queue_net_exit_batch, .id = &nfnl_queue_net_id, .size = sizeof(struct nfnl_queue_net), + .async = true, }; static int __init nfnetlink_queue_init(void) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index ef38e5aecd28..100191df0371 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -2384,6 +2384,7 @@ static struct pernet_operations ovs_net_ops = { .exit = ovs_exit_net, .id = &ovs_net_id, .size = sizeof(struct ovs_net), + .async = true, }; static int __init dp_init(void) diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index b6c8524032a0..f81c1d0ddff4 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -464,10 +464,10 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb, return 0; } -static unsigned int packet_length(const struct sk_buff *skb, - struct net_device *dev) +static int packet_length(const struct sk_buff *skb, + struct net_device *dev) { - unsigned int length = skb->len - dev->hard_header_len; + int length = skb->len - dev->hard_header_len; if (!skb_vlan_tag_present(skb) && eth_type_vlan(skb->protocol)) @@ -478,7 +478,7 @@ static unsigned int packet_length(const struct sk_buff *skb, * account for 802.1ad. e.g. is_skb_forwardable(). */ - return length; + return length > 0 ? length : 0; } void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto) diff --git a/net/rds/connection.c b/net/rds/connection.c index 2da3176bf792..abef75da89a7 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -540,9 +540,9 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens, int (*visitor)(struct rds_connection *, void *), + u64 *buffer, size_t item_len) { - uint64_t buffer[(item_len + 7) / 8]; struct hlist_head *head; struct rds_connection *conn; size_t i; @@ -578,9 +578,9 @@ static void rds_walk_conn_path_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens, int (*visitor)(struct rds_conn_path *, void *), + u64 *buffer, size_t item_len) { - u64 buffer[(item_len + 7) / 8]; struct hlist_head *head; struct rds_connection *conn; size_t i; @@ -649,8 +649,11 @@ static void rds_conn_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens) { + u64 buffer[(sizeof(struct rds_info_connection) + 7) / 8]; + rds_walk_conn_path_info(sock, len, iter, lens, rds_conn_info_visitor, + buffer, sizeof(struct rds_info_connection)); } diff --git a/net/rds/ib.c b/net/rds/ib.c index 50a88f3e7e39..02deee29e7f1 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -321,8 +321,11 @@ static void rds_ib_ic_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens) { + u64 buffer[(sizeof(struct rds_info_rdma_connection) + 7) / 8]; + rds_for_each_conn_info(sock, len, iter, lens, rds_ib_conn_info_visitor, + buffer, sizeof(struct rds_info_rdma_connection)); } diff --git a/net/rds/message.c b/net/rds/message.c index 90dcdcfe9f62..a35f76971984 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -67,7 +67,7 @@ static inline bool rds_zcookie_add(struct rds_msg_zcopy_info *info, u32 cookie) return true; } -struct rds_msg_zcopy_info *rds_info_from_znotifier(struct rds_znotifier *znotif) +static struct rds_msg_zcopy_info *rds_info_from_znotifier(struct rds_znotifier *znotif) { return container_of(znotif, struct rds_msg_zcopy_info, znotif); } @@ -355,9 +355,8 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in return rm; } -int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter *from) +static int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter *from) { - unsigned long sg_off; struct scatterlist *sg; int ret = 0; int length = iov_iter_count(from); @@ -370,7 +369,6 @@ int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter *from) * now allocate and copy in the data payload. */ sg = rm->data.op_sg; - sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */ info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) diff --git a/net/rds/rds.h b/net/rds/rds.h index 74cd27c661de..b04c333d9d1c 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -735,6 +735,7 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens, int (*visitor)(struct rds_connection *, void *), + u64 *buffer, size_t item_len); __printf(2, 3) diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 08230a145042..08ea9cd5c2f6 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -272,13 +272,14 @@ static int rds_tcp_laddr_check(struct net *net, __be32 addr) static void rds_tcp_conn_free(void *arg) { struct rds_tcp_connection *tc = arg; + unsigned long flags; rdsdebug("freeing tc %p\n", tc); - spin_lock_bh(&rds_tcp_conn_lock); + spin_lock_irqsave(&rds_tcp_conn_lock, flags); if (!tc->t_tcp_node_detached) list_del(&tc->t_tcp_node); - spin_unlock_bh(&rds_tcp_conn_lock); + spin_unlock_irqrestore(&rds_tcp_conn_lock, flags); kmem_cache_free(rds_tcp_conn_slab, tc); } @@ -308,13 +309,13 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp) rdsdebug("rds_conn_path [%d] tc %p\n", i, conn->c_path[i].cp_transport_data); } - spin_lock_bh(&rds_tcp_conn_lock); + spin_lock_irq(&rds_tcp_conn_lock); for (i = 0; i < RDS_MPATH_WORKERS; i++) { tc = conn->c_path[i].cp_transport_data; tc->t_tcp_node_detached = false; list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list); } - spin_unlock_bh(&rds_tcp_conn_lock); + spin_unlock_irq(&rds_tcp_conn_lock); fail: if (ret) { for (j = 0; j < i; j++) @@ -515,6 +516,7 @@ static struct pernet_operations rds_tcp_net_ops = { .exit = rds_tcp_exit_net, .id = &rds_tcp_netid, .size = sizeof(struct rds_tcp_net), + .async = true, }; static void rds_tcp_kill_sock(struct net *net) @@ -526,7 +528,7 @@ static void rds_tcp_kill_sock(struct net *net) rtn->rds_tcp_listen_sock = NULL; rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w); - spin_lock_bh(&rds_tcp_conn_lock); + spin_lock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); @@ -539,7 +541,7 @@ static void rds_tcp_kill_sock(struct net *net) tc->t_tcp_node_detached = true; } } - spin_unlock_bh(&rds_tcp_conn_lock); + spin_unlock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) rds_conn_destroy(tc->t_cpath->cp_conn); } @@ -587,7 +589,7 @@ static void rds_tcp_sysctl_reset(struct net *net) { struct rds_tcp_connection *tc, *_tc; - spin_lock_bh(&rds_tcp_conn_lock); + spin_lock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); @@ -597,7 +599,7 @@ static void rds_tcp_sysctl_reset(struct net *net) /* reconnect with new parameters */ rds_conn_path_drop(tc->t_cpath, false); } - spin_unlock_bh(&rds_tcp_conn_lock); + spin_unlock_irq(&rds_tcp_conn_lock); } static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write, diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 9d45d8b56744..7bff716e911e 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -272,7 +272,7 @@ static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb, unsigned int *_offset, unsigned int *_len) { unsigned int offset = sizeof(struct rxrpc_wire_header); - unsigned int len = *_len; + unsigned int len; int ret; u8 annotation = *_annotation; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index a54fa7b8c217..57cf37145282 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -109,6 +109,42 @@ int __tcf_idr_release(struct tc_action *p, bool bind, bool strict) } EXPORT_SYMBOL(__tcf_idr_release); +static size_t tcf_action_shared_attrs_size(const struct tc_action *act) +{ + u32 cookie_len = 0; + + if (act->act_cookie) + cookie_len = nla_total_size(act->act_cookie->len); + + return nla_total_size(0) /* action number nested */ + + nla_total_size(IFNAMSIZ) /* TCA_ACT_KIND */ + + cookie_len /* TCA_ACT_COOKIE */ + + nla_total_size(0) /* TCA_ACT_STATS nested */ + /* TCA_STATS_BASIC */ + + nla_total_size_64bit(sizeof(struct gnet_stats_basic)) + /* TCA_STATS_QUEUE */ + + nla_total_size_64bit(sizeof(struct gnet_stats_queue)) + + nla_total_size(0) /* TCA_OPTIONS nested */ + + nla_total_size(sizeof(struct tcf_t)); /* TCA_GACT_TM */ +} + +static size_t tcf_action_full_attrs_size(size_t sz) +{ + return NLMSG_HDRLEN /* struct nlmsghdr */ + + sizeof(struct tcamsg) + + nla_total_size(0) /* TCA_ACT_TAB nested */ + + sz; +} + +static size_t tcf_action_fill_size(const struct tc_action *act) +{ + size_t sz = tcf_action_shared_attrs_size(act); + + if (act->ops->get_fill_size) + return act->ops->get_fill_size(act) + sz; + return sz; +} + static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, struct netlink_callback *cb) { @@ -741,10 +777,12 @@ static void cleanup_a(struct list_head *actions, int ovr) int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, char *name, int ovr, int bind, - struct list_head *actions, struct netlink_ext_ack *extack) + struct list_head *actions, size_t *attr_size, + struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; struct tc_action *act; + size_t sz = 0; int err; int i; @@ -760,11 +798,14 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, goto err; } act->order = i; + sz += tcf_action_fill_size(act); if (ovr) act->tcfa_refcnt++; list_add_tail(&act->list, actions); } + *attr_size = tcf_action_full_attrs_size(sz); + /* Remove the temp refcnt which was necessary to protect against * destroying an existing action which was being replaced */ @@ -994,12 +1035,13 @@ err_out: static int tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions, - u32 portid, struct netlink_ext_ack *extack) + u32 portid, size_t attr_size, struct netlink_ext_ack *extack) { int ret; struct sk_buff *skb; - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size, + GFP_KERNEL); if (!skb) return -ENOBUFS; @@ -1032,6 +1074,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, int i, ret; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; struct tc_action *act; + size_t attr_size = 0; LIST_HEAD(actions); ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack); @@ -1053,13 +1096,16 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, goto err; } act->order = i; + attr_size += tcf_action_fill_size(act); list_add_tail(&act->list, &actions); } + attr_size = tcf_action_full_attrs_size(attr_size); + if (event == RTM_GETACTION) ret = tcf_get_notify(net, portid, n, &actions, event, extack); else { /* delete */ - ret = tcf_del_notify(net, n, &actions, portid, extack); + ret = tcf_del_notify(net, n, &actions, portid, attr_size, extack); if (ret) goto err; return ret; @@ -1072,12 +1118,13 @@ err: static int tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions, - u32 portid, struct netlink_ext_ack *extack) + u32 portid, size_t attr_size, struct netlink_ext_ack *extack) { struct sk_buff *skb; int err = 0; - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size, + GFP_KERNEL); if (!skb) return -ENOBUFS; @@ -1099,15 +1146,16 @@ static int tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, u32 portid, int ovr, struct netlink_ext_ack *extack) { + size_t attr_size = 0; int ret = 0; LIST_HEAD(actions); ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions, - extack); + &attr_size, extack); if (ret) return ret; - return tcf_add_notify(net, n, &actions, portid, extack); + return tcf_add_notify(net, n, &actions, portid, attr_size, extack); } static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 74563254e676..88fbb8403565 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -217,6 +217,19 @@ static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index, return tcf_idr_search(tn, a, index); } +static size_t tcf_gact_get_fill_size(const struct tc_action *act) +{ + size_t sz = nla_total_size(sizeof(struct tc_gact)); /* TCA_GACT_PARMS */ + +#ifdef CONFIG_GACT_PROB + if (to_gact(act)->tcfg_ptype) + /* TCA_GACT_PROB */ + sz += nla_total_size(sizeof(struct tc_gact_p)); +#endif + + return sz; +} + static struct tc_action_ops act_gact_ops = { .kind = "gact", .type = TCA_ACT_GACT, @@ -227,6 +240,7 @@ static struct tc_action_ops act_gact_ops = { .init = tcf_gact_init, .walk = tcf_gact_walker, .lookup = tcf_gact_search, + .get_fill_size = tcf_gact_get_fill_size, .size = sizeof(struct tcf_gact), }; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 19f9f421d5b7..ec5fe8ec0c3e 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1433,6 +1433,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, #ifdef CONFIG_NET_CLS_ACT { struct tc_action *act; + size_t attr_size = 0; if (exts->police && tb[exts->police]) { act = tcf_action_init_1(net, tp, tb[exts->police], @@ -1450,7 +1451,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, err = tcf_action_init(net, tp, tb[exts->action], rate_tlv, NULL, ovr, TCA_ACT_BIND, - &actions, extack); + &actions, &attr_size, extack); if (err) return err; list_for_each_entry(act, &actions, list) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 7d0ce2c40f93..d964e60c730e 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -511,6 +511,9 @@ static int fl_set_key_flags(struct nlattr **tb, fl_set_key_flag(key, mask, flags_key, flags_mask, TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT); + fl_set_key_flag(key, mask, flags_key, flags_mask, + TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST, + FLOW_DIS_FIRST_FRAG); return 0; } @@ -1130,6 +1133,9 @@ static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask) fl_get_key_flag(flags_key, flags_mask, &key, &mask, TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT); + fl_get_key_flag(flags_key, flags_mask, &key, &mask, + TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST, + FLOW_DIS_FIRST_FRAG); _key = cpu_to_be32(key); _mask = cpu_to_be32(mask); diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 00667c50efa7..e64630cd3331 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -101,13 +101,14 @@ struct sctp_shared_key *sctp_auth_shkey_create(__u16 key_id, gfp_t gfp) return NULL; INIT_LIST_HEAD(&new->key_list); + refcount_set(&new->refcnt, 1); new->key_id = key_id; return new; } /* Free the shared key structure */ -static void sctp_auth_shkey_free(struct sctp_shared_key *sh_key) +static void sctp_auth_shkey_destroy(struct sctp_shared_key *sh_key) { BUG_ON(!list_empty(&sh_key->key_list)); sctp_auth_key_put(sh_key->key); @@ -115,6 +116,17 @@ static void sctp_auth_shkey_free(struct sctp_shared_key *sh_key) kfree(sh_key); } +void sctp_auth_shkey_release(struct sctp_shared_key *sh_key) +{ + if (refcount_dec_and_test(&sh_key->refcnt)) + sctp_auth_shkey_destroy(sh_key); +} + +void sctp_auth_shkey_hold(struct sctp_shared_key *sh_key) +{ + refcount_inc(&sh_key->refcnt); +} + /* Destroy the entire key list. This is done during the * associon and endpoint free process. */ @@ -128,7 +140,7 @@ void sctp_auth_destroy_keys(struct list_head *keys) key_for_each_safe(ep_key, tmp, keys) { list_del_init(&ep_key->key_list); - sctp_auth_shkey_free(ep_key); + sctp_auth_shkey_release(ep_key); } } @@ -409,13 +421,19 @@ int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp) sctp_auth_key_put(asoc->asoc_shared_key); asoc->asoc_shared_key = secret; + asoc->shkey = ep_key; /* Update send queue in case any chunk already in there now * needs authenticating */ list_for_each_entry(chunk, &asoc->outqueue.out_chunk_list, list) { - if (sctp_auth_send_cid(chunk->chunk_hdr->type, asoc)) + if (sctp_auth_send_cid(chunk->chunk_hdr->type, asoc)) { chunk->auth = 1; + if (!chunk->shkey) { + chunk->shkey = asoc->shkey; + sctp_auth_shkey_hold(chunk->shkey); + } + } } return 0; @@ -431,8 +449,11 @@ struct sctp_shared_key *sctp_auth_get_shkey( /* First search associations set of endpoint pair shared keys */ key_for_each(key, &asoc->endpoint_shared_keys) { - if (key->key_id == key_id) - return key; + if (key->key_id == key_id) { + if (!key->deactivated) + return key; + break; + } } return NULL; @@ -703,16 +724,15 @@ int sctp_auth_recv_cid(enum sctp_cid chunk, const struct sctp_association *asoc) * after the AUTH chunk in the SCTP packet. */ void sctp_auth_calculate_hmac(const struct sctp_association *asoc, - struct sk_buff *skb, - struct sctp_auth_chunk *auth, - gfp_t gfp) + struct sk_buff *skb, struct sctp_auth_chunk *auth, + struct sctp_shared_key *ep_key, gfp_t gfp) { - struct crypto_shash *tfm; struct sctp_auth_bytes *asoc_key; + struct crypto_shash *tfm; __u16 key_id, hmac_id; - __u8 *digest; unsigned char *end; int free_key = 0; + __u8 *digest; /* Extract the info we need: * - hmac id @@ -724,12 +744,7 @@ void sctp_auth_calculate_hmac(const struct sctp_association *asoc, if (key_id == asoc->active_key_id) asoc_key = asoc->asoc_shared_key; else { - struct sctp_shared_key *ep_key; - - ep_key = sctp_auth_get_shkey(asoc, key_id); - if (!ep_key) - return; - + /* ep_key can't be NULL here */ asoc_key = sctp_auth_asoc_create_secret(asoc, ep_key, gfp); if (!asoc_key) return; @@ -829,7 +844,7 @@ int sctp_auth_set_key(struct sctp_endpoint *ep, struct sctp_association *asoc, struct sctp_authkey *auth_key) { - struct sctp_shared_key *cur_key = NULL; + struct sctp_shared_key *cur_key, *shkey; struct sctp_auth_bytes *key; struct list_head *sh_keys; int replace = 0; @@ -842,46 +857,34 @@ int sctp_auth_set_key(struct sctp_endpoint *ep, else sh_keys = &ep->endpoint_shared_keys; - key_for_each(cur_key, sh_keys) { - if (cur_key->key_id == auth_key->sca_keynumber) { + key_for_each(shkey, sh_keys) { + if (shkey->key_id == auth_key->sca_keynumber) { replace = 1; break; } } - /* If we are not replacing a key id, we need to allocate - * a shared key. - */ - if (!replace) { - cur_key = sctp_auth_shkey_create(auth_key->sca_keynumber, - GFP_KERNEL); - if (!cur_key) - return -ENOMEM; - } + cur_key = sctp_auth_shkey_create(auth_key->sca_keynumber, GFP_KERNEL); + if (!cur_key) + return -ENOMEM; /* Create a new key data based on the info passed in */ key = sctp_auth_create_key(auth_key->sca_keylength, GFP_KERNEL); - if (!key) - goto nomem; + if (!key) { + kfree(cur_key); + return -ENOMEM; + } memcpy(key->data, &auth_key->sca_key[0], auth_key->sca_keylength); + cur_key->key = key; - /* If we are replacing, remove the old keys data from the - * key id. If we are adding new key id, add it to the - * list. - */ - if (replace) - sctp_auth_key_put(cur_key->key); - else - list_add(&cur_key->key_list, sh_keys); + if (replace) { + list_del_init(&shkey->key_list); + sctp_auth_shkey_release(shkey); + } + list_add(&cur_key->key_list, sh_keys); - cur_key->key = key; return 0; -nomem: - if (!replace) - sctp_auth_shkey_free(cur_key); - - return -ENOMEM; } int sctp_auth_set_active_key(struct sctp_endpoint *ep, @@ -905,7 +908,7 @@ int sctp_auth_set_active_key(struct sctp_endpoint *ep, } } - if (!found) + if (!found || key->deactivated) return -EINVAL; if (asoc) { @@ -952,7 +955,58 @@ int sctp_auth_del_key_id(struct sctp_endpoint *ep, /* Delete the shared key */ list_del_init(&key->key_list); - sctp_auth_shkey_free(key); + sctp_auth_shkey_release(key); + + return 0; +} + +int sctp_auth_deact_key_id(struct sctp_endpoint *ep, + struct sctp_association *asoc, __u16 key_id) +{ + struct sctp_shared_key *key; + struct list_head *sh_keys; + int found = 0; + + /* The key identifier MUST NOT be the current active key + * The key identifier MUST correst to an existing key + */ + if (asoc) { + if (asoc->active_key_id == key_id) + return -EINVAL; + + sh_keys = &asoc->endpoint_shared_keys; + } else { + if (ep->active_key_id == key_id) + return -EINVAL; + + sh_keys = &ep->endpoint_shared_keys; + } + + key_for_each(key, sh_keys) { + if (key->key_id == key_id) { + found = 1; + break; + } + } + + if (!found) + return -EINVAL; + + /* refcnt == 1 and !list_empty mean it's not being used anywhere + * and deactivated will be set, so it's time to notify userland + * that this shkey can be freed. + */ + if (asoc && !list_empty(&key->key_list) && + refcount_read(&key->refcnt) == 1) { + struct sctp_ulpevent *ev; + + ev = sctp_ulpevent_make_authkey(asoc, key->key_id, + SCTP_AUTH_FREE_KEY, GFP_KERNEL); + if (ev) + asoc->stream.si->enqueue_event(&asoc->ulpq, ev); + } + + key->deactivated = 1; return 0; } diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 991a530c6b31..f889a84f264d 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -168,6 +168,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, { size_t len, first_len, max_data, remaining; size_t msg_len = iov_iter_count(from); + struct sctp_shared_key *shkey = NULL; struct list_head *pos, *temp; struct sctp_chunk *chunk; struct sctp_datamsg *msg; @@ -204,6 +205,17 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, if (hmac_desc) max_data -= SCTP_PAD4(sizeof(struct sctp_auth_chunk) + hmac_desc->hmac_len); + + if (sinfo->sinfo_tsn && + sinfo->sinfo_ssn != asoc->active_key_id) { + shkey = sctp_auth_get_shkey(asoc, sinfo->sinfo_ssn); + if (!shkey) { + err = -EINVAL; + goto errout; + } + } else { + shkey = asoc->shkey; + } } /* Check what's our max considering the above */ @@ -275,6 +287,8 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, if (err < 0) goto errout_chunk_free; + chunk->shkey = shkey; + /* Put the chunk->skb back into the form expected by send. */ __skb_pull(chunk->skb, (__u8 *)chunk->chunk_hdr - chunk->skb->data); diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c index aeea6da81441..fd2684ad94c8 100644 --- a/net/sctp/objcnt.c +++ b/net/sctp/objcnt.c @@ -130,11 +130,3 @@ void sctp_dbg_objcnt_init(struct net *net) if (!ent) pr_warn("sctp_dbg_objcnt: Unable to create /proc entry.\n"); } - -/* Cleanup the objcount entry in the proc filesystem. */ -void sctp_dbg_objcnt_exit(struct net *net) -{ - remove_proc_entry("sctp_dbg_objcnt", net->sctp.proc_net_sctp); -} - - diff --git a/net/sctp/output.c b/net/sctp/output.c index 01a26ee051e3..d6e1c90cc09a 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -241,10 +241,13 @@ static enum sctp_xmit sctp_packet_bundle_auth(struct sctp_packet *pkt, if (!chunk->auth) return retval; - auth = sctp_make_auth(asoc); + auth = sctp_make_auth(asoc, chunk->shkey->key_id); if (!auth) return retval; + auth->shkey = chunk->shkey; + sctp_auth_shkey_hold(auth->shkey); + retval = __sctp_packet_append_chunk(pkt, auth); if (retval != SCTP_XMIT_OK) @@ -490,7 +493,8 @@ merge: } if (auth) { - sctp_auth_calculate_hmac(tp->asoc, nskb, auth, gfp); + sctp_auth_calculate_hmac(tp->asoc, nskb, auth, + packet->auth->shkey, gfp); /* free auth if no more chunks, or add it back */ if (list_empty(&packet->chunk_list)) sctp_chunk_free(packet->auth); @@ -770,6 +774,16 @@ static enum sctp_xmit sctp_packet_will_fit(struct sctp_packet *packet, enum sctp_xmit retval = SCTP_XMIT_OK; size_t psize, pmtu, maxsize; + /* Don't bundle in this packet if this chunk's auth key doesn't + * match other chunks already enqueued on this packet. Also, + * don't bundle the chunk with auth key if other chunks in this + * packet don't have auth key. + */ + if ((packet->auth && chunk->shkey != packet->auth->shkey) || + (!packet->auth && chunk->shkey && + chunk->chunk_hdr->type != SCTP_CID_AUTH)) + return SCTP_XMIT_PMTU_FULL; + psize = packet->size; if (packet->transport->asoc) pmtu = packet->transport->asoc->pathmtu; diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 537545ebcb0e..17d0155d9de3 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -101,25 +101,6 @@ static const struct file_operations sctp_snmp_seq_fops = { .release = single_release_net, }; -/* Set up the proc fs entry for 'snmp' object. */ -int __net_init sctp_snmp_proc_init(struct net *net) -{ - struct proc_dir_entry *p; - - p = proc_create("snmp", S_IRUGO, net->sctp.proc_net_sctp, - &sctp_snmp_seq_fops); - if (!p) - return -ENOMEM; - - return 0; -} - -/* Cleanup the proc fs entry for 'snmp' object. */ -void sctp_snmp_proc_exit(struct net *net) -{ - remove_proc_entry("snmp", net->sctp.proc_net_sctp); -} - /* Dump local addresses of an association/endpoint. */ static void sctp_seq_dump_local_addrs(struct seq_file *seq, struct sctp_ep_common *epb) { @@ -259,25 +240,6 @@ static const struct file_operations sctp_eps_seq_fops = { .release = seq_release_net, }; -/* Set up the proc fs entry for 'eps' object. */ -int __net_init sctp_eps_proc_init(struct net *net) -{ - struct proc_dir_entry *p; - - p = proc_create("eps", S_IRUGO, net->sctp.proc_net_sctp, - &sctp_eps_seq_fops); - if (!p) - return -ENOMEM; - - return 0; -} - -/* Cleanup the proc fs entry for 'eps' object. */ -void sctp_eps_proc_exit(struct net *net) -{ - remove_proc_entry("eps", net->sctp.proc_net_sctp); -} - struct sctp_ht_iter { struct seq_net_private p; struct rhashtable_iter hti; @@ -390,25 +352,6 @@ static const struct file_operations sctp_assocs_seq_fops = { .release = seq_release_net, }; -/* Set up the proc fs entry for 'assocs' object. */ -int __net_init sctp_assocs_proc_init(struct net *net) -{ - struct proc_dir_entry *p; - - p = proc_create("assocs", S_IRUGO, net->sctp.proc_net_sctp, - &sctp_assocs_seq_fops); - if (!p) - return -ENOMEM; - - return 0; -} - -/* Cleanup the proc fs entry for 'assocs' object. */ -void sctp_assocs_proc_exit(struct net *net) -{ - remove_proc_entry("assocs", net->sctp.proc_net_sctp); -} - static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) { struct sctp_association *assoc; @@ -488,12 +431,6 @@ static const struct seq_operations sctp_remaddr_ops = { .show = sctp_remaddr_seq_show, }; -/* Cleanup the proc fs entry for 'remaddr' object. */ -void sctp_remaddr_proc_exit(struct net *net) -{ - remove_proc_entry("remaddr", net->sctp.proc_net_sctp); -} - static int sctp_remaddr_seq_open(struct inode *inode, struct file *file) { return seq_open_net(inode, file, &sctp_remaddr_ops, @@ -507,13 +444,28 @@ static const struct file_operations sctp_remaddr_seq_fops = { .release = seq_release_net, }; -int __net_init sctp_remaddr_proc_init(struct net *net) +/* Set up the proc fs entry for the SCTP protocol. */ +int __net_init sctp_proc_init(struct net *net) { - struct proc_dir_entry *p; - - p = proc_create("remaddr", S_IRUGO, net->sctp.proc_net_sctp, - &sctp_remaddr_seq_fops); - if (!p) + net->sctp.proc_net_sctp = proc_net_mkdir(net, "sctp", net->proc_net); + if (!net->sctp.proc_net_sctp) return -ENOMEM; + if (!proc_create("snmp", S_IRUGO, net->sctp.proc_net_sctp, + &sctp_snmp_seq_fops)) + goto cleanup; + if (!proc_create("eps", S_IRUGO, net->sctp.proc_net_sctp, + &sctp_eps_seq_fops)) + goto cleanup; + if (!proc_create("assocs", S_IRUGO, net->sctp.proc_net_sctp, + &sctp_assocs_seq_fops)) + goto cleanup; + if (!proc_create("remaddr", S_IRUGO, net->sctp.proc_net_sctp, + &sctp_remaddr_seq_fops)) + goto cleanup; return 0; + +cleanup: + remove_proc_subtree("sctp", net->proc_net); + net->sctp.proc_net_sctp = NULL; + return -ENOMEM; } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 91813e686c67..493b817f6a2a 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -80,56 +80,6 @@ long sysctl_sctp_mem[3]; int sysctl_sctp_rmem[3]; int sysctl_sctp_wmem[3]; -/* Set up the proc fs entry for the SCTP protocol. */ -static int __net_init sctp_proc_init(struct net *net) -{ -#ifdef CONFIG_PROC_FS - net->sctp.proc_net_sctp = proc_net_mkdir(net, "sctp", net->proc_net); - if (!net->sctp.proc_net_sctp) - goto out_proc_net_sctp; - if (sctp_snmp_proc_init(net)) - goto out_snmp_proc_init; - if (sctp_eps_proc_init(net)) - goto out_eps_proc_init; - if (sctp_assocs_proc_init(net)) - goto out_assocs_proc_init; - if (sctp_remaddr_proc_init(net)) - goto out_remaddr_proc_init; - - return 0; - -out_remaddr_proc_init: - sctp_assocs_proc_exit(net); -out_assocs_proc_init: - sctp_eps_proc_exit(net); -out_eps_proc_init: - sctp_snmp_proc_exit(net); -out_snmp_proc_init: - remove_proc_entry("sctp", net->proc_net); - net->sctp.proc_net_sctp = NULL; -out_proc_net_sctp: - return -ENOMEM; -#endif /* CONFIG_PROC_FS */ - return 0; -} - -/* Clean up the proc fs entry for the SCTP protocol. - * Note: Do not make this __exit as it is used in the init error - * path. - */ -static void sctp_proc_exit(struct net *net) -{ -#ifdef CONFIG_PROC_FS - sctp_snmp_proc_exit(net); - sctp_eps_proc_exit(net); - sctp_assocs_proc_exit(net); - sctp_remaddr_proc_exit(net); - - remove_proc_entry("sctp", net->proc_net); - net->sctp.proc_net_sctp = NULL; -#endif -} - /* Private helper to extract ipv4 address and stash them in * the protocol structure. */ @@ -1285,10 +1235,12 @@ static int __net_init sctp_defaults_init(struct net *net) if (status) goto err_init_mibs; +#ifdef CONFIG_PROC_FS /* Initialize proc fs directory. */ status = sctp_proc_init(net); if (status) goto err_init_proc; +#endif sctp_dbg_objcnt_init(net); @@ -1320,9 +1272,10 @@ static void __net_exit sctp_defaults_exit(struct net *net) sctp_free_addr_wq(net); sctp_free_local_addr_list(net); - sctp_dbg_objcnt_exit(net); - - sctp_proc_exit(net); +#ifdef CONFIG_PROC_FS + remove_proc_subtree("sctp", net->proc_net); + net->sctp.proc_net_sctp = NULL; +#endif cleanup_sctp_mibs(net); sctp_sysctl_net_unregister(net); } @@ -1330,6 +1283,7 @@ static void __net_exit sctp_defaults_exit(struct net *net) static struct pernet_operations sctp_defaults_ops = { .init = sctp_defaults_init, .exit = sctp_defaults_exit, + .async = true, }; static int __net_init sctp_ctrlsock_init(struct net *net) @@ -1353,6 +1307,7 @@ static void __net_init sctp_ctrlsock_exit(struct net *net) static struct pernet_operations sctp_ctrlsock_ops = { .init = sctp_ctrlsock_init, .exit = sctp_ctrlsock_exit, + .async = true, }; /* Initialize the universe into something sensible. */ diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index d01475f5f710..cc20bc39ee7c 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -87,7 +87,28 @@ static void *sctp_addto_chunk_fixed(struct sctp_chunk *, int len, /* Control chunk destructor */ static void sctp_control_release_owner(struct sk_buff *skb) { - /*TODO: do memory release */ + struct sctp_chunk *chunk = skb_shinfo(skb)->destructor_arg; + + if (chunk->shkey) { + struct sctp_shared_key *shkey = chunk->shkey; + struct sctp_association *asoc = chunk->asoc; + + /* refcnt == 2 and !list_empty mean after this release, it's + * not being used anywhere, and it's time to notify userland + * that this shkey can be freed if it's been deactivated. + */ + if (shkey->deactivated && !list_empty(&shkey->key_list) && + refcount_read(&shkey->refcnt) == 2) { + struct sctp_ulpevent *ev; + + ev = sctp_ulpevent_make_authkey(asoc, shkey->key_id, + SCTP_AUTH_FREE_KEY, + GFP_KERNEL); + if (ev) + asoc->stream.si->enqueue_event(&asoc->ulpq, ev); + } + sctp_auth_shkey_release(chunk->shkey); + } } static void sctp_control_set_owner_w(struct sctp_chunk *chunk) @@ -102,7 +123,12 @@ static void sctp_control_set_owner_w(struct sctp_chunk *chunk) * * For now don't do anything for now. */ + if (chunk->auth) { + chunk->shkey = asoc->shkey; + sctp_auth_shkey_hold(chunk->shkey); + } skb->sk = asoc ? asoc->base.sk : NULL; + skb_shinfo(skb)->destructor_arg = chunk; skb->destructor = sctp_control_release_owner; } @@ -1271,7 +1297,8 @@ nodata: return retval; } -struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc) +struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc, + __u16 key_id) { struct sctp_authhdr auth_hdr; struct sctp_hmac *hmac_desc; @@ -1289,7 +1316,7 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc) return NULL; auth_hdr.hmac_id = htons(hmac_desc->hmac_id); - auth_hdr.shkey_id = htons(asoc->active_key_id); + auth_hdr.shkey_id = htons(key_id); retval->subh.auth_hdr = sctp_addto_chunk(retval, sizeof(auth_hdr), &auth_hdr); diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index b71e7fb0a20a..298112ca8c06 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1049,6 +1049,16 @@ static void sctp_cmd_assoc_change(struct sctp_cmd_seq *commands, asoc->stream.si->enqueue_event(&asoc->ulpq, ev); } +static void sctp_cmd_peer_no_auth(struct sctp_cmd_seq *commands, + struct sctp_association *asoc) +{ + struct sctp_ulpevent *ev; + + ev = sctp_ulpevent_make_authkey(asoc, 0, SCTP_AUTH_NO_AUTH, GFP_ATOMIC); + if (ev) + asoc->stream.si->enqueue_event(&asoc->ulpq, ev); +} + /* Helper function to generate an adaptation indication event */ static void sctp_cmd_adaptation_ind(struct sctp_cmd_seq *commands, struct sctp_association *asoc) @@ -1755,6 +1765,9 @@ static int sctp_cmd_interpreter(enum sctp_event event_type, case SCTP_CMD_ADAPTATION_IND: sctp_cmd_adaptation_ind(commands, asoc); break; + case SCTP_CMD_PEER_NO_AUTH: + sctp_cmd_peer_no_auth(commands, asoc); + break; case SCTP_CMD_ASSOC_SHKEY: error = sctp_auth_asoc_init_active_key(asoc, diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index eb7905ffe5f2..cc56a67dbb4d 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -659,7 +659,7 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net, void *arg, struct sctp_cmd_seq *commands) { - struct sctp_ulpevent *ev, *ai_ev = NULL; + struct sctp_ulpevent *ev, *ai_ev = NULL, *auth_ev = NULL; struct sctp_association *new_asoc; struct sctp_init_chunk *peer_init; struct sctp_chunk *chunk = arg; @@ -820,6 +820,14 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net, goto nomem_aiev; } + if (!new_asoc->peer.auth_capable) { + auth_ev = sctp_ulpevent_make_authkey(new_asoc, 0, + SCTP_AUTH_NO_AUTH, + GFP_ATOMIC); + if (!auth_ev) + goto nomem_authev; + } + /* Add all the state machine commands now since we've created * everything. This way we don't introduce memory corruptions * during side-effect processing and correclty count established @@ -847,8 +855,14 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net, sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ai_ev)); + if (auth_ev) + sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, + SCTP_ULPEVENT(auth_ev)); + return SCTP_DISPOSITION_CONSUME; +nomem_authev: + sctp_ulpevent_free(ai_ev); nomem_aiev: sctp_ulpevent_free(ev); nomem_ev: @@ -953,6 +967,15 @@ enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net, SCTP_ULPEVENT(ev)); } + if (!asoc->peer.auth_capable) { + ev = sctp_ulpevent_make_authkey(asoc, 0, SCTP_AUTH_NO_AUTH, + GFP_ATOMIC); + if (!ev) + goto nomem; + sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, + SCTP_ULPEVENT(ev)); + } + return SCTP_DISPOSITION_CONSUME; nomem: return SCTP_DISPOSITION_NOMEM; @@ -1908,6 +1931,9 @@ static enum sctp_disposition sctp_sf_do_dupcook_b( if (asoc->peer.adaptation_ind) sctp_add_cmd_sf(commands, SCTP_CMD_ADAPTATION_IND, SCTP_NULL()); + if (!asoc->peer.auth_capable) + sctp_add_cmd_sf(commands, SCTP_CMD_PEER_NO_AUTH, SCTP_NULL()); + return SCTP_DISPOSITION_CONSUME; nomem: @@ -1954,7 +1980,7 @@ static enum sctp_disposition sctp_sf_do_dupcook_d( struct sctp_cmd_seq *commands, struct sctp_association *new_asoc) { - struct sctp_ulpevent *ev = NULL, *ai_ev = NULL; + struct sctp_ulpevent *ev = NULL, *ai_ev = NULL, *auth_ev = NULL; struct sctp_chunk *repl; /* Clarification from Implementor's Guide: @@ -2001,6 +2027,14 @@ static enum sctp_disposition sctp_sf_do_dupcook_d( goto nomem; } + + if (!asoc->peer.auth_capable) { + auth_ev = sctp_ulpevent_make_authkey(asoc, 0, + SCTP_AUTH_NO_AUTH, + GFP_ATOMIC); + if (!auth_ev) + goto nomem; + } } repl = sctp_make_cookie_ack(new_asoc, chunk); @@ -2015,10 +2049,15 @@ static enum sctp_disposition sctp_sf_do_dupcook_d( if (ai_ev) sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ai_ev)); + if (auth_ev) + sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, + SCTP_ULPEVENT(auth_ev)); return SCTP_DISPOSITION_CONSUME; nomem: + if (auth_ev) + sctp_ulpevent_free(auth_ev); if (ai_ev) sctp_ulpevent_free(ai_ev); if (ev) @@ -4114,6 +4153,7 @@ static enum sctp_ierror sctp_sf_authenticate( const union sctp_subtype type, struct sctp_chunk *chunk) { + struct sctp_shared_key *sh_key = NULL; struct sctp_authhdr *auth_hdr; __u8 *save_digest, *digest; struct sctp_hmac *hmac; @@ -4135,9 +4175,11 @@ static enum sctp_ierror sctp_sf_authenticate( * configured */ key_id = ntohs(auth_hdr->shkey_id); - if (key_id != asoc->active_key_id && !sctp_auth_get_shkey(asoc, key_id)) - return SCTP_IERROR_AUTH_BAD_KEYID; - + if (key_id != asoc->active_key_id) { + sh_key = sctp_auth_get_shkey(asoc, key_id); + if (!sh_key) + return SCTP_IERROR_AUTH_BAD_KEYID; + } /* Make sure that the length of the signature matches what * we expect. @@ -4166,7 +4208,7 @@ static enum sctp_ierror sctp_sf_authenticate( sctp_auth_calculate_hmac(asoc, chunk->skb, (struct sctp_auth_chunk *)chunk->chunk_hdr, - GFP_ATOMIC); + sh_key, GFP_ATOMIC); /* Discard the packet if the digests do not match */ if (memcmp(save_digest, digest, sig_len)) { @@ -4243,7 +4285,7 @@ enum sctp_disposition sctp_sf_eat_auth(struct net *net, struct sctp_ulpevent *ev; ev = sctp_ulpevent_make_authkey(asoc, ntohs(auth_hdr->shkey_id), - SCTP_AUTH_NEWKEY, GFP_ATOMIC); + SCTP_AUTH_NEW_KEY, GFP_ATOMIC); if (!ev) return -ENOMEM; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 7d3476a4860d..7a10ae3c3d82 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -156,6 +156,9 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk) /* The sndbuf space is tracked per association. */ sctp_association_hold(asoc); + if (chunk->shkey) + sctp_auth_shkey_hold(chunk->shkey); + skb_set_owner_w(chunk->skb, sk); chunk->skb->destructor = sctp_wfree; @@ -1677,7 +1680,7 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags, struct sctp_association *asoc; enum sctp_scope scope; struct cmsghdr *cmsg; - int err = -EINVAL; + int err; *tp = NULL; @@ -1761,16 +1764,20 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags, memset(daddr, 0, sizeof(*daddr)); dlen = cmsg->cmsg_len - sizeof(struct cmsghdr); if (cmsg->cmsg_type == SCTP_DSTADDRV4) { - if (dlen < sizeof(struct in_addr)) + if (dlen < sizeof(struct in_addr)) { + err = -EINVAL; goto free; + } dlen = sizeof(struct in_addr); daddr->v4.sin_family = AF_INET; daddr->v4.sin_port = htons(asoc->peer.port); memcpy(&daddr->v4.sin_addr, CMSG_DATA(cmsg), dlen); } else { - if (dlen < sizeof(struct in6_addr)) + if (dlen < sizeof(struct in6_addr)) { + err = -EINVAL; goto free; + } dlen = sizeof(struct in6_addr); daddr->v6.sin6_family = AF_INET6; @@ -1876,6 +1883,19 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc, goto err; } + if (asoc->pmtu_pending) + sctp_assoc_pending_pmtu(asoc); + + if (sctp_wspace(asoc) < msg_len) + sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc)); + + if (!sctp_wspace(asoc)) { + timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); + err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len); + if (err) + goto err; + } + if (sctp_state(asoc, CLOSED)) { err = sctp_primitive_ASSOCIATE(net, asoc, NULL); if (err) @@ -1893,19 +1913,6 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc, pr_debug("%s: we associated primitively\n", __func__); } - if (asoc->pmtu_pending) - sctp_assoc_pending_pmtu(asoc); - - if (sctp_wspace(asoc) < msg_len) - sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc)); - - if (!sctp_wspace(asoc)) { - timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); - err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len); - if (err) - goto err; - } - datamsg = sctp_datamsg_from_user(asoc, sinfo, &msg->msg_iter); if (IS_ERR(datamsg)) { err = PTR_ERR(datamsg); @@ -1980,6 +1987,14 @@ static void sctp_sendmsg_update_sinfo(struct sctp_association *asoc, if (!cmsgs->srinfo && !cmsgs->prinfo) sinfo->sinfo_timetolive = asoc->default_timetolive; + + if (cmsgs->authinfo) { + /* Reuse sinfo_tsn to indicate that authinfo was set and + * sinfo_ssn to save the keyid on tx path. + */ + sinfo->sinfo_tsn = 1; + sinfo->sinfo_ssn = cmsgs->authinfo->auth_keynumber; + } } static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) @@ -3632,6 +3647,33 @@ static int sctp_setsockopt_del_key(struct sock *sk, } /* + * 8.3.4 Deactivate a Shared Key (SCTP_AUTH_DEACTIVATE_KEY) + * + * This set option will deactivate a shared secret key. + */ +static int sctp_setsockopt_deactivate_key(struct sock *sk, char __user *optval, + unsigned int optlen) +{ + struct sctp_endpoint *ep = sctp_sk(sk)->ep; + struct sctp_authkeyid val; + struct sctp_association *asoc; + + if (!ep->auth_enable) + return -EACCES; + + if (optlen != sizeof(struct sctp_authkeyid)) + return -EINVAL; + if (copy_from_user(&val, optval, optlen)) + return -EFAULT; + + asoc = sctp_id2assoc(sk, val.scact_assoc_id); + if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP)) + return -EINVAL; + + return sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber); +} + +/* * 8.1.23 SCTP_AUTO_ASCONF * * This option will enable or disable the use of the automatic generation of @@ -4223,6 +4265,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_AUTH_DELETE_KEY: retval = sctp_setsockopt_del_key(sk, optval, optlen); break; + case SCTP_AUTH_DEACTIVATE_KEY: + retval = sctp_setsockopt_deactivate_key(sk, optval, optlen); + break; case SCTP_AUTO_ASCONF: retval = sctp_setsockopt_auto_asconf(sk, optval, optlen); break; @@ -7197,6 +7242,7 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, case SCTP_AUTH_KEY: case SCTP_AUTH_CHUNK: case SCTP_AUTH_DELETE_KEY: + case SCTP_AUTH_DEACTIVATE_KEY: retval = -EOPNOTSUPP; break; case SCTP_HMAC_IDENT: @@ -7867,6 +7913,21 @@ static int sctp_msghdr_parse(const struct msghdr *msg, struct sctp_cmsgs *cmsgs) if (cmsgs->prinfo->pr_policy == SCTP_PR_SCTP_NONE) cmsgs->prinfo->pr_value = 0; break; + case SCTP_AUTHINFO: + /* SCTP Socket API Extension + * 5.3.8 SCTP AUTH Information Structure (SCTP_AUTHINFO) + * + * This cmsghdr structure specifies SCTP options for sendmsg(). + * + * cmsg_level cmsg_type cmsg_data[] + * ------------ ------------ --------------------- + * IPPROTO_SCTP SCTP_AUTHINFO struct sctp_authinfo + */ + if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct sctp_authinfo))) + return -EINVAL; + + cmsgs->authinfo = CMSG_DATA(cmsg); + break; case SCTP_DSTADDRV4: case SCTP_DSTADDRV6: /* SCTP Socket API Extension @@ -8105,6 +8166,26 @@ static void sctp_wfree(struct sk_buff *skb) sk->sk_wmem_queued -= skb->truesize; sk_mem_uncharge(sk, skb->truesize); + if (chunk->shkey) { + struct sctp_shared_key *shkey = chunk->shkey; + + /* refcnt == 2 and !list_empty mean after this release, it's + * not being used anywhere, and it's time to notify userland + * that this shkey can be freed if it's been deactivated. + */ + if (shkey->deactivated && !list_empty(&shkey->key_list) && + refcount_read(&shkey->refcnt) == 2) { + struct sctp_ulpevent *ev; + + ev = sctp_ulpevent_make_authkey(asoc, shkey->key_id, + SCTP_AUTH_FREE_KEY, + GFP_KERNEL); + if (ev) + asoc->stream.si->enqueue_event(&asoc->ulpq, ev); + } + sctp_auth_shkey_release(chunk->shkey); + } + sock_wfree(skb); sctp_wake_up_waiters(sk, asoc); diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 2c6f4e0a9f3d..86913eb5cfa0 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -7,12 +7,11 @@ * applicable with RoCE-cards only * * Initial restrictions: - * - IPv6 support postponed * - support for alternate links postponed * - partial support for non-blocking sockets only * - support for urgent data postponed * - * Copyright IBM Corp. 2016 + * Copyright IBM Corp. 2016, 2018 * * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> * based on prototype from Frank Blaschka @@ -64,6 +63,10 @@ static struct smc_hashinfo smc_v4_hashinfo = { .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock), }; +static struct smc_hashinfo smc_v6_hashinfo = { + .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock), +}; + int smc_hash_sk(struct sock *sk) { struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; @@ -103,6 +106,18 @@ struct proto smc_proto = { }; EXPORT_SYMBOL_GPL(smc_proto); +struct proto smc_proto6 = { + .name = "SMC6", + .owner = THIS_MODULE, + .keepalive = smc_set_keepalive, + .hash = smc_hash_sk, + .unhash = smc_unhash_sk, + .obj_size = sizeof(struct smc_sock), + .h.smc_hash = &smc_v6_hashinfo, + .slab_flags = SLAB_TYPESAFE_BY_RCU, +}; +EXPORT_SYMBOL_GPL(smc_proto6); + static int smc_release(struct socket *sock) { struct sock *sk = sock->sk; @@ -159,19 +174,22 @@ static void smc_destruct(struct sock *sk) sk_refcnt_debug_dec(sk); } -static struct sock *smc_sock_alloc(struct net *net, struct socket *sock) +static struct sock *smc_sock_alloc(struct net *net, struct socket *sock, + int protocol) { struct smc_sock *smc; + struct proto *prot; struct sock *sk; - sk = sk_alloc(net, PF_SMC, GFP_KERNEL, &smc_proto, 0); + prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto; + sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0); if (!sk) return NULL; sock_init_data(sock, sk); /* sets sk_refcnt to 1 */ sk->sk_state = SMC_INIT; sk->sk_destruct = smc_destruct; - sk->sk_protocol = SMCPROTO_SMC; + sk->sk_protocol = protocol; smc = smc_sk(sk); INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); INIT_LIST_HEAD(&smc->accept_q); @@ -198,10 +216,13 @@ static int smc_bind(struct socket *sock, struct sockaddr *uaddr, goto out; rc = -EAFNOSUPPORT; + if (addr->sin_family != AF_INET && + addr->sin_family != AF_INET6 && + addr->sin_family != AF_UNSPEC) + goto out; /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */ - if ((addr->sin_family != AF_INET) && - ((addr->sin_family != AF_UNSPEC) || - (addr->sin_addr.s_addr != htonl(INADDR_ANY)))) + if (addr->sin_family == AF_UNSPEC && + addr->sin_addr.s_addr != htonl(INADDR_ANY)) goto out; lock_sock(sk); @@ -529,7 +550,7 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr, /* separate smc parameter checking to be safe */ if (alen < sizeof(addr->sa_family)) goto out_err; - if (addr->sa_family != AF_INET) + if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6) goto out_err; lock_sock(sk); @@ -571,7 +592,7 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) int rc; release_sock(lsk); - new_sk = smc_sock_alloc(sock_net(lsk), NULL); + new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol); if (!new_sk) { rc = -ENOMEM; lsk->sk_err = ENOMEM; @@ -767,8 +788,6 @@ static void smc_listen_work(struct work_struct *work) struct smc_link *link; int reason_code = 0; int rc = 0; - __be32 subnet; - u8 prefix_len; u8 ibport; /* check if peer is smc capable */ @@ -803,17 +822,11 @@ static void smc_listen_work(struct work_struct *work) goto decline_rdma; } - /* determine subnet and mask from internal TCP socket */ - rc = smc_clc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len); - if (rc) { - reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ - goto decline_rdma; - } - pclc = (struct smc_clc_msg_proposal *)&buf; pclc_prfx = smc_clc_proposal_get_prefix(pclc); - if (pclc_prfx->outgoing_subnet != subnet || - pclc_prfx->prefix_len != prefix_len) { + + rc = smc_clc_prfx_match(newclcsock, pclc_prfx); + if (rc) { reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ goto decline_rdma; } @@ -1375,6 +1388,7 @@ static const struct proto_ops smc_sock_ops = { static int smc_create(struct net *net, struct socket *sock, int protocol, int kern) { + int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET; struct smc_sock *smc; struct sock *sk; int rc; @@ -1384,20 +1398,20 @@ static int smc_create(struct net *net, struct socket *sock, int protocol, goto out; rc = -EPROTONOSUPPORT; - if ((protocol != IPPROTO_IP) && (protocol != IPPROTO_TCP)) + if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6) goto out; rc = -ENOBUFS; sock->ops = &smc_sock_ops; - sk = smc_sock_alloc(net, sock); + sk = smc_sock_alloc(net, sock, protocol); if (!sk) goto out; /* create internal TCP socket for CLC handshake and fallback */ smc = smc_sk(sk); smc->use_fallback = false; /* assume rdma capability first */ - rc = sock_create_kern(net, PF_INET, SOCK_STREAM, - IPPROTO_TCP, &smc->clcsock); + rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP, + &smc->clcsock); if (rc) { sk_common_release(sk); goto out; @@ -1437,16 +1451,23 @@ static int __init smc_init(void) rc = proto_register(&smc_proto, 1); if (rc) { - pr_err("%s: proto_register fails with %d\n", __func__, rc); + pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc); goto out_pnet; } + rc = proto_register(&smc_proto6, 1); + if (rc) { + pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc); + goto out_proto; + } + rc = sock_register(&smc_sock_family_ops); if (rc) { pr_err("%s: sock_register fails with %d\n", __func__, rc); - goto out_proto; + goto out_proto6; } INIT_HLIST_HEAD(&smc_v4_hashinfo.ht); + INIT_HLIST_HEAD(&smc_v6_hashinfo.ht); rc = smc_ib_register_client(); if (rc) { @@ -1459,6 +1480,8 @@ static int __init smc_init(void) out_sock: sock_unregister(PF_SMC); +out_proto6: + proto_unregister(&smc_proto6); out_proto: proto_unregister(&smc_proto); out_pnet: @@ -1477,11 +1500,13 @@ static void __exit smc_exit(void) spin_unlock_bh(&smc_lgr_list.lock); list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) { list_del_init(&lgr->list); + cancel_delayed_work_sync(&lgr->free_work); smc_lgr_free(lgr); /* free link group */ } static_branch_disable(&tcp_have_smc); smc_ib_unregister_client(); sock_unregister(PF_SMC); + proto_unregister(&smc_proto6); proto_unregister(&smc_proto); smc_pnet_exit(); } diff --git a/net/smc/smc.h b/net/smc/smc.h index 268cdf11533c..e4829a2f46ba 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -18,11 +18,13 @@ #include "smc_ib.h" -#define SMCPROTO_SMC 0 /* SMC protocol */ +#define SMCPROTO_SMC 0 /* SMC protocol, IPv4 */ +#define SMCPROTO_SMC6 1 /* SMC protocol, IPv6 */ #define SMC_MAX_PORTS 2 /* Max # of ports */ extern struct proto smc_proto; +extern struct proto smc_proto6; #ifdef ATOMIC64_INIT #define KERNEL_HAS_ATOMIC64 diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 874c5a75d6dd..64fbc3230e6c 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -5,7 +5,7 @@ * CLC (connection layer control) handshake over initial TCP socket to * prepare for RDMA traffic * - * Copyright IBM Corp. 2016 + * Copyright IBM Corp. 2016, 2018 * * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> */ @@ -15,6 +15,7 @@ #include <linux/if_ether.h> #include <linux/sched/signal.h> +#include <net/addrconf.h> #include <net/sock.h> #include <net/tcp.h> @@ -74,15 +75,67 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm) return true; } -/* determine subnet and mask of internal TCP socket */ -int smc_clc_netinfo_by_tcpsk(struct socket *clcsock, - __be32 *subnet, u8 *prefix_len) +/* find ipv4 addr on device and get the prefix len, fill CLC proposal msg */ +static int smc_clc_prfx_set4_rcu(struct dst_entry *dst, __be32 ipv4, + struct smc_clc_msg_proposal_prefix *prop) +{ + struct in_device *in_dev = __in_dev_get_rcu(dst->dev); + + if (!in_dev) + return -ENODEV; + for_ifa(in_dev) { + if (!inet_ifa_match(ipv4, ifa)) + continue; + prop->prefix_len = inet_mask_len(ifa->ifa_mask); + prop->outgoing_subnet = ifa->ifa_address & ifa->ifa_mask; + /* prop->ipv6_prefixes_cnt = 0; already done by memset before */ + return 0; + } endfor_ifa(in_dev); + return -ENOENT; +} + +/* fill CLC proposal msg with ipv6 prefixes from device */ +static int smc_clc_prfx_set6_rcu(struct dst_entry *dst, + struct smc_clc_msg_proposal_prefix *prop, + struct smc_clc_ipv6_prefix *ipv6_prfx) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct inet6_dev *in6_dev = __in6_dev_get(dst->dev); + struct inet6_ifaddr *ifa; + int cnt = 0; + + if (!in6_dev) + return -ENODEV; + /* use a maximum of 8 IPv6 prefixes from device */ + list_for_each_entry(ifa, &in6_dev->addr_list, if_list) { + if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL) + continue; + ipv6_addr_prefix(&ipv6_prfx[cnt].prefix, + &ifa->addr, ifa->prefix_len); + ipv6_prfx[cnt].prefix_len = ifa->prefix_len; + cnt++; + if (cnt == SMC_CLC_MAX_V6_PREFIX) + break; + } + prop->ipv6_prefixes_cnt = cnt; + if (cnt) + return 0; +#endif + return -ENOENT; +} + +/* retrieve and set prefixes in CLC proposal msg */ +static int smc_clc_prfx_set(struct socket *clcsock, + struct smc_clc_msg_proposal_prefix *prop, + struct smc_clc_ipv6_prefix *ipv6_prfx) { struct dst_entry *dst = sk_dst_get(clcsock->sk); - struct in_device *in_dev; - struct sockaddr_in addr; + struct sockaddr_storage addrs; + struct sockaddr_in6 *addr6; + struct sockaddr_in *addr; int rc = -ENOENT; + memset(prop, 0, sizeof(*prop)); if (!dst) { rc = -ENOTCONN; goto out; @@ -91,22 +144,97 @@ int smc_clc_netinfo_by_tcpsk(struct socket *clcsock, rc = -ENODEV; goto out_rel; } - /* get address to which the internal TCP socket is bound */ - kernel_getsockname(clcsock, (struct sockaddr *)&addr); - /* analyze IPv4 specific data of net_device belonging to TCP socket */ + kernel_getsockname(clcsock, (struct sockaddr *)&addrs); + /* analyze IP specific data of net_device belonging to TCP socket */ + addr6 = (struct sockaddr_in6 *)&addrs; rcu_read_lock(); - in_dev = __in_dev_get_rcu(dst->dev); + if (addrs.ss_family == PF_INET) { + /* IPv4 */ + addr = (struct sockaddr_in *)&addrs; + rc = smc_clc_prfx_set4_rcu(dst, addr->sin_addr.s_addr, prop); + } else if (ipv6_addr_v4mapped(&addr6->sin6_addr)) { + /* mapped IPv4 address - peer is IPv4 only */ + rc = smc_clc_prfx_set4_rcu(dst, addr6->sin6_addr.s6_addr32[3], + prop); + } else { + /* IPv6 */ + rc = smc_clc_prfx_set6_rcu(dst, prop, ipv6_prfx); + } + rcu_read_unlock(); +out_rel: + dst_release(dst); +out: + return rc; +} + +/* match ipv4 addrs of dev against addr in CLC proposal */ +static int smc_clc_prfx_match4_rcu(struct net_device *dev, + struct smc_clc_msg_proposal_prefix *prop) +{ + struct in_device *in_dev = __in_dev_get_rcu(dev); + + if (!in_dev) + return -ENODEV; for_ifa(in_dev) { - if (!inet_ifa_match(addr.sin_addr.s_addr, ifa)) - continue; - *prefix_len = inet_mask_len(ifa->ifa_mask); - *subnet = ifa->ifa_address & ifa->ifa_mask; - rc = 0; - break; + if (prop->prefix_len == inet_mask_len(ifa->ifa_mask) && + inet_ifa_match(prop->outgoing_subnet, ifa)) + return 0; } endfor_ifa(in_dev); - rcu_read_unlock(); + return -ENOENT; +} + +/* match ipv6 addrs of dev against addrs in CLC proposal */ +static int smc_clc_prfx_match6_rcu(struct net_device *dev, + struct smc_clc_msg_proposal_prefix *prop) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct inet6_dev *in6_dev = __in6_dev_get(dev); + struct smc_clc_ipv6_prefix *ipv6_prfx; + struct inet6_ifaddr *ifa; + int i, max; + + if (!in6_dev) + return -ENODEV; + /* ipv6 prefix list starts behind smc_clc_msg_proposal_prefix */ + ipv6_prfx = (struct smc_clc_ipv6_prefix *)((u8 *)prop + sizeof(*prop)); + max = min_t(u8, prop->ipv6_prefixes_cnt, SMC_CLC_MAX_V6_PREFIX); + list_for_each_entry(ifa, &in6_dev->addr_list, if_list) { + if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL) + continue; + for (i = 0; i < max; i++) { + if (ifa->prefix_len == ipv6_prfx[i].prefix_len && + ipv6_prefix_equal(&ifa->addr, &ipv6_prfx[i].prefix, + ifa->prefix_len)) + return 0; + } + } +#endif + return -ENOENT; +} + +/* check if proposed prefixes match one of our device prefixes */ +int smc_clc_prfx_match(struct socket *clcsock, + struct smc_clc_msg_proposal_prefix *prop) +{ + struct dst_entry *dst = sk_dst_get(clcsock->sk); + int rc; + + if (!dst) { + rc = -ENOTCONN; + goto out; + } + if (!dst->dev) { + rc = -ENODEV; + goto out_rel; + } + rcu_read_lock(); + if (!prop->ipv6_prefixes_cnt) + rc = smc_clc_prfx_match4_rcu(dst->dev, prop); + else + rc = smc_clc_prfx_match6_rcu(dst->dev, prop); + rcu_read_unlock(); out_rel: dst_release(dst); out: @@ -232,16 +360,24 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_ib_device *smcibdev, u8 ibport) { + struct smc_clc_ipv6_prefix ipv6_prfx[SMC_CLC_MAX_V6_PREFIX]; struct smc_clc_msg_proposal_prefix pclc_prfx; struct smc_clc_msg_proposal pclc; struct smc_clc_msg_trail trl; + int len, i, plen, rc; int reason_code = 0; - struct kvec vec[3]; + struct kvec vec[4]; struct msghdr msg; - int len, plen, rc; + + /* retrieve ip prefixes for CLC proposal msg */ + rc = smc_clc_prfx_set(smc->clcsock, &pclc_prfx, ipv6_prfx); + if (rc) + return SMC_CLC_DECL_CNFERR; /* configuration error */ /* send SMC Proposal CLC message */ - plen = sizeof(pclc) + sizeof(pclc_prfx) + sizeof(trl); + plen = sizeof(pclc) + sizeof(pclc_prfx) + + (pclc_prfx.ipv6_prefixes_cnt * sizeof(ipv6_prfx[0])) + + sizeof(trl); memset(&pclc, 0, sizeof(pclc)); memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); pclc.hdr.type = SMC_CLC_PROPOSAL; @@ -252,23 +388,22 @@ int smc_clc_send_proposal(struct smc_sock *smc, memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN); pclc.iparea_offset = htons(0); - memset(&pclc_prfx, 0, sizeof(pclc_prfx)); - /* determine subnet and mask from internal TCP socket */ - rc = smc_clc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet, - &pclc_prfx.prefix_len); - if (rc) - return SMC_CLC_DECL_CNFERR; /* configuration error */ - pclc_prfx.ipv6_prefixes_cnt = 0; memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); memset(&msg, 0, sizeof(msg)); - vec[0].iov_base = &pclc; - vec[0].iov_len = sizeof(pclc); - vec[1].iov_base = &pclc_prfx; - vec[1].iov_len = sizeof(pclc_prfx); - vec[2].iov_base = &trl; - vec[2].iov_len = sizeof(trl); + i = 0; + vec[i].iov_base = &pclc; + vec[i++].iov_len = sizeof(pclc); + vec[i].iov_base = &pclc_prfx; + vec[i++].iov_len = sizeof(pclc_prfx); + if (pclc_prfx.ipv6_prefixes_cnt > 0) { + vec[i].iov_base = &ipv6_prfx[0]; + vec[i++].iov_len = pclc_prfx.ipv6_prefixes_cnt * + sizeof(ipv6_prfx[0]); + } + vec[i].iov_base = &trl; + vec[i++].iov_len = sizeof(trl); /* due to the few bytes needed for clc-handshake this cannot block */ - len = kernel_sendmsg(smc->clcsock, &msg, vec, 3, plen); + len = kernel_sendmsg(smc->clcsock, &msg, vec, i, plen); if (len < sizeof(pclc)) { if (len >= 0) { reason_code = -ENETUNREACH; diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 20e048beac30..63bf1dc2c1f9 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -60,10 +60,15 @@ struct smc_clc_msg_local { /* header2 of clc messages */ u8 mac[6]; /* mac of ib_device port */ }; +#define SMC_CLC_MAX_V6_PREFIX 8 + +/* Struct would be 4 byte aligned, but it is used in an array that is sent + * to peers and must conform to RFC7609, hence we need to use packed here. + */ struct smc_clc_ipv6_prefix { - u8 prefix[4]; + struct in6_addr prefix; u8 prefix_len; -} __packed; +} __packed; /* format defined in RFC7609 */ struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/ __be32 outgoing_subnet; /* subnet mask */ @@ -79,9 +84,11 @@ struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */ } __aligned(4); #define SMC_CLC_PROPOSAL_MAX_OFFSET 0x28 -#define SMC_CLC_PROPOSAL_MAX_PREFIX (8 * sizeof(struct smc_clc_ipv6_prefix)) +#define SMC_CLC_PROPOSAL_MAX_PREFIX (SMC_CLC_MAX_V6_PREFIX * \ + sizeof(struct smc_clc_ipv6_prefix)) #define SMC_CLC_MAX_LEN (sizeof(struct smc_clc_msg_proposal) + \ SMC_CLC_PROPOSAL_MAX_OFFSET + \ + sizeof(struct smc_clc_msg_proposal_prefix) + \ SMC_CLC_PROPOSAL_MAX_PREFIX + \ sizeof(struct smc_clc_msg_trail)) @@ -122,8 +129,8 @@ smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc) ((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset)); } -int smc_clc_netinfo_by_tcpsk(struct socket *clcsock, __be32 *subnet, - u8 *prefix_len); +int smc_clc_prfx_match(struct socket *clcsock, + struct smc_clc_msg_proposal_prefix *prop); int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, u8 expected_type); int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index f76f60e463cb..f44f6803f7ff 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -32,6 +32,17 @@ static u32 smc_lgr_num; /* unique link group number */ +static void smc_lgr_schedule_free_work(struct smc_link_group *lgr) +{ + /* client link group creation always follows the server link group + * creation. For client use a somewhat higher removal delay time, + * otherwise there is a risk of out-of-sync link groups. + */ + mod_delayed_work(system_wq, &lgr->free_work, + lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT : + SMC_LGR_FREE_DELAY_SERV); +} + /* Register connection's alert token in our lookup structure. * To use rbtrees we have to implement our own insert core. * Requires @conns_lock @@ -111,13 +122,7 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn) write_unlock_bh(&lgr->conns_lock); if (!reduced || lgr->conns_num) return; - /* client link group creation always follows the server link group - * creation. For client use a somewhat higher removal delay time, - * otherwise there is a risk of out-of-sync link groups. - */ - mod_delayed_work(system_wq, &lgr->free_work, - lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT : - SMC_LGR_FREE_DELAY_SERV); + smc_lgr_schedule_free_work(lgr); } static void smc_lgr_free_work(struct work_struct *work) @@ -140,7 +145,8 @@ static void smc_lgr_free_work(struct work_struct *work) list_del_init(&lgr->list); /* remove from smc_lgr_list */ free: spin_unlock_bh(&smc_lgr_list.lock); - smc_lgr_free(lgr); + if (!delayed_work_pending(&lgr->free_work)) + smc_lgr_free(lgr); } /* create a new SMC link group */ @@ -343,6 +349,7 @@ void smc_lgr_terminate(struct smc_link_group *lgr) } write_unlock_bh(&lgr->conns_lock); wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait); + smc_lgr_schedule_free_work(lgr); } /* Determine vlan of internal TCP socket. diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 2a8957bd6d38..26df554f7588 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -23,6 +23,8 @@ #include "smc_wr.h" #include "smc.h" +#define SMC_MAX_CQE 32766 /* max. # of completion queue elements */ + #define SMC_QP_MIN_RNR_TIMER 5 #define SMC_QP_TIMEOUT 15 /* 4096 * 2 ** timeout usec */ #define SMC_QP_RETRY_CNT 7 /* 7: infinite */ @@ -438,9 +440,15 @@ out: long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev) { struct ib_cq_init_attr cqattr = { - .cqe = SMC_WR_MAX_CQE, .comp_vector = 0 }; + .cqe = SMC_MAX_CQE, .comp_vector = 0 }; + int cqe_size_order, smc_order; long rc; + /* the calculated number of cq entries fits to mlx5 cq allocation */ + cqe_size_order = cache_line_size() == 128 ? 7 : 6; + smc_order = MAX_ORDER - cqe_size_order - 1; + if (SMC_MAX_CQE + 2 > (0x00000001 << smc_order) * PAGE_SIZE) + cqattr.cqe = (0x00000001 << smc_order) * PAGE_SIZE - 2; smcibdev->roce_cq_send = ib_create_cq(smcibdev->ibdev, smc_wr_tx_cq_handler, NULL, smcibdev, &cqattr); diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index ef0c3494c9cb..210bec3c3ebe 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -19,7 +19,6 @@ #include "smc.h" #include "smc_core.h" -#define SMC_WR_MAX_CQE 32768 /* max. # of completion queue elements */ #define SMC_WR_BUF_CNT 16 /* # of ctrl buffers per link */ #define SMC_WR_TX_WAIT_FREE_SLOT_TIME (10 * HZ) diff --git a/net/tipc/addr.c b/net/tipc/addr.c index 48fd3b5a73fb..97cd857d7f43 100644 --- a/net/tipc/addr.c +++ b/net/tipc/addr.c @@ -64,23 +64,6 @@ int in_own_node(struct net *net, u32 addr) } /** - * addr_domain - convert 2-bit scope value to equivalent message lookup domain - * - * Needed when address of a named message must be looked up a second time - * after a network hop. - */ -u32 addr_domain(struct net *net, u32 sc) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); - - if (likely(sc == TIPC_NODE_SCOPE)) - return tn->own_addr; - if (sc == TIPC_CLUSTER_SCOPE) - return tipc_cluster_mask(tn->own_addr); - return tipc_zone_mask(tn->own_addr); -} - -/** * tipc_addr_domain_valid - validates a network domain address * * Accepts <Z.C.N>, <Z.C.0>, <Z.0.0>, and <0.0.0>, @@ -124,20 +107,6 @@ int tipc_in_scope(u32 domain, u32 addr) return 0; } -/** - * tipc_addr_scope - convert message lookup domain to a 2-bit scope value - */ -int tipc_addr_scope(u32 domain) -{ - if (likely(!domain)) - return TIPC_ZONE_SCOPE; - if (tipc_node(domain)) - return TIPC_NODE_SCOPE; - if (tipc_cluster(domain)) - return TIPC_CLUSTER_SCOPE; - return TIPC_ZONE_SCOPE; -} - char *tipc_addr_string_fill(char *string, u32 addr) { snprintf(string, 16, "<%u.%u.%u>", diff --git a/net/tipc/addr.h b/net/tipc/addr.h index bebb347803ce..2ecf5a5d40dd 100644 --- a/net/tipc/addr.h +++ b/net/tipc/addr.h @@ -60,6 +60,16 @@ static inline u32 tipc_cluster_mask(u32 addr) return addr & TIPC_ZONE_CLUSTER_MASK; } +static inline int tipc_node2scope(u32 node) +{ + return node ? TIPC_NODE_SCOPE : TIPC_CLUSTER_SCOPE; +} + +static inline int tipc_scope2node(struct net *net, int sc) +{ + return sc != TIPC_NODE_SCOPE ? 0 : tipc_own_addr(net); +} + u32 tipc_own_addr(struct net *net); int in_own_cluster(struct net *net, u32 addr); int in_own_cluster_exact(struct net *net, u32 addr); diff --git a/net/tipc/core.c b/net/tipc/core.c index 0b982d048fb9..04fd91bb11d7 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -105,6 +105,7 @@ static struct pernet_operations tipc_net_ops = { .exit = tipc_exit_net, .id = &tipc_net_id, .size = sizeof(struct tipc_net), + .async = true, }; static int __init tipc_init(void) diff --git a/net/tipc/core.h b/net/tipc/core.h index ff8b071654f5..347f850dc872 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -131,6 +131,11 @@ static inline struct list_head *tipc_nodes(struct net *net) return &tipc_net(net)->node_list; } +static inline struct name_table *tipc_name_table(struct net *net) +{ + return tipc_net(net)->nametbl; +} + static inline struct tipc_topsrv *tipc_topsrv(struct net *net) { return tipc_net(net)->topsrv; diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 4e1c6f6450bb..b6c45dccba3d 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -580,7 +580,7 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err) msg = buf_msg(skb); if (msg_reroute_cnt(msg)) return false; - dnode = addr_domain(net, msg_lookup_scope(msg)); + dnode = tipc_scope2node(net, msg_lookup_scope(msg)); dport = tipc_nametbl_translate(net, msg_nametype(msg), msg_nameinst(msg), &dnode); if (!dport) diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 23f8899e0f8c..28d095a7d8bb 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -56,7 +56,7 @@ static void publ_to_item(struct distr_item *i, struct publication *p) i->type = htonl(p->type); i->lower = htonl(p->lower); i->upper = htonl(p->upper); - i->ref = htonl(p->ref); + i->port = htonl(p->port); i->key = htonl(p->key); } @@ -86,25 +86,25 @@ static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size, */ struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct sk_buff *buf; + struct name_table *nt = tipc_name_table(net); struct distr_item *item; + struct sk_buff *skb; - list_add_tail_rcu(&publ->local_list, - &tn->nametbl->publ_list[publ->scope]); - - if (publ->scope == TIPC_NODE_SCOPE) + if (publ->scope == TIPC_NODE_SCOPE) { + list_add_tail_rcu(&publ->binding_node, &nt->node_scope); return NULL; + } + list_add_tail_rcu(&publ->binding_node, &nt->cluster_scope); - buf = named_prepare_buf(net, PUBLICATION, ITEM_SIZE, 0); - if (!buf) { + skb = named_prepare_buf(net, PUBLICATION, ITEM_SIZE, 0); + if (!skb) { pr_warn("Publication distribution failure\n"); return NULL; } - item = (struct distr_item *)msg_data(buf_msg(buf)); + item = (struct distr_item *)msg_data(buf_msg(skb)); publ_to_item(item, publ); - return buf; + return skb; } /** @@ -115,7 +115,7 @@ struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ) struct sk_buff *buf; struct distr_item *item; - list_del(&publ->local_list); + list_del(&publ->binding_node); if (publ->scope == TIPC_NODE_SCOPE) return NULL; @@ -147,7 +147,7 @@ static void named_distribute(struct net *net, struct sk_buff_head *list, ITEM_SIZE) * ITEM_SIZE; u32 msg_rem = msg_dsz; - list_for_each_entry(publ, pls, local_list) { + list_for_each_entry(publ, pls, binding_node) { /* Prepare next buffer: */ if (!skb) { skb = named_prepare_buf(net, PUBLICATION, msg_rem, @@ -184,16 +184,13 @@ static void named_distribute(struct net *net, struct sk_buff_head *list, */ void tipc_named_node_up(struct net *net, u32 dnode) { - struct tipc_net *tn = net_generic(net, tipc_net_id); + struct name_table *nt = tipc_name_table(net); struct sk_buff_head head; __skb_queue_head_init(&head); rcu_read_lock(); - named_distribute(net, &head, dnode, - &tn->nametbl->publ_list[TIPC_CLUSTER_SCOPE]); - named_distribute(net, &head, dnode, - &tn->nametbl->publ_list[TIPC_ZONE_SCOPE]); + named_distribute(net, &head, dnode, &nt->cluster_scope); rcu_read_unlock(); tipc_node_xmit(net, &head, dnode, 0); @@ -212,15 +209,15 @@ static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr) spin_lock_bh(&tn->nametbl_lock); p = tipc_nametbl_remove_publ(net, publ->type, publ->lower, - publ->node, publ->ref, publ->key); + publ->node, publ->port, publ->key); if (p) - tipc_node_unsubscribe(net, &p->nodesub_list, addr); + tipc_node_unsubscribe(net, &p->binding_node, addr); spin_unlock_bh(&tn->nametbl_lock); if (p != publ) { pr_err("Unable to remove publication from failed node\n" - " (type=%u, lower=%u, node=0x%x, ref=%u, key=%u)\n", - publ->type, publ->lower, publ->node, publ->ref, + " (type=%u, lower=%u, node=0x%x, port=%u, key=%u)\n", + publ->type, publ->lower, publ->node, publ->port, publ->key); } @@ -249,7 +246,7 @@ void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr) { struct publication *publ, *tmp; - list_for_each_entry_safe(publ, tmp, nsub_list, nodesub_list) + list_for_each_entry_safe(publ, tmp, nsub_list, binding_node) tipc_publ_purge(net, publ, addr); tipc_dist_queue_purge(net, addr); } @@ -271,18 +268,18 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i, ntohl(i->lower), ntohl(i->upper), TIPC_CLUSTER_SCOPE, node, - ntohl(i->ref), ntohl(i->key)); + ntohl(i->port), ntohl(i->key)); if (publ) { - tipc_node_subscribe(net, &publ->nodesub_list, node); + tipc_node_subscribe(net, &publ->binding_node, node); return true; } } else if (dtype == WITHDRAWAL) { publ = tipc_nametbl_remove_publ(net, ntohl(i->type), ntohl(i->lower), - node, ntohl(i->ref), + node, ntohl(i->port), ntohl(i->key)); if (publ) { - tipc_node_unsubscribe(net, &publ->nodesub_list, node); + tipc_node_unsubscribe(net, &publ->binding_node, node); kfree_rcu(publ, rcu); return true; } @@ -382,16 +379,16 @@ void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq) */ void tipc_named_reinit(struct net *net) { - struct tipc_net *tn = net_generic(net, tipc_net_id); + struct name_table *nt = tipc_name_table(net); + struct tipc_net *tn = tipc_net(net); struct publication *publ; - int scope; spin_lock_bh(&tn->nametbl_lock); - for (scope = TIPC_ZONE_SCOPE; scope <= TIPC_NODE_SCOPE; scope++) - list_for_each_entry_rcu(publ, &tn->nametbl->publ_list[scope], - local_list) - publ->node = tn->own_addr; + list_for_each_entry_rcu(publ, &nt->node_scope, binding_node) + publ->node = tn->own_addr; + list_for_each_entry_rcu(publ, &nt->cluster_scope, binding_node) + publ->node = tn->own_addr; spin_unlock_bh(&tn->nametbl_lock); } diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h index 1264ba0af937..4753e628d7c4 100644 --- a/net/tipc/name_distr.h +++ b/net/tipc/name_distr.h @@ -63,7 +63,7 @@ struct distr_item { __be32 type; __be32 lower; __be32 upper; - __be32 ref; + __be32 port; __be32 key; }; diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index e01c9c691ba2..bbbfc0702634 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -1,7 +1,7 @@ /* * net/tipc/name_table.c: TIPC name table code * - * Copyright (c) 2000-2006, 2014-2015, Ericsson AB + * Copyright (c) 2000-2006, 2014-2018, Ericsson AB * Copyright (c) 2004-2008, 2010-2014, Wind River Systems * All rights reserved. * @@ -50,24 +50,12 @@ /** * struct name_info - name sequence publication info - * @node_list: circular list of publications made by own node - * @cluster_list: circular list of publications made by own cluster - * @zone_list: circular list of publications made by own zone - * @node_list_size: number of entries in "node_list" - * @cluster_list_size: number of entries in "cluster_list" - * @zone_list_size: number of entries in "zone_list" - * - * Note: The zone list always contains at least one entry, since all - * publications of the associated name sequence belong to it. - * (The cluster and node lists may be empty.) + * @node_list: list of publications on own node of this <type,lower,upper> + * @all_publ: list of all publications of this <type,lower,upper> */ struct name_info { - struct list_head node_list; - struct list_head cluster_list; - struct list_head zone_list; - u32 node_list_size; - u32 cluster_list_size; - u32 zone_list_size; + struct list_head local_publ; + struct list_head all_publ; }; /** @@ -114,7 +102,7 @@ static int hash(int x) * publ_create - create a publication structure */ static struct publication *publ_create(u32 type, u32 lower, u32 upper, - u32 scope, u32 node, u32 port_ref, + u32 scope, u32 node, u32 port, u32 key) { struct publication *publ = kzalloc(sizeof(*publ), GFP_ATOMIC); @@ -128,9 +116,9 @@ static struct publication *publ_create(u32 type, u32 lower, u32 upper, publ->upper = upper; publ->scope = scope; publ->node = node; - publ->ref = port_ref; + publ->port = port; publ->key = key; - INIT_LIST_HEAD(&publ->pport_list); + INIT_LIST_HEAD(&publ->binding_sock); return publ; } @@ -249,9 +237,9 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net, info = sseq->info; /* Check if an identical publication already exists */ - list_for_each_entry(publ, &info->zone_list, zone_list) { - if ((publ->ref == port) && (publ->key == key) && - (!publ->node || (publ->node == node))) + list_for_each_entry(publ, &info->all_publ, all_publ) { + if (publ->port == port && publ->key == key && + (!publ->node || publ->node == node)) return NULL; } } else { @@ -290,9 +278,8 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net, return NULL; } - INIT_LIST_HEAD(&info->node_list); - INIT_LIST_HEAD(&info->cluster_list); - INIT_LIST_HEAD(&info->zone_list); + INIT_LIST_HEAD(&info->local_publ); + INIT_LIST_HEAD(&info->all_publ); /* Insert new sub-sequence */ sseq = &nseq->sseqs[inspos]; @@ -311,23 +298,15 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net, if (!publ) return NULL; - list_add(&publ->zone_list, &info->zone_list); - info->zone_list_size++; - - if (in_own_cluster(net, node)) { - list_add(&publ->cluster_list, &info->cluster_list); - info->cluster_list_size++; - } + list_add(&publ->all_publ, &info->all_publ); - if (in_own_node(net, node)) { - list_add(&publ->node_list, &info->node_list); - info->node_list_size++; - } + if (in_own_node(net, node)) + list_add(&publ->local_publ, &info->local_publ); /* Any subscriptions waiting for notification? */ list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) { tipc_sub_report_overlap(s, publ->lower, publ->upper, - TIPC_PUBLISHED, publ->ref, + TIPC_PUBLISHED, publ->port, publ->node, publ->scope, created_subseq); } @@ -348,7 +327,7 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net, static struct publication *tipc_nameseq_remove_publ(struct net *net, struct name_seq *nseq, u32 inst, u32 node, - u32 ref, u32 key) + u32 port, u32 key) { struct publication *publ; struct sub_seq *sseq = nameseq_find_subseq(nseq, inst); @@ -363,32 +342,20 @@ static struct publication *tipc_nameseq_remove_publ(struct net *net, info = sseq->info; /* Locate publication, if it exists */ - list_for_each_entry(publ, &info->zone_list, zone_list) { - if ((publ->key == key) && (publ->ref == ref) && - (!publ->node || (publ->node == node))) + list_for_each_entry(publ, &info->all_publ, all_publ) { + if (publ->key == key && publ->port == port && + (!publ->node || publ->node == node)) goto found; } return NULL; found: - /* Remove publication from zone scope list */ - list_del(&publ->zone_list); - info->zone_list_size--; - - /* Remove publication from cluster scope list, if present */ - if (in_own_cluster(net, node)) { - list_del(&publ->cluster_list); - info->cluster_list_size--; - } - - /* Remove publication from node scope list, if present */ - if (in_own_node(net, node)) { - list_del(&publ->node_list); - info->node_list_size--; - } + list_del(&publ->all_publ); + if (in_own_node(net, node)) + list_del(&publ->local_publ); /* Contract subseq list if no more publications for that subseq */ - if (list_empty(&info->zone_list)) { + if (list_empty(&info->all_publ)) { kfree(info); free = &nseq->sseqs[nseq->first_free--]; memmove(sseq, sseq + 1, (free - (sseq + 1)) * sizeof(*sseq)); @@ -398,8 +365,9 @@ found: /* Notify any waiting subscriptions */ list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) { tipc_sub_report_overlap(s, publ->lower, publ->upper, - TIPC_WITHDRAWN, publ->ref, publ->node, - publ->scope, removed_subseq); + TIPC_WITHDRAWN, publ->port, + publ->node, publ->scope, + removed_subseq); } return publ; @@ -435,11 +403,12 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq, struct name_info *info = sseq->info; int must_report = 1; - list_for_each_entry(crs, &info->zone_list, zone_list) { + list_for_each_entry(crs, &info->all_publ, all_publ) { tipc_sub_report_overlap(sub, sseq->lower, sseq->upper, TIPC_PUBLISHED, - crs->ref, crs->node, + crs->port, + crs->node, crs->scope, must_report); must_report = 0; @@ -473,8 +442,7 @@ struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type, struct name_seq *seq = nametbl_find_seq(net, type); int index = hash(type); - if ((scope < TIPC_ZONE_SCOPE) || (scope > TIPC_NODE_SCOPE) || - (lower > upper)) { + if (scope > TIPC_NODE_SCOPE || lower > upper) { pr_debug("Failed to publish illegal {%u,%u,%u} with scope %u\n", type, lower, upper, scope); return NULL; @@ -493,7 +461,7 @@ struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type, } struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type, - u32 lower, u32 node, u32 ref, + u32 lower, u32 node, u32 port, u32 key) { struct publication *publ; @@ -503,7 +471,7 @@ struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type, return NULL; spin_lock_bh(&seq->lock); - publ = tipc_nameseq_remove_publ(net, seq, lower, node, ref, key); + publ = tipc_nameseq_remove_publ(net, seq, lower, node, port, key); if (!seq->first_free && list_empty(&seq->subscriptions)) { hlist_del_init_rcu(&seq->ns_list); kfree(seq->sseqs); @@ -536,7 +504,7 @@ u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, struct name_info *info; struct publication *publ; struct name_seq *seq; - u32 ref = 0; + u32 port = 0; u32 node = 0; if (!tipc_in_scope(*destnode, tn->own_addr)) @@ -554,54 +522,42 @@ u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, /* Closest-First Algorithm */ if (likely(!*destnode)) { - if (!list_empty(&info->node_list)) { - publ = list_first_entry(&info->node_list, - struct publication, - node_list); - list_move_tail(&publ->node_list, - &info->node_list); - } else if (!list_empty(&info->cluster_list)) { - publ = list_first_entry(&info->cluster_list, + if (!list_empty(&info->local_publ)) { + publ = list_first_entry(&info->local_publ, struct publication, - cluster_list); - list_move_tail(&publ->cluster_list, - &info->cluster_list); + local_publ); + list_move_tail(&publ->local_publ, + &info->local_publ); } else { - publ = list_first_entry(&info->zone_list, + publ = list_first_entry(&info->all_publ, struct publication, - zone_list); - list_move_tail(&publ->zone_list, - &info->zone_list); + all_publ); + list_move_tail(&publ->all_publ, + &info->all_publ); } } /* Round-Robin Algorithm */ else if (*destnode == tn->own_addr) { - if (list_empty(&info->node_list)) + if (list_empty(&info->local_publ)) goto no_match; - publ = list_first_entry(&info->node_list, struct publication, - node_list); - list_move_tail(&publ->node_list, &info->node_list); - } else if (in_own_cluster_exact(net, *destnode)) { - if (list_empty(&info->cluster_list)) - goto no_match; - publ = list_first_entry(&info->cluster_list, struct publication, - cluster_list); - list_move_tail(&publ->cluster_list, &info->cluster_list); + publ = list_first_entry(&info->local_publ, struct publication, + local_publ); + list_move_tail(&publ->local_publ, &info->local_publ); } else { - publ = list_first_entry(&info->zone_list, struct publication, - zone_list); - list_move_tail(&publ->zone_list, &info->zone_list); + publ = list_first_entry(&info->all_publ, struct publication, + all_publ); + list_move_tail(&publ->all_publ, &info->all_publ); } - ref = publ->ref; + port = publ->port; node = publ->node; no_match: spin_unlock_bh(&seq->lock); not_found: rcu_read_unlock(); *destnode = node; - return ref; + return port; } bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope, @@ -623,16 +579,16 @@ bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope, sseq = nameseq_find_subseq(seq, instance); if (likely(sseq)) { info = sseq->info; - list_for_each_entry(publ, &info->zone_list, zone_list) { + list_for_each_entry(publ, &info->all_publ, all_publ) { if (publ->scope != scope) continue; - if (publ->ref == exclude && publ->node == self) + if (publ->port == exclude && publ->node == self) continue; - tipc_dest_push(dsts, publ->node, publ->ref); + tipc_dest_push(dsts, publ->node, publ->port); (*dstcnt)++; if (all) continue; - list_move_tail(&publ->zone_list, &info->zone_list); + list_move_tail(&publ->all_publ, &info->all_publ); break; } } @@ -642,15 +598,14 @@ exit: return !list_empty(dsts); } -int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper, - u32 scope, bool exact, struct list_head *dports) +void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper, + u32 scope, bool exact, struct list_head *dports) { struct sub_seq *sseq_stop; struct name_info *info; struct publication *p; struct name_seq *seq; struct sub_seq *sseq; - int res = 0; rcu_read_lock(); seq = nametbl_find_seq(net, type); @@ -664,18 +619,14 @@ int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper, if (sseq->lower > upper) break; info = sseq->info; - list_for_each_entry(p, &info->node_list, node_list) { + list_for_each_entry(p, &info->local_publ, local_publ) { if (p->scope == scope || (!exact && p->scope < scope)) - tipc_dest_push(dports, 0, p->ref); + tipc_dest_push(dports, 0, p->port); } - - if (info->cluster_list_size != info->node_list_size) - res = 1; } spin_unlock_bh(&seq->lock); exit: rcu_read_unlock(); - return res; } /* tipc_nametbl_lookup_dst_nodes - find broadcast destination nodes @@ -700,7 +651,7 @@ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, stop = seq->sseqs + seq->first_free; for (; sseq != stop && sseq->lower <= upper; sseq++) { info = sseq->info; - list_for_each_entry(publ, &info->zone_list, zone_list) { + list_for_each_entry(publ, &info->all_publ, all_publ) { tipc_nlist_add(nodes, publ->node); } } @@ -729,10 +680,10 @@ void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, stop = seq->sseqs + seq->first_free; for (; sseq != stop; sseq++) { info = sseq->info; - list_for_each_entry(p, &info->zone_list, zone_list) { + list_for_each_entry(p, &info->all_publ, all_publ) { if (p->scope != scope) continue; - tipc_group_add_member(grp, p->node, p->ref, p->lower); + tipc_group_add_member(grp, p->node, p->port, p->lower); } } spin_unlock_bh(&seq->lock); @@ -777,7 +728,7 @@ struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower, /** * tipc_nametbl_withdraw - withdraw name publication from network name tables */ -int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref, +int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 port, u32 key) { struct publication *publ; @@ -786,18 +737,18 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref, spin_lock_bh(&tn->nametbl_lock); publ = tipc_nametbl_remove_publ(net, type, lower, tn->own_addr, - ref, key); + port, key); if (likely(publ)) { tn->nametbl->local_publ_count--; skb = tipc_named_withdraw(net, publ); /* Any pending external events? */ tipc_named_process_backlog(net); - list_del_init(&publ->pport_list); + list_del_init(&publ->binding_sock); kfree_rcu(publ, rcu); } else { pr_err("Unable to remove local publication\n" - "(type=%u, lower=%u, ref=%u, key=%u)\n", - type, lower, ref, key); + "(type=%u, lower=%u, port=%u, key=%u)\n", + type, lower, port, key); } spin_unlock_bh(&tn->nametbl_lock); @@ -879,9 +830,8 @@ int tipc_nametbl_init(struct net *net) for (i = 0; i < TIPC_NAMETBL_SIZE; i++) INIT_HLIST_HEAD(&tipc_nametbl->seq_hlist[i]); - INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_ZONE_SCOPE]); - INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_CLUSTER_SCOPE]); - INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_NODE_SCOPE]); + INIT_LIST_HEAD(&tipc_nametbl->node_scope); + INIT_LIST_HEAD(&tipc_nametbl->cluster_scope); tn->nametbl = tipc_nametbl; spin_lock_init(&tn->nametbl_lock); return 0; @@ -901,9 +851,9 @@ static void tipc_purge_publications(struct net *net, struct name_seq *seq) spin_lock_bh(&seq->lock); sseq = seq->sseqs; info = sseq->info; - list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) { + list_for_each_entry_safe(publ, safe, &info->all_publ, all_publ) { tipc_nameseq_remove_publ(net, seq, publ->lower, publ->node, - publ->ref, publ->key); + publ->port, publ->key); kfree_rcu(publ, rcu); } hlist_del_init_rcu(&seq->ns_list); @@ -950,17 +900,17 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg, struct publication *p; if (*last_publ) { - list_for_each_entry(p, &sseq->info->zone_list, zone_list) + list_for_each_entry(p, &sseq->info->all_publ, all_publ) if (p->key == *last_publ) break; if (p->key != *last_publ) return -EPIPE; } else { - p = list_first_entry(&sseq->info->zone_list, struct publication, - zone_list); + p = list_first_entry(&sseq->info->all_publ, struct publication, + all_publ); } - list_for_each_entry_from(p, &sseq->info->zone_list, zone_list) { + list_for_each_entry_from(p, &sseq->info->all_publ, all_publ) { *last_publ = p->key; hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, @@ -987,7 +937,7 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg, goto publ_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_NODE, p->node)) goto publ_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_REF, p->ref)) + if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_REF, p->port)) goto publ_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_KEY, p->key)) goto publ_msg_full; diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index 17652602d5e2..34a4ccb907aa 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -1,7 +1,7 @@ /* * net/tipc/name_table.h: Include file for TIPC name table code * - * Copyright (c) 2000-2006, 2014-2015, Ericsson AB + * Copyright (c) 2000-2006, 2014-2018, Ericsson AB * Copyright (c) 2004-2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -54,19 +54,22 @@ struct tipc_group; * @type: name sequence type * @lower: name sequence lower bound * @upper: name sequence upper bound - * @scope: scope of publication - * @node: network address of publishing port's node - * @ref: publishing port - * @key: publication key - * @nodesub_list: subscription to "node down" event (off-node publication only) - * @local_list: adjacent entries in list of publications made by this node - * @pport_list: adjacent entries in list of publications made by this port - * @node_list: adjacent matching name seq publications with >= node scope - * @cluster_list: adjacent matching name seq publications with >= cluster scope - * @zone_list: adjacent matching name seq publications with >= zone scope + * @scope: scope of publication, TIPC_NODE_SCOPE or TIPC_CLUSTER_SCOPE + * @node: network address of publishing socket's node + * @port: publishing port + * @key: publication key, unique across the cluster + * @binding_node: all publications from the same node which bound this one + * - Remote publications: in node->publ_list + * Used by node/name distr to withdraw publications when node is lost + * - Local/node scope publications: in name_table->node_scope list + * - Local/cluster scope publications: in name_table->cluster_scope list + * @binding_sock: all publications from the same socket which bound this one + * Used by socket to withdraw publications when socket is unbound/released + * @local_publ: list of identical publications made from this node + * Used by closest_first and multicast receive lookup algorithms + * @all_publ: all publications identical to this one, whatever node and scope + * Used by round-robin lookup algorithm * @rcu: RCU callback head used for deferred freeing - * - * Note that the node list, cluster list, and zone list are circular lists. */ struct publication { u32 type; @@ -74,34 +77,37 @@ struct publication { u32 upper; u32 scope; u32 node; - u32 ref; + u32 port; u32 key; - struct list_head nodesub_list; - struct list_head local_list; - struct list_head pport_list; - struct list_head node_list; - struct list_head cluster_list; - struct list_head zone_list; + struct list_head binding_node; + struct list_head binding_sock; + struct list_head local_publ; + struct list_head all_publ; struct rcu_head rcu; }; /** * struct name_table - table containing all existing port name publications * @seq_hlist: name sequence hash lists - * @publ_list: pulication lists + * @node_scope: all local publications with node scope + * - used by name_distr during re-init of name table + * @cluster_scope: all local publications with cluster scope + * - used by name_distr to send bulk updates to new nodes + * - used by name_distr during re-init of name table * @local_publ_count: number of publications issued by this node */ struct name_table { struct hlist_head seq_hlist[TIPC_NAMETBL_SIZE]; - struct list_head publ_list[TIPC_PUBL_SCOPE_NUM]; + struct list_head node_scope; + struct list_head cluster_scope; u32 local_publ_count; }; int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb); u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node); -int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper, - u32 scope, bool exact, struct list_head *dports); +void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper, + u32 scope, bool exact, struct list_head *dports); void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, u32 type, u32 domain); void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, diff --git a/net/tipc/net.c b/net/tipc/net.c index 1a2fde0d6f61..5c4c4405b78e 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -118,7 +118,7 @@ int tipc_net_start(struct net *net, u32 addr) tipc_sk_reinit(net); tipc_nametbl_publish(net, TIPC_CFG_SRV, tn->own_addr, tn->own_addr, - TIPC_ZONE_SCOPE, 0, tn->own_addr); + TIPC_CLUSTER_SCOPE, 0, tn->own_addr); pr_info("Started in network mode\n"); pr_info("Own node address %s, network identity %u\n", diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 8b04e601311c..a4a9148d4629 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -644,7 +644,7 @@ static int tipc_bind(struct socket *sock, struct sockaddr *uaddr, goto exit; } - res = (addr->scope > 0) ? + res = (addr->scope >= 0) ? tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq) : tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq); exit: @@ -1280,8 +1280,8 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) struct tipc_msg *hdr = &tsk->phdr; struct tipc_name_seq *seq; struct sk_buff_head pkts; - u32 type, inst, domain; u32 dnode, dport; + u32 type, inst; int mtu, rc; if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE)) @@ -1332,13 +1332,12 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) if (dest->addrtype == TIPC_ADDR_NAME) { type = dest->addr.name.name.type; inst = dest->addr.name.name.instance; - domain = dest->addr.name.domain; - dnode = domain; + dnode = dest->addr.name.domain; msg_set_type(hdr, TIPC_NAMED_MSG); msg_set_hdr_sz(hdr, NAMED_H_SIZE); msg_set_nametype(hdr, type); msg_set_nameinst(hdr, inst); - msg_set_lookup_scope(hdr, tipc_addr_scope(domain)); + msg_set_lookup_scope(hdr, tipc_node2scope(dnode)); dport = tipc_nametbl_translate(net, type, inst, &dnode); msg_set_destnode(hdr, dnode); msg_set_destport(hdr, dport); @@ -2592,6 +2591,9 @@ static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, struct publication *publ; u32 key; + if (scope != TIPC_NODE_SCOPE) + scope = TIPC_CLUSTER_SCOPE; + if (tipc_sk_connected(sk)) return -EINVAL; key = tsk->portid + tsk->pub_count + 1; @@ -2603,7 +2605,7 @@ static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, if (unlikely(!publ)) return -EINVAL; - list_add(&publ->pport_list, &tsk->publications); + list_add(&publ->binding_sock, &tsk->publications); tsk->pub_count++; tsk->published = 1; return 0; @@ -2617,7 +2619,10 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, struct publication *safe; int rc = -EINVAL; - list_for_each_entry_safe(publ, safe, &tsk->publications, pport_list) { + if (scope != TIPC_NODE_SCOPE) + scope = TIPC_CLUSTER_SCOPE; + + list_for_each_entry_safe(publ, safe, &tsk->publications, binding_sock) { if (seq) { if (publ->scope != scope) continue; @@ -2628,12 +2633,12 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, if (publ->upper != seq->upper) break; tipc_nametbl_withdraw(net, publ->type, publ->lower, - publ->ref, publ->key); + publ->port, publ->key); rc = 0; break; } tipc_nametbl_withdraw(net, publ->type, publ->lower, - publ->ref, publ->key); + publ->port, publ->key); rc = 0; } if (list_empty(&tsk->publications)) @@ -3287,7 +3292,7 @@ static int __tipc_nl_list_sk_publ(struct sk_buff *skb, struct publication *p; if (*last_publ) { - list_for_each_entry(p, &tsk->publications, pport_list) { + list_for_each_entry(p, &tsk->publications, binding_sock) { if (p->key == *last_publ) break; } @@ -3304,10 +3309,10 @@ static int __tipc_nl_list_sk_publ(struct sk_buff *skb, } } else { p = list_first_entry(&tsk->publications, struct publication, - pport_list); + binding_sock); } - list_for_each_entry_from(p, &tsk->publications, pport_list) { + list_for_each_entry_from(p, &tsk->publications, binding_sock) { err = __tipc_nl_add_sk_publ(skb, cb, p); if (err) { *last_publ = p->key; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 7f52b8eb177d..aff2e84ec761 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -3258,6 +3258,7 @@ static void __net_exit xfrm_user_net_exit(struct list_head *net_exit_list) static struct pernet_operations xfrm_user_net_ops = { .init = xfrm_user_net_init, .exit_batch = xfrm_user_net_exit, + .async = true, }; static int __init xfrm_user_init(void) |