diff options
Diffstat (limited to 'net')
119 files changed, 3121 insertions, 2735 deletions
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index c88bd8e8937e..a642bb829d09 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -495,7 +495,9 @@ static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head, static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, struct net_bridge_port *source, const unsigned char *addr, - __u16 vid) + __u16 vid, + unsigned char is_local, + unsigned char is_static) { struct net_bridge_fdb_entry *fdb; @@ -504,8 +506,8 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, memcpy(fdb->addr.addr, addr, ETH_ALEN); fdb->dst = source; fdb->vlan_id = vid; - fdb->is_local = 0; - fdb->is_static = 0; + fdb->is_local = is_local; + fdb->is_static = is_static; fdb->added_by_user = 0; fdb->added_by_external_learn = 0; fdb->updated = fdb->used = jiffies; @@ -536,11 +538,10 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, fdb_delete(br, fdb); } - fdb = fdb_create(head, source, addr, vid); + fdb = fdb_create(head, source, addr, vid, 1, 1); if (!fdb) return -ENOMEM; - fdb->is_local = fdb->is_static = 1; fdb_add_hw_addr(br, addr); fdb_notify(br, fdb, RTM_NEWNEIGH); return 0; @@ -597,7 +598,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, } else { spin_lock(&br->hash_lock); if (likely(!fdb_find(head, addr, vid))) { - fdb = fdb_create(head, source, addr, vid); + fdb = fdb_create(head, source, addr, vid, 0, 0); if (fdb) { if (unlikely(added_by_user)) fdb->added_by_user = 1; @@ -774,7 +775,7 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr, if (!(flags & NLM_F_CREATE)) return -ENOENT; - fdb = fdb_create(head, source, addr, vid); + fdb = fdb_create(head, source, addr, vid, 0, 0); if (!fdb) return -ENOMEM; @@ -1099,7 +1100,7 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, head = &br->hash[br_mac_hash(addr, vid)]; fdb = fdb_find(head, addr, vid); if (!fdb) { - fdb = fdb_create(head, p, addr, vid); + fdb = fdb_create(head, p, addr, vid, 0, 0); if (!fdb) { err = -ENOMEM; goto err_unlock; diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index a9d424e20229..fcdb86dd5a23 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -141,7 +141,7 @@ EXPORT_SYMBOL_GPL(br_deliver); /* called with rcu_read_lock */ void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, struct sk_buff *skb0) { - if (should_deliver(to, skb)) { + if (to && should_deliver(to, skb)) { if (skb0) deliver_clone(to, skb, __br_forward); else diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 94b4de8c4646..40197ff8918a 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1214,29 +1214,10 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) return 0; } -static size_t br_get_link_af_size(const struct net_device *dev) -{ - struct net_bridge_port *p; - struct net_bridge *br; - int num_vlans = 0; - - if (br_port_exists(dev)) { - p = br_port_get_rtnl(dev); - num_vlans = br_get_num_vlan_infos(nbp_vlan_group(p), - RTEXT_FILTER_BRVLAN); - } else if (dev->priv_flags & IFF_EBRIDGE) { - br = netdev_priv(dev); - num_vlans = br_get_num_vlan_infos(br_vlan_group(br), - RTEXT_FILTER_BRVLAN); - } - - /* Each VLAN is returned in bridge_vlan_info along with flags */ - return num_vlans * nla_total_size(sizeof(struct bridge_vlan_info)); -} static struct rtnl_af_ops br_af_ops __read_mostly = { .family = AF_BRIDGE, - .get_link_af_size = br_get_link_af_size, + .get_link_af_size = br_get_link_af_size_filtered, }; struct rtnl_link_ops br_link_ops __read_mostly = { diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 4ca449a16132..fa53d7a89f48 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -15,6 +15,7 @@ #include <linux/kmod.h> #include <linux/etherdevice.h> #include <linux/rtnetlink.h> +#include <net/switchdev.h> #include "br_private.h" #include "br_private_stp.h" @@ -35,11 +36,22 @@ static inline port_id br_make_port_id(__u8 priority, __u16 port_no) /* called under bridge lock */ void br_init_port(struct net_bridge_port *p) { + struct switchdev_attr attr = { + .id = SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME, + .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP | SWITCHDEV_F_DEFER, + .u.ageing_time = p->br->ageing_time, + }; + int err; + p->port_id = br_make_port_id(p->priority, p->port_no); br_become_designated_port(p); br_set_state(p, BR_STATE_BLOCKING); p->topology_change_ack = 0; p->config_pending = 0; + + err = switchdev_port_attr_set(p->dev, &attr); + if (err) + netdev_err(p->dev, "failed to set HW ageing time\n"); } /* called under bridge lock */ diff --git a/net/core/dev.c b/net/core/dev.c index 1225b4be8ed6..13f49f81ae13 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -99,6 +99,7 @@ #include <linux/rtnetlink.h> #include <linux/stat.h> #include <net/dst.h> +#include <net/dst_metadata.h> #include <net/pkt_sched.h> #include <net/checksum.h> #include <net/xfrm.h> @@ -682,6 +683,32 @@ int dev_get_iflink(const struct net_device *dev) EXPORT_SYMBOL(dev_get_iflink); /** + * dev_fill_metadata_dst - Retrieve tunnel egress information. + * @dev: targeted interface + * @skb: The packet. + * + * For better visibility of tunnel traffic OVS needs to retrieve + * egress tunnel information for a packet. Following API allows + * user to get this info. + */ +int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) +{ + struct ip_tunnel_info *info; + + if (!dev->netdev_ops || !dev->netdev_ops->ndo_fill_metadata_dst) + return -EINVAL; + + info = skb_tunnel_info_unclone(skb); + if (!info) + return -ENOMEM; + if (unlikely(!(info->mode & IP_TUNNEL_INFO_TX))) + return -EINVAL; + + return dev->netdev_ops->ndo_fill_metadata_dst(dev, skb); +} +EXPORT_SYMBOL_GPL(dev_fill_metadata_dst); + +/** * __dev_get_by_name - find a device by its name * @net: the applicable net namespace * @name: name to find diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 24775953fa68..504bd17b7456 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -497,7 +497,8 @@ void rtnl_af_unregister(struct rtnl_af_ops *ops) } EXPORT_SYMBOL_GPL(rtnl_af_unregister); -static size_t rtnl_link_get_af_size(const struct net_device *dev) +static size_t rtnl_link_get_af_size(const struct net_device *dev, + u32 ext_filter_mask) { struct rtnl_af_ops *af_ops; size_t size; @@ -509,7 +510,7 @@ static size_t rtnl_link_get_af_size(const struct net_device *dev) if (af_ops->get_link_af_size) { /* AF_* + nested data */ size += nla_total_size(sizeof(struct nlattr)) + - af_ops->get_link_af_size(dev); + af_ops->get_link_af_size(dev, ext_filter_mask); } } @@ -837,7 +838,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev, /* IFLA_VF_STATS_BROADCAST */ nla_total_size(sizeof(__u64)) + /* IFLA_VF_STATS_MULTICAST */ - nla_total_size(sizeof(__u64))); + nla_total_size(sizeof(__u64)) + + nla_total_size(sizeof(struct ifla_vf_trust))); return size; } else return 0; @@ -900,7 +902,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */ + rtnl_port_size(dev, ext_filter_mask) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ + rtnl_link_get_size(dev) /* IFLA_LINKINFO */ - + rtnl_link_get_af_size(dev) /* IFLA_AF_SPEC */ + + rtnl_link_get_af_size(dev, ext_filter_mask) /* IFLA_AF_SPEC */ + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */ + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */ + nla_total_size(1); /* IFLA_PROTO_DOWN */ @@ -1160,6 +1162,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct ifla_vf_link_state vf_linkstate; struct ifla_vf_rss_query_en vf_rss_query_en; struct ifla_vf_stats vf_stats; + struct ifla_vf_trust vf_trust; /* * Not all SR-IOV capable drivers support the @@ -1169,6 +1172,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, */ ivi.spoofchk = -1; ivi.rss_query_en = -1; + ivi.trusted = -1; memset(ivi.mac, 0, sizeof(ivi.mac)); /* The default value for VF link state is "auto" * IFLA_VF_LINK_STATE_AUTO which equals zero @@ -1182,7 +1186,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, vf_tx_rate.vf = vf_spoofchk.vf = vf_linkstate.vf = - vf_rss_query_en.vf = ivi.vf; + vf_rss_query_en.vf = + vf_trust.vf = ivi.vf; memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); vf_vlan.vlan = ivi.vlan; @@ -1193,6 +1198,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, vf_spoofchk.setting = ivi.spoofchk; vf_linkstate.link_state = ivi.linkstate; vf_rss_query_en.setting = ivi.rss_query_en; + vf_trust.setting = ivi.trusted; vf = nla_nest_start(skb, IFLA_VF_INFO); if (!vf) { nla_nest_cancel(skb, vfinfo); @@ -1210,7 +1216,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, &vf_linkstate) || nla_put(skb, IFLA_VF_RSS_QUERY_EN, sizeof(vf_rss_query_en), - &vf_rss_query_en)) + &vf_rss_query_en) || + nla_put(skb, IFLA_VF_TRUST, + sizeof(vf_trust), &vf_trust)) goto nla_put_failure; memset(&vf_stats, 0, sizeof(vf_stats)); if (dev->netdev_ops->ndo_get_vf_stats) @@ -1347,6 +1355,7 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { [IFLA_VF_LINK_STATE] = { .len = sizeof(struct ifla_vf_link_state) }, [IFLA_VF_RSS_QUERY_EN] = { .len = sizeof(struct ifla_vf_rss_query_en) }, [IFLA_VF_STATS] = { .type = NLA_NESTED }, + [IFLA_VF_TRUST] = { .len = sizeof(struct ifla_vf_trust) }, }; static const struct nla_policy ifla_vf_stats_policy[IFLA_VF_STATS_MAX + 1] = { @@ -1586,6 +1595,16 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) return err; } + if (tb[IFLA_VF_TRUST]) { + struct ifla_vf_trust *ivt = nla_data(tb[IFLA_VF_TRUST]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_trust) + err = ops->ndo_set_vf_trust(dev, ivt->vf, ivt->setting); + if (err < 0) + return err; + } + return err; } @@ -3443,4 +3462,3 @@ void __init rtnetlink_init(void) rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, NULL); rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL); } - diff --git a/net/core/sock.c b/net/core/sock.c index dcc7d62654d5..0ef30aa90132 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -422,13 +422,25 @@ static void sock_warn_obsolete_bsdism(const char *name) } } +static bool sock_needs_netstamp(const struct sock *sk) +{ + switch (sk->sk_family) { + case AF_UNSPEC: + case AF_UNIX: + return false; + default: + return true; + } +} + #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) static void sock_disable_timestamp(struct sock *sk, unsigned long flags) { if (sk->sk_flags & flags) { sk->sk_flags &= ~flags; - if (!(sk->sk_flags & SK_FLAGS_TIMESTAMP)) + if (sock_needs_netstamp(sk) && + !(sk->sk_flags & SK_FLAGS_TIMESTAMP)) net_disable_timestamp(); } } @@ -1582,7 +1594,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) if (newsk->sk_prot->sockets_allocated) sk_sockets_allocated_inc(newsk); - if (newsk->sk_flags & SK_FLAGS_TIMESTAMP) + if (sock_needs_netstamp(sk) && + newsk->sk_flags & SK_FLAGS_TIMESTAMP) net_enable_timestamp(); } out: @@ -2510,7 +2523,8 @@ void sock_enable_timestamp(struct sock *sk, int flag) * time stamping, but time stamping might have been on * already because of the other one */ - if (!(previous_flags & SK_FLAGS_TIMESTAMP)) + if (sock_needs_netstamp(sk) && + !(previous_flags & SK_FLAGS_TIMESTAMP)) net_enable_timestamp(); } } diff --git a/net/core/tso.c b/net/core/tso.c index 630b30b4fb53..5dca7ce8ee9f 100644 --- a/net/core/tso.c +++ b/net/core/tso.c @@ -1,4 +1,5 @@ #include <linux/export.h> +#include <linux/if_vlan.h> #include <net/ip.h> #include <net/tso.h> #include <asm/unaligned.h> @@ -14,18 +15,24 @@ EXPORT_SYMBOL(tso_count_descs); void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso, int size, bool is_last) { - struct iphdr *iph; struct tcphdr *tcph; int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); int mac_hdr_len = skb_network_offset(skb); memcpy(hdr, skb->data, hdr_len); - iph = (struct iphdr *)(hdr + mac_hdr_len); - iph->id = htons(tso->ip_id); - iph->tot_len = htons(size + hdr_len - mac_hdr_len); + if (!tso->ipv6) { + struct iphdr *iph = (void *)(hdr + mac_hdr_len); + + iph->id = htons(tso->ip_id); + iph->tot_len = htons(size + hdr_len - mac_hdr_len); + tso->ip_id++; + } else { + struct ipv6hdr *iph = (void *)(hdr + mac_hdr_len); + + iph->payload_len = htons(size + tcp_hdrlen(skb)); + } tcph = (struct tcphdr *)(hdr + skb_transport_offset(skb)); put_unaligned_be32(tso->tcp_seq, &tcph->seq); - tso->ip_id++; if (!is_last) { /* Clear all special flags for not last packet */ @@ -61,6 +68,7 @@ void tso_start(struct sk_buff *skb, struct tso_t *tso) tso->ip_id = ntohs(ip_hdr(skb)->id); tso->tcp_seq = ntohl(tcp_hdr(skb)->seq); tso->next_frag_idx = 0; + tso->ipv6 = vlan_get_protocol(skb) == htons(ETH_P_IPV6); /* Build first data */ tso->size = skb_headlen(skb) - hdr_len; diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 923f5a180134..b0e28d24e1a7 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -278,7 +278,9 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb); struct sock *dccp_v4_request_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct dst_entry *dst); + struct dst_entry *dst, + struct request_sock *req_unhash, + bool *own_req); struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock *req); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 59bc180b02d8..5684e14932bd 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -393,7 +393,9 @@ static inline u64 dccp_v4_init_sequence(const struct sk_buff *skb) struct sock *dccp_v4_request_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct dst_entry *dst) + struct dst_entry *dst, + struct request_sock *req_unhash, + bool *own_req) { struct inet_request_sock *ireq; struct inet_sock *newinet; @@ -426,7 +428,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk, if (__inet_inherit_port(sk, newsk) < 0) goto put_and_exit; - __inet_hash_nolisten(newsk, NULL); + *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); return newsk; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index d9cc731f2619..ef4e48ce9143 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -380,7 +380,9 @@ drop: static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct dst_entry *dst) + struct dst_entry *dst, + struct request_sock *req_unhash, + bool *own_req) { struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *newnp; @@ -393,7 +395,8 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, /* * v6 mapped */ - newsk = dccp_v4_request_recv_sock(sk, skb, req, dst); + newsk = dccp_v4_request_recv_sock(sk, skb, req, dst, + req_unhash, own_req); if (newsk == NULL) return NULL; @@ -511,7 +514,7 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, dccp_done(newsk); goto out; } - __inet_hash(newsk, NULL); + *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); return newsk; diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index d10aace43672..1994f8af646b 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -143,6 +143,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, { struct sock *child = NULL; struct dccp_request_sock *dreq = dccp_rsk(req); + bool own_req; /* Check for retransmitted REQUEST */ if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) { @@ -182,14 +183,13 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, if (dccp_parse_options(sk, dreq, skb)) goto drop; - child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); - if (child == NULL) + child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, + req, &own_req); + if (!child) goto listen_overflow; - inet_csk_reqsk_queue_drop(sk, req); - inet_csk_reqsk_queue_add(sk, req, child); -out: - return child; + return inet_csk_complete_hashdance(sk, child, req, own_req); + listen_overflow: dccp_pr_debug("listen_overflow!\n"); DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; @@ -198,7 +198,7 @@ drop: req->rsk_ops->send_reset(sk, skb); inet_csk_reqsk_queue_drop(sk, req); - goto out; + return NULL; } EXPORT_SYMBOL_GPL(dccp_check_req); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index b0b8da0f5af8..481754ee062a 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -378,31 +378,11 @@ static int dsa_slave_port_fdb_dump(struct net_device *dev, { struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - unsigned char addr[ETH_ALEN] = { 0 }; - u16 vid = 0; - int ret; - - if (!ds->drv->port_fdb_getnext) - return -EOPNOTSUPP; - for (;;) { - bool is_static; - - ret = ds->drv->port_fdb_getnext(ds, p->port, addr, &vid, - &is_static); - if (ret < 0) - break; + if (ds->drv->port_fdb_dump) + return ds->drv->port_fdb_dump(ds, p->port, fdb, cb); - ether_addr_copy(fdb->addr, addr); - fdb->vid = vid; - fdb->ndm_state = is_static ? NUD_NOARP : NUD_REACHABLE; - - ret = cb(&fdb->obj); - if (ret < 0) - break; - } - - return ret == -ENOENT ? 0 : ret; + return -EOPNOTSUPP; } static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 89aacb630a53..c29809f765dc 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -8,6 +8,7 @@ obj-y := route.o inetpeer.o protocol.o \ inet_timewait_sock.o inet_connection_sock.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \ + tcp_recovery.o \ tcp_offload.o datagram.o raw.o udp.o udplite.o \ udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o \ diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 735008472844..cebd9d31e65a 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1644,7 +1644,8 @@ errout: rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err); } -static size_t inet_get_link_af_size(const struct net_device *dev) +static size_t inet_get_link_af_size(const struct net_device *dev, + u32 ext_filter_mask) { struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr); @@ -2398,4 +2399,3 @@ void __init devinet_init(void) rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf, inet_netconf_dump_devconf, NULL); } - diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index d7c2bb0c4f65..e786873c89f2 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -867,9 +867,10 @@ void fib_add_ifaddr(struct in_ifaddr *ifa) if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) && (prefix != addr || ifa->ifa_prefixlen < 32)) { - fib_magic(RTM_NEWROUTE, - dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST, - prefix, ifa->ifa_prefixlen, prim); + if (!(ifa->ifa_flags & IFA_F_NOPREFIXROUTE)) + fib_magic(RTM_NEWROUTE, + dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST, + prefix, ifa->ifa_prefixlen, prim); /* Add network specific broadcasts, when it takes a sense */ if (ifa->ifa_prefixlen < 31) { @@ -914,9 +915,10 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim) } } else if (!ipv4_is_zeronet(any) && (any != ifa->ifa_local || ifa->ifa_prefixlen < 32)) { - fib_magic(RTM_DELROUTE, - dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST, - any, ifa->ifa_prefixlen, prim); + if (!(ifa->ifa_flags & IFA_F_NOPREFIXROUTE)) + fib_magic(RTM_DELROUTE, + dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST, + any, ifa->ifa_prefixlen, prim); subnet = 1; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 8430bc8ccd58..1feb15f23de8 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -523,15 +523,15 @@ static bool reqsk_queue_unlink(struct request_sock_queue *queue, struct request_sock *req) { struct inet_hashinfo *hashinfo = req_to_sk(req)->sk_prot->h.hashinfo; - spinlock_t *lock; - bool found; + bool found = false; - lock = inet_ehash_lockp(hashinfo, req->rsk_hash); - - spin_lock(lock); - found = __sk_nulls_del_node_init_rcu(req_to_sk(req)); - spin_unlock(lock); + if (sk_hashed(req_to_sk(req))) { + spinlock_t *lock = inet_ehash_lockp(hashinfo, req->rsk_hash); + spin_lock(lock); + found = __sk_nulls_del_node_init_rcu(req_to_sk(req)); + spin_unlock(lock); + } if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer)) reqsk_put(req); return found; @@ -811,6 +811,25 @@ void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req, } EXPORT_SYMBOL(inet_csk_reqsk_queue_add); +struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child, + struct request_sock *req, bool own_req) +{ + if (own_req) { + inet_csk_reqsk_queue_drop(sk, req); + reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); + inet_csk_reqsk_queue_add(sk, req, child); + /* Warning: caller must not call reqsk_put(req); + * child stole last reference on it. + */ + return child; + } + /* Too bad, another child took ownership of the request, undo. */ + bh_unlock_sock(child); + sock_put(child); + return NULL; +} +EXPORT_SYMBOL(inet_csk_complete_hashdance); + /* * This routine closes sockets which have been at least partially * opened, but not yet accepted. diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 958728a22001..ccc5980797fc 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -407,13 +407,13 @@ static u32 inet_sk_port_offset(const struct sock *sk) /* insert a socket into ehash, and eventually remove another one * (The another one can be a SYN_RECV or TIMEWAIT */ -int inet_ehash_insert(struct sock *sk, struct sock *osk) +bool inet_ehash_insert(struct sock *sk, struct sock *osk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; struct hlist_nulls_head *list; struct inet_ehash_bucket *head; spinlock_t *lock; - int ret = 0; + bool ret = true; WARN_ON_ONCE(!sk_unhashed(sk)); @@ -423,30 +423,41 @@ int inet_ehash_insert(struct sock *sk, struct sock *osk) lock = inet_ehash_lockp(hashinfo, sk->sk_hash); spin_lock(lock); - __sk_nulls_add_node_rcu(sk, list); if (osk) { - WARN_ON(sk->sk_hash != osk->sk_hash); - sk_nulls_del_node_init_rcu(osk); + WARN_ON_ONCE(sk->sk_hash != osk->sk_hash); + ret = sk_nulls_del_node_init_rcu(osk); } + if (ret) + __sk_nulls_add_node_rcu(sk, list); spin_unlock(lock); return ret; } -void __inet_hash_nolisten(struct sock *sk, struct sock *osk) +bool inet_ehash_nolisten(struct sock *sk, struct sock *osk) { - inet_ehash_insert(sk, osk); - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + bool ok = inet_ehash_insert(sk, osk); + + if (ok) { + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + } else { + percpu_counter_inc(sk->sk_prot->orphan_count); + sk->sk_state = TCP_CLOSE; + sock_set_flag(sk, SOCK_DEAD); + inet_csk_destroy_sock(sk); + } + return ok; } -EXPORT_SYMBOL_GPL(__inet_hash_nolisten); +EXPORT_SYMBOL_GPL(inet_ehash_nolisten); void __inet_hash(struct sock *sk, struct sock *osk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; struct inet_listen_hashbucket *ilb; - if (sk->sk_state != TCP_LISTEN) - return __inet_hash_nolisten(sk, osk); - + if (sk->sk_state != TCP_LISTEN) { + inet_ehash_nolisten(sk, osk); + return; + } WARN_ON(!sk_unhashed(sk)); ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; @@ -567,7 +578,7 @@ ok: inet_bind_hash(sk, tb, port); if (sk_unhashed(sk)) { inet_sk(sk)->inet_sport = htons(port); - __inet_hash_nolisten(sk, (struct sock *)tw); + inet_ehash_nolisten(sk, (struct sock *)tw); } if (tw) inet_twsk_bind_unhash(tw, hinfo); @@ -584,7 +595,7 @@ ok: tb = inet_csk(sk)->icsk_bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { - __inet_hash_nolisten(sk, NULL); + inet_ehash_nolisten(sk, NULL); spin_unlock_bh(&head->lock); return 0; } else { diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index bd0679d90519..614521437e30 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -498,10 +498,26 @@ static struct sk_buff *gre_handle_offloads(struct sk_buff *skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); } +static struct rtable *gre_get_rt(struct sk_buff *skb, + struct net_device *dev, + struct flowi4 *fl, + const struct ip_tunnel_key *key) +{ + struct net *net = dev_net(dev); + + memset(fl, 0, sizeof(*fl)); + fl->daddr = key->u.ipv4.dst; + fl->saddr = key->u.ipv4.src; + fl->flowi4_tos = RT_TOS(key->tos); + fl->flowi4_mark = skb->mark; + fl->flowi4_proto = IPPROTO_GRE; + + return ip_route_output_key(net, fl); +} + static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel_info *tun_info; - struct net *net = dev_net(dev); const struct ip_tunnel_key *key; struct flowi4 fl; struct rtable *rt; @@ -516,14 +532,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev) goto err_free_skb; key = &tun_info->key; - memset(&fl, 0, sizeof(fl)); - fl.daddr = key->u.ipv4.dst; - fl.saddr = key->u.ipv4.src; - fl.flowi4_tos = RT_TOS(key->tos); - fl.flowi4_mark = skb->mark; - fl.flowi4_proto = IPPROTO_GRE; - - rt = ip_route_output_key(net, &fl); + rt = gre_get_rt(skb, dev, &fl, key); if (IS_ERR(rt)) goto err_free_skb; @@ -566,6 +575,24 @@ err_free_skb: dev->stats.tx_dropped++; } +static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) +{ + struct ip_tunnel_info *info = skb_tunnel_info(skb); + struct rtable *rt; + struct flowi4 fl4; + + if (ip_tunnel_info_af(info) != AF_INET) + return -EINVAL; + + rt = gre_get_rt(skb, dev, &fl4, &info->key); + if (IS_ERR(rt)) + return PTR_ERR(rt); + + ip_rt_put(rt); + info->key.u.ipv4.src = fl4.saddr; + return 0; +} + static netdev_tx_t ipgre_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -1023,6 +1050,7 @@ static const struct net_device_ops gre_tap_netdev_ops = { .ndo_change_mtu = ip_tunnel_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, .ndo_get_iflink = ip_tunnel_get_iflink, + .ndo_fill_metadata_dst = gre_fill_metadata_dst, }; static void ipgre_tap_setup(struct net_device *dev) diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 690d27d3f2f9..a35584176535 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -75,6 +75,7 @@ endif # NF_TABLES config NF_DUP_IPV4 tristate "Netfilter IPv4 packet duplication to alternate destination" + depends on !NF_CONNTRACK || NF_CONNTRACK help This option enables the nf_dup_ipv4 core, which duplicates an IPv4 packet to be rerouted to another destination. diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index 74dd6671b66d..78cc64eddfc1 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -60,9 +60,7 @@ static bool rpfilter_lookup_reverse(struct net *net, struct flowi4 *fl4, if (FIB_RES_DEV(res) == dev) dev_match = true; #endif - if (dev_match || flags & XT_RPFILTER_LOOSE) - return FIB_RES_NH(res).nh_scope <= RT_SCOPE_HOST; - return dev_match; + return dev_match || flags & XT_RPFILTER_LOOSE; } static bool rpfilter_is_local(const struct sk_buff *skb) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 4c0892badb8b..4cbe9f0a4281 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -221,8 +221,10 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, { struct inet_connection_sock *icsk = inet_csk(sk); struct sock *child; + bool own_req; - child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst); + child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst, + NULL, &own_req); if (child) { atomic_set(&req->rsk_refcnt, 1); sock_rps_save_rxhash(child, skb); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 894da3a70aff..25300c5e283b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -496,6 +496,13 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { + .procname = "tcp_recovery", + .data = &sysctl_tcp_recovery, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { .procname = "tcp_reordering", .data = &sysctl_tcp_reordering, .maxlen = sizeof(int), @@ -577,6 +584,13 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { + .procname = "tcp_min_rtt_wlen", + .data = &sysctl_tcp_min_rtt_wlen, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { .procname = "tcp_low_latency", .data = &sysctl_tcp_low_latency, .maxlen = sizeof(int), diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ac1bdbb50352..0cfa7c0c1e80 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -388,6 +388,7 @@ void tcp_init_sock(struct sock *sk) icsk->icsk_rto = TCP_TIMEOUT_INIT; tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); + tp->rtt_min[0].rtt = ~0U; /* So many TCP implementations out there (incorrectly) count the * initial SYN frame in their delayed-ACK and congestion control diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 7092a61c4dc8..7e538f71f5fb 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -209,7 +209,7 @@ static void dctcp_update_alpha(struct sock *sk, u32 flags) /* alpha = (1 - g) * alpha + g * F */ - alpha -= alpha >> dctcp_shift_g; + alpha -= min_not_zero(alpha, alpha >> dctcp_shift_g); if (bytes_ecn) { /* If dctcp_shift_g == 1, a 32bit value would overflow * after 8 Mbytes. diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 93396bf7b475..55be6ac70cff 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -133,12 +133,14 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; struct sock *child; u32 end_seq; + bool own_req; req->num_retrans = 0; req->num_timeout = 0; req->sk = NULL; - child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); + child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, + NULL, &own_req); if (!child) return NULL; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 944eaca69115..fdd88c3803a6 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -95,6 +95,7 @@ int sysctl_tcp_stdurg __read_mostly; int sysctl_tcp_rfc1337 __read_mostly; int sysctl_tcp_max_orphans __read_mostly = NR_FILE; int sysctl_tcp_frto __read_mostly = 2; +int sysctl_tcp_min_rtt_wlen __read_mostly = 300; int sysctl_tcp_thin_dupack __read_mostly; @@ -880,6 +881,7 @@ static void tcp_update_reordering(struct sock *sk, const int metric, if (metric > 0) tcp_disable_early_retrans(tp); + tp->rack.reord = 1; } /* This must be called before lost_out is incremented */ @@ -905,8 +907,7 @@ static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb) } } -static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, - struct sk_buff *skb) +void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb) { tcp_verify_retransmit_hint(tp, skb); @@ -1047,70 +1048,6 @@ static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack, return !before(start_seq, end_seq - tp->max_window); } -/* Check for lost retransmit. This superb idea is borrowed from "ratehalving". - * Event "B". Later note: FACK people cheated me again 8), we have to account - * for reordering! Ugly, but should help. - * - * Search retransmitted skbs from write_queue that were sent when snd_nxt was - * less than what is now known to be received by the other end (derived from - * highest SACK block). Also calculate the lowest snd_nxt among the remaining - * retransmitted skbs to avoid some costly processing per ACKs. - */ -static void tcp_mark_lost_retrans(struct sock *sk, int *flag) -{ - const struct inet_connection_sock *icsk = inet_csk(sk); - struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb; - int cnt = 0; - u32 new_low_seq = tp->snd_nxt; - u32 received_upto = tcp_highest_sack_seq(tp); - - if (!tcp_is_fack(tp) || !tp->retrans_out || - !after(received_upto, tp->lost_retrans_low) || - icsk->icsk_ca_state != TCP_CA_Recovery) - return; - - tcp_for_write_queue(skb, sk) { - u32 ack_seq = TCP_SKB_CB(skb)->ack_seq; - - if (skb == tcp_send_head(sk)) - break; - if (cnt == tp->retrans_out) - break; - if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) - continue; - - if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)) - continue; - - /* TODO: We would like to get rid of tcp_is_fack(tp) only - * constraint here (see above) but figuring out that at - * least tp->reordering SACK blocks reside between ack_seq - * and received_upto is not easy task to do cheaply with - * the available datastructures. - * - * Whether FACK should check here for tp->reordering segs - * in-between one could argue for either way (it would be - * rather simple to implement as we could count fack_count - * during the walk and do tp->fackets_out - fack_count). - */ - if (after(received_upto, ack_seq)) { - TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - tp->retrans_out -= tcp_skb_pcount(skb); - *flag |= FLAG_LOST_RETRANS; - tcp_skb_mark_lost_uncond_verify(tp, skb); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT); - } else { - if (before(ack_seq, new_low_seq)) - new_low_seq = ack_seq; - cnt += tcp_skb_pcount(skb); - } - } - - if (tp->retrans_out) - tp->lost_retrans_low = new_low_seq; -} - static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb, struct tcp_sack_block_wire *sp, int num_sacks, u32 prior_snd_una) @@ -1236,6 +1173,8 @@ static u8 tcp_sacktag_one(struct sock *sk, return sacked; if (!(sacked & TCPCB_SACKED_ACKED)) { + tcp_rack_advance(tp, xmit_time, sacked); + if (sacked & TCPCB_SACKED_RETRANS) { /* If the segment is not tagged as lost, * we do not clear RETRANS, believing @@ -1837,7 +1776,6 @@ advance_sp: ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker)) tcp_update_reordering(sk, tp->fackets_out - state->reord, 0); - tcp_mark_lost_retrans(sk, &state->flag); tcp_verify_left_out(tp); out: @@ -2314,14 +2252,29 @@ static inline void tcp_moderate_cwnd(struct tcp_sock *tp) tp->snd_cwnd_stamp = tcp_time_stamp; } +static bool tcp_tsopt_ecr_before(const struct tcp_sock *tp, u32 when) +{ + return tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && + before(tp->rx_opt.rcv_tsecr, when); +} + +/* skb is spurious retransmitted if the returned timestamp echo + * reply is prior to the skb transmission time + */ +static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp, + const struct sk_buff *skb) +{ + return (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) && + tcp_tsopt_ecr_before(tp, tcp_skb_timestamp(skb)); +} + /* Nothing was retransmitted or returned timestamp is less * than timestamp of the first retransmission. */ static inline bool tcp_packet_delayed(const struct tcp_sock *tp) { return !tp->retrans_stamp || - (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && - before(tp->rx_opt.rcv_tsecr, tp->retrans_stamp)); + tcp_tsopt_ecr_before(tp, tp->retrans_stamp); } /* Undo procedures. */ @@ -2853,6 +2806,11 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, } } + /* Use RACK to detect loss */ + if (sysctl_tcp_recovery & TCP_RACK_LOST_RETRANS && + tcp_rack_mark_lost(sk)) + flag |= FLAG_LOST_RETRANS; + /* E. Process state. */ switch (icsk->icsk_ca_state) { case TCP_CA_Recovery: @@ -2915,8 +2873,69 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, tcp_xmit_retransmit_queue(sk); } +/* Kathleen Nichols' algorithm for tracking the minimum value of + * a data stream over some fixed time interval. (E.g., the minimum + * RTT over the past five minutes.) It uses constant space and constant + * time per update yet almost always delivers the same minimum as an + * implementation that has to keep all the data in the window. + * + * The algorithm keeps track of the best, 2nd best & 3rd best min + * values, maintaining an invariant that the measurement time of the + * n'th best >= n-1'th best. It also makes sure that the three values + * are widely separated in the time window since that bounds the worse + * case error when that data is monotonically increasing over the window. + * + * Upon getting a new min, we can forget everything earlier because it + * has no value - the new min is <= everything else in the window by + * definition and it's the most recent. So we restart fresh on every new min + * and overwrites 2nd & 3rd choices. The same property holds for 2nd & 3rd + * best. + */ +static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us) +{ + const u32 now = tcp_time_stamp, wlen = sysctl_tcp_min_rtt_wlen * HZ; + struct rtt_meas *m = tcp_sk(sk)->rtt_min; + struct rtt_meas rttm = { .rtt = (rtt_us ? : 1), .ts = now }; + u32 elapsed; + + /* Check if the new measurement updates the 1st, 2nd, or 3rd choices */ + if (unlikely(rttm.rtt <= m[0].rtt)) + m[0] = m[1] = m[2] = rttm; + else if (rttm.rtt <= m[1].rtt) + m[1] = m[2] = rttm; + else if (rttm.rtt <= m[2].rtt) + m[2] = rttm; + + elapsed = now - m[0].ts; + if (unlikely(elapsed > wlen)) { + /* Passed entire window without a new min so make 2nd choice + * the new min & 3rd choice the new 2nd. So forth and so on. + */ + m[0] = m[1]; + m[1] = m[2]; + m[2] = rttm; + if (now - m[0].ts > wlen) { + m[0] = m[1]; + m[1] = rttm; + if (now - m[0].ts > wlen) + m[0] = rttm; + } + } else if (m[1].ts == m[0].ts && elapsed > wlen / 4) { + /* Passed a quarter of the window without a new min so + * take 2nd choice from the 2nd quarter of the window. + */ + m[2] = m[1] = rttm; + } else if (m[2].ts == m[1].ts && elapsed > wlen / 2) { + /* Passed half the window without a new min so take the 3rd + * choice from the last half of the window. + */ + m[2] = rttm; + } +} + static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, - long seq_rtt_us, long sack_rtt_us) + long seq_rtt_us, long sack_rtt_us, + long ca_rtt_us) { const struct tcp_sock *tp = tcp_sk(sk); @@ -2925,9 +2944,6 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, * Karn's algorithm forbids taking RTT if some retransmitted data * is acked (RFC6298). */ - if (flag & FLAG_RETRANS_DATA_ACKED) - seq_rtt_us = -1L; - if (seq_rtt_us < 0) seq_rtt_us = sack_rtt_us; @@ -2939,11 +2955,16 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, */ if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && flag & FLAG_ACKED) - seq_rtt_us = jiffies_to_usecs(tcp_time_stamp - tp->rx_opt.rcv_tsecr); - + seq_rtt_us = ca_rtt_us = jiffies_to_usecs(tcp_time_stamp - + tp->rx_opt.rcv_tsecr); if (seq_rtt_us < 0) return false; + /* ca_rtt_us >= 0 is counting on the invariant that ca_rtt_us is + * always taken together with ACK, SACK, or TS-opts. Any negative + * values will be skipped with the seq_rtt_us < 0 check above. + */ + tcp_update_rtt_min(sk, ca_rtt_us); tcp_rtt_estimator(sk, seq_rtt_us); tcp_set_rto(sk); @@ -2964,7 +2985,7 @@ void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req) rtt_us = skb_mstamp_us_delta(&now, &tcp_rsk(req)->snt_synack); } - tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L); + tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L, rtt_us); } @@ -3131,6 +3152,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, if (sacked & TCPCB_SACKED_ACKED) tp->sacked_out -= acked_pcount; + else if (tcp_is_sack(tp) && !tcp_skb_spurious_retrans(tp, skb)) + tcp_rack_advance(tp, &skb->skb_mstamp, sacked); if (sacked & TCPCB_LOST) tp->lost_out -= acked_pcount; @@ -3169,7 +3192,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, flag |= FLAG_SACK_RENEGING; skb_mstamp_get(&now); - if (likely(first_ackt.v64)) { + if (likely(first_ackt.v64) && !(flag & FLAG_RETRANS_DATA_ACKED)) { seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt); ca_rtt_us = skb_mstamp_us_delta(&now, &last_ackt); } @@ -3178,7 +3201,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, ca_rtt_us = skb_mstamp_us_delta(&now, &sack->last_sackt); } - rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us); + rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us, + ca_rtt_us); if (flag & FLAG_ACKED) { tcp_rearm_rto(sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 30dd45c1f568..1c2648bbac4b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1247,7 +1247,9 @@ EXPORT_SYMBOL(tcp_v4_conn_request); */ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct dst_entry *dst) + struct dst_entry *dst, + struct request_sock *req_unhash, + bool *own_req) { struct inet_request_sock *ireq; struct inet_sock *newinet; @@ -1323,7 +1325,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, if (__inet_inherit_port(sk, newsk) < 0) goto put_and_exit; - __inet_hash_nolisten(newsk, NULL); + *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); return newsk; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 41828bdc5d32..3575dd1e5b67 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -470,6 +470,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->srtt_us = 0; newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); + newtp->rtt_min[0].rtt = ~0U; newicsk->icsk_rto = TCP_TIMEOUT_INIT; newtp->packets_out = 0; @@ -547,6 +548,8 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, tcp_ecn_openreq_child(newtp, req); newtp->fastopen_rsk = NULL; newtp->syn_data_acked = 0; + newtp->rack.mstamp.v64 = 0; + newtp->rack.advanced = 0; newtp->saved_syn = req->saved_syn; req->saved_syn = NULL; @@ -577,6 +580,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th = tcp_hdr(skb); __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); bool paws_reject = false; + bool own_req; tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(struct tcphdr)>>2)) { @@ -764,18 +768,14 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * ESTABLISHED STATE. If it will be dropped after * socket is created, wait for troubles. */ - child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); + child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, + req, &own_req); if (!child) goto listen_overflow; sock_rps_save_rxhash(child, skb); tcp_synack_rtt_meas(child, req); - inet_csk_reqsk_queue_drop(sk, req); - inet_csk_reqsk_queue_add(sk, req, child); - /* Warning: caller must not call reqsk_put(req); - * child stole last reference on it. - */ - return child; + return inet_csk_complete_hashdance(sk, child, req, own_req); listen_overflow: if (!sysctl_tcp_abort_on_overflow) { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 19adedb8c5cc..f4f9793eb025 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2655,8 +2655,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) net_dbg_ratelimited("retrans_out leaked\n"); } #endif - if (!tp->retrans_out) - tp->lost_retrans_low = tp->snd_nxt; TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS; tp->retrans_out += tcp_skb_pcount(skb); @@ -2664,10 +2662,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) if (!tp->retrans_stamp) tp->retrans_stamp = tcp_skb_timestamp(skb); - /* snd_nxt is stored to detect loss of retransmitted segment, - * see tcp_input.c tcp_sacktag_write_queue(). - */ - TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt; } else if (err != -EBUSY) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); } @@ -3416,7 +3410,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib) */ tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK); skb_mstamp_get(&skb->skb_mstamp); - NET_INC_STATS_BH(sock_net(sk), mib); + NET_INC_STATS(sock_net(sk), mib); return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); } diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c new file mode 100644 index 000000000000..5353085fd0b2 --- /dev/null +++ b/net/ipv4/tcp_recovery.c @@ -0,0 +1,109 @@ +#include <linux/tcp.h> +#include <net/tcp.h> + +int sysctl_tcp_recovery __read_mostly = TCP_RACK_LOST_RETRANS; + +/* Marks a packet lost, if some packet sent later has been (s)acked. + * The underlying idea is similar to the traditional dupthresh and FACK + * but they look at different metrics: + * + * dupthresh: 3 OOO packets delivered (packet count) + * FACK: sequence delta to highest sacked sequence (sequence space) + * RACK: sent time delta to the latest delivered packet (time domain) + * + * The advantage of RACK is it applies to both original and retransmitted + * packet and therefore is robust against tail losses. Another advantage + * is being more resilient to reordering by simply allowing some + * "settling delay", instead of tweaking the dupthresh. + * + * The current version is only used after recovery starts but can be + * easily extended to detect the first loss. + */ +int tcp_rack_mark_lost(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb; + u32 reo_wnd, prior_retrans = tp->retrans_out; + + if (inet_csk(sk)->icsk_ca_state < TCP_CA_Recovery || !tp->rack.advanced) + return 0; + + /* Reset the advanced flag to avoid unnecessary queue scanning */ + tp->rack.advanced = 0; + + /* To be more reordering resilient, allow min_rtt/4 settling delay + * (lower-bounded to 1000uS). We use min_rtt instead of the smoothed + * RTT because reordering is often a path property and less related + * to queuing or delayed ACKs. + * + * TODO: measure and adapt to the observed reordering delay, and + * use a timer to retransmit like the delayed early retransmit. + */ + reo_wnd = 1000; + if (tp->rack.reord && tcp_min_rtt(tp) != ~0U) + reo_wnd = max(tcp_min_rtt(tp) >> 2, reo_wnd); + + tcp_for_write_queue(skb, sk) { + struct tcp_skb_cb *scb = TCP_SKB_CB(skb); + + if (skb == tcp_send_head(sk)) + break; + + /* Skip ones already (s)acked */ + if (!after(scb->end_seq, tp->snd_una) || + scb->sacked & TCPCB_SACKED_ACKED) + continue; + + if (skb_mstamp_after(&tp->rack.mstamp, &skb->skb_mstamp)) { + + if (skb_mstamp_us_delta(&tp->rack.mstamp, + &skb->skb_mstamp) <= reo_wnd) + continue; + + /* skb is lost if packet sent later is sacked */ + tcp_skb_mark_lost_uncond_verify(tp, skb); + if (scb->sacked & TCPCB_SACKED_RETRANS) { + scb->sacked &= ~TCPCB_SACKED_RETRANS; + tp->retrans_out -= tcp_skb_pcount(skb); + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPLOSTRETRANSMIT); + } + } else if (!(scb->sacked & TCPCB_RETRANS)) { + /* Original data are sent sequentially so stop early + * b/c the rest are all sent after rack_sent + */ + break; + } + } + return prior_retrans - tp->retrans_out; +} + +/* Record the most recently (re)sent time among the (s)acked packets */ +void tcp_rack_advance(struct tcp_sock *tp, + const struct skb_mstamp *xmit_time, u8 sacked) +{ + if (tp->rack.mstamp.v64 && + !skb_mstamp_after(xmit_time, &tp->rack.mstamp)) + return; + + if (sacked & TCPCB_RETRANS) { + struct skb_mstamp now; + + /* If the sacked packet was retransmitted, it's ambiguous + * whether the retransmission or the original (or the prior + * retransmission) was sacked. + * + * If the original is lost, there is no ambiguity. Otherwise + * we assume the original can be delayed up to aRTT + min_rtt. + * the aRTT term is bounded by the fast recovery or timeout, + * so it's at least one RTT (i.e., retransmission is at least + * an RTT later). + */ + skb_mstamp_get(&now); + if (skb_mstamp_us_delta(&now, xmit_time) < tcp_min_rtt(tp)) + return; + } + + tp->rack.mstamp = *xmit_time; + tp->rack.advanced = 1; +} diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 9f298d0dc9a1..7ee6518afa86 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -30,6 +30,8 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb) mtu = dst_mtu(skb_dst(skb)); if (skb->len > mtu) { + skb->protocol = htons(ETH_P_IP); + if (skb->sk) xfrm_local_error(skb, mtu); else diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d135350495e8..d72fa90d6feb 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3147,6 +3147,32 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, } break; + case NETDEV_CHANGEMTU: + /* if MTU under IPV6_MIN_MTU stop IPv6 on this interface. */ + if (dev->mtu < IPV6_MIN_MTU) { + addrconf_ifdown(dev, 1); + break; + } + + if (idev) { + rt6_mtu_change(dev, dev->mtu); + idev->cnf.mtu6 = dev->mtu; + break; + } + + /* allocate new idev */ + idev = ipv6_add_dev(dev); + if (IS_ERR(idev)) + break; + + /* device is still not ready */ + if (!(idev->if_flags & IF_READY)) + break; + + run_pending = 1; + + /* fall through */ + case NETDEV_UP: case NETDEV_CHANGE: if (dev->flags & IFF_SLAVE) @@ -3170,7 +3196,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, idev->if_flags |= IF_READY; run_pending = 1; } - } else { + } else if (event == NETDEV_CHANGE) { if (!addrconf_qdisc_ok(dev)) { /* device is still not ready. */ break; @@ -3235,24 +3261,6 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, } break; - case NETDEV_CHANGEMTU: - if (idev && dev->mtu >= IPV6_MIN_MTU) { - rt6_mtu_change(dev, dev->mtu); - idev->cnf.mtu6 = dev->mtu; - break; - } - - if (!idev && dev->mtu >= IPV6_MIN_MTU) { - idev = ipv6_add_dev(dev); - if (!IS_ERR(idev)) - break; - } - - /* - * if MTU under IPV6_MIN_MTU. - * Stop IPv6 on this interface. - */ - case NETDEV_DOWN: case NETDEV_UNREGISTER: /* @@ -4788,7 +4796,8 @@ nla_put_failure: return -EMSGSIZE; } -static size_t inet6_get_link_af_size(const struct net_device *dev) +static size_t inet6_get_link_af_size(const struct net_device *dev, + u32 ext_filter_mask) { if (!__in6_dev_get(dev)) return 0; diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 9f777ec59a59..ed33abf57abd 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -32,6 +32,7 @@ struct fib6_rule { struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, int flags, pol_lookup_t lookup) { + struct rt6_info *rt; struct fib_lookup_arg arg = { .lookup_ptr = lookup, .flags = FIB_LOOKUP_NOREF, @@ -40,11 +41,21 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, fib_rules_lookup(net->ipv6.fib6_rules_ops, flowi6_to_flowi(fl6), flags, &arg); - if (arg.result) - return arg.result; + rt = arg.result; - dst_hold(&net->ipv6.ip6_null_entry->dst); - return &net->ipv6.ip6_null_entry->dst; + if (!rt) { + dst_hold(&net->ipv6.ip6_null_entry->dst); + return &net->ipv6.ip6_null_entry->dst; + } + + if (rt->rt6i_flags & RTF_REJECT && + rt->dst.error == -EAGAIN) { + ip6_rt_put(rt); + rt = net->ipv6.ip6_null_entry; + dst_hold(&rt->dst); + } + + return &rt->dst; } static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index efb1c00f2270..36c5a98b0472 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -453,7 +453,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) * and anycast addresses will be checked later. */ if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) { - net_dbg_ratelimited("icmp6_send: addr_any/mcast source\n"); + net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n", + &hdr->saddr, &hdr->daddr); return; } @@ -461,7 +462,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) * Never answer to a ICMP packet. */ if (is_ineligible(skb)) { - net_dbg_ratelimited("icmp6_send: no reply to icmp error\n"); + net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n", + &hdr->saddr, &hdr->daddr); return; } @@ -513,7 +515,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) len = skb->len - msg.offset; len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr)); if (len < 0) { - net_dbg_ratelimited("icmp: len problem\n"); + net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n", + &hdr->saddr, &hdr->daddr); goto out_dst_release; } @@ -785,7 +788,8 @@ static int icmpv6_rcv(struct sk_buff *skb) if (type & ICMPV6_INFOMSG_MASK) break; - net_dbg_ratelimited("icmpv6: msg of unknown type\n"); + net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n", + saddr, daddr); /* * error of unknown type. diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 09fddf70cca4..0c7e276c230e 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -286,7 +286,17 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id) struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, int flags, pol_lookup_t lookup) { - return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl6, flags); + struct rt6_info *rt; + + rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags); + if (rt->rt6i_flags & RTF_REJECT && + rt->dst.error == -EAGAIN) { + ip6_rt_put(rt); + rt = net->ipv6.ip6_null_entry; + dst_hold(&rt->dst); + } + + return &rt->dst; } static void __net_init fib6_tables_init(struct net *net) diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 08b62047c67f..eeca943f12dc 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -264,6 +264,9 @@ static int ipv6_gro_complete(struct sk_buff *skb, int nhoff) struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff); int err = -ENOSYS; + if (skb->encapsulation) + skb_set_inner_network_header(skb, nhoff); + iph->payload_len = htons(skb->len - nhoff - sizeof(*iph)); rcu_read_lock(); @@ -280,6 +283,13 @@ out_unlock: return err; } +static int sit_gro_complete(struct sk_buff *skb, int nhoff) +{ + skb->encapsulation = 1; + skb_shinfo(skb)->gso_type |= SKB_GSO_SIT; + return ipv6_gro_complete(skb, nhoff); +} + static struct packet_offload ipv6_packet_offload __read_mostly = { .type = cpu_to_be16(ETH_P_IPV6), .callbacks = { @@ -292,6 +302,8 @@ static struct packet_offload ipv6_packet_offload __read_mostly = { static const struct net_offload sit_offload = { .callbacks = { .gso_segment = ipv6_gso_segment, + .gro_receive = ipv6_gro_receive, + .gro_complete = sit_gro_complete, }, }; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 0c89671e0767..c2650688aca7 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -28,6 +28,7 @@ #include <linux/errno.h> #include <linux/kernel.h> +#include <linux/overflow-arith.h> #include <linux/string.h> #include <linux/socket.h> #include <linux/net.h> @@ -596,7 +597,10 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, if (np->frag_size) mtu = np->frag_size; } - mtu -= hlen + sizeof(struct frag_hdr); + + if (overflow_usub(mtu, hlen + sizeof(struct frag_hdr), &mtu) || + mtu <= 7) + goto fail_toobig; frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr, &ipv6_hdr(skb)->saddr); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 96833e4b3193..f6a024e141e5 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -58,6 +58,7 @@ endif # NF_TABLES config NF_DUP_IPV6 tristate "Netfilter IPv6 packet duplication to alternate destination" + depends on !NF_CONNTRACK || NF_CONNTRACK help This option enables the nf_dup_ipv6 core, which duplicates an IPv6 packet to be rerouted to another destination. diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d0619632723a..2701cb3d88e9 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1171,6 +1171,7 @@ struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk, { struct dst_entry *dst; int flags = 0; + bool any_src; dst = l3mdev_rt6_dst_by_oif(net, fl6); if (dst) @@ -1178,11 +1179,12 @@ struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk, fl6->flowi6_iif = LOOPBACK_IFINDEX; + any_src = ipv6_addr_any(&fl6->saddr); if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) || - fl6->flowi6_oif) + (fl6->flowi6_oif && any_src)) flags |= RT6_LOOKUP_F_IFACE; - if (!ipv6_addr_any(&fl6->saddr)) + if (!any_src) flags |= RT6_LOOKUP_F_HAS_SADDR; else if (sk) flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f495d189f5e0..714bc5ad096e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -965,7 +965,9 @@ drop: static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct dst_entry *dst) + struct dst_entry *dst, + struct request_sock *req_unhash, + bool *own_req) { struct inet_request_sock *ireq; struct ipv6_pinfo *newnp; @@ -984,7 +986,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * * v6 mapped */ - newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst); + newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst, + req_unhash, own_req); if (!newsk) return NULL; @@ -1145,7 +1148,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * tcp_done(newsk); goto out; } - __inet_hash(newsk, NULL); + *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); return newsk; diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 9db067a11b52..4d09ce6fa90e 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -79,6 +79,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) if (!skb->ignore_df && skb->len > mtu) { skb->dev = dst->dev; + skb->protocol = htons(ETH_P_IPV6); if (xfrm6_local_dontfrag(skb)) xfrm6_local_rxpmtu(skb, mtu); @@ -143,6 +144,7 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) struct dst_entry *dst = skb_dst(skb); struct xfrm_state *x = dst->xfrm; int mtu; + bool toobig; #ifdef CONFIG_NETFILTER if (!x) { @@ -151,25 +153,29 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) } #endif + if (x->props.mode != XFRM_MODE_TUNNEL) + goto skip_frag; + if (skb->protocol == htons(ETH_P_IPV6)) mtu = ip6_skb_dst_mtu(skb); else mtu = dst_mtu(skb_dst(skb)); - if (skb->len > mtu && xfrm6_local_dontfrag(skb)) { + toobig = skb->len > mtu && !skb_is_gso(skb); + + if (toobig && xfrm6_local_dontfrag(skb)) { xfrm6_local_rxpmtu(skb, mtu); return -EMSGSIZE; - } else if (!skb->ignore_df && skb->len > mtu && skb->sk) { + } else if (!skb->ignore_df && toobig && skb->sk) { xfrm_local_error(skb, mtu); return -EMSGSIZE; } - if (x->props.mode == XFRM_MODE_TUNNEL && - ((skb->len > mtu && !skb_is_gso(skb)) || - dst_allfrag(skb_dst(skb)))) { + if (toobig || dst_allfrag(skb_dst(skb))) return ip6_fragment(net, sk, skb, __xfrm6_output_finish); - } + +skip_frag: return x->outer_mode->afinfo->output_finish(sk, skb); } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 08c9c93f3527..2cc5840f943d 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -177,7 +177,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) return; case IPPROTO_ICMPV6: - if (!onlyproto && pskb_may_pull(skb, nh + offset + 2 - skb->data)) { + if (!onlyproto && (nh + offset + 2 < skb->data || + pskb_may_pull(skb, nh + offset + 2 - skb->data))) { u8 *icmp; nh = skb_network_header(skb); @@ -191,7 +192,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) #if IS_ENABLED(CONFIG_IPV6_MIP6) case IPPROTO_MH: offset += ipv6_optlen(exthdr); - if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) { + if (!onlyproto && (nh + offset + 3 < skb->data || + pskb_may_pull(skb, nh + offset + 3 - skb->data))) { struct ip6_mh *mh; nh = skb_network_header(skb); diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c index a26c401ef4a4..43964594aa12 100644 --- a/net/irda/irlmp.c +++ b/net/irda/irlmp.c @@ -1839,7 +1839,7 @@ static void *irlmp_seq_hb_idx(struct irlmp_iter_state *iter, loff_t *off) for (element = hashbin_get_first(iter->hashbin); element != NULL; element = hashbin_get_next(iter->hashbin)) { - if (!off || *off-- == 0) { + if (!off || (*off)-- == 0) { /* NB: hashbin left locked */ return element; } diff --git a/net/key/af_key.c b/net/key/af_key.c index 83a70688784b..f9c9ecb0cdd3 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -261,7 +261,7 @@ static int pfkey_broadcast(struct sk_buff *skb, err2 = pfkey_broadcast_one(skb, &skb2, GFP_ATOMIC, sk); - /* Error is cleare after succecful sending to at least one + /* Error is cleared after successful sending to at least one * registered KM */ if ((broadcast_flags & BROADCAST_REGISTERED) && err) err = err2; diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index 783e891b7525..f9137a8341f4 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -27,7 +27,6 @@ mac80211-y := \ key.o \ util.o \ wme.o \ - event.o \ chan.o \ trace.o mlme.o \ tdls.o \ diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 68e551e263c6..713cdbf6fb3c 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -17,7 +17,6 @@ #include <net/cfg80211.h> #include "ieee80211_i.h" #include "driver-ops.h" -#include "cfg.h" #include "rate.h" #include "mesh.h" #include "wme.h" @@ -469,45 +468,6 @@ void sta_set_rate_info_tx(struct sta_info *sta, rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI; } -void sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo) -{ - rinfo->flags = 0; - - if (sta->last_rx_rate_flag & RX_FLAG_HT) { - rinfo->flags |= RATE_INFO_FLAGS_MCS; - rinfo->mcs = sta->last_rx_rate_idx; - } else if (sta->last_rx_rate_flag & RX_FLAG_VHT) { - rinfo->flags |= RATE_INFO_FLAGS_VHT_MCS; - rinfo->nss = sta->last_rx_rate_vht_nss; - rinfo->mcs = sta->last_rx_rate_idx; - } else { - struct ieee80211_supported_band *sband; - int shift = ieee80211_vif_get_shift(&sta->sdata->vif); - u16 brate; - - sband = sta->local->hw.wiphy->bands[ - ieee80211_get_sdata_band(sta->sdata)]; - brate = sband->bitrates[sta->last_rx_rate_idx].bitrate; - rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift); - } - - if (sta->last_rx_rate_flag & RX_FLAG_SHORT_GI) - rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI; - - if (sta->last_rx_rate_flag & RX_FLAG_5MHZ) - rinfo->bw = RATE_INFO_BW_5; - else if (sta->last_rx_rate_flag & RX_FLAG_10MHZ) - rinfo->bw = RATE_INFO_BW_10; - else if (sta->last_rx_rate_flag & RX_FLAG_40MHZ) - rinfo->bw = RATE_INFO_BW_40; - else if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_80MHZ) - rinfo->bw = RATE_INFO_BW_80; - else if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_160MHZ) - rinfo->bw = RATE_INFO_BW_160; - else - rinfo->bw = RATE_INFO_BW_20; -} - static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev, int idx, u8 *mac, struct station_info *sinfo) { @@ -1138,6 +1098,7 @@ static int sta_apply_parameters(struct ieee80211_local *local, } if (mask & BIT(NL80211_STA_FLAG_MFP)) { + sta->sta.mfp = !!(set & BIT(NL80211_STA_FLAG_MFP)); if (set & BIT(NL80211_STA_FLAG_MFP)) set_sta_flag(sta, WLAN_STA_MFP); else @@ -1427,7 +1388,7 @@ static int ieee80211_change_station(struct wiphy *wiphy, if (sdata->vif.type == NL80211_IFTYPE_STATION && params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) { - ieee80211_recalc_ps(local, -1); + ieee80211_recalc_ps(local); ieee80211_recalc_ps_vif(sdata); } @@ -2462,7 +2423,7 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, if (ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS)) ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); - ieee80211_recalc_ps(local, -1); + ieee80211_recalc_ps(local); ieee80211_recalc_ps_vif(sdata); return 0; diff --git a/net/mac80211/cfg.h b/net/mac80211/cfg.h deleted file mode 100644 index 2d51f62dc76c..000000000000 --- a/net/mac80211/cfg.h +++ /dev/null @@ -1,9 +0,0 @@ -/* - * mac80211 configuration hooks for cfg80211 - */ -#ifndef __CFG_H -#define __CFG_H - -extern const struct cfg80211_ops mac80211_config_ops; - -#endif /* __CFG_H */ diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 06d52935036d..a39512f09f9e 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -50,7 +50,6 @@ static const struct file_operations sta_ ##name## _ops = { \ STA_OPS(name) STA_FILE(aid, sta.aid, D); -STA_FILE(last_ack_signal, last_ack_signal, D); static ssize_t sta_flags_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) @@ -366,11 +365,10 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) DEBUGFS_ADD(agg_status); DEBUGFS_ADD(ht_capa); DEBUGFS_ADD(vht_capa); - DEBUGFS_ADD(last_ack_signal); - DEBUGFS_ADD_COUNTER(rx_duplicates, num_duplicates); - DEBUGFS_ADD_COUNTER(rx_fragments, rx_fragments); - DEBUGFS_ADD_COUNTER(tx_filtered, tx_filtered_count); + DEBUGFS_ADD_COUNTER(rx_duplicates, rx_stats.num_duplicates); + DEBUGFS_ADD_COUNTER(rx_fragments, rx_stats.fragments); + DEBUGFS_ADD_COUNTER(tx_filtered, status_stats.filtered); if (sizeof(sta->driver_buffered_tids) == sizeof(u32)) debugfs_create_x32("driver_buffered_tids", 0400, diff --git a/net/mac80211/ethtool.c b/net/mac80211/ethtool.c index 188faab11c24..9cc986deda61 100644 --- a/net/mac80211/ethtool.c +++ b/net/mac80211/ethtool.c @@ -40,7 +40,7 @@ static const char ieee80211_gstrings_sta_stats[][ETH_GSTRING_LEN] = { "rx_duplicates", "rx_fragments", "rx_dropped", "tx_packets", "tx_bytes", "tx_filtered", "tx_retry_failed", "tx_retries", - "beacon_loss", "sta_state", "txrate", "rxrate", "signal", + "sta_state", "txrate", "rxrate", "signal", "channel", "noise", "ch_time", "ch_time_busy", "ch_time_ext_busy", "ch_time_rx", "ch_time_tx" }; @@ -77,20 +77,19 @@ static void ieee80211_get_stats(struct net_device *dev, memset(data, 0, sizeof(u64) * STA_STATS_LEN); -#define ADD_STA_STATS(sta) \ - do { \ - data[i++] += sta->rx_packets; \ - data[i++] += sta->rx_bytes; \ - data[i++] += sta->num_duplicates; \ - data[i++] += sta->rx_fragments; \ - data[i++] += sta->rx_dropped; \ - \ - data[i++] += sinfo.tx_packets; \ - data[i++] += sinfo.tx_bytes; \ - data[i++] += sta->tx_filtered_count; \ - data[i++] += sta->tx_retry_failed; \ - data[i++] += sta->tx_retry_count; \ - data[i++] += sta->beacon_loss_count; \ +#define ADD_STA_STATS(sta) \ + do { \ + data[i++] += sta->rx_stats.packets; \ + data[i++] += sta->rx_stats.bytes; \ + data[i++] += sta->rx_stats.num_duplicates; \ + data[i++] += sta->rx_stats.fragments; \ + data[i++] += sta->rx_stats.dropped; \ + \ + data[i++] += sinfo.tx_packets; \ + data[i++] += sinfo.tx_bytes; \ + data[i++] += sta->status_stats.filtered; \ + data[i++] += sta->status_stats.retry_failed; \ + data[i++] += sta->status_stats.retry_count; \ } while (0) /* For Managed stations, find the single station based on BSSID diff --git a/net/mac80211/event.c b/net/mac80211/event.c deleted file mode 100644 index 01ae759518f6..000000000000 --- a/net/mac80211/event.c +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * mac80211 - events - */ -#include <net/cfg80211.h> -#include "ieee80211_i.h" - -/* - * Indicate a failed Michael MIC to userspace. If the caller knows the TSC of - * the frame that generated the MIC failure (i.e., if it was provided by the - * driver or is still in the frame), it should provide that information. - */ -void mac80211_ev_michael_mic_failure(struct ieee80211_sub_if_data *sdata, int keyidx, - struct ieee80211_hdr *hdr, const u8 *tsc, - gfp_t gfp) -{ - cfg80211_michael_mic_failure(sdata->dev, hdr->addr2, - (hdr->addr1[0] & 0x01) ? - NL80211_KEYTYPE_GROUP : - NL80211_KEYTYPE_PAIRWISE, - keyidx, tsc, gfp); -} diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 7f72bc9bae2e..2001555d49cb 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -229,7 +229,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, struct cfg80211_chan_def chandef; struct ieee80211_channel *chan; struct beacon_data *presp; - enum nl80211_bss_scan_width scan_width; + struct cfg80211_inform_bss bss_meta = {}; bool have_higher_than_11mbit; bool radar_required; int err; @@ -383,10 +383,11 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, mod_timer(&ifibss->timer, round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL)); - scan_width = cfg80211_chandef_to_scan_width(&chandef); - bss = cfg80211_inform_bss_width_frame(local->hw.wiphy, chan, - scan_width, mgmt, - presp->head_len, 0, GFP_KERNEL); + bss_meta.chan = chan; + bss_meta.scan_width = cfg80211_chandef_to_scan_width(&chandef); + bss = cfg80211_inform_bss_frame_data(local->hw.wiphy, &bss_meta, mgmt, + presp->head_len, GFP_KERNEL); + cfg80211_put_bss(local->hw.wiphy, bss); netif_carrier_on(sdata->dev); cfg80211_ibss_joined(sdata->dev, ifibss->bssid, chan, GFP_KERNEL); @@ -646,7 +647,7 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, const u8 *bssid, return NULL; } - sta->last_rx = jiffies; + sta->rx_stats.last_rx = jiffies; /* make sure mandatory rates are always added */ sband = local->hw.wiphy->bands[band]; @@ -668,7 +669,8 @@ static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata) list_for_each_entry_rcu(sta, &local->sta_list, list) { if (sta->sdata == sdata && - time_after(sta->last_rx + IEEE80211_IBSS_MERGE_INTERVAL, + time_after(sta->rx_stats.last_rx + + IEEE80211_IBSS_MERGE_INTERVAL, jiffies)) { active++; break; @@ -1234,7 +1236,7 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata, if (!sta) return; - sta->last_rx = jiffies; + sta->rx_stats.last_rx = jiffies; /* make sure mandatory rates are always added */ sband = local->hw.wiphy->bands[band]; @@ -1252,7 +1254,7 @@ static void ieee80211_ibss_sta_expire(struct ieee80211_sub_if_data *sdata) struct ieee80211_local *local = sdata->local; struct sta_info *sta, *tmp; unsigned long exp_time = IEEE80211_IBSS_INACTIVITY_LIMIT; - unsigned long exp_rsn_time = IEEE80211_IBSS_RSN_INACTIVITY_LIMIT; + unsigned long exp_rsn = IEEE80211_IBSS_RSN_INACTIVITY_LIMIT; mutex_lock(&local->sta_mtx); @@ -1260,8 +1262,8 @@ static void ieee80211_ibss_sta_expire(struct ieee80211_sub_if_data *sdata) if (sdata != sta->sdata) continue; - if (time_after(jiffies, sta->last_rx + exp_time) || - (time_after(jiffies, sta->last_rx + exp_rsn_time) && + if (time_after(jiffies, sta->rx_stats.last_rx + exp_time) || + (time_after(jiffies, sta->rx_stats.last_rx + exp_rsn) && sta->sta_state != IEEE80211_STA_AUTHORIZED)) { sta_dbg(sta->sdata, "expiring inactive %sSTA %pM\n", sta->sta_state != IEEE80211_STA_AUTHORIZED ? diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index f9605f13def9..62f2a97cd2a6 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -34,6 +34,8 @@ #include "sta_info.h" #include "debug.h" +extern const struct cfg80211_ops mac80211_config_ops; + struct ieee80211_local; /* Maximum number of broadcast/multicast frames to buffer when some of the @@ -501,6 +503,9 @@ struct ieee80211_if_managed { */ unsigned int count_beacon_signal; + /* Number of times beacon loss was invoked. */ + unsigned int beacon_loss_count; + /* * Last Beacon frame signal strength average (ave_beacon_signal / 16) * that triggered a cqm event. 0 indicates that no event has been @@ -1305,7 +1310,6 @@ struct ieee80211_local { struct work_struct dynamic_ps_enable_work; struct work_struct dynamic_ps_disable_work; struct timer_list dynamic_ps_timer; - struct notifier_block network_latency_notifier; struct notifier_block ifa_notifier; struct notifier_block ifa6_notifier; @@ -1491,10 +1495,8 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, struct cfg80211_disassoc_request *req); void ieee80211_send_pspoll(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata); -void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency); +void ieee80211_recalc_ps(struct ieee80211_local *local); void ieee80211_recalc_ps_vif(struct ieee80211_sub_if_data *sdata); -int ieee80211_max_network_latency(struct notifier_block *nb, - unsigned long data, void *dummy); int ieee80211_set_arp_filter(struct ieee80211_sub_if_data *sdata); void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata); void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, @@ -1766,9 +1768,6 @@ extern const void *const mac80211_wiphy_privid; /* for wiphy privid */ int ieee80211_frame_duration(enum ieee80211_band band, size_t len, int rate, int erp, int short_preamble, int shift); -void mac80211_ev_michael_mic_failure(struct ieee80211_sub_if_data *sdata, int keyidx, - struct ieee80211_hdr *hdr, const u8 *tsc, - gfp_t gfp); void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata, bool bss_notify); void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 42d7f0f65bd6..f848c75518a2 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -709,7 +709,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) if (hw_reconf_flags) ieee80211_hw_config(local, hw_reconf_flags); - ieee80211_recalc_ps(local, -1); + ieee80211_recalc_ps(local); if (sdata->vif.type == NL80211_IFTYPE_MONITOR || sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { @@ -1016,7 +1016,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, drv_remove_interface(local, sdata); } - ieee80211_recalc_ps(local, -1); + ieee80211_recalc_ps(local); if (cancel_scan) flush_delayed_work(&local->scan_work); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 9b813a2f3a75..273c96de4910 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -20,7 +20,6 @@ #include <linux/if_arp.h> #include <linux/rtnetlink.h> #include <linux/bitmap.h> -#include <linux/pm_qos.h> #include <linux/inetdevice.h> #include <net/net_namespace.h> #include <net/cfg80211.h> @@ -32,7 +31,6 @@ #include "mesh.h" #include "wep.h" #include "led.h" -#include "cfg.h" #include "debugfs.h" void ieee80211_configure_filter(struct ieee80211_local *local) @@ -1083,13 +1081,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) rtnl_unlock(); - local->network_latency_notifier.notifier_call = - ieee80211_max_network_latency; - result = pm_qos_add_notifier(PM_QOS_NETWORK_LATENCY, - &local->network_latency_notifier); - if (result) - goto fail_pm_qos; - #ifdef CONFIG_INET local->ifa_notifier.notifier_call = ieee80211_ifa_changed; result = register_inetaddr_notifier(&local->ifa_notifier); @@ -1114,10 +1105,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) #endif #if defined(CONFIG_INET) || defined(CONFIG_IPV6) fail_ifa: - pm_qos_remove_notifier(PM_QOS_NETWORK_LATENCY, - &local->network_latency_notifier); #endif - fail_pm_qos: rtnl_lock(); rate_control_deinitialize(local); ieee80211_remove_interfaces(local); @@ -1143,8 +1131,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) tasklet_kill(&local->tx_pending_tasklet); tasklet_kill(&local->tasklet); - pm_qos_remove_notifier(PM_QOS_NETWORK_LATENCY, - &local->network_latency_notifier); #ifdef CONFIG_INET unregister_inetaddr_notifier(&local->ifa_notifier); #endif diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index d80e0a4c16cf..c6be0b4f4058 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -329,7 +329,7 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local, if (sta->mesh->fail_avg >= 100) return MAX_METRIC; - sta_set_rate_info_tx(sta, &sta->last_tx_rate, &rinfo); + sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, &rinfo); rate = cfg80211_calculate_bitrate(&rinfo); if (WARN_ON(!rate)) return MAX_METRIC; diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index a360b24b7df8..c1f889270484 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -60,7 +60,9 @@ static bool rssi_threshold_check(struct ieee80211_sub_if_data *sdata, { s32 rssi_threshold = sdata->u.mesh.mshcfg.rssi_threshold; return rssi_threshold == 0 || - (sta && (s8) -ewma_signal_read(&sta->avg_signal) > rssi_threshold); + (sta && + (s8)-ewma_signal_read(&sta->rx_stats.avg_signal) > + rssi_threshold); } /** @@ -390,7 +392,7 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata, rates = ieee80211_sta_get_rates(sdata, elems, band, &basic_rates); spin_lock_bh(&sta->mesh->plink_lock); - sta->last_rx = jiffies; + sta->rx_stats.last_rx = jiffies; /* rates and capabilities don't change during peering */ if (sta->mesh->plink_state == NL80211_PLINK_ESTAB && diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 56ef9a8e151c..ded4b976bb48 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -20,7 +20,6 @@ #include <linux/etherdevice.h> #include <linux/moduleparam.h> #include <linux/rtnetlink.h> -#include <linux/pm_qos.h> #include <linux/crc32.h> #include <linux/slab.h> #include <linux/export.h> @@ -1476,7 +1475,7 @@ static bool ieee80211_powersave_allowed(struct ieee80211_sub_if_data *sdata) } /* need to hold RTNL or interface lock */ -void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency) +void ieee80211_recalc_ps(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata, *found = NULL; int count = 0; @@ -1505,48 +1504,23 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency) } if (count == 1 && ieee80211_powersave_allowed(found)) { + u8 dtimper = found->u.mgd.dtim_period; s32 beaconint_us; - if (latency < 0) - latency = pm_qos_request(PM_QOS_NETWORK_LATENCY); - beaconint_us = ieee80211_tu_to_usec( found->vif.bss_conf.beacon_int); timeout = local->dynamic_ps_forced_timeout; - if (timeout < 0) { - /* - * Go to full PSM if the user configures a very low - * latency requirement. - * The 2000 second value is there for compatibility - * until the PM_QOS_NETWORK_LATENCY is configured - * with real values. - */ - if (latency > (1900 * USEC_PER_MSEC) && - latency != (2000 * USEC_PER_SEC)) - timeout = 0; - else - timeout = 100; - } + if (timeout < 0) + timeout = 100; local->hw.conf.dynamic_ps_timeout = timeout; - if (beaconint_us > latency) { - local->ps_sdata = NULL; - } else { - int maxslp = 1; - u8 dtimper = found->u.mgd.dtim_period; - - /* If the TIM IE is invalid, pretend the value is 1 */ - if (!dtimper) - dtimper = 1; - else if (dtimper > 1) - maxslp = min_t(int, dtimper, - latency / beaconint_us); - - local->hw.conf.max_sleep_period = maxslp; - local->hw.conf.ps_dtim_period = dtimper; - local->ps_sdata = found; - } + /* If the TIM IE is invalid, pretend the value is 1 */ + if (!dtimper) + dtimper = 1; + + local->hw.conf.ps_dtim_period = dtimper; + local->ps_sdata = found; } else { local->ps_sdata = NULL; } @@ -1997,7 +1971,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, ieee80211_bss_info_change_notify(sdata, bss_info_changed); mutex_lock(&local->iflist_mtx); - ieee80211_recalc_ps(local, -1); + ieee80211_recalc_ps(local); mutex_unlock(&local->iflist_mtx); ieee80211_recalc_smps(sdata); @@ -2165,7 +2139,7 @@ static void ieee80211_reset_ap_probe(struct ieee80211_sub_if_data *sdata) __ieee80211_stop_poll(sdata); mutex_lock(&local->iflist_mtx); - ieee80211_recalc_ps(local, -1); + ieee80211_recalc_ps(local); mutex_unlock(&local->iflist_mtx); if (ieee80211_hw_check(&sdata->local->hw, CONNECTION_MONITOR)) @@ -2341,7 +2315,7 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, goto out; mutex_lock(&sdata->local->iflist_mtx); - ieee80211_recalc_ps(sdata->local, -1); + ieee80211_recalc_ps(sdata->local); mutex_unlock(&sdata->local->iflist_mtx); ifmgd->probe_send_count = 0; @@ -2446,15 +2420,9 @@ static void ieee80211_beacon_connection_loss_work(struct work_struct *work) container_of(work, struct ieee80211_sub_if_data, u.mgd.beacon_connection_loss_work); struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - struct sta_info *sta; - if (ifmgd->associated) { - rcu_read_lock(); - sta = sta_info_get(sdata, ifmgd->bssid); - if (sta) - sta->beacon_loss_count++; - rcu_read_unlock(); - } + if (ifmgd->associated) + ifmgd->beacon_loss_count++; if (ifmgd->connection_loss) { sdata_info(sdata, "Connection to AP %pM lost\n", @@ -3044,8 +3012,12 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, rate_control_rate_init(sta); - if (ifmgd->flags & IEEE80211_STA_MFP_ENABLED) + if (ifmgd->flags & IEEE80211_STA_MFP_ENABLED) { set_sta_flag(sta, WLAN_STA_MFP); + sta->sta.mfp = true; + } else { + sta->sta.mfp = false; + } sta->sta.wme = elems.wmm_param && local->hw.queues >= IEEE80211_NUM_ACS; @@ -3544,7 +3516,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ifmgd->have_beacon = true; mutex_lock(&local->iflist_mtx); - ieee80211_recalc_ps(local, -1); + ieee80211_recalc_ps(local); mutex_unlock(&local->iflist_mtx); ieee80211_recalc_ps_vif(sdata); @@ -4148,21 +4120,6 @@ void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local) rcu_read_unlock(); } -int ieee80211_max_network_latency(struct notifier_block *nb, - unsigned long data, void *dummy) -{ - s32 latency_usec = (s32) data; - struct ieee80211_local *local = - container_of(nb, struct ieee80211_local, - network_latency_notifier); - - mutex_lock(&local->iflist_mtx); - ieee80211_recalc_ps(local, latency_usec); - mutex_unlock(&local->iflist_mtx); - - return NOTIFY_OK; -} - static u8 ieee80211_ht_vht_rx_chains(struct ieee80211_sub_if_data *sdata, struct cfg80211_bss *cbss) { diff --git a/net/mac80211/ocb.c b/net/mac80211/ocb.c index 573b81a1fb2d..0be0aadfc559 100644 --- a/net/mac80211/ocb.c +++ b/net/mac80211/ocb.c @@ -75,7 +75,7 @@ void ieee80211_ocb_rx_no_sta(struct ieee80211_sub_if_data *sdata, if (!sta) return; - sta->last_rx = jiffies; + sta->rx_stats.last_rx = jiffies; /* Add only mandatory rates for now */ sband = local->hw.wiphy->bands[band]; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 5bc0b88d9eb1..8bae5de0dc44 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1113,16 +1113,16 @@ ieee80211_rx_h_check_dup(struct ieee80211_rx_data *rx) is_multicast_ether_addr(hdr->addr1)) return RX_CONTINUE; - if (rx->sta) { - if (unlikely(ieee80211_has_retry(hdr->frame_control) && - rx->sta->last_seq_ctrl[rx->seqno_idx] == - hdr->seq_ctrl)) { - I802_DEBUG_INC(rx->local->dot11FrameDuplicateCount); - rx->sta->num_duplicates++; - return RX_DROP_UNUSABLE; - } else if (!(status->flag & RX_FLAG_AMSDU_MORE)) { - rx->sta->last_seq_ctrl[rx->seqno_idx] = hdr->seq_ctrl; - } + if (!rx->sta) + return RX_CONTINUE; + + if (unlikely(ieee80211_has_retry(hdr->frame_control) && + rx->sta->last_seq_ctrl[rx->seqno_idx] == hdr->seq_ctrl)) { + I802_DEBUG_INC(rx->local->dot11FrameDuplicateCount); + rx->sta->rx_stats.num_duplicates++; + return RX_DROP_UNUSABLE; + } else if (!(status->flag & RX_FLAG_AMSDU_MORE)) { + rx->sta->last_seq_ctrl[rx->seqno_idx] = hdr->seq_ctrl; } return RX_CONTINUE; @@ -1396,51 +1396,56 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) NL80211_IFTYPE_ADHOC); if (ether_addr_equal(bssid, rx->sdata->u.ibss.bssid) && test_sta_flag(sta, WLAN_STA_AUTHORIZED)) { - sta->last_rx = jiffies; + sta->rx_stats.last_rx = jiffies; if (ieee80211_is_data(hdr->frame_control) && !is_multicast_ether_addr(hdr->addr1)) { - sta->last_rx_rate_idx = status->rate_idx; - sta->last_rx_rate_flag = status->flag; - sta->last_rx_rate_vht_flag = status->vht_flag; - sta->last_rx_rate_vht_nss = status->vht_nss; + sta->rx_stats.last_rate_idx = + status->rate_idx; + sta->rx_stats.last_rate_flag = + status->flag; + sta->rx_stats.last_rate_vht_flag = + status->vht_flag; + sta->rx_stats.last_rate_vht_nss = + status->vht_nss; } } } else if (rx->sdata->vif.type == NL80211_IFTYPE_OCB) { - sta->last_rx = jiffies; + sta->rx_stats.last_rx = jiffies; } else if (!is_multicast_ether_addr(hdr->addr1)) { /* * Mesh beacons will update last_rx when if they are found to * match the current local configuration when processed. */ - sta->last_rx = jiffies; + sta->rx_stats.last_rx = jiffies; if (ieee80211_is_data(hdr->frame_control)) { - sta->last_rx_rate_idx = status->rate_idx; - sta->last_rx_rate_flag = status->flag; - sta->last_rx_rate_vht_flag = status->vht_flag; - sta->last_rx_rate_vht_nss = status->vht_nss; + sta->rx_stats.last_rate_idx = status->rate_idx; + sta->rx_stats.last_rate_flag = status->flag; + sta->rx_stats.last_rate_vht_flag = status->vht_flag; + sta->rx_stats.last_rate_vht_nss = status->vht_nss; } } if (rx->sdata->vif.type == NL80211_IFTYPE_STATION) ieee80211_sta_rx_notify(rx->sdata, hdr); - sta->rx_fragments++; - sta->rx_bytes += rx->skb->len; + sta->rx_stats.fragments++; + sta->rx_stats.bytes += rx->skb->len; if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) { - sta->last_signal = status->signal; - ewma_signal_add(&sta->avg_signal, -status->signal); + sta->rx_stats.last_signal = status->signal; + ewma_signal_add(&sta->rx_stats.avg_signal, -status->signal); } if (status->chains) { - sta->chains = status->chains; + sta->rx_stats.chains = status->chains; for (i = 0; i < ARRAY_SIZE(status->chain_signal); i++) { int signal = status->chain_signal[i]; if (!(status->chains & BIT(i))) continue; - sta->chain_signal_last[i] = signal; - ewma_signal_add(&sta->chain_signal_avg[i], -signal); + sta->rx_stats.chain_signal_last[i] = signal; + ewma_signal_add(&sta->rx_stats.chain_signal_avg[i], + -signal); } } @@ -1500,7 +1505,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) * Update counter and free packet here to avoid * counting this as a dropped packed. */ - sta->rx_packets++; + sta->rx_stats.packets++; dev_kfree_skb(rx->skb); return RX_QUEUED; } @@ -1922,7 +1927,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) ieee80211_led_rx(rx->local); out_no_led: if (rx->sta) - rx->sta->rx_packets++; + rx->sta->rx_stats.packets++; return RX_CONTINUE; } @@ -2376,7 +2381,7 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx) * for non-QoS-data frames. Here we know it's a data * frame, so count MSDUs. */ - rx->sta->rx_msdu[rx->seqno_idx]++; + rx->sta->rx_stats.msdu[rx->seqno_idx]++; } /* @@ -2413,7 +2418,7 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx) skb_queue_tail(&local->skb_queue_tdls_chsw, rx->skb); schedule_work(&local->tdls_chsw_work); if (rx->sta) - rx->sta->rx_packets++; + rx->sta->rx_stats.packets++; return RX_QUEUED; } @@ -2875,7 +2880,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) handled: if (rx->sta) - rx->sta->rx_packets++; + rx->sta->rx_stats.packets++; dev_kfree_skb(rx->skb); return RX_QUEUED; @@ -2884,7 +2889,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) skb_queue_tail(&sdata->skb_queue, rx->skb); ieee80211_queue_work(&local->hw, &sdata->work); if (rx->sta) - rx->sta->rx_packets++; + rx->sta->rx_stats.packets++; return RX_QUEUED; } @@ -2911,7 +2916,7 @@ ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx) if (cfg80211_rx_mgmt(&rx->sdata->wdev, status->freq, sig, rx->skb->data, rx->skb->len, 0)) { if (rx->sta) - rx->sta->rx_packets++; + rx->sta->rx_stats.packets++; dev_kfree_skb(rx->skb); return RX_QUEUED; } @@ -3030,7 +3035,7 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx) skb_queue_tail(&sdata->skb_queue, rx->skb); ieee80211_queue_work(&rx->local->hw, &sdata->work); if (rx->sta) - rx->sta->rx_packets++; + rx->sta->rx_stats.packets++; return RX_QUEUED; } @@ -3112,7 +3117,7 @@ static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx, case RX_DROP_MONITOR: I802_DEBUG_INC(rx->sdata->local->rx_handlers_drop); if (rx->sta) - rx->sta->rx_dropped++; + rx->sta->rx_stats.dropped++; /* fall through */ case RX_CONTINUE: { struct ieee80211_rate *rate = NULL; @@ -3132,7 +3137,7 @@ static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx, case RX_DROP_UNUSABLE: I802_DEBUG_INC(rx->sdata->local->rx_handlers_drop); if (rx->sta) - rx->sta->rx_dropped++; + rx->sta->rx_stats.dropped++; dev_kfree_skb(rx->skb); break; case RX_QUEUED: diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 11d0901ebb7b..b64fd2b2d95a 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -16,7 +16,6 @@ #include <linux/if_arp.h> #include <linux/etherdevice.h> #include <linux/rtnetlink.h> -#include <linux/pm_qos.h> #include <net/sch_generic.h> #include <linux/slab.h> #include <linux/export.h> @@ -67,24 +66,23 @@ ieee80211_bss_info_update(struct ieee80211_local *local, struct cfg80211_bss *cbss; struct ieee80211_bss *bss; int clen, srlen; - enum nl80211_bss_scan_width scan_width; - s32 signal = 0; + struct cfg80211_inform_bss bss_meta = {}; bool signal_valid; if (ieee80211_hw_check(&local->hw, SIGNAL_DBM)) - signal = rx_status->signal * 100; + bss_meta.signal = rx_status->signal * 100; else if (ieee80211_hw_check(&local->hw, SIGNAL_UNSPEC)) - signal = (rx_status->signal * 100) / local->hw.max_signal; + bss_meta.signal = (rx_status->signal * 100) / local->hw.max_signal; - scan_width = NL80211_BSS_CHAN_WIDTH_20; + bss_meta.scan_width = NL80211_BSS_CHAN_WIDTH_20; if (rx_status->flag & RX_FLAG_5MHZ) - scan_width = NL80211_BSS_CHAN_WIDTH_5; + bss_meta.scan_width = NL80211_BSS_CHAN_WIDTH_5; if (rx_status->flag & RX_FLAG_10MHZ) - scan_width = NL80211_BSS_CHAN_WIDTH_10; + bss_meta.scan_width = NL80211_BSS_CHAN_WIDTH_10; - cbss = cfg80211_inform_bss_width_frame(local->hw.wiphy, channel, - scan_width, mgmt, len, signal, - GFP_ATOMIC); + bss_meta.chan = channel; + cbss = cfg80211_inform_bss_frame_data(local->hw.wiphy, &bss_meta, + mgmt, len, GFP_ATOMIC); if (!cbss) return NULL; /* In case the signal is invalid update the status */ diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index c3644458e2ee..f91d1873218c 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -331,7 +331,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, memcpy(sta->sta.addr, addr, ETH_ALEN); sta->local = local; sta->sdata = sdata; - sta->last_rx = jiffies; + sta->rx_stats.last_rx = jiffies; sta->sta_state = IEEE80211_STA_NONE; @@ -339,9 +339,9 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta->reserved_tid = IEEE80211_TID_UNRESERVED; sta->last_connected = ktime_get_seconds(); - ewma_signal_init(&sta->avg_signal); - for (i = 0; i < ARRAY_SIZE(sta->chain_signal_avg); i++) - ewma_signal_init(&sta->chain_signal_avg[i]); + ewma_signal_init(&sta->rx_stats.avg_signal); + for (i = 0; i < ARRAY_SIZE(sta->rx_stats.chain_signal_avg); i++) + ewma_signal_init(&sta->rx_stats.chain_signal_avg[i]); if (local->ops->wake_tx_queue) { void *txq_data; @@ -1066,7 +1066,7 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, if (sdata != sta->sdata) continue; - if (time_after(jiffies, sta->last_rx + exp_time)) { + if (time_after(jiffies, sta->rx_stats.last_rx + exp_time)) { sta_dbg(sta->sdata, "expiring inactive STA %pM\n", sta->sta.addr); @@ -1806,6 +1806,45 @@ u8 sta_info_tx_streams(struct sta_info *sta) >> IEEE80211_HT_MCS_TX_MAX_STREAMS_SHIFT) + 1; } +static void sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo) +{ + rinfo->flags = 0; + + if (sta->rx_stats.last_rate_flag & RX_FLAG_HT) { + rinfo->flags |= RATE_INFO_FLAGS_MCS; + rinfo->mcs = sta->rx_stats.last_rate_idx; + } else if (sta->rx_stats.last_rate_flag & RX_FLAG_VHT) { + rinfo->flags |= RATE_INFO_FLAGS_VHT_MCS; + rinfo->nss = sta->rx_stats.last_rate_vht_nss; + rinfo->mcs = sta->rx_stats.last_rate_idx; + } else { + struct ieee80211_supported_band *sband; + int shift = ieee80211_vif_get_shift(&sta->sdata->vif); + u16 brate; + + sband = sta->local->hw.wiphy->bands[ + ieee80211_get_sdata_band(sta->sdata)]; + brate = sband->bitrates[sta->rx_stats.last_rate_idx].bitrate; + rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift); + } + + if (sta->rx_stats.last_rate_flag & RX_FLAG_SHORT_GI) + rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI; + + if (sta->rx_stats.last_rate_flag & RX_FLAG_5MHZ) + rinfo->bw = RATE_INFO_BW_5; + else if (sta->rx_stats.last_rate_flag & RX_FLAG_10MHZ) + rinfo->bw = RATE_INFO_BW_10; + else if (sta->rx_stats.last_rate_flag & RX_FLAG_40MHZ) + rinfo->bw = RATE_INFO_BW_40; + else if (sta->rx_stats.last_rate_vht_flag & RX_VHT_FLAG_80MHZ) + rinfo->bw = RATE_INFO_BW_80; + else if (sta->rx_stats.last_rate_vht_flag & RX_VHT_FLAG_160MHZ) + rinfo->bw = RATE_INFO_BW_160; + else + rinfo->bw = RATE_INFO_BW_20; +} + void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) { struct ieee80211_sub_if_data *sdata = sta->sdata; @@ -1832,50 +1871,54 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) BIT(NL80211_STA_INFO_STA_FLAGS) | BIT(NL80211_STA_INFO_BSS_PARAM) | BIT(NL80211_STA_INFO_CONNECTED_TIME) | - BIT(NL80211_STA_INFO_RX_DROP_MISC) | - BIT(NL80211_STA_INFO_BEACON_LOSS); + BIT(NL80211_STA_INFO_RX_DROP_MISC); + + if (sdata->vif.type == NL80211_IFTYPE_STATION) { + sinfo->beacon_loss_count = sdata->u.mgd.beacon_loss_count; + sinfo->filled |= BIT(NL80211_STA_INFO_BEACON_LOSS); + } sinfo->connected_time = ktime_get_seconds() - sta->last_connected; - sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx); + sinfo->inactive_time = + jiffies_to_msecs(jiffies - sta->rx_stats.last_rx); if (!(sinfo->filled & (BIT(NL80211_STA_INFO_TX_BYTES64) | BIT(NL80211_STA_INFO_TX_BYTES)))) { sinfo->tx_bytes = 0; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) - sinfo->tx_bytes += sta->tx_bytes[ac]; + sinfo->tx_bytes += sta->tx_stats.bytes[ac]; sinfo->filled |= BIT(NL80211_STA_INFO_TX_BYTES64); } if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_PACKETS))) { sinfo->tx_packets = 0; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) - sinfo->tx_packets += sta->tx_packets[ac]; + sinfo->tx_packets += sta->tx_stats.packets[ac]; sinfo->filled |= BIT(NL80211_STA_INFO_TX_PACKETS); } if (!(sinfo->filled & (BIT(NL80211_STA_INFO_RX_BYTES64) | BIT(NL80211_STA_INFO_RX_BYTES)))) { - sinfo->rx_bytes = sta->rx_bytes; + sinfo->rx_bytes = sta->rx_stats.bytes; sinfo->filled |= BIT(NL80211_STA_INFO_RX_BYTES64); } if (!(sinfo->filled & BIT(NL80211_STA_INFO_RX_PACKETS))) { - sinfo->rx_packets = sta->rx_packets; + sinfo->rx_packets = sta->rx_stats.packets; sinfo->filled |= BIT(NL80211_STA_INFO_RX_PACKETS); } if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_RETRIES))) { - sinfo->tx_retries = sta->tx_retry_count; + sinfo->tx_retries = sta->status_stats.retry_count; sinfo->filled |= BIT(NL80211_STA_INFO_TX_RETRIES); } if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_FAILED))) { - sinfo->tx_failed = sta->tx_retry_failed; + sinfo->tx_failed = sta->status_stats.retry_failed; sinfo->filled |= BIT(NL80211_STA_INFO_TX_FAILED); } - sinfo->rx_dropped_misc = sta->rx_dropped; - sinfo->beacon_loss_count = sta->beacon_loss_count; + sinfo->rx_dropped_misc = sta->rx_stats.dropped; if (sdata->vif.type == NL80211_IFTYPE_STATION && !(sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER)) { @@ -1887,33 +1930,35 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) if (ieee80211_hw_check(&sta->local->hw, SIGNAL_DBM) || ieee80211_hw_check(&sta->local->hw, SIGNAL_UNSPEC)) { if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL))) { - sinfo->signal = (s8)sta->last_signal; + sinfo->signal = (s8)sta->rx_stats.last_signal; sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL); } if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL_AVG))) { sinfo->signal_avg = - (s8) -ewma_signal_read(&sta->avg_signal); + -ewma_signal_read(&sta->rx_stats.avg_signal); sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL_AVG); } } - if (sta->chains && + if (sta->rx_stats.chains && !(sinfo->filled & (BIT(NL80211_STA_INFO_CHAIN_SIGNAL) | BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)))) { sinfo->filled |= BIT(NL80211_STA_INFO_CHAIN_SIGNAL) | BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG); - sinfo->chains = sta->chains; + sinfo->chains = sta->rx_stats.chains; for (i = 0; i < ARRAY_SIZE(sinfo->chain_signal); i++) { - sinfo->chain_signal[i] = sta->chain_signal_last[i]; + sinfo->chain_signal[i] = + sta->rx_stats.chain_signal_last[i]; sinfo->chain_signal_avg[i] = - (s8) -ewma_signal_read(&sta->chain_signal_avg[i]); + -ewma_signal_read(&sta->rx_stats.chain_signal_avg[i]); } } if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_BITRATE))) { - sta_set_rate_info_tx(sta, &sta->last_tx_rate, &sinfo->txrate); + sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, + &sinfo->txrate); sinfo->filled |= BIT(NL80211_STA_INFO_TX_BITRATE); } @@ -1928,12 +1973,12 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) if (!(tidstats->filled & BIT(NL80211_TID_STATS_RX_MSDU))) { tidstats->filled |= BIT(NL80211_TID_STATS_RX_MSDU); - tidstats->rx_msdu = sta->rx_msdu[i]; + tidstats->rx_msdu = sta->rx_stats.msdu[i]; } if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU))) { tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU); - tidstats->tx_msdu = sta->tx_msdu[i]; + tidstats->tx_msdu = sta->tx_stats.msdu[i]; } if (!(tidstats->filled & @@ -1941,7 +1986,8 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU_RETRIES); - tidstats->tx_msdu_retries = sta->tx_msdu_retries[i]; + tidstats->tx_msdu_retries = + sta->status_stats.msdu_retries[i]; } if (!(tidstats->filled & @@ -1949,7 +1995,8 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU_FAILED); - tidstats->tx_msdu_failed = sta->tx_msdu_failed[i]; + tidstats->tx_msdu_failed = + sta->status_stats.msdu_failed[i]; } } diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index d5ded8749ac4..2cafb21b422f 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -344,12 +344,6 @@ DECLARE_EWMA(signal, 1024, 8) * @rate_ctrl_lock: spinlock used to protect rate control data * (data inside the algorithm, so serializes calls there) * @rate_ctrl_priv: rate control private per-STA pointer - * @last_tx_rate: rate used for last transmit, to report to userspace as - * "the" transmit rate - * @last_rx_rate_idx: rx status rate index of the last data packet - * @last_rx_rate_flag: rx status flag of the last data packet - * @last_rx_rate_vht_flag: rx status vht flag of the last data packet - * @last_rx_rate_vht_nss: rx status nss of last data packet * @lock: used for locking all fields that require locking, see comments * in the header file. * @drv_deliver_wk: used for delivering frames after driver PS unblocking @@ -364,23 +358,9 @@ DECLARE_EWMA(signal, 1024, 8) * the station when it leaves powersave or polls for frames * @driver_buffered_tids: bitmap of TIDs the driver has data buffered on * @txq_buffered_tids: bitmap of TIDs that mac80211 has txq data buffered on - * @rx_packets: Number of MSDUs received from this STA - * @rx_bytes: Number of bytes received from this STA - * @last_rx: time (in jiffies) when last frame was received from this STA * @last_connected: time (in seconds) when a station got connected - * @num_duplicates: number of duplicate frames received from this STA - * @rx_fragments: number of received MPDUs - * @rx_dropped: number of dropped MPDUs from this STA - * @last_signal: signal of last received frame from this STA - * @avg_signal: moving average of signal of received frames from this STA - * @last_ack_signal: signal of last received Ack frame from this STA * @last_seq_ctrl: last received seq/frag number from this STA (per TID * plus one for non-QoS frames) - * @tx_filtered_count: number of frames the hardware filtered for this STA - * @tx_retry_failed: number of frames that failed retry - * @tx_retry_count: total number of retries for frames to this STA - * @tx_packets: number of RX/TX MSDUs - * @tx_bytes: number of bytes transmitted to this STA * @tid_seq: per-TID sequence numbers for sending to this STA * @ampdu_mlme: A-MPDU state machine state * @timer_to_tid: identity mapping to ID timers @@ -388,32 +368,22 @@ DECLARE_EWMA(signal, 1024, 8) * @debugfs: debug filesystem info * @dead: set to true when sta is unlinked * @uploaded: set to true when sta is uploaded to the driver - * @lost_packets: number of consecutive lost packets * @sta: station information we share with the driver * @sta_state: duplicates information about station state (for debug) * @beacon_loss_count: number of times beacon loss has triggered * @rcu_head: RCU head used for freeing this station struct * @cur_max_bandwidth: maximum bandwidth to use for TX to the station, * taken from HT/VHT capabilities or VHT operating mode notification - * @chains: chains ever used for RX from this station - * @chain_signal_last: last signal (per chain) - * @chain_signal_avg: signal average (per chain) * @known_smps_mode: the smps_mode the client thinks we are in. Relevant for * AP only. * @cipher_scheme: optional cipher scheme for this station - * @last_tdls_pkt_time: holds the time in jiffies of last TDLS pkt ACKed * @reserved_tid: reserved TID (if any, otherwise IEEE80211_TID_UNRESERVED) - * @tx_msdu: MSDUs transmitted to this station, using IEEE80211_NUM_TID - * entry for non-QoS frames - * @tx_msdu_retries: MSDU retries for transmissions to to this station, - * using IEEE80211_NUM_TID entry for non-QoS frames - * @tx_msdu_failed: MSDU failures for transmissions to to this station, - * using IEEE80211_NUM_TID entry for non-QoS frames - * @rx_msdu: MSDUs received from this station, using IEEE80211_NUM_TID - * entry for non-QoS frames * @fast_tx: TX fastpath information * @tdls_chandef: a TDLS peer can have a wider chandef that is compatible to * the BSS one. + * @tx_stats: TX statistics + * @rx_stats: RX statistics + * @status_stats: TX status statistics */ struct sta_info { /* General information, mostly static */ @@ -457,42 +427,49 @@ struct sta_info { unsigned long driver_buffered_tids; unsigned long txq_buffered_tids; - /* Updated from RX path only, no locking requirements */ - unsigned long rx_packets; - u64 rx_bytes; - unsigned long last_rx; long last_connected; - unsigned long num_duplicates; - unsigned long rx_fragments; - unsigned long rx_dropped; - int last_signal; - struct ewma_signal avg_signal; - int last_ack_signal; - u8 chains; - s8 chain_signal_last[IEEE80211_MAX_CHAINS]; - struct ewma_signal chain_signal_avg[IEEE80211_MAX_CHAINS]; + /* Updated from RX path only, no locking requirements */ + struct { + unsigned long packets; + u64 bytes; + unsigned long last_rx; + unsigned long num_duplicates; + unsigned long fragments; + unsigned long dropped; + int last_signal; + struct ewma_signal avg_signal; + u8 chains; + s8 chain_signal_last[IEEE80211_MAX_CHAINS]; + struct ewma_signal chain_signal_avg[IEEE80211_MAX_CHAINS]; + int last_rate_idx; + u32 last_rate_flag; + u32 last_rate_vht_flag; + u8 last_rate_vht_nss; + u64 msdu[IEEE80211_NUM_TIDS + 1]; + } rx_stats; /* Plus 1 for non-QoS frames */ __le16 last_seq_ctrl[IEEE80211_NUM_TIDS + 1]; /* Updated from TX status path only, no locking requirements */ - unsigned long tx_filtered_count; - unsigned long tx_retry_failed, tx_retry_count; + struct { + unsigned long filtered; + unsigned long retry_failed, retry_count; + unsigned int lost_packets; + unsigned long last_tdls_pkt_time; + u64 msdu_retries[IEEE80211_NUM_TIDS + 1]; + u64 msdu_failed[IEEE80211_NUM_TIDS + 1]; + } status_stats; /* Updated from TX path only, no locking requirements */ - u64 tx_packets[IEEE80211_NUM_ACS]; - u64 tx_bytes[IEEE80211_NUM_ACS]; - struct ieee80211_tx_rate last_tx_rate; - int last_rx_rate_idx; - u32 last_rx_rate_flag; - u32 last_rx_rate_vht_flag; - u8 last_rx_rate_vht_nss; + struct { + u64 packets[IEEE80211_NUM_ACS]; + u64 bytes[IEEE80211_NUM_ACS]; + struct ieee80211_tx_rate last_rate; + u64 msdu[IEEE80211_NUM_TIDS + 1]; + } tx_stats; u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1]; - u64 tx_msdu[IEEE80211_NUM_TIDS + 1]; - u64 tx_msdu_retries[IEEE80211_NUM_TIDS + 1]; - u64 tx_msdu_failed[IEEE80211_NUM_TIDS + 1]; - u64 rx_msdu[IEEE80211_NUM_TIDS + 1]; /* * Aggregation information, locked with lock. @@ -509,15 +486,9 @@ struct sta_info { enum ieee80211_sta_rx_bandwidth cur_max_bandwidth; - unsigned int lost_packets; - unsigned int beacon_loss_count; - enum ieee80211_smps_mode known_smps_mode; const struct ieee80211_cipher_scheme *cipher_scheme; - /* TDLS timeout data */ - unsigned long last_tdls_pkt_time; - u8 reserved_tid; struct cfg80211_chan_def tdls_chandef; @@ -688,8 +659,6 @@ static inline int sta_info_flush(struct ieee80211_sub_if_data *sdata) void sta_set_rate_info_tx(struct sta_info *sta, const struct ieee80211_tx_rate *rate, struct rate_info *rinfo); -void sta_set_rate_info_rx(struct sta_info *sta, - struct rate_info *rinfo); void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo); void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 9169ccc36534..5bad05e9af90 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -67,7 +67,7 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, IEEE80211_TX_INTFL_RETRANSMISSION; info->flags &= ~IEEE80211_TX_TEMPORARY_FLAGS; - sta->tx_filtered_count++; + sta->status_stats.filtered++; /* * Clear more-data bit on filtered frames, it might be set @@ -183,7 +183,7 @@ static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb) struct ieee80211_sub_if_data *sdata = sta->sdata; if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) - sta->last_rx = jiffies; + sta->rx_stats.last_rx = jiffies; if (ieee80211_is_data_qos(mgmt->frame_control)) { struct ieee80211_hdr *hdr = (void *) skb->data; @@ -557,8 +557,9 @@ static void ieee80211_lost_packet(struct sta_info *sta, !(info->flags & IEEE80211_TX_STAT_AMPDU)) return; - sta->lost_packets++; - if (!sta->sta.tdls && sta->lost_packets < STA_LOST_PKT_THRESHOLD) + sta->status_stats.lost_packets++; + if (!sta->sta.tdls && + sta->status_stats.lost_packets < STA_LOST_PKT_THRESHOLD) return; /* @@ -568,14 +569,15 @@ static void ieee80211_lost_packet(struct sta_info *sta, * mechanism. */ if (sta->sta.tdls && - (sta->lost_packets < STA_LOST_TDLS_PKT_THRESHOLD || + (sta->status_stats.lost_packets < STA_LOST_TDLS_PKT_THRESHOLD || time_before(jiffies, - sta->last_tdls_pkt_time + STA_LOST_TDLS_PKT_TIME))) + sta->status_stats.last_tdls_pkt_time + + STA_LOST_TDLS_PKT_TIME))) return; cfg80211_cqm_pktloss_notify(sta->sdata->dev, sta->sta.addr, - sta->lost_packets, GFP_ATOMIC); - sta->lost_packets = 0; + sta->status_stats.lost_packets, GFP_ATOMIC); + sta->status_stats.lost_packets = 0; } static int ieee80211_tx_get_rates(struct ieee80211_hw *hw, @@ -636,18 +638,18 @@ void ieee80211_tx_status_noskb(struct ieee80211_hw *hw, sta = container_of(pubsta, struct sta_info, sta); if (!acked) - sta->tx_retry_failed++; - sta->tx_retry_count += retry_count; + sta->status_stats.retry_failed++; + sta->status_stats.retry_count += retry_count; if (acked) { - sta->last_rx = jiffies; + sta->rx_stats.last_rx = jiffies; - if (sta->lost_packets) - sta->lost_packets = 0; + if (sta->status_stats.lost_packets) + sta->status_stats.lost_packets = 0; /* Track when last TDLS packet was ACKed */ if (test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH)) - sta->last_tdls_pkt_time = jiffies; + sta->status_stats.last_tdls_pkt_time = jiffies; } else { ieee80211_lost_packet(sta, info); } @@ -784,7 +786,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL) && (ieee80211_is_data(hdr->frame_control)) && (rates_idx != -1)) - sta->last_tx_rate = info->status.rates[rates_idx]; + sta->tx_stats.last_rate = + info->status.rates[rates_idx]; if ((info->flags & IEEE80211_TX_STAT_AMPDU_NO_BACK) && (ieee80211_is_data_qos(fc))) { @@ -830,13 +833,15 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) return; } else { if (!acked) - sta->tx_retry_failed++; - sta->tx_retry_count += retry_count; + sta->status_stats.retry_failed++; + sta->status_stats.retry_count += retry_count; if (ieee80211_is_data_present(fc)) { if (!acked) - sta->tx_msdu_failed[tid]++; - sta->tx_msdu_retries[tid] += retry_count; + sta->status_stats.msdu_failed[tid]++; + + sta->status_stats.msdu_retries[tid] += + retry_count; } } @@ -854,19 +859,17 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { if (info->flags & IEEE80211_TX_STAT_ACK) { - if (sta->lost_packets) - sta->lost_packets = 0; + if (sta->status_stats.lost_packets) + sta->status_stats.lost_packets = 0; /* Track when last TDLS packet was ACKed */ if (test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH)) - sta->last_tdls_pkt_time = jiffies; + sta->status_stats.last_tdls_pkt_time = + jiffies; } else { ieee80211_lost_packet(sta, info); } } - - if (acked) - sta->last_ack_signal = info->status.ack_signal; } rcu_read_unlock(); diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 314e3bd7fbdb..5cf8f4ea077f 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -325,7 +325,6 @@ TRACE_EVENT(drv_config, __field(u32, flags) __field(int, power_level) __field(int, dynamic_ps_timeout) - __field(int, max_sleep_period) __field(u16, listen_interval) __field(u8, long_frame_max_tx_count) __field(u8, short_frame_max_tx_count) @@ -339,7 +338,6 @@ TRACE_EVENT(drv_config, __entry->flags = local->hw.conf.flags; __entry->power_level = local->hw.conf.power_level; __entry->dynamic_ps_timeout = local->hw.conf.dynamic_ps_timeout; - __entry->max_sleep_period = local->hw.conf.max_sleep_period; __entry->listen_interval = local->hw.conf.listen_interval; __entry->long_frame_max_tx_count = local->hw.conf.long_frame_max_tx_count; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 3478a83187e5..bdc224d5053a 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -757,9 +757,9 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) if (txrc.reported_rate.idx < 0) { txrc.reported_rate = tx->rate; if (tx->sta && ieee80211_is_data(hdr->frame_control)) - tx->sta->last_tx_rate = txrc.reported_rate; + tx->sta->tx_stats.last_rate = txrc.reported_rate; } else if (tx->sta) - tx->sta->last_tx_rate = txrc.reported_rate; + tx->sta->tx_stats.last_rate = txrc.reported_rate; if (ratetbl) return TX_CONTINUE; @@ -824,7 +824,7 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) hdr->seq_ctrl = cpu_to_le16(tx->sdata->sequence_number); tx->sdata->sequence_number += 0x10; if (tx->sta) - tx->sta->tx_msdu[IEEE80211_NUM_TIDS]++; + tx->sta->tx_stats.msdu[IEEE80211_NUM_TIDS]++; return TX_CONTINUE; } @@ -840,7 +840,7 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) qc = ieee80211_get_qos_ctl(hdr); tid = *qc & IEEE80211_QOS_CTL_TID_MASK; - tx->sta->tx_msdu[tid]++; + tx->sta->tx_stats.msdu[tid]++; if (!tx->sta->sta.txq[0]) hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid); @@ -994,10 +994,10 @@ ieee80211_tx_h_stats(struct ieee80211_tx_data *tx) skb_queue_walk(&tx->skbs, skb) { ac = skb_get_queue_mapping(skb); - tx->sta->tx_bytes[ac] += skb->len; + tx->sta->tx_stats.bytes[ac] += skb->len; } if (ac >= 0) - tx->sta->tx_packets[ac]++; + tx->sta->tx_stats.packets[ac]++; return TX_CONTINUE; } @@ -2779,10 +2779,10 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, } if (skb_shinfo(skb)->gso_size) - sta->tx_msdu[tid] += + sta->tx_stats.msdu[tid] += DIV_ROUND_UP(skb->len, skb_shinfo(skb)->gso_size); else - sta->tx_msdu[tid]++; + sta->tx_stats.msdu[tid]++; info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)]; @@ -2813,8 +2813,8 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, /* statistics normally done by ieee80211_tx_h_stats (but that * has to consider fragmentation, so is more complex) */ - sta->tx_bytes[skb_get_queue_mapping(skb)] += skb->len; - sta->tx_packets[skb_get_queue_mapping(skb)]++; + sta->tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len; + sta->tx_stats.packets[skb_get_queue_mapping(skb)]++; if (fast_tx->pn_offs) { u64 pn; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 60c4dbf92625..8274c86296f9 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1951,7 +1951,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) } } - ieee80211_recalc_ps(local, -1); + ieee80211_recalc_ps(local); /* * The sta might be in psm against the ap (e.g. because @@ -2042,9 +2042,13 @@ int ieee80211_reconfig(struct ieee80211_local *local) if (sched_scan_sdata && sched_scan_req) /* * Sched scan stopped, but we don't want to report it. Instead, - * we're trying to reschedule. + * we're trying to reschedule. However, if more than one scan + * plan was set, we cannot reschedule since we don't know which + * scan plan was currently running (and some scan plans may have + * already finished). */ - if (__ieee80211_request_sched_scan_start(sched_scan_sdata, + if (sched_scan_req->n_scan_plans > 1 || + __ieee80211_request_sched_scan_start(sched_scan_sdata, sched_scan_req)) sched_scan_stopped = true; mutex_unlock(&local->mtx); @@ -3301,9 +3305,11 @@ void ieee80211_init_tx_queue(struct ieee80211_sub_if_data *sdata, if (sta) { txqi->txq.sta = &sta->sta; sta->sta.txq[tid] = &txqi->txq; + txqi->txq.tid = tid; txqi->txq.ac = ieee802_1d_to_ac[tid & 7]; } else { sdata->vif.txq = &txqi->txq; + txqi->txq.tid = 0; txqi->txq.ac = IEEE80211_AC_BE; } } diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index feb547dc8643..d824c38971ed 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -174,9 +174,12 @@ mic_fail_no_key: * a driver that supports HW encryption. Send up the key idx only if * the key is set. */ - mac80211_ev_michael_mic_failure(rx->sdata, - rx->key ? rx->key->conf.keyidx : -1, - (void *) skb->data, NULL, GFP_ATOMIC); + cfg80211_michael_mic_failure(rx->sdata->dev, hdr->addr2, + is_multicast_ether_addr(hdr->addr1) ? + NL80211_KEYTYPE_GROUP : + NL80211_KEYTYPE_PAIRWISE, + rx->key ? rx->key->conf.keyidx : -1, + NULL, GFP_ATOMIC); return RX_DROP_UNUSABLE; } diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index bb185a28de98..c70d750148b6 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -19,36 +19,13 @@ #include <net/ipv6.h> #include <net/addrconf.h> #endif +#include <net/nexthop.h> #include "internal.h" -#define LABEL_NOT_SPECIFIED (1<<20) -#define MAX_NEW_LABELS 2 - -/* This maximum ha length copied from the definition of struct neighbour */ -#define MAX_VIA_ALEN (ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))) - -enum mpls_payload_type { - MPT_UNSPEC, /* IPv4 or IPv6 */ - MPT_IPV4 = 4, - MPT_IPV6 = 6, - - /* Other types not implemented: - * - Pseudo-wire with or without control word (RFC4385) - * - GAL (RFC5586) - */ -}; - -struct mpls_route { /* next hop label forwarding entry */ - struct net_device __rcu *rt_dev; - struct rcu_head rt_rcu; - u32 rt_label[MAX_NEW_LABELS]; - u8 rt_protocol; /* routing protocol that set this entry */ - u8 rt_payload_type; - u8 rt_labels; - u8 rt_via_alen; - u8 rt_via_table; - u8 rt_via[0]; -}; +/* Maximum number of labels to look ahead at when selecting a path of + * a multipath route + */ +#define MAX_MP_SELECT_LABELS 4 static int zero = 0; static int label_limit = (1 << 20) - 1; @@ -80,10 +57,24 @@ bool mpls_output_possible(const struct net_device *dev) } EXPORT_SYMBOL_GPL(mpls_output_possible); -static unsigned int mpls_rt_header_size(const struct mpls_route *rt) +static u8 *__mpls_nh_via(struct mpls_route *rt, struct mpls_nh *nh) +{ + u8 *nh0_via = PTR_ALIGN((u8 *)&rt->rt_nh[rt->rt_nhn], VIA_ALEN_ALIGN); + int nh_index = nh - rt->rt_nh; + + return nh0_via + rt->rt_max_alen * nh_index; +} + +static const u8 *mpls_nh_via(const struct mpls_route *rt, + const struct mpls_nh *nh) +{ + return __mpls_nh_via((struct mpls_route *)rt, (struct mpls_nh *)nh); +} + +static unsigned int mpls_nh_header_size(const struct mpls_nh *nh) { /* The size of the layer 2.5 labels to be added for this route */ - return rt->rt_labels * sizeof(struct mpls_shim_hdr); + return nh->nh_labels * sizeof(struct mpls_shim_hdr); } unsigned int mpls_dev_mtu(const struct net_device *dev) @@ -105,6 +96,80 @@ bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) } EXPORT_SYMBOL_GPL(mpls_pkt_too_big); +static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, + struct sk_buff *skb, bool bos) +{ + struct mpls_entry_decoded dec; + struct mpls_shim_hdr *hdr; + bool eli_seen = false; + int label_index; + int nh_index = 0; + u32 hash = 0; + + /* No need to look further into packet if there's only + * one path + */ + if (rt->rt_nhn == 1) + goto out; + + for (label_index = 0; label_index < MAX_MP_SELECT_LABELS && !bos; + label_index++) { + if (!pskb_may_pull(skb, sizeof(*hdr) * label_index)) + break; + + /* Read and decode the current label */ + hdr = mpls_hdr(skb) + label_index; + dec = mpls_entry_decode(hdr); + + /* RFC6790 - reserved labels MUST NOT be used as keys + * for the load-balancing function + */ + if (likely(dec.label >= MPLS_LABEL_FIRST_UNRESERVED)) { + hash = jhash_1word(dec.label, hash); + + /* The entropy label follows the entropy label + * indicator, so this means that the entropy + * label was just added to the hash - no need to + * go any deeper either in the label stack or in the + * payload + */ + if (eli_seen) + break; + } else if (dec.label == MPLS_LABEL_ENTROPY) { + eli_seen = true; + } + + bos = dec.bos; + if (bos && pskb_may_pull(skb, sizeof(*hdr) * label_index + + sizeof(struct iphdr))) { + const struct iphdr *v4hdr; + + v4hdr = (const struct iphdr *)(mpls_hdr(skb) + + label_index); + if (v4hdr->version == 4) { + hash = jhash_3words(ntohl(v4hdr->saddr), + ntohl(v4hdr->daddr), + v4hdr->protocol, hash); + } else if (v4hdr->version == 6 && + pskb_may_pull(skb, sizeof(*hdr) * label_index + + sizeof(struct ipv6hdr))) { + const struct ipv6hdr *v6hdr; + + v6hdr = (const struct ipv6hdr *)(mpls_hdr(skb) + + label_index); + + hash = __ipv6_addr_jhash(&v6hdr->saddr, hash); + hash = __ipv6_addr_jhash(&v6hdr->daddr, hash); + hash = jhash_1word(v6hdr->nexthdr, hash); + } + } + } + + nh_index = hash % rt->rt_nhn; +out: + return &rt->rt_nh[nh_index]; +} + static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb, struct mpls_entry_decoded dec) { @@ -159,6 +224,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, struct net *net = dev_net(dev); struct mpls_shim_hdr *hdr; struct mpls_route *rt; + struct mpls_nh *nh; struct mpls_entry_decoded dec; struct net_device *out_dev; struct mpls_dev *mdev; @@ -196,8 +262,12 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, if (!rt) goto drop; + nh = mpls_select_multipath(rt, skb, dec.bos); + if (!nh) + goto drop; + /* Find the output device */ - out_dev = rcu_dereference(rt->rt_dev); + out_dev = rcu_dereference(nh->nh_dev); if (!mpls_output_possible(out_dev)) goto drop; @@ -212,7 +282,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, dec.ttl -= 1; /* Verify the destination can hold the packet */ - new_header_size = mpls_rt_header_size(rt); + new_header_size = mpls_nh_header_size(nh); mtu = mpls_dev_mtu(out_dev); if (mpls_pkt_too_big(skb, mtu - new_header_size)) goto drop; @@ -240,13 +310,14 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, /* Push the new labels */ hdr = mpls_hdr(skb); bos = dec.bos; - for (i = rt->rt_labels - 1; i >= 0; i--) { - hdr[i] = mpls_entry_encode(rt->rt_label[i], dec.ttl, 0, bos); + for (i = nh->nh_labels - 1; i >= 0; i--) { + hdr[i] = mpls_entry_encode(nh->nh_label[i], + dec.ttl, 0, bos); bos = false; } } - err = neigh_xmit(rt->rt_via_table, out_dev, rt->rt_via, skb); + err = neigh_xmit(nh->nh_via_table, out_dev, mpls_nh_via(rt, nh), skb); if (err) net_dbg_ratelimited("%s: packet transmission failed: %d\n", __func__, err); @@ -270,24 +341,33 @@ static const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = { struct mpls_route_config { u32 rc_protocol; u32 rc_ifindex; - u16 rc_via_table; - u16 rc_via_alen; + u8 rc_via_table; + u8 rc_via_alen; u8 rc_via[MAX_VIA_ALEN]; u32 rc_label; - u32 rc_output_labels; + u8 rc_output_labels; u32 rc_output_label[MAX_NEW_LABELS]; u32 rc_nlflags; enum mpls_payload_type rc_payload_type; struct nl_info rc_nlinfo; + struct rtnexthop *rc_mp; + int rc_mp_len; }; -static struct mpls_route *mpls_rt_alloc(size_t alen) +static struct mpls_route *mpls_rt_alloc(int num_nh, u8 max_alen) { + u8 max_alen_aligned = ALIGN(max_alen, VIA_ALEN_ALIGN); struct mpls_route *rt; - rt = kzalloc(sizeof(*rt) + alen, GFP_KERNEL); - if (rt) - rt->rt_via_alen = alen; + rt = kzalloc(ALIGN(sizeof(*rt) + num_nh * sizeof(*rt->rt_nh), + VIA_ALEN_ALIGN) + + num_nh * max_alen_aligned, + GFP_KERNEL); + if (rt) { + rt->rt_nhn = num_nh; + rt->rt_max_alen = max_alen_aligned; + } + return rt; } @@ -312,25 +392,22 @@ static void mpls_notify_route(struct net *net, unsigned index, } static void mpls_route_update(struct net *net, unsigned index, - struct net_device *dev, struct mpls_route *new, + struct mpls_route *new, const struct nl_info *info) { struct mpls_route __rcu **platform_label; - struct mpls_route *rt, *old = NULL; + struct mpls_route *rt; ASSERT_RTNL(); platform_label = rtnl_dereference(net->mpls.platform_label); rt = rtnl_dereference(platform_label[index]); - if (!dev || (rt && (rtnl_dereference(rt->rt_dev) == dev))) { - rcu_assign_pointer(platform_label[index], new); - old = rt; - } + rcu_assign_pointer(platform_label[index], new); - mpls_notify_route(net, index, old, new, info); + mpls_notify_route(net, index, rt, new, info); /* If we removed a route free it now */ - mpls_rt_free(old); + mpls_rt_free(rt); } static unsigned find_free_label(struct net *net) @@ -350,7 +427,8 @@ static unsigned find_free_label(struct net *net) } #if IS_ENABLED(CONFIG_INET) -static struct net_device *inet_fib_lookup_dev(struct net *net, void *addr) +static struct net_device *inet_fib_lookup_dev(struct net *net, + const void *addr) { struct net_device *dev; struct rtable *rt; @@ -369,14 +447,16 @@ static struct net_device *inet_fib_lookup_dev(struct net *net, void *addr) return dev; } #else -static struct net_device *inet_fib_lookup_dev(struct net *net, void *addr) +static struct net_device *inet_fib_lookup_dev(struct net *net, + const void *addr) { return ERR_PTR(-EAFNOSUPPORT); } #endif #if IS_ENABLED(CONFIG_IPV6) -static struct net_device *inet6_fib_lookup_dev(struct net *net, void *addr) +static struct net_device *inet6_fib_lookup_dev(struct net *net, + const void *addr) { struct net_device *dev; struct dst_entry *dst; @@ -399,47 +479,234 @@ static struct net_device *inet6_fib_lookup_dev(struct net *net, void *addr) return dev; } #else -static struct net_device *inet6_fib_lookup_dev(struct net *net, void *addr) +static struct net_device *inet6_fib_lookup_dev(struct net *net, + const void *addr) { return ERR_PTR(-EAFNOSUPPORT); } #endif static struct net_device *find_outdev(struct net *net, - struct mpls_route_config *cfg) + struct mpls_route *rt, + struct mpls_nh *nh, int oif) { struct net_device *dev = NULL; - if (!cfg->rc_ifindex) { - switch (cfg->rc_via_table) { + if (!oif) { + switch (nh->nh_via_table) { case NEIGH_ARP_TABLE: - dev = inet_fib_lookup_dev(net, cfg->rc_via); + dev = inet_fib_lookup_dev(net, mpls_nh_via(rt, nh)); break; case NEIGH_ND_TABLE: - dev = inet6_fib_lookup_dev(net, cfg->rc_via); + dev = inet6_fib_lookup_dev(net, mpls_nh_via(rt, nh)); break; case NEIGH_LINK_TABLE: break; } } else { - dev = dev_get_by_index(net, cfg->rc_ifindex); + dev = dev_get_by_index(net, oif); } if (!dev) return ERR_PTR(-ENODEV); + /* The caller is holding rtnl anyways, so release the dev reference */ + dev_put(dev); + return dev; } +static int mpls_nh_assign_dev(struct net *net, struct mpls_route *rt, + struct mpls_nh *nh, int oif) +{ + struct net_device *dev = NULL; + int err = -ENODEV; + + dev = find_outdev(net, rt, nh, oif); + if (IS_ERR(dev)) { + err = PTR_ERR(dev); + dev = NULL; + goto errout; + } + + /* Ensure this is a supported device */ + err = -EINVAL; + if (!mpls_dev_get(dev)) + goto errout; + + RCU_INIT_POINTER(nh->nh_dev, dev); + + return 0; + +errout: + return err; +} + +static int mpls_nh_build_from_cfg(struct mpls_route_config *cfg, + struct mpls_route *rt) +{ + struct net *net = cfg->rc_nlinfo.nl_net; + struct mpls_nh *nh = rt->rt_nh; + int err; + int i; + + if (!nh) + return -ENOMEM; + + err = -EINVAL; + /* Ensure only a supported number of labels are present */ + if (cfg->rc_output_labels > MAX_NEW_LABELS) + goto errout; + + nh->nh_labels = cfg->rc_output_labels; + for (i = 0; i < nh->nh_labels; i++) + nh->nh_label[i] = cfg->rc_output_label[i]; + + nh->nh_via_table = cfg->rc_via_table; + memcpy(__mpls_nh_via(rt, nh), cfg->rc_via, cfg->rc_via_alen); + nh->nh_via_alen = cfg->rc_via_alen; + + err = mpls_nh_assign_dev(net, rt, nh, cfg->rc_ifindex); + if (err) + goto errout; + + return 0; + +errout: + return err; +} + +static int mpls_nh_build(struct net *net, struct mpls_route *rt, + struct mpls_nh *nh, int oif, + struct nlattr *via, struct nlattr *newdst) +{ + int err = -ENOMEM; + + if (!nh) + goto errout; + + if (newdst) { + err = nla_get_labels(newdst, MAX_NEW_LABELS, + &nh->nh_labels, nh->nh_label); + if (err) + goto errout; + } + + err = nla_get_via(via, &nh->nh_via_alen, &nh->nh_via_table, + __mpls_nh_via(rt, nh)); + if (err) + goto errout; + + err = mpls_nh_assign_dev(net, rt, nh, oif); + if (err) + goto errout; + + return 0; + +errout: + return err; +} + +static int mpls_count_nexthops(struct rtnexthop *rtnh, int len, + u8 cfg_via_alen, u8 *max_via_alen) +{ + int nhs = 0; + int remaining = len; + + if (!rtnh) { + *max_via_alen = cfg_via_alen; + return 1; + } + + *max_via_alen = 0; + + while (rtnh_ok(rtnh, remaining)) { + struct nlattr *nla, *attrs = rtnh_attrs(rtnh); + int attrlen; + + attrlen = rtnh_attrlen(rtnh); + nla = nla_find(attrs, attrlen, RTA_VIA); + if (nla && nla_len(nla) >= + offsetof(struct rtvia, rtvia_addr)) { + int via_alen = nla_len(nla) - + offsetof(struct rtvia, rtvia_addr); + + if (via_alen <= MAX_VIA_ALEN) + *max_via_alen = max_t(u16, *max_via_alen, + via_alen); + } + + nhs++; + rtnh = rtnh_next(rtnh, &remaining); + } + + /* leftover implies invalid nexthop configuration, discard it */ + return remaining > 0 ? 0 : nhs; +} + +static int mpls_nh_build_multi(struct mpls_route_config *cfg, + struct mpls_route *rt) +{ + struct rtnexthop *rtnh = cfg->rc_mp; + struct nlattr *nla_via, *nla_newdst; + int remaining = cfg->rc_mp_len; + int nhs = 0; + int err = 0; + + change_nexthops(rt) { + int attrlen; + + nla_via = NULL; + nla_newdst = NULL; + + err = -EINVAL; + if (!rtnh_ok(rtnh, remaining)) + goto errout; + + /* neither weighted multipath nor any flags + * are supported + */ + if (rtnh->rtnh_hops || rtnh->rtnh_flags) + goto errout; + + attrlen = rtnh_attrlen(rtnh); + if (attrlen > 0) { + struct nlattr *attrs = rtnh_attrs(rtnh); + + nla_via = nla_find(attrs, attrlen, RTA_VIA); + nla_newdst = nla_find(attrs, attrlen, RTA_NEWDST); + } + + if (!nla_via) + goto errout; + + err = mpls_nh_build(cfg->rc_nlinfo.nl_net, rt, nh, + rtnh->rtnh_ifindex, nla_via, + nla_newdst); + if (err) + goto errout; + + rtnh = rtnh_next(rtnh, &remaining); + nhs++; + } endfor_nexthops(rt); + + rt->rt_nhn = nhs; + + return 0; + +errout: + return err; +} + static int mpls_route_add(struct mpls_route_config *cfg) { struct mpls_route __rcu **platform_label; struct net *net = cfg->rc_nlinfo.nl_net; - struct net_device *dev = NULL; struct mpls_route *rt, *old; - unsigned index; - int i; int err = -EINVAL; + u8 max_via_alen; + unsigned index; + int nhs; index = cfg->rc_label; @@ -457,27 +724,6 @@ static int mpls_route_add(struct mpls_route_config *cfg) if (index >= net->mpls.platform_labels) goto errout; - /* Ensure only a supported number of labels are present */ - if (cfg->rc_output_labels > MAX_NEW_LABELS) - goto errout; - - dev = find_outdev(net, cfg); - if (IS_ERR(dev)) { - err = PTR_ERR(dev); - dev = NULL; - goto errout; - } - - /* Ensure this is a supported device */ - err = -EINVAL; - if (!mpls_dev_get(dev)) - goto errout; - - err = -EINVAL; - if ((cfg->rc_via_table == NEIGH_LINK_TABLE) && - (dev->addr_len != cfg->rc_via_alen)) - goto errout; - /* Append makes no sense with mpls */ err = -EOPNOTSUPP; if (cfg->rc_nlflags & NLM_F_APPEND) @@ -497,28 +743,34 @@ static int mpls_route_add(struct mpls_route_config *cfg) if (!(cfg->rc_nlflags & NLM_F_CREATE) && !old) goto errout; + err = -EINVAL; + nhs = mpls_count_nexthops(cfg->rc_mp, cfg->rc_mp_len, + cfg->rc_via_alen, &max_via_alen); + if (nhs == 0) + goto errout; + err = -ENOMEM; - rt = mpls_rt_alloc(cfg->rc_via_alen); + rt = mpls_rt_alloc(nhs, max_via_alen); if (!rt) goto errout; - rt->rt_labels = cfg->rc_output_labels; - for (i = 0; i < rt->rt_labels; i++) - rt->rt_label[i] = cfg->rc_output_label[i]; rt->rt_protocol = cfg->rc_protocol; - RCU_INIT_POINTER(rt->rt_dev, dev); rt->rt_payload_type = cfg->rc_payload_type; - rt->rt_via_table = cfg->rc_via_table; - memcpy(rt->rt_via, cfg->rc_via, cfg->rc_via_alen); - mpls_route_update(net, index, NULL, rt, &cfg->rc_nlinfo); + if (cfg->rc_mp) + err = mpls_nh_build_multi(cfg, rt); + else + err = mpls_nh_build_from_cfg(cfg, rt); + if (err) + goto freert; + + mpls_route_update(net, index, rt, &cfg->rc_nlinfo); - dev_put(dev); return 0; +freert: + mpls_rt_free(rt); errout: - if (dev) - dev_put(dev); return err; } @@ -538,7 +790,7 @@ static int mpls_route_del(struct mpls_route_config *cfg) if (index >= net->mpls.platform_labels) goto errout; - mpls_route_update(net, index, NULL, NULL, &cfg->rc_nlinfo); + mpls_route_update(net, index, NULL, &cfg->rc_nlinfo); err = 0; errout: @@ -635,9 +887,11 @@ static void mpls_ifdown(struct net_device *dev) struct mpls_route *rt = rtnl_dereference(platform_label[index]); if (!rt) continue; - if (rtnl_dereference(rt->rt_dev) != dev) - continue; - rt->rt_dev = NULL; + for_nexthops(rt) { + if (rtnl_dereference(nh->nh_dev) != dev) + continue; + nh->nh_dev = NULL; + } endfor_nexthops(rt); } mdev = mpls_dev_get(dev); @@ -736,7 +990,7 @@ int nla_put_labels(struct sk_buff *skb, int attrtype, EXPORT_SYMBOL_GPL(nla_put_labels); int nla_get_labels(const struct nlattr *nla, - u32 max_labels, u32 *labels, u32 label[]) + u32 max_labels, u8 *labels, u32 label[]) { unsigned len = nla_len(nla); unsigned nla_labels; @@ -781,6 +1035,48 @@ int nla_get_labels(const struct nlattr *nla, } EXPORT_SYMBOL_GPL(nla_get_labels); +int nla_get_via(const struct nlattr *nla, u8 *via_alen, + u8 *via_table, u8 via_addr[]) +{ + struct rtvia *via = nla_data(nla); + int err = -EINVAL; + int alen; + + if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr)) + goto errout; + alen = nla_len(nla) - + offsetof(struct rtvia, rtvia_addr); + if (alen > MAX_VIA_ALEN) + goto errout; + + /* Validate the address family */ + switch (via->rtvia_family) { + case AF_PACKET: + *via_table = NEIGH_LINK_TABLE; + break; + case AF_INET: + *via_table = NEIGH_ARP_TABLE; + if (alen != 4) + goto errout; + break; + case AF_INET6: + *via_table = NEIGH_ND_TABLE; + if (alen != 16) + goto errout; + break; + default: + /* Unsupported address family */ + goto errout; + } + + memcpy(via_addr, via->rtvia_addr, alen); + *via_alen = alen; + err = 0; + +errout: + return err; +} + static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, struct mpls_route_config *cfg) { @@ -844,7 +1140,7 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, break; case RTA_DST: { - u32 label_count; + u8 label_count; if (nla_get_labels(nla, 1, &label_count, &cfg->rc_label)) goto errout; @@ -857,35 +1153,15 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, } case RTA_VIA: { - struct rtvia *via = nla_data(nla); - if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr)) - goto errout; - cfg->rc_via_alen = nla_len(nla) - - offsetof(struct rtvia, rtvia_addr); - if (cfg->rc_via_alen > MAX_VIA_ALEN) + if (nla_get_via(nla, &cfg->rc_via_alen, + &cfg->rc_via_table, cfg->rc_via)) goto errout; - - /* Validate the address family */ - switch(via->rtvia_family) { - case AF_PACKET: - cfg->rc_via_table = NEIGH_LINK_TABLE; - break; - case AF_INET: - cfg->rc_via_table = NEIGH_ARP_TABLE; - if (cfg->rc_via_alen != 4) - goto errout; - break; - case AF_INET6: - cfg->rc_via_table = NEIGH_ND_TABLE; - if (cfg->rc_via_alen != 16) - goto errout; - break; - default: - /* Unsupported address family */ - goto errout; - } - - memcpy(cfg->rc_via, via->rtvia_addr, cfg->rc_via_alen); + break; + } + case RTA_MULTIPATH: + { + cfg->rc_mp = nla_data(nla); + cfg->rc_mp_len = nla_len(nla); break; } default: @@ -946,16 +1222,52 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, rtm->rtm_type = RTN_UNICAST; rtm->rtm_flags = 0; - if (rt->rt_labels && - nla_put_labels(skb, RTA_NEWDST, rt->rt_labels, rt->rt_label)) - goto nla_put_failure; - if (nla_put_via(skb, rt->rt_via_table, rt->rt_via, rt->rt_via_alen)) - goto nla_put_failure; - dev = rtnl_dereference(rt->rt_dev); - if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex)) - goto nla_put_failure; if (nla_put_labels(skb, RTA_DST, 1, &label)) goto nla_put_failure; + if (rt->rt_nhn == 1) { + const struct mpls_nh *nh = rt->rt_nh; + + if (nh->nh_labels && + nla_put_labels(skb, RTA_NEWDST, nh->nh_labels, + nh->nh_label)) + goto nla_put_failure; + if (nla_put_via(skb, nh->nh_via_table, mpls_nh_via(rt, nh), + nh->nh_via_alen)) + goto nla_put_failure; + dev = rtnl_dereference(nh->nh_dev); + if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex)) + goto nla_put_failure; + } else { + struct rtnexthop *rtnh; + struct nlattr *mp; + + mp = nla_nest_start(skb, RTA_MULTIPATH); + if (!mp) + goto nla_put_failure; + + for_nexthops(rt) { + rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); + if (!rtnh) + goto nla_put_failure; + + dev = rtnl_dereference(nh->nh_dev); + if (dev) + rtnh->rtnh_ifindex = dev->ifindex; + if (nh->nh_labels && nla_put_labels(skb, RTA_NEWDST, + nh->nh_labels, + nh->nh_label)) + goto nla_put_failure; + if (nla_put_via(skb, nh->nh_via_table, + mpls_nh_via(rt, nh), + nh->nh_via_alen)) + goto nla_put_failure; + + /* length of rtnetlink header + attributes */ + rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh; + } endfor_nexthops(rt); + + nla_nest_end(skb, mp); + } nlmsg_end(skb, nlh); return 0; @@ -1000,12 +1312,30 @@ static inline size_t lfib_nlmsg_size(struct mpls_route *rt) { size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg)) - + nla_total_size(2 + rt->rt_via_alen) /* RTA_VIA */ + nla_total_size(4); /* RTA_DST */ - if (rt->rt_labels) /* RTA_NEWDST */ - payload += nla_total_size(rt->rt_labels * 4); - if (rt->rt_dev) /* RTA_OIF */ - payload += nla_total_size(4); + + if (rt->rt_nhn == 1) { + struct mpls_nh *nh = rt->rt_nh; + + if (nh->nh_dev) + payload += nla_total_size(4); /* RTA_OIF */ + payload += nla_total_size(2 + nh->nh_via_alen); /* RTA_VIA */ + if (nh->nh_labels) /* RTA_NEWDST */ + payload += nla_total_size(nh->nh_labels * 4); + } else { + /* each nexthop is packed in an attribute */ + size_t nhsize = 0; + + for_nexthops(rt) { + nhsize += nla_total_size(sizeof(struct rtnexthop)); + nhsize += nla_total_size(2 + nh->nh_via_alen); + if (nh->nh_labels) + nhsize += nla_total_size(nh->nh_labels * 4); + } endfor_nexthops(rt); + /* nested attribute */ + payload += nla_total_size(nhsize); + } + return payload; } @@ -1057,25 +1387,29 @@ static int resize_platform_label_table(struct net *net, size_t limit) /* In case the predefined labels need to be populated */ if (limit > MPLS_LABEL_IPV4NULL) { struct net_device *lo = net->loopback_dev; - rt0 = mpls_rt_alloc(lo->addr_len); + rt0 = mpls_rt_alloc(1, lo->addr_len); if (!rt0) goto nort0; - RCU_INIT_POINTER(rt0->rt_dev, lo); + RCU_INIT_POINTER(rt0->rt_nh->nh_dev, lo); rt0->rt_protocol = RTPROT_KERNEL; rt0->rt_payload_type = MPT_IPV4; - rt0->rt_via_table = NEIGH_LINK_TABLE; - memcpy(rt0->rt_via, lo->dev_addr, lo->addr_len); + rt0->rt_nh->nh_via_table = NEIGH_LINK_TABLE; + rt0->rt_nh->nh_via_alen = lo->addr_len; + memcpy(__mpls_nh_via(rt0, rt0->rt_nh), lo->dev_addr, + lo->addr_len); } if (limit > MPLS_LABEL_IPV6NULL) { struct net_device *lo = net->loopback_dev; - rt2 = mpls_rt_alloc(lo->addr_len); + rt2 = mpls_rt_alloc(1, lo->addr_len); if (!rt2) goto nort2; - RCU_INIT_POINTER(rt2->rt_dev, lo); + RCU_INIT_POINTER(rt2->rt_nh->nh_dev, lo); rt2->rt_protocol = RTPROT_KERNEL; rt2->rt_payload_type = MPT_IPV6; - rt2->rt_via_table = NEIGH_LINK_TABLE; - memcpy(rt2->rt_via, lo->dev_addr, lo->addr_len); + rt2->rt_nh->nh_via_table = NEIGH_LINK_TABLE; + rt2->rt_nh->nh_via_alen = lo->addr_len; + memcpy(__mpls_nh_via(rt2, rt2->rt_nh), lo->dev_addr, + lo->addr_len); } rtnl_lock(); @@ -1085,7 +1419,7 @@ static int resize_platform_label_table(struct net *net, size_t limit) /* Free any labels beyond the new table */ for (index = limit; index < old_limit; index++) - mpls_route_update(net, index, NULL, NULL, NULL); + mpls_route_update(net, index, NULL, NULL); /* Copy over the old labels */ cp_size = size; diff --git a/net/mpls/internal.h b/net/mpls/internal.h index 2681a4ba6c37..bde52ce88c94 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -21,6 +21,76 @@ struct mpls_dev { struct sk_buff; +#define LABEL_NOT_SPECIFIED (1 << 20) +#define MAX_NEW_LABELS 2 + +/* This maximum ha length copied from the definition of struct neighbour */ +#define VIA_ALEN_ALIGN sizeof(unsigned long) +#define MAX_VIA_ALEN (ALIGN(MAX_ADDR_LEN, VIA_ALEN_ALIGN)) + +enum mpls_payload_type { + MPT_UNSPEC, /* IPv4 or IPv6 */ + MPT_IPV4 = 4, + MPT_IPV6 = 6, + + /* Other types not implemented: + * - Pseudo-wire with or without control word (RFC4385) + * - GAL (RFC5586) + */ +}; + +struct mpls_nh { /* next hop label forwarding entry */ + struct net_device __rcu *nh_dev; + u32 nh_label[MAX_NEW_LABELS]; + u8 nh_labels; + u8 nh_via_alen; + u8 nh_via_table; +}; + +/* The route, nexthops and vias are stored together in the same memory + * block: + * + * +----------------------+ + * | mpls_route | + * +----------------------+ + * | mpls_nh 0 | + * +----------------------+ + * | ... | + * +----------------------+ + * | mpls_nh n-1 | + * +----------------------+ + * | alignment padding | + * +----------------------+ + * | via[rt_max_alen] 0 | + * +----------------------+ + * | ... | + * +----------------------+ + * | via[rt_max_alen] n-1 | + * +----------------------+ + */ +struct mpls_route { /* next hop label forwarding entry */ + struct rcu_head rt_rcu; + u8 rt_protocol; + u8 rt_payload_type; + u8 rt_max_alen; + unsigned int rt_nhn; + struct mpls_nh rt_nh[0]; +}; + +#define for_nexthops(rt) { \ + int nhsel; struct mpls_nh *nh; \ + for (nhsel = 0, nh = (rt)->rt_nh; \ + nhsel < (rt)->rt_nhn; \ + nh++, nhsel++) + +#define change_nexthops(rt) { \ + int nhsel; struct mpls_nh *nh; \ + for (nhsel = 0, nh = (struct mpls_nh *)((rt)->rt_nh); \ + nhsel < (rt)->rt_nhn; \ + nh++, nhsel++) + +#define endfor_nexthops(rt) } + static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb) { return (struct mpls_shim_hdr *)skb_network_header(skb); @@ -52,8 +122,10 @@ static inline struct mpls_entry_decoded mpls_entry_decode(struct mpls_shim_hdr * int nla_put_labels(struct sk_buff *skb, int attrtype, u8 labels, const u32 label[]); -int nla_get_labels(const struct nlattr *nla, u32 max_labels, u32 *labels, +int nla_get_labels(const struct nlattr *nla, u32 max_labels, u8 *labels, u32 label[]); +int nla_get_via(const struct nlattr *nla, u8 *via_alen, u8 *via_table, + u8 via[]); bool mpls_output_possible(const struct net_device *dev); unsigned int mpls_dev_mtu(const struct net_device *dev); bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu); diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 09e661c3ae58..f39276d1c2d7 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -152,6 +152,8 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) #endif synchronize_net(); nf_queue_nf_hook_drop(net, &entry->ops); + /* other cpu might still process nfqueue verdict that used reg */ + synchronize_net(); kfree(entry); } EXPORT_SYMBOL(nf_unregister_net_hook); diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index a1fe5377a2b3..5a30ce6e8c90 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -297,7 +297,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, ip_set_timeout_expired(ext_timeout(n, set)))) n = NULL; - e = kzalloc(set->dsize, GFP_KERNEL); + e = kzalloc(set->dsize, GFP_ATOMIC); if (!e) return -ENOMEM; e->id = d->id; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 0a49a8c7c564..fafe33bdb619 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2371,7 +2371,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname, int pos, idx, shift; err = 0; - netlink_table_grab(); + netlink_lock_table(); for (pos = 0; pos * 8 < nlk->ngroups; pos += sizeof(u32)) { if (len - pos < sizeof(u32)) break; @@ -2386,7 +2386,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname, } if (put_user(ALIGN(nlk->ngroups / 8, sizeof(u32)), optlen)) err = -EFAULT; - netlink_table_ungrab(); + netlink_unlock_table(); break; } case NETLINK_CAP_ACK: diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index c6087233d7fc..221fa8b37a47 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -769,7 +769,6 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, struct sw_flow_key *key, const struct nlattr *attr, const struct nlattr *actions, int actions_len) { - struct ip_tunnel_info info; struct dp_upcall_info upcall; const struct nlattr *a; int rem; @@ -797,11 +796,9 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, if (vport) { int err; - upcall.egress_tun_info = &info; - err = ovs_vport_get_egress_tun_info(vport, skb, - &upcall); - if (err) - upcall.egress_tun_info = NULL; + err = dev_fill_metadata_dst(vport->dev, skb); + if (!err) + upcall.egress_tun_info = skb_tunnel_info(skb); } break; diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 9ed833e9bb7d..bd165ee2bb16 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -151,6 +151,8 @@ static void ovs_ct_update_key(const struct sk_buff *skb, ct = nf_ct_get(skb, &ctinfo); if (ct) { state = ovs_ct_get_state(ctinfo); + if (!nf_ct_is_confirmed(ct)) + state |= OVS_CS_F_NEW; if (ct->master) state |= OVS_CS_F_RELATED; zone = nf_ct_zone(ct); @@ -222,9 +224,6 @@ static int ovs_ct_set_labels(struct sk_buff *skb, struct sw_flow_key *key, struct nf_conn *ct; int err; - if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS)) - return -ENOTSUPP; - /* The connection could be invalid, in which case set_label is no-op.*/ ct = nf_ct_get(skb, &ctinfo); if (!ct) @@ -377,7 +376,7 @@ static bool skb_nfct_cached(const struct net *net, const struct sk_buff *skb, return true; } -static int __ovs_ct_lookup(struct net *net, const struct sw_flow_key *key, +static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, const struct ovs_conntrack_info *info, struct sk_buff *skb) { @@ -408,6 +407,8 @@ static int __ovs_ct_lookup(struct net *net, const struct sw_flow_key *key, } } + ovs_ct_update_key(skb, key, true); + return 0; } @@ -430,8 +431,6 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key, err = __ovs_ct_lookup(net, key, info, skb); if (err) return err; - - ovs_ct_update_key(skb, key, true); } return 0; @@ -460,8 +459,6 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key, if (nf_conntrack_confirm(skb) != NF_ACCEPT) return -EINVAL; - ovs_ct_update_key(skb, key, true); - return 0; } @@ -587,6 +584,10 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, case OVS_CT_ATTR_MARK: { struct md_mark *mark = nla_data(a); + if (!mark->mask) { + OVS_NLERR(log, "ct_mark mask cannot be 0"); + return -EINVAL; + } info->mark = *mark; break; } @@ -595,6 +596,10 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, case OVS_CT_ATTR_LABELS: { struct md_labels *labels = nla_data(a); + if (!labels_nonzero(&labels->mask)) { + OVS_NLERR(log, "ct_labels mask cannot be 0"); + return -EINVAL; + } info->labels = *labels; break; } @@ -705,11 +710,12 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info, if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id)) return -EMSGSIZE; - if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && + if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && ct_info->mark.mask && nla_put(skb, OVS_CT_ATTR_MARK, sizeof(ct_info->mark), &ct_info->mark)) return -EMSGSIZE; if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && + labels_nonzero(&ct_info->labels.mask) && nla_put(skb, OVS_CT_ATTR_LABELS, sizeof(ct_info->labels), &ct_info->labels)) return -EMSGSIZE; diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h index da8714942c95..82e0dfc66028 100644 --- a/net/openvswitch/conntrack.h +++ b/net/openvswitch/conntrack.h @@ -35,12 +35,9 @@ void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key); int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb); void ovs_ct_free_action(const struct nlattr *a); -static inline bool ovs_ct_state_supported(u32 state) -{ - return !(state & ~(OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED | - OVS_CS_F_RELATED | OVS_CS_F_REPLY_DIR | - OVS_CS_F_INVALID | OVS_CS_F_TRACKED)); -} +#define CT_SUPPORTED_MASK (OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED | \ + OVS_CS_F_RELATED | OVS_CS_F_REPLY_DIR | \ + OVS_CS_F_INVALID | OVS_CS_F_TRACKED) #else #include <linux/errno.h> @@ -53,11 +50,6 @@ static inline bool ovs_ct_verify(struct net *net, int attr) return false; } -static inline bool ovs_ct_state_supported(u32 state) -{ - return false; -} - static inline int ovs_ct_copy_action(struct net *net, const struct nlattr *nla, const struct sw_flow_key *key, struct sw_flow_actions **acts, bool log) @@ -94,5 +86,7 @@ static inline int ovs_ct_put_key(const struct sw_flow_key *key, } static inline void ovs_ct_free_action(const struct nlattr *a) { } + +#define CT_SUPPORTED_MASK 0 #endif /* CONFIG_NF_CONNTRACK */ #endif /* ovs_conntrack.h */ diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index a75828091e21..5633172b791a 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -489,9 +489,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, if (upcall_info->egress_tun_info) { nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY); - err = ovs_nla_put_egress_tunnel_key(user_skb, - upcall_info->egress_tun_info, - upcall_info->egress_tun_opts); + err = ovs_nla_put_tunnel_info(user_skb, + upcall_info->egress_tun_info); BUG_ON(err); nla_nest_end(user_skb, nla); } diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index f88038a99f44..67bdecd9fdc1 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -117,7 +117,6 @@ struct ovs_skb_cb { */ struct dp_upcall_info { struct ip_tunnel_info *egress_tun_info; - const void *egress_tun_opts; const struct nlattr *userdata; const struct nlattr *actions; int actions_len; diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 6799c8d470c6..907d6fd28ede 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -548,11 +548,11 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, struct sw_flow_match *match, bool is_mask, bool log) { + bool ttl = false, ipv4 = false, ipv6 = false; + __be16 tun_flags = 0; + int opts_type = 0; struct nlattr *a; int rem; - bool ttl = false; - __be16 tun_flags = 0, ipv4 = false, ipv6 = false; - int opts_type = 0; nla_for_each_nested(a, attr, rem) { int type = nla_type(a); @@ -764,7 +764,7 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb, if ((output->tun_flags & TUNNEL_OAM) && nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM)) return -EMSGSIZE; - if (tun_opts) { + if (swkey_tun_opts_len) { if (output->tun_flags & TUNNEL_GENEVE_OPT && nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, swkey_tun_opts_len, tun_opts)) @@ -798,14 +798,13 @@ static int ip_tun_to_nlattr(struct sk_buff *skb, return 0; } -int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb, - const struct ip_tunnel_info *egress_tun_info, - const void *egress_tun_opts) +int ovs_nla_put_tunnel_info(struct sk_buff *skb, + struct ip_tunnel_info *tun_info) { - return __ip_tun_to_nlattr(skb, &egress_tun_info->key, - egress_tun_opts, - egress_tun_info->options_len, - ip_tunnel_info_af(egress_tun_info)); + return __ip_tun_to_nlattr(skb, &tun_info->key, + ip_tunnel_info_opts(tun_info), + tun_info->options_len, + ip_tunnel_info_af(tun_info)); } static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, @@ -866,7 +865,7 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) { u32 ct_state = nla_get_u32(a[OVS_KEY_ATTR_CT_STATE]); - if (!is_mask && !ovs_ct_state_supported(ct_state)) { + if (ct_state & ~CT_SUPPORTED_MASK) { OVS_NLERR(log, "ct_state flags %08x unsupported", ct_state); return -EINVAL; @@ -1149,6 +1148,9 @@ static void nlattr_set(struct nlattr *attr, u8 val, } else { memset(nla_data(nla), val, nla_len(nla)); } + + if (nla_type(nla) == OVS_KEY_ATTR_CT_STATE) + *(u32 *)nla_data(nla) &= CT_SUPPORTED_MASK; } } @@ -2432,11 +2434,7 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) if (!start) return -EMSGSIZE; - err = ip_tun_to_nlattr(skb, &tun_info->key, - tun_info->options_len ? - ip_tunnel_info_opts(tun_info) : NULL, - tun_info->options_len, - ip_tunnel_info_af(tun_info)); + err = ovs_nla_put_tunnel_info(skb, tun_info); if (err) return err; nla_nest_end(skb, start); diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index 6ca3f0baf449..47dd142eca1c 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -55,9 +55,9 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb); int ovs_nla_get_match(struct net *, struct sw_flow_match *, const struct nlattr *key, const struct nlattr *mask, bool log); -int ovs_nla_put_egress_tunnel_key(struct sk_buff *, - const struct ip_tunnel_info *, - const void *egress_tun_opts); + +int ovs_nla_put_tunnel_info(struct sk_buff *skb, + struct ip_tunnel_info *tun_info); bool ovs_nla_get_ufid(struct sw_flow_id *, const struct nlattr *, bool log); int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid, diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c index 2735e9c4a3b8..efb736bb6855 100644 --- a/net/openvswitch/vport-geneve.c +++ b/net/openvswitch/vport-geneve.c @@ -52,18 +52,6 @@ static int geneve_get_options(const struct vport *vport, return 0; } -static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, - struct dp_upcall_info *upcall) -{ - struct geneve_port *geneve_port = geneve_vport(vport); - struct net *net = ovs_dp_get_net(vport->dp); - __be16 dport = htons(geneve_port->port_no); - __be16 sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true); - - return ovs_tunnel_get_egress_info(upcall, ovs_dp_get_net(vport->dp), - skb, IPPROTO_UDP, sport, dport); -} - static struct vport *geneve_tnl_create(const struct vport_parms *parms) { struct net *net = ovs_dp_get_net(parms->dp); @@ -128,9 +116,8 @@ static struct vport_ops ovs_geneve_vport_ops = { .create = geneve_create, .destroy = ovs_netdev_tunnel_destroy, .get_options = geneve_get_options, - .send = ovs_netdev_send, + .send = dev_queue_xmit, .owner = THIS_MODULE, - .get_egress_tun_info = geneve_get_egress_tun_info, }; static int __init ovs_geneve_tnl_init(void) diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index 4d24481669c9..c3257d78d3d2 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -84,18 +84,10 @@ static struct vport *gre_create(const struct vport_parms *parms) return ovs_netdev_link(vport, parms->name); } -static int gre_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, - struct dp_upcall_info *upcall) -{ - return ovs_tunnel_get_egress_info(upcall, ovs_dp_get_net(vport->dp), - skb, IPPROTO_GRE, 0, 0); -} - static struct vport_ops ovs_gre_vport_ops = { .type = OVS_VPORT_TYPE_GRE, .create = gre_create, - .send = ovs_netdev_send, - .get_egress_tun_info = gre_get_egress_tun_info, + .send = dev_queue_xmit, .destroy = ovs_netdev_tunnel_destroy, .owner = THIS_MODULE, }; diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 388b8a6bf112..ec76398a792f 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -106,12 +106,45 @@ static void internal_dev_destructor(struct net_device *dev) free_netdev(dev); } +static struct rtnl_link_stats64 * +internal_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) +{ + int i; + + memset(stats, 0, sizeof(*stats)); + stats->rx_errors = dev->stats.rx_errors; + stats->tx_errors = dev->stats.tx_errors; + stats->tx_dropped = dev->stats.tx_dropped; + stats->rx_dropped = dev->stats.rx_dropped; + + for_each_possible_cpu(i) { + const struct pcpu_sw_netstats *percpu_stats; + struct pcpu_sw_netstats local_stats; + unsigned int start; + + percpu_stats = per_cpu_ptr(dev->tstats, i); + + do { + start = u64_stats_fetch_begin_irq(&percpu_stats->syncp); + local_stats = *percpu_stats; + } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start)); + + stats->rx_bytes += local_stats.rx_bytes; + stats->rx_packets += local_stats.rx_packets; + stats->tx_bytes += local_stats.tx_bytes; + stats->tx_packets += local_stats.tx_packets; + } + + return stats; +} + static const struct net_device_ops internal_dev_netdev_ops = { .ndo_open = internal_dev_open, .ndo_stop = internal_dev_stop, .ndo_start_xmit = internal_dev_xmit, .ndo_set_mac_address = eth_mac_addr, .ndo_change_mtu = internal_dev_change_mtu, + .ndo_get_stats64 = internal_get_stats, }; static struct rtnl_link_ops internal_dev_link_ops __read_mostly = { @@ -161,6 +194,11 @@ static struct vport *internal_dev_create(const struct vport_parms *parms) err = -ENOMEM; goto error_free_vport; } + vport->dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!vport->dev->tstats) { + err = -ENOMEM; + goto error_free_netdev; + } dev_net_set(vport->dev, ovs_dp_get_net(vport->dp)); internal_dev = internal_dev_priv(vport->dev); @@ -173,7 +211,7 @@ static struct vport *internal_dev_create(const struct vport_parms *parms) rtnl_lock(); err = register_netdevice(vport->dev); if (err) - goto error_free_netdev; + goto error_unlock; dev_set_promiscuity(vport->dev, 1); rtnl_unlock(); @@ -181,8 +219,10 @@ static struct vport *internal_dev_create(const struct vport_parms *parms) return vport; -error_free_netdev: +error_unlock: rtnl_unlock(); + free_percpu(vport->dev->tstats); +error_free_netdev: free_netdev(vport->dev); error_free_vport: ovs_vport_free(vport); @@ -198,26 +238,25 @@ static void internal_dev_destroy(struct vport *vport) /* unregister_netdevice() waits for an RCU grace period. */ unregister_netdevice(vport->dev); - + free_percpu(vport->dev->tstats); rtnl_unlock(); } -static void internal_dev_recv(struct vport *vport, struct sk_buff *skb) +static netdev_tx_t internal_dev_recv(struct sk_buff *skb) { - struct net_device *netdev = vport->dev; + struct net_device *netdev = skb->dev; struct pcpu_sw_netstats *stats; if (unlikely(!(netdev->flags & IFF_UP))) { kfree_skb(skb); netdev->stats.rx_dropped++; - return; + return NETDEV_TX_OK; } skb_dst_drop(skb); nf_reset(skb); secpath_reset(skb); - skb->dev = netdev; skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, netdev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); @@ -229,6 +268,7 @@ static void internal_dev_recv(struct vport *vport, struct sk_buff *skb) u64_stats_update_end(&stats->syncp); netif_rx(skb); + return NETDEV_TX_OK; } static struct vport_ops ovs_internal_vport_ops = { diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index f7e8dcce7ada..b327368a3848 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -190,37 +190,6 @@ void ovs_netdev_tunnel_destroy(struct vport *vport) } EXPORT_SYMBOL_GPL(ovs_netdev_tunnel_destroy); -static unsigned int packet_length(const struct sk_buff *skb) -{ - unsigned int length = skb->len - ETH_HLEN; - - if (skb->protocol == htons(ETH_P_8021Q)) - length -= VLAN_HLEN; - - return length; -} - -void ovs_netdev_send(struct vport *vport, struct sk_buff *skb) -{ - int mtu = vport->dev->mtu; - - if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) { - net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n", - vport->dev->name, - packet_length(skb), mtu); - vport->dev->stats.tx_errors++; - goto drop; - } - - skb->dev = vport->dev; - dev_queue_xmit(skb); - return; - -drop: - kfree_skb(skb); -} -EXPORT_SYMBOL_GPL(ovs_netdev_send); - /* Returns null if this device is not attached to a datapath. */ struct vport *ovs_netdev_get_vport(struct net_device *dev) { @@ -235,7 +204,7 @@ static struct vport_ops ovs_netdev_vport_ops = { .type = OVS_VPORT_TYPE_NETDEV, .create = netdev_create, .destroy = netdev_destroy, - .send = ovs_netdev_send, + .send = dev_queue_xmit, }; int __init ovs_netdev_init(void) diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h index bf22fcedbc69..19e29c12adcc 100644 --- a/net/openvswitch/vport-netdev.h +++ b/net/openvswitch/vport-netdev.h @@ -27,7 +27,6 @@ struct vport *ovs_netdev_get_vport(struct net_device *dev); struct vport *ovs_netdev_link(struct vport *vport, const char *name); -void ovs_netdev_send(struct vport *vport, struct sk_buff *skb); void ovs_netdev_detach_dev(struct vport *); int __init ovs_netdev_init(void); diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index fb3cdb85905d..1605691d9414 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -146,32 +146,12 @@ static struct vport *vxlan_create(const struct vport_parms *parms) return ovs_netdev_link(vport, parms->name); } -static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, - struct dp_upcall_info *upcall) -{ - struct vxlan_dev *vxlan = netdev_priv(vport->dev); - struct net *net = ovs_dp_get_net(vport->dp); - unsigned short family = ip_tunnel_info_af(upcall->egress_tun_info); - __be16 dst_port = vxlan_dev_dst_port(vxlan, family); - __be16 src_port; - int port_min; - int port_max; - - inet_get_local_port_range(net, &port_min, &port_max); - src_port = udp_flow_src_port(net, skb, 0, 0, true); - - return ovs_tunnel_get_egress_info(upcall, net, - skb, IPPROTO_UDP, - src_port, dst_port); -} - static struct vport_ops ovs_vxlan_netdev_vport_ops = { .type = OVS_VPORT_TYPE_VXLAN, .create = vxlan_create, .destroy = ovs_netdev_tunnel_destroy, .get_options = vxlan_get_options, - .send = ovs_netdev_send, - .get_egress_tun_info = vxlan_get_egress_tun_info, + .send = dev_queue_xmit, }; static int __init ovs_vxlan_tnl_init(void) diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 12a36ac21eda..0ac0fd004d7e 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -480,60 +480,32 @@ void ovs_vport_deferred_free(struct vport *vport) } EXPORT_SYMBOL_GPL(ovs_vport_deferred_free); -int ovs_tunnel_get_egress_info(struct dp_upcall_info *upcall, - struct net *net, - struct sk_buff *skb, - u8 ipproto, - __be16 tp_src, - __be16 tp_dst) +static unsigned int packet_length(const struct sk_buff *skb) { - struct ip_tunnel_info *egress_tun_info = upcall->egress_tun_info; - const struct ip_tunnel_info *tun_info = skb_tunnel_info(skb); - const struct ip_tunnel_key *tun_key; - u32 skb_mark = skb->mark; - struct rtable *rt; - struct flowi4 fl; - - if (unlikely(!tun_info)) - return -EINVAL; - if (ip_tunnel_info_af(tun_info) != AF_INET) - return -EINVAL; - - tun_key = &tun_info->key; - - /* Route lookup to get srouce IP address. - * The process may need to be changed if the corresponding process - * in vports ops changed. - */ - rt = ovs_tunnel_route_lookup(net, tun_key, skb_mark, &fl, ipproto); - if (IS_ERR(rt)) - return PTR_ERR(rt); + unsigned int length = skb->len - ETH_HLEN; - ip_rt_put(rt); + if (skb->protocol == htons(ETH_P_8021Q)) + length -= VLAN_HLEN; - /* Generate egress_tun_info based on tun_info, - * saddr, tp_src and tp_dst - */ - ip_tunnel_key_init(&egress_tun_info->key, - fl.saddr, tun_key->u.ipv4.dst, - tun_key->tos, - tun_key->ttl, - tp_src, tp_dst, - tun_key->tun_id, - tun_key->tun_flags); - egress_tun_info->options_len = tun_info->options_len; - egress_tun_info->mode = tun_info->mode; - upcall->egress_tun_opts = ip_tunnel_info_opts(egress_tun_info); - return 0; + return length; } -EXPORT_SYMBOL_GPL(ovs_tunnel_get_egress_info); -int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, - struct dp_upcall_info *upcall) +void ovs_vport_send(struct vport *vport, struct sk_buff *skb) { - /* get_egress_tun_info() is only implemented on tunnel ports. */ - if (unlikely(!vport->ops->get_egress_tun_info)) - return -EINVAL; + int mtu = vport->dev->mtu; + + if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) { + net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n", + vport->dev->name, + packet_length(skb), mtu); + vport->dev->stats.tx_errors++; + goto drop; + } + + skb->dev = vport->dev; + vport->ops->send(skb); + return; - return vport->ops->get_egress_tun_info(vport, skb, upcall); +drop: + kfree_skb(skb); } diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index a413f3ae6a7b..bdfd82a7c064 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -27,7 +27,6 @@ #include <linux/skbuff.h> #include <linux/spinlock.h> #include <linux/u64_stats_sync.h> -#include <net/route.h> #include "datapath.h" @@ -53,16 +52,6 @@ int ovs_vport_set_upcall_portids(struct vport *, const struct nlattr *pids); int ovs_vport_get_upcall_portids(const struct vport *, struct sk_buff *); u32 ovs_vport_find_upcall_portid(const struct vport *, struct sk_buff *); -int ovs_tunnel_get_egress_info(struct dp_upcall_info *upcall, - struct net *net, - struct sk_buff *, - u8 ipproto, - __be16 tp_src, - __be16 tp_dst); - -int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, - struct dp_upcall_info *upcall); - /** * struct vport_portids - array of netlink portids of a vport. * must be protected by rcu. @@ -140,8 +129,6 @@ struct vport_parms { * have any configuration. * @send: Send a packet on the device. * zero for dropped packets or negative for error. - * @get_egress_tun_info: Get the egress tunnel 5-tuple and other info for - * a packet. */ struct vport_ops { enum ovs_vport_type type; @@ -153,10 +140,7 @@ struct vport_ops { int (*set_options)(struct vport *, struct nlattr *); int (*get_options)(const struct vport *, struct sk_buff *); - void (*send)(struct vport *, struct sk_buff *); - int (*get_egress_tun_info)(struct vport *, struct sk_buff *, - struct dp_upcall_info *upcall); - + netdev_tx_t (*send) (struct sk_buff *skb); struct module *owner; struct list_head list; }; @@ -234,9 +218,6 @@ static inline struct rtable *ovs_tunnel_route_lookup(struct net *net, return rt; } -static inline void ovs_vport_send(struct vport *vport, struct sk_buff *skb) -{ - vport->ops->send(vport, skb); -} +void ovs_vport_send(struct vport *vport, struct sk_buff *skb); #endif /* vport.h */ diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 1eb76956b439..8950d39af341 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -746,7 +746,7 @@ int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, }; u16 mode = BRIDGE_MODE_UNDEF; - u32 mask = BR_LEARNING | BR_LEARNING_SYNC; + u32 mask = BR_LEARNING | BR_LEARNING_SYNC | BR_FLOOD; int err; err = switchdev_port_attr_get(dev, &attr); @@ -817,6 +817,9 @@ static int switchdev_port_br_setlink_protinfo(struct net_device *dev, err = switchdev_port_br_setflag(dev, attr, BR_LEARNING_SYNC); break; + case IFLA_BRPORT_UNICAST_FLOOD: + err = switchdev_port_br_setflag(dev, attr, BR_FLOOD); + break; default: err = -EOPNOTSUPP; break; diff --git a/net/sysctl_net.c b/net/sysctl_net.c index e7000be321b0..ed98c1fc3de1 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -94,10 +94,14 @@ __init int net_sysctl_init(void) goto out; ret = register_pernet_subsys(&sysctl_pernet_ops); if (ret) - goto out; + goto out1; register_sysctl_root(&net_sysctl_root); out: return ret; +out1: + unregister_sysctl_table(net_header); + net_header = NULL; + goto out; } struct ctl_table_header *register_net_sysctl(struct net *net, diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 41042de3ae9b..9dc239dfe192 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -35,741 +35,301 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <linux/tipc_config.h> #include "socket.h" #include "msg.h" #include "bcast.h" #include "name_distr.h" -#include "core.h" +#include "link.h" +#include "node.h" -#define MAX_PKT_DEFAULT_MCAST 1500 /* bcast link max packet size (fixed) */ -#define BCLINK_WIN_DEFAULT 20 /* bcast link window size (default) */ +#define BCLINK_WIN_DEFAULT 50 /* bcast link window size (default) */ +#define BCLINK_WIN_MIN 32 /* bcast minimum link window size */ const char tipc_bclink_name[] = "broadcast-link"; -static void tipc_nmap_diff(struct tipc_node_map *nm_a, - struct tipc_node_map *nm_b, - struct tipc_node_map *nm_diff); -static void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node); -static void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node); - -static void tipc_bclink_lock(struct net *net) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); - - spin_lock_bh(&tn->bclink->lock); -} - -static void tipc_bclink_unlock(struct net *net) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); - - spin_unlock_bh(&tn->bclink->lock); -} - -void tipc_bclink_input(struct net *net) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); - - tipc_sk_mcast_rcv(net, &tn->bclink->arrvq, &tn->bclink->inputq); -} - -uint tipc_bclink_get_mtu(void) -{ - return MAX_PKT_DEFAULT_MCAST; -} - -static u32 bcbuf_acks(struct sk_buff *buf) -{ - return (u32)(unsigned long)TIPC_SKB_CB(buf)->handle; -} - -static void bcbuf_set_acks(struct sk_buff *buf, u32 acks) -{ - TIPC_SKB_CB(buf)->handle = (void *)(unsigned long)acks; -} - -static void bcbuf_decr_acks(struct sk_buff *buf) -{ - bcbuf_set_acks(buf, bcbuf_acks(buf) - 1); -} +/** + * struct tipc_bc_base - base structure for keeping broadcast send state + * @link: broadcast send link structure + * @inputq: data input queue; will only carry SOCK_WAKEUP messages + * @dest: array keeping number of reachable destinations per bearer + * @primary_bearer: a bearer having links to all broadcast destinations, if any + */ +struct tipc_bc_base { + struct tipc_link *link; + struct sk_buff_head inputq; + int dests[MAX_BEARERS]; + int primary_bearer; +}; -void tipc_bclink_add_node(struct net *net, u32 addr) +static struct tipc_bc_base *tipc_bc_base(struct net *net) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - - tipc_bclink_lock(net); - tipc_nmap_add(&tn->bclink->bcast_nodes, addr); - tipc_bclink_unlock(net); + return tipc_net(net)->bcbase; } -void tipc_bclink_remove_node(struct net *net, u32 addr) +int tipc_bcast_get_mtu(struct net *net) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - - tipc_bclink_lock(net); - tipc_nmap_remove(&tn->bclink->bcast_nodes, addr); - - /* Last node? => reset backlog queue */ - if (!tn->bclink->bcast_nodes.count) - tipc_link_purge_backlog(&tn->bclink->link); - - tipc_bclink_unlock(net); + return tipc_link_mtu(tipc_bc_sndlink(net)); } -static void bclink_set_last_sent(struct net *net) +/* tipc_bcbase_select_primary(): find a bearer with links to all destinations, + * if any, and make it primary bearer + */ +static void tipc_bcbase_select_primary(struct net *net) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_link *bcl = tn->bcl; + struct tipc_bc_base *bb = tipc_bc_base(net); + int all_dests = tipc_link_bc_peers(bb->link); + int i, mtu; - bcl->silent_intv_cnt = mod(bcl->snd_nxt - 1); -} + bb->primary_bearer = INVALID_BEARER_ID; -u32 tipc_bclink_get_last_sent(struct net *net) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); + if (!all_dests) + return; - return tn->bcl->silent_intv_cnt; -} + for (i = 0; i < MAX_BEARERS; i++) { + if (!bb->dests[i]) + continue; -static void bclink_update_last_sent(struct tipc_node *node, u32 seqno) -{ - node->bclink.last_sent = less_eq(node->bclink.last_sent, seqno) ? - seqno : node->bclink.last_sent; -} + mtu = tipc_bearer_mtu(net, i); + if (mtu < tipc_link_mtu(bb->link)) + tipc_link_set_mtu(bb->link, mtu); -/** - * tipc_bclink_retransmit_to - get most recent node to request retransmission - * - * Called with bclink_lock locked - */ -struct tipc_node *tipc_bclink_retransmit_to(struct net *net) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); - - return tn->bclink->retransmit_to; -} + if (bb->dests[i] < all_dests) + continue; -/** - * bclink_retransmit_pkt - retransmit broadcast packets - * @after: sequence number of last packet to *not* retransmit - * @to: sequence number of last packet to retransmit - * - * Called with bclink_lock locked - */ -static void bclink_retransmit_pkt(struct tipc_net *tn, u32 after, u32 to) -{ - struct sk_buff *skb; - struct tipc_link *bcl = tn->bcl; + bb->primary_bearer = i; - skb_queue_walk(&bcl->transmq, skb) { - if (more(buf_seqno(skb), after)) { - tipc_link_retransmit(bcl, skb, mod(to - after)); + /* Reduce risk that all nodes select same primary */ + if ((i ^ tipc_own_addr(net)) & 1) break; - } } } -/** - * bclink_prepare_wakeup - prepare users for wakeup after congestion - * @bcl: broadcast link - * @resultq: queue for users which can be woken up - * Move a number of waiting users, as permitted by available space in - * the send queue, from link wait queue to specified queue for wakeup - */ -static void bclink_prepare_wakeup(struct tipc_link *bcl, struct sk_buff_head *resultq) +void tipc_bcast_inc_bearer_dst_cnt(struct net *net, int bearer_id) { - int pnd[TIPC_SYSTEM_IMPORTANCE + 1] = {0,}; - int imp, lim; - struct sk_buff *skb, *tmp; - - skb_queue_walk_safe(&bcl->wakeupq, skb, tmp) { - imp = TIPC_SKB_CB(skb)->chain_imp; - lim = bcl->window + bcl->backlog[imp].limit; - pnd[imp] += TIPC_SKB_CB(skb)->chain_sz; - if ((pnd[imp] + bcl->backlog[imp].len) >= lim) - continue; - skb_unlink(skb, &bcl->wakeupq); - skb_queue_tail(resultq, skb); - } -} + struct tipc_bc_base *bb = tipc_bc_base(net); -/** - * tipc_bclink_wakeup_users - wake up pending users - * - * Called with no locks taken - */ -void tipc_bclink_wakeup_users(struct net *net) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_link *bcl = tn->bcl; - struct sk_buff_head resultq; - - skb_queue_head_init(&resultq); - bclink_prepare_wakeup(bcl, &resultq); - tipc_sk_rcv(net, &resultq); + tipc_bcast_lock(net); + bb->dests[bearer_id]++; + tipc_bcbase_select_primary(net); + tipc_bcast_unlock(net); } -/** - * tipc_bclink_acknowledge - handle acknowledgement of broadcast packets - * @n_ptr: node that sent acknowledgement info - * @acked: broadcast sequence # that has been acknowledged - * - * Node is locked, bclink_lock unlocked. - */ -void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) +void tipc_bcast_dec_bearer_dst_cnt(struct net *net, int bearer_id) { - struct sk_buff *skb, *tmp; - unsigned int released = 0; - struct net *net = n_ptr->net; - struct tipc_net *tn = net_generic(net, tipc_net_id); - - if (unlikely(!n_ptr->bclink.recv_permitted)) - return; - - tipc_bclink_lock(net); - - /* Bail out if tx queue is empty (no clean up is required) */ - skb = skb_peek(&tn->bcl->transmq); - if (!skb) - goto exit; - - /* Determine which messages need to be acknowledged */ - if (acked == INVALID_LINK_SEQ) { - /* - * Contact with specified node has been lost, so need to - * acknowledge sent messages only (if other nodes still exist) - * or both sent and unsent messages (otherwise) - */ - if (tn->bclink->bcast_nodes.count) - acked = tn->bcl->silent_intv_cnt; - else - acked = tn->bcl->snd_nxt; - } else { - /* - * Bail out if specified sequence number does not correspond - * to a message that has been sent and not yet acknowledged - */ - if (less(acked, buf_seqno(skb)) || - less(tn->bcl->silent_intv_cnt, acked) || - less_eq(acked, n_ptr->bclink.acked)) - goto exit; - } - - /* Skip over packets that node has previously acknowledged */ - skb_queue_walk(&tn->bcl->transmq, skb) { - if (more(buf_seqno(skb), n_ptr->bclink.acked)) - break; - } - - /* Update packets that node is now acknowledging */ - skb_queue_walk_from_safe(&tn->bcl->transmq, skb, tmp) { - if (more(buf_seqno(skb), acked)) - break; - bcbuf_decr_acks(skb); - bclink_set_last_sent(net); - if (bcbuf_acks(skb) == 0) { - __skb_unlink(skb, &tn->bcl->transmq); - kfree_skb(skb); - released = 1; - } - } - n_ptr->bclink.acked = acked; + struct tipc_bc_base *bb = tipc_bc_base(net); - /* Try resolving broadcast link congestion, if necessary */ - if (unlikely(skb_peek(&tn->bcl->backlogq))) { - tipc_link_push_packets(tn->bcl); - bclink_set_last_sent(net); - } - if (unlikely(released && !skb_queue_empty(&tn->bcl->wakeupq))) - n_ptr->action_flags |= TIPC_WAKEUP_BCAST_USERS; -exit: - tipc_bclink_unlock(net); + tipc_bcast_lock(net); + bb->dests[bearer_id]--; + tipc_bcbase_select_primary(net); + tipc_bcast_unlock(net); } -/** - * tipc_bclink_update_link_state - update broadcast link state +/* tipc_bcbase_xmit - broadcast a packet queue across one or more bearers * - * RCU and node lock set + * Note that number of reachable destinations, as indicated in the dests[] + * array, may transitionally differ from the number of destinations indicated + * in each sent buffer. We can sustain this. Excess destination nodes will + * drop and never acknowledge the unexpected packets, and missing destinations + * will either require retransmission (if they are just about to be added to + * the bearer), or be removed from the buffer's 'ackers' counter (if they + * just went down) */ -void tipc_bclink_update_link_state(struct tipc_node *n_ptr, - u32 last_sent) +static void tipc_bcbase_xmit(struct net *net, struct sk_buff_head *xmitq) { - struct sk_buff *buf; - struct net *net = n_ptr->net; - struct tipc_net *tn = net_generic(net, tipc_net_id); + int bearer_id; + struct tipc_bc_base *bb = tipc_bc_base(net); + struct sk_buff *skb, *_skb; + struct sk_buff_head _xmitq; - /* Ignore "stale" link state info */ - if (less_eq(last_sent, n_ptr->bclink.last_in)) + if (skb_queue_empty(xmitq)) return; - /* Update link synchronization state; quit if in sync */ - bclink_update_last_sent(n_ptr, last_sent); - - if (n_ptr->bclink.last_sent == n_ptr->bclink.last_in) + /* The typical case: at least one bearer has links to all nodes */ + bearer_id = bb->primary_bearer; + if (bearer_id >= 0) { + tipc_bearer_bc_xmit(net, bearer_id, xmitq); return; - - /* Update out-of-sync state; quit if loss is still unconfirmed */ - if ((++n_ptr->bclink.oos_state) == 1) { - if (n_ptr->bclink.deferred_size < (TIPC_MIN_LINK_WIN / 2)) - return; - n_ptr->bclink.oos_state++; } - /* Don't NACK if one has been recently sent (or seen) */ - if (n_ptr->bclink.oos_state & 0x1) - return; - - /* Send NACK */ - buf = tipc_buf_acquire(INT_H_SIZE); - if (buf) { - struct tipc_msg *msg = buf_msg(buf); - struct sk_buff *skb = skb_peek(&n_ptr->bclink.deferdq); - u32 to = skb ? buf_seqno(skb) - 1 : n_ptr->bclink.last_sent; - - tipc_msg_init(tn->own_addr, msg, BCAST_PROTOCOL, STATE_MSG, - INT_H_SIZE, n_ptr->addr); - msg_set_non_seq(msg, 1); - msg_set_mc_netid(msg, tn->net_id); - msg_set_bcast_ack(msg, n_ptr->bclink.last_in); - msg_set_bcgap_after(msg, n_ptr->bclink.last_in); - msg_set_bcgap_to(msg, to); - - tipc_bclink_lock(net); - tipc_bearer_send(net, MAX_BEARERS, buf, NULL); - tn->bcl->stats.sent_nacks++; - tipc_bclink_unlock(net); - kfree_skb(buf); - - n_ptr->bclink.oos_state++; - } -} - -void tipc_bclink_sync_state(struct tipc_node *n, struct tipc_msg *hdr) -{ - u16 last = msg_last_bcast(hdr); - int mtyp = msg_type(hdr); + /* We have to transmit across all bearers */ + skb_queue_head_init(&_xmitq); + for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { + if (!bb->dests[bearer_id]) + continue; - if (unlikely(msg_user(hdr) != LINK_PROTOCOL)) - return; - if (mtyp == STATE_MSG) { - tipc_bclink_update_link_state(n, last); - return; + skb_queue_walk(xmitq, skb) { + _skb = pskb_copy_for_clone(skb, GFP_ATOMIC); + if (!_skb) + break; + __skb_queue_tail(&_xmitq, _skb); + } + tipc_bearer_bc_xmit(net, bearer_id, &_xmitq); } - /* Compatibility: older nodes don't know BCAST_PROTOCOL synchronization, - * and transfer synch info in LINK_PROTOCOL messages. - */ - if (tipc_node_is_up(n)) - return; - if ((mtyp != RESET_MSG) && (mtyp != ACTIVATE_MSG)) - return; - n->bclink.last_sent = last; - n->bclink.last_in = last; - n->bclink.oos_state = 0; + __skb_queue_purge(xmitq); + __skb_queue_purge(&_xmitq); } -/** - * bclink_peek_nack - monitor retransmission requests sent by other nodes - * - * Delay any upcoming NACK by this node if another node has already - * requested the first message this node is going to ask for. - */ -static void bclink_peek_nack(struct net *net, struct tipc_msg *msg) -{ - struct tipc_node *n_ptr = tipc_node_find(net, msg_destnode(msg)); - - if (unlikely(!n_ptr)) - return; - - tipc_node_lock(n_ptr); - if (n_ptr->bclink.recv_permitted && - (n_ptr->bclink.last_in != n_ptr->bclink.last_sent) && - (n_ptr->bclink.last_in == msg_bcgap_after(msg))) - n_ptr->bclink.oos_state = 2; - tipc_node_unlock(n_ptr); - tipc_node_put(n_ptr); -} - -/* tipc_bclink_xmit - deliver buffer chain to all nodes in cluster +/* tipc_bcast_xmit - deliver buffer chain to all nodes in cluster * and to identified node local sockets * @net: the applicable net namespace * @list: chain of buffers containing message * Consumes the buffer chain, except when returning -ELINKCONG * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE */ -int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list) +int tipc_bcast_xmit(struct net *net, struct sk_buff_head *list) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_link *bcl = tn->bcl; - struct tipc_bclink *bclink = tn->bclink; + struct tipc_link *l = tipc_bc_sndlink(net); + struct sk_buff_head xmitq, inputq, rcvq; int rc = 0; - int bc = 0; - struct sk_buff *skb; - struct sk_buff_head arrvq; - struct sk_buff_head inputq; - /* Prepare clone of message for local node */ - skb = tipc_msg_reassemble(list); - if (unlikely(!skb)) - return -EHOSTUNREACH; + __skb_queue_head_init(&rcvq); + __skb_queue_head_init(&xmitq); + skb_queue_head_init(&inputq); - /* Broadcast to all nodes */ - if (likely(bclink)) { - tipc_bclink_lock(net); - if (likely(bclink->bcast_nodes.count)) { - rc = __tipc_link_xmit(net, bcl, list); - if (likely(!rc)) { - u32 len = skb_queue_len(&bcl->transmq); - - bclink_set_last_sent(net); - bcl->stats.queue_sz_counts++; - bcl->stats.accu_queue_sz += len; - } - bc = 1; - } - tipc_bclink_unlock(net); - } + /* Prepare message clone for local node */ + if (unlikely(!tipc_msg_reassemble(list, &rcvq))) + return -EHOSTUNREACH; - if (unlikely(!bc)) - __skb_queue_purge(list); + tipc_bcast_lock(net); + if (tipc_link_bc_peers(l)) + rc = tipc_link_xmit(l, list, &xmitq); + tipc_bcast_unlock(net); + /* Don't send to local node if adding to link failed */ if (unlikely(rc)) { - kfree_skb(skb); + __skb_queue_purge(&rcvq); return rc; } - /* Deliver message clone */ - __skb_queue_head_init(&arrvq); - skb_queue_head_init(&inputq); - __skb_queue_tail(&arrvq, skb); - tipc_sk_mcast_rcv(net, &arrvq, &inputq); - return rc; -} -/** - * bclink_accept_pkt - accept an incoming, in-sequence broadcast packet - * - * Called with both sending node's lock and bclink_lock taken. - */ -static void bclink_accept_pkt(struct tipc_node *node, u32 seqno) -{ - struct tipc_net *tn = net_generic(node->net, tipc_net_id); - - bclink_update_last_sent(node, seqno); - node->bclink.last_in = seqno; - node->bclink.oos_state = 0; - tn->bcl->stats.recv_info++; - - /* - * Unicast an ACK periodically, ensuring that - * all nodes in the cluster don't ACK at the same time - */ - if (((seqno - tn->own_addr) % TIPC_MIN_LINK_WIN) == 0) { - tipc_link_proto_xmit(node_active_link(node, node->addr), - STATE_MSG, 0, 0, 0, 0); - tn->bcl->stats.sent_acks++; - } + /* Broadcast to all nodes, inluding local node */ + tipc_bcbase_xmit(net, &xmitq); + tipc_sk_mcast_rcv(net, &rcvq, &inputq); + __skb_queue_purge(list); + return 0; } -/** - * tipc_bclink_rcv - receive a broadcast packet, and deliver upwards +/* tipc_bcast_rcv - receive a broadcast packet, and deliver to rcv link * * RCU is locked, no other locks set */ -void tipc_bclink_rcv(struct net *net, struct sk_buff *buf) +int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_link *bcl = tn->bcl; - struct tipc_msg *msg = buf_msg(buf); - struct tipc_node *node; - u32 next_in; - u32 seqno; - int deferred = 0; - int pos = 0; - struct sk_buff *iskb; - struct sk_buff_head *arrvq, *inputq; - - /* Screen out unwanted broadcast messages */ - if (msg_mc_netid(msg) != tn->net_id) - goto exit; - - node = tipc_node_find(net, msg_prevnode(msg)); - if (unlikely(!node)) - goto exit; - - tipc_node_lock(node); - if (unlikely(!node->bclink.recv_permitted)) - goto unlock; - - /* Handle broadcast protocol message */ - if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) { - if (msg_type(msg) != STATE_MSG) - goto unlock; - if (msg_destnode(msg) == tn->own_addr) { - tipc_bclink_acknowledge(node, msg_bcast_ack(msg)); - tipc_bclink_lock(net); - bcl->stats.recv_nacks++; - tn->bclink->retransmit_to = node; - bclink_retransmit_pkt(tn, msg_bcgap_after(msg), - msg_bcgap_to(msg)); - tipc_bclink_unlock(net); - tipc_node_unlock(node); - } else { - tipc_node_unlock(node); - bclink_peek_nack(net, msg); - } - tipc_node_put(node); - goto exit; - } - - /* Handle in-sequence broadcast message */ - seqno = msg_seqno(msg); - next_in = mod(node->bclink.last_in + 1); - arrvq = &tn->bclink->arrvq; - inputq = &tn->bclink->inputq; - - if (likely(seqno == next_in)) { -receive: - /* Deliver message to destination */ - if (likely(msg_isdata(msg))) { - tipc_bclink_lock(net); - bclink_accept_pkt(node, seqno); - spin_lock_bh(&inputq->lock); - __skb_queue_tail(arrvq, buf); - spin_unlock_bh(&inputq->lock); - node->action_flags |= TIPC_BCAST_MSG_EVT; - tipc_bclink_unlock(net); - tipc_node_unlock(node); - } else if (msg_user(msg) == MSG_BUNDLER) { - tipc_bclink_lock(net); - bclink_accept_pkt(node, seqno); - bcl->stats.recv_bundles++; - bcl->stats.recv_bundled += msg_msgcnt(msg); - pos = 0; - while (tipc_msg_extract(buf, &iskb, &pos)) { - spin_lock_bh(&inputq->lock); - __skb_queue_tail(arrvq, iskb); - spin_unlock_bh(&inputq->lock); - } - node->action_flags |= TIPC_BCAST_MSG_EVT; - tipc_bclink_unlock(net); - tipc_node_unlock(node); - } else if (msg_user(msg) == MSG_FRAGMENTER) { - tipc_bclink_lock(net); - bclink_accept_pkt(node, seqno); - tipc_buf_append(&node->bclink.reasm_buf, &buf); - if (unlikely(!buf && !node->bclink.reasm_buf)) { - tipc_bclink_unlock(net); - goto unlock; - } - bcl->stats.recv_fragments++; - if (buf) { - bcl->stats.recv_fragmented++; - msg = buf_msg(buf); - tipc_bclink_unlock(net); - goto receive; - } - tipc_bclink_unlock(net); - tipc_node_unlock(node); - } else { - tipc_bclink_lock(net); - bclink_accept_pkt(node, seqno); - tipc_bclink_unlock(net); - tipc_node_unlock(node); - kfree_skb(buf); - } - buf = NULL; - - /* Determine new synchronization state */ - tipc_node_lock(node); - if (unlikely(!tipc_node_is_up(node))) - goto unlock; + struct tipc_msg *hdr = buf_msg(skb); + struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq; + struct sk_buff_head xmitq; + int rc; - if (node->bclink.last_in == node->bclink.last_sent) - goto unlock; + __skb_queue_head_init(&xmitq); - if (skb_queue_empty(&node->bclink.deferdq)) { - node->bclink.oos_state = 1; - goto unlock; - } - - msg = buf_msg(skb_peek(&node->bclink.deferdq)); - seqno = msg_seqno(msg); - next_in = mod(next_in + 1); - if (seqno != next_in) - goto unlock; - - /* Take in-sequence message from deferred queue & deliver it */ - buf = __skb_dequeue(&node->bclink.deferdq); - goto receive; - } - - /* Handle out-of-sequence broadcast message */ - if (less(next_in, seqno)) { - deferred = tipc_link_defer_pkt(&node->bclink.deferdq, - buf); - bclink_update_last_sent(node, seqno); - buf = NULL; + if (msg_mc_netid(hdr) != tipc_netid(net) || !tipc_link_is_up(l)) { + kfree_skb(skb); + return 0; } - tipc_bclink_lock(net); - - if (deferred) - bcl->stats.deferred_recv++; + tipc_bcast_lock(net); + if (msg_user(hdr) == BCAST_PROTOCOL) + rc = tipc_link_bc_nack_rcv(l, skb, &xmitq); else - bcl->stats.duplicates++; + rc = tipc_link_rcv(l, skb, NULL); + tipc_bcast_unlock(net); - tipc_bclink_unlock(net); + tipc_bcbase_xmit(net, &xmitq); -unlock: - tipc_node_unlock(node); - tipc_node_put(node); -exit: - kfree_skb(buf); -} + /* Any socket wakeup messages ? */ + if (!skb_queue_empty(inputq)) + tipc_sk_rcv(net, inputq); -u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr) -{ - return (n_ptr->bclink.recv_permitted && - (tipc_bclink_get_last_sent(n_ptr->net) != n_ptr->bclink.acked)); + return rc; } - -/** - * tipc_bcbearer_send - send a packet through the broadcast pseudo-bearer - * - * Send packet over as many bearers as necessary to reach all nodes - * that have joined the broadcast link. +/* tipc_bcast_ack_rcv - receive and handle a broadcast acknowledge * - * Returns 0 (packet sent successfully) under all circumstances, - * since the broadcast link's pseudo-bearer never blocks + * RCU is locked, no other locks set */ -static int tipc_bcbearer_send(struct net *net, struct sk_buff *buf, - struct tipc_bearer *unused1, - struct tipc_media_addr *unused2) +void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, u32 acked) { - int bp_index; - struct tipc_msg *msg = buf_msg(buf); - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_bcbearer *bcbearer = tn->bcbearer; - struct tipc_bclink *bclink = tn->bclink; - - /* Prepare broadcast link message for reliable transmission, - * if first time trying to send it; - * preparation is skipped for broadcast link protocol messages - * since they are sent in an unreliable manner and don't need it - */ - if (likely(!msg_non_seq(buf_msg(buf)))) { - bcbuf_set_acks(buf, bclink->bcast_nodes.count); - msg_set_non_seq(msg, 1); - msg_set_mc_netid(msg, tn->net_id); - tn->bcl->stats.sent_info++; - if (WARN_ON(!bclink->bcast_nodes.count)) { - dump_stack(); - return 0; - } - } + struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq; + struct sk_buff_head xmitq; - /* Send buffer over bearers until all targets reached */ - bcbearer->remains = bclink->bcast_nodes; - - for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) { - struct tipc_bearer *p = bcbearer->bpairs[bp_index].primary; - struct tipc_bearer *s = bcbearer->bpairs[bp_index].secondary; - struct tipc_bearer *bp[2] = {p, s}; - struct tipc_bearer *b = bp[msg_link_selector(msg)]; - struct sk_buff *tbuf; - - if (!p) - break; /* No more bearers to try */ - if (!b) - b = p; - tipc_nmap_diff(&bcbearer->remains, &b->nodes, - &bcbearer->remains_new); - if (bcbearer->remains_new.count == bcbearer->remains.count) - continue; /* Nothing added by bearer pair */ - - if (bp_index == 0) { - /* Use original buffer for first bearer */ - tipc_bearer_send(net, b->identity, buf, &b->bcast_addr); - } else { - /* Avoid concurrent buffer access */ - tbuf = pskb_copy_for_clone(buf, GFP_ATOMIC); - if (!tbuf) - break; - tipc_bearer_send(net, b->identity, tbuf, - &b->bcast_addr); - kfree_skb(tbuf); /* Bearer keeps a clone */ - } - if (bcbearer->remains_new.count == 0) - break; /* All targets reached */ + __skb_queue_head_init(&xmitq); - bcbearer->remains = bcbearer->remains_new; - } + tipc_bcast_lock(net); + tipc_link_bc_ack_rcv(l, acked, &xmitq); + tipc_bcast_unlock(net); - return 0; + tipc_bcbase_xmit(net, &xmitq); + + /* Any socket wakeup messages ? */ + if (!skb_queue_empty(inputq)) + tipc_sk_rcv(net, inputq); } -/** - * tipc_bcbearer_sort - create sets of bearer pairs used by broadcast bearer +/* tipc_bcast_synch_rcv - check and update rcv link with peer's send state + * + * RCU is locked, no other locks set */ -void tipc_bcbearer_sort(struct net *net, struct tipc_node_map *nm_ptr, - u32 node, bool action) +void tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, + struct tipc_msg *hdr) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_bcbearer *bcbearer = tn->bcbearer; - struct tipc_bcbearer_pair *bp_temp = bcbearer->bpairs_temp; - struct tipc_bcbearer_pair *bp_curr; - struct tipc_bearer *b; - int b_index; - int pri; + struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq; + struct sk_buff_head xmitq; - tipc_bclink_lock(net); + __skb_queue_head_init(&xmitq); - if (action) - tipc_nmap_add(nm_ptr, node); - else - tipc_nmap_remove(nm_ptr, node); + tipc_bcast_lock(net); + if (msg_type(hdr) == STATE_MSG) { + tipc_link_bc_ack_rcv(l, msg_bcast_ack(hdr), &xmitq); + tipc_link_bc_sync_rcv(l, hdr, &xmitq); + } else { + tipc_link_bc_init_rcv(l, hdr); + } + tipc_bcast_unlock(net); - /* Group bearers by priority (can assume max of two per priority) */ - memset(bp_temp, 0, sizeof(bcbearer->bpairs_temp)); + tipc_bcbase_xmit(net, &xmitq); - rcu_read_lock(); - for (b_index = 0; b_index < MAX_BEARERS; b_index++) { - b = rcu_dereference_rtnl(tn->bearer_list[b_index]); - if (!b || !b->nodes.count) - continue; - - if (!bp_temp[b->priority].primary) - bp_temp[b->priority].primary = b; - else - bp_temp[b->priority].secondary = b; - } - rcu_read_unlock(); + /* Any socket wakeup messages ? */ + if (!skb_queue_empty(inputq)) + tipc_sk_rcv(net, inputq); +} - /* Create array of bearer pairs for broadcasting */ - bp_curr = bcbearer->bpairs; - memset(bcbearer->bpairs, 0, sizeof(bcbearer->bpairs)); +/* tipc_bcast_add_peer - add a peer node to broadcast link and bearer + * + * RCU is locked, node lock is set + */ +void tipc_bcast_add_peer(struct net *net, struct tipc_link *uc_l, + struct sk_buff_head *xmitq) +{ + struct tipc_link *snd_l = tipc_bc_sndlink(net); - for (pri = TIPC_MAX_LINK_PRI; pri >= 0; pri--) { + tipc_bcast_lock(net); + tipc_link_add_bc_peer(snd_l, uc_l, xmitq); + tipc_bcbase_select_primary(net); + tipc_bcast_unlock(net); +} - if (!bp_temp[pri].primary) - continue; +/* tipc_bcast_remove_peer - remove a peer node from broadcast link and bearer + * + * RCU is locked, node lock is set + */ +void tipc_bcast_remove_peer(struct net *net, struct tipc_link *rcv_l) +{ + struct tipc_link *snd_l = tipc_bc_sndlink(net); + struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq; + struct sk_buff_head xmitq; - bp_curr->primary = bp_temp[pri].primary; + __skb_queue_head_init(&xmitq); - if (bp_temp[pri].secondary) { - if (tipc_nmap_equal(&bp_temp[pri].primary->nodes, - &bp_temp[pri].secondary->nodes)) { - bp_curr->secondary = bp_temp[pri].secondary; - } else { - bp_curr++; - bp_curr->primary = bp_temp[pri].secondary; - } - } + tipc_bcast_lock(net); + tipc_link_remove_bc_peer(snd_l, rcv_l, &xmitq); + tipc_bcbase_select_primary(net); + tipc_bcast_unlock(net); - bp_curr++; - } + tipc_bcbase_xmit(net, &xmitq); - tipc_bclink_unlock(net); + /* Any socket wakeup messages ? */ + if (!skb_queue_empty(inputq)) + tipc_sk_rcv(net, inputq); } static int __tipc_nl_add_bc_link_stat(struct sk_buff *skb, @@ -835,7 +395,7 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) if (!bcl) return 0; - tipc_bclink_lock(net); + tipc_bcast_lock(net); hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_LINK_GET); @@ -870,7 +430,7 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) if (err) goto attr_msg_full; - tipc_bclink_unlock(net); + tipc_bcast_unlock(net); nla_nest_end(msg->skb, attrs); genlmsg_end(msg->skb, hdr); @@ -881,7 +441,7 @@ prop_msg_full: attr_msg_full: nla_nest_cancel(msg->skb, attrs); msg_full: - tipc_bclink_unlock(net); + tipc_bcast_unlock(net); genlmsg_cancel(msg->skb, hdr); return -EMSGSIZE; @@ -895,25 +455,25 @@ int tipc_bclink_reset_stats(struct net *net) if (!bcl) return -ENOPROTOOPT; - tipc_bclink_lock(net); + tipc_bcast_lock(net); memset(&bcl->stats, 0, sizeof(bcl->stats)); - tipc_bclink_unlock(net); + tipc_bcast_unlock(net); return 0; } -int tipc_bclink_set_queue_limits(struct net *net, u32 limit) +static int tipc_bc_link_set_queue_limits(struct net *net, u32 limit) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_link *bcl = tn->bcl; + struct tipc_link *l = tipc_bc_sndlink(net); - if (!bcl) + if (!l) return -ENOPROTOOPT; - if ((limit < TIPC_MIN_LINK_WIN) || (limit > TIPC_MAX_LINK_WIN)) + if (limit < BCLINK_WIN_MIN) + limit = BCLINK_WIN_MIN; + if (limit > TIPC_MAX_LINK_WIN) return -EINVAL; - - tipc_bclink_lock(net); - tipc_link_set_queue_limits(bcl, limit); - tipc_bclink_unlock(net); + tipc_bcast_lock(net); + tipc_link_set_queue_limits(l, limit); + tipc_bcast_unlock(net); return 0; } @@ -935,123 +495,51 @@ int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]) win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); - return tipc_bclink_set_queue_limits(net, win); + return tipc_bc_link_set_queue_limits(net, win); } -int tipc_bclink_init(struct net *net) +int tipc_bcast_init(struct net *net) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_bcbearer *bcbearer; - struct tipc_bclink *bclink; - struct tipc_link *bcl; - - bcbearer = kzalloc(sizeof(*bcbearer), GFP_ATOMIC); - if (!bcbearer) - return -ENOMEM; - - bclink = kzalloc(sizeof(*bclink), GFP_ATOMIC); - if (!bclink) { - kfree(bcbearer); - return -ENOMEM; - } + struct tipc_net *tn = tipc_net(net); + struct tipc_bc_base *bb = NULL; + struct tipc_link *l = NULL; - bcl = &bclink->link; - bcbearer->bearer.media = &bcbearer->media; - bcbearer->media.send_msg = tipc_bcbearer_send; - sprintf(bcbearer->media.name, "tipc-broadcast"); - - spin_lock_init(&bclink->lock); - __skb_queue_head_init(&bcl->transmq); - __skb_queue_head_init(&bcl->backlogq); - __skb_queue_head_init(&bcl->deferdq); - skb_queue_head_init(&bcl->wakeupq); - bcl->snd_nxt = 1; - spin_lock_init(&bclink->node.lock); - __skb_queue_head_init(&bclink->arrvq); - skb_queue_head_init(&bclink->inputq); - bcl->owner = &bclink->node; - bcl->owner->net = net; - bcl->mtu = MAX_PKT_DEFAULT_MCAST; - tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT); - bcl->bearer_id = MAX_BEARERS; - rcu_assign_pointer(tn->bearer_list[MAX_BEARERS], &bcbearer->bearer); - bcl->pmsg = (struct tipc_msg *)&bcl->proto_msg; - msg_set_prevnode(bcl->pmsg, tn->own_addr); - strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME); - tn->bcbearer = bcbearer; - tn->bclink = bclink; - tn->bcl = bcl; - return 0; -} + bb = kzalloc(sizeof(*bb), GFP_ATOMIC); + if (!bb) + goto enomem; + tn->bcbase = bb; + spin_lock_init(&tipc_net(net)->bclock); -void tipc_bclink_stop(struct net *net) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); - - tipc_bclink_lock(net); - tipc_link_purge_queues(tn->bcl); - tipc_bclink_unlock(net); - - RCU_INIT_POINTER(tn->bearer_list[BCBEARER], NULL); - synchronize_net(); - kfree(tn->bcbearer); - kfree(tn->bclink); + if (!tipc_link_bc_create(net, 0, 0, + U16_MAX, + BCLINK_WIN_DEFAULT, + 0, + &bb->inputq, + NULL, + NULL, + &l)) + goto enomem; + bb->link = l; + tn->bcl = l; + return 0; +enomem: + kfree(bb); + kfree(l); + return -ENOMEM; } -/** - * tipc_nmap_add - add a node to a node map - */ -static void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node) +void tipc_bcast_reinit(struct net *net) { - int n = tipc_node(node); - int w = n / WSIZE; - u32 mask = (1 << (n % WSIZE)); + struct tipc_bc_base *b = tipc_bc_base(net); - if ((nm_ptr->map[w] & mask) == 0) { - nm_ptr->count++; - nm_ptr->map[w] |= mask; - } + msg_set_prevnode(b->link->pmsg, tipc_own_addr(net)); } -/** - * tipc_nmap_remove - remove a node from a node map - */ -static void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node) +void tipc_bcast_stop(struct net *net) { - int n = tipc_node(node); - int w = n / WSIZE; - u32 mask = (1 << (n % WSIZE)); - - if ((nm_ptr->map[w] & mask) != 0) { - nm_ptr->map[w] &= ~mask; - nm_ptr->count--; - } -} + struct tipc_net *tn = net_generic(net, tipc_net_id); -/** - * tipc_nmap_diff - find differences between node maps - * @nm_a: input node map A - * @nm_b: input node map B - * @nm_diff: output node map A-B (i.e. nodes of A that are not in B) - */ -static void tipc_nmap_diff(struct tipc_node_map *nm_a, - struct tipc_node_map *nm_b, - struct tipc_node_map *nm_diff) -{ - int stop = ARRAY_SIZE(nm_a->map); - int w; - int b; - u32 map; - - memset(nm_diff, 0, sizeof(*nm_diff)); - for (w = 0; w < stop; w++) { - map = nm_a->map[w] ^ (nm_a->map[w] & nm_b->map[w]); - nm_diff->map[w] = map; - if (map != 0) { - for (b = 0 ; b < WSIZE; b++) { - if (map & (1 << b)) - nm_diff->count++; - } - } - } + synchronize_net(); + kfree(tn->bcbase); + kfree(tn->bcl); } diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index d74c69bcf60b..2855b9356a15 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -37,102 +37,44 @@ #ifndef _TIPC_BCAST_H #define _TIPC_BCAST_H -#include <linux/tipc_config.h> -#include "link.h" -#include "node.h" +#include "core.h" -/** - * struct tipc_bcbearer_pair - a pair of bearers used by broadcast link - * @primary: pointer to primary bearer - * @secondary: pointer to secondary bearer - * - * Bearers must have same priority and same set of reachable destinations - * to be paired. - */ - -struct tipc_bcbearer_pair { - struct tipc_bearer *primary; - struct tipc_bearer *secondary; -}; - -#define BCBEARER MAX_BEARERS - -/** - * struct tipc_bcbearer - bearer used by broadcast link - * @bearer: (non-standard) broadcast bearer structure - * @media: (non-standard) broadcast media structure - * @bpairs: array of bearer pairs - * @bpairs_temp: temporary array of bearer pairs used by tipc_bcbearer_sort() - * @remains: temporary node map used by tipc_bcbearer_send() - * @remains_new: temporary node map used tipc_bcbearer_send() - * - * Note: The fields labelled "temporary" are incorporated into the bearer - * to avoid consuming potentially limited stack space through the use of - * large local variables within multicast routines. Concurrent access is - * prevented through use of the spinlock "bclink_lock". - */ -struct tipc_bcbearer { - struct tipc_bearer bearer; - struct tipc_media media; - struct tipc_bcbearer_pair bpairs[MAX_BEARERS]; - struct tipc_bcbearer_pair bpairs_temp[TIPC_MAX_LINK_PRI + 1]; - struct tipc_node_map remains; - struct tipc_node_map remains_new; -}; +struct tipc_node; +struct tipc_msg; +struct tipc_nl_msg; +struct tipc_node_map; -/** - * struct tipc_bclink - link used for broadcast messages - * @lock: spinlock governing access to structure - * @link: (non-standard) broadcast link structure - * @node: (non-standard) node structure representing b'cast link's peer node - * @bcast_nodes: map of broadcast-capable nodes - * @retransmit_to: node that most recently requested a retransmit - * - * Handles sequence numbering, fragmentation, bundling, etc. - */ -struct tipc_bclink { - spinlock_t lock; - struct tipc_link link; - struct tipc_node node; - struct sk_buff_head arrvq; - struct sk_buff_head inputq; - struct tipc_node_map bcast_nodes; - struct tipc_node *retransmit_to; -}; +int tipc_bcast_init(struct net *net); +void tipc_bcast_reinit(struct net *net); +void tipc_bcast_stop(struct net *net); +void tipc_bcast_add_peer(struct net *net, struct tipc_link *l, + struct sk_buff_head *xmitq); +void tipc_bcast_remove_peer(struct net *net, struct tipc_link *rcv_bcl); +void tipc_bcast_inc_bearer_dst_cnt(struct net *net, int bearer_id); +void tipc_bcast_dec_bearer_dst_cnt(struct net *net, int bearer_id); +int tipc_bcast_get_mtu(struct net *net); +int tipc_bcast_xmit(struct net *net, struct sk_buff_head *list); +int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb); +void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, u32 acked); +void tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, + struct tipc_msg *hdr); +int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg); +int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]); +int tipc_bclink_reset_stats(struct net *net); -struct tipc_node; -extern const char tipc_bclink_name[]; +static inline void tipc_bcast_lock(struct net *net) +{ + spin_lock_bh(&tipc_net(net)->bclock); +} -/** - * tipc_nmap_equal - test for equality of node maps - */ -static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, - struct tipc_node_map *nm_b) +static inline void tipc_bcast_unlock(struct net *net) { - return !memcmp(nm_a, nm_b, sizeof(*nm_a)); + spin_unlock_bh(&tipc_net(net)->bclock); } -int tipc_bclink_init(struct net *net); -void tipc_bclink_stop(struct net *net); -void tipc_bclink_add_node(struct net *net, u32 addr); -void tipc_bclink_remove_node(struct net *net, u32 addr); -struct tipc_node *tipc_bclink_retransmit_to(struct net *tn); -void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked); -void tipc_bclink_rcv(struct net *net, struct sk_buff *buf); -u32 tipc_bclink_get_last_sent(struct net *net); -u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr); -void tipc_bclink_update_link_state(struct tipc_node *node, - u32 last_sent); -int tipc_bclink_reset_stats(struct net *net); -int tipc_bclink_set_queue_limits(struct net *net, u32 limit); -void tipc_bcbearer_sort(struct net *net, struct tipc_node_map *nm_ptr, - u32 node, bool action); -uint tipc_bclink_get_mtu(void); -int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list); -void tipc_bclink_wakeup_users(struct net *net); -int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg); -int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]); -void tipc_bclink_input(struct net *net); -void tipc_bclink_sync_state(struct tipc_node *n, struct tipc_msg *msg); +static inline struct tipc_link *tipc_bc_sndlink(struct net *net) +{ + return tipc_net(net)->bcl; +} #endif diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 82b278668ab7..648f2a67f314 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -193,10 +193,8 @@ void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest) rcu_read_lock(); b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); - if (b_ptr) { - tipc_bcbearer_sort(net, &b_ptr->nodes, dest, true); + if (b_ptr) tipc_disc_add_dest(b_ptr->link_req); - } rcu_read_unlock(); } @@ -207,10 +205,8 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest) rcu_read_lock(); b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); - if (b_ptr) { - tipc_bcbearer_sort(net, &b_ptr->nodes, dest, false); + if (b_ptr) tipc_disc_remove_dest(b_ptr->link_req); - } rcu_read_unlock(); } @@ -418,10 +414,9 @@ void tipc_disable_l2_media(struct tipc_bearer *b) * @b_ptr: the bearer through which the packet is to be sent * @dest: peer destination address */ -int tipc_l2_send_msg(struct net *net, struct sk_buff *buf, +int tipc_l2_send_msg(struct net *net, struct sk_buff *skb, struct tipc_bearer *b, struct tipc_media_addr *dest) { - struct sk_buff *clone; struct net_device *dev; int delta; @@ -429,42 +424,48 @@ int tipc_l2_send_msg(struct net *net, struct sk_buff *buf, if (!dev) return 0; - clone = skb_clone(buf, GFP_ATOMIC); - if (!clone) - return 0; - - delta = dev->hard_header_len - skb_headroom(buf); + delta = dev->hard_header_len - skb_headroom(skb); if ((delta > 0) && - pskb_expand_head(clone, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) { - kfree_skb(clone); + pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) { + kfree_skb(skb); return 0; } - skb_reset_network_header(clone); - clone->dev = dev; - clone->protocol = htons(ETH_P_TIPC); - dev_hard_header(clone, dev, ETH_P_TIPC, dest->value, - dev->dev_addr, clone->len); - dev_queue_xmit(clone); + skb_reset_network_header(skb); + skb->dev = dev; + skb->protocol = htons(ETH_P_TIPC); + dev_hard_header(skb, dev, ETH_P_TIPC, dest->value, + dev->dev_addr, skb->len); + dev_queue_xmit(skb); return 0; } -/* tipc_bearer_send- sends buffer to destination over bearer - * - * IMPORTANT: - * The media send routine must not alter the buffer being passed in - * as it may be needed for later retransmission! +int tipc_bearer_mtu(struct net *net, u32 bearer_id) +{ + int mtu = 0; + struct tipc_bearer *b; + + rcu_read_lock(); + b = rcu_dereference_rtnl(tipc_net(net)->bearer_list[bearer_id]); + if (b) + mtu = b->mtu; + rcu_read_unlock(); + return mtu; +} + +/* tipc_bearer_xmit_skb - sends buffer to destination over bearer */ -void tipc_bearer_send(struct net *net, u32 bearer_id, struct sk_buff *buf, - struct tipc_media_addr *dest) +void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id, + struct sk_buff *skb, + struct tipc_media_addr *dest) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_bearer *b_ptr; + struct tipc_net *tn = tipc_net(net); + struct tipc_bearer *b; rcu_read_lock(); - b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); - if (likely(b_ptr)) - b_ptr->media->send_msg(net, buf, b_ptr, dest); + b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); + if (likely(b)) + b->media->send_msg(net, skb, b, dest); rcu_read_unlock(); } @@ -487,8 +488,31 @@ void tipc_bearer_xmit(struct net *net, u32 bearer_id, skb_queue_walk_safe(xmitq, skb, tmp) { __skb_dequeue(xmitq); b->media->send_msg(net, skb, b, dst); - /* Until we remove cloning in tipc_l2_send_msg(): */ - kfree_skb(skb); + } + } + rcu_read_unlock(); +} + +/* tipc_bearer_bc_xmit() - broadcast buffers to all destinations + */ +void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, + struct sk_buff_head *xmitq) +{ + struct tipc_net *tn = tipc_net(net); + int net_id = tn->net_id; + struct tipc_bearer *b; + struct sk_buff *skb, *tmp; + struct tipc_msg *hdr; + + rcu_read_lock(); + b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); + if (likely(b)) { + skb_queue_walk_safe(xmitq, skb, tmp) { + hdr = buf_msg(skb); + msg_set_non_seq(hdr, 1); + msg_set_mc_netid(hdr, net_id); + __skb_dequeue(xmitq); + b->media->send_msg(net, skb, b, &b->bcast_addr); } } rcu_read_unlock(); diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 6426f242f626..552185bc4773 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -163,6 +163,7 @@ struct tipc_bearer { u32 identity; struct tipc_link_req *link_req; char net_plane; + int node_cnt; struct tipc_node_map nodes; }; @@ -215,10 +216,14 @@ struct tipc_media *tipc_media_find(const char *name); int tipc_bearer_setup(void); void tipc_bearer_cleanup(void); void tipc_bearer_stop(struct net *net); -void tipc_bearer_send(struct net *net, u32 bearer_id, struct sk_buff *buf, - struct tipc_media_addr *dest); +int tipc_bearer_mtu(struct net *net, u32 bearer_id); +void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id, + struct sk_buff *skb, + struct tipc_media_addr *dest); void tipc_bearer_xmit(struct net *net, u32 bearer_id, struct sk_buff_head *xmitq, struct tipc_media_addr *dst); +void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, + struct sk_buff_head *xmitq); #endif /* _TIPC_BEARER_H */ diff --git a/net/tipc/core.c b/net/tipc/core.c index 005ba5eb0ea4..03a842870c52 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -42,6 +42,7 @@ #include "bearer.h" #include "net.h" #include "socket.h" +#include "bcast.h" #include <linux/module.h> @@ -71,8 +72,15 @@ static int __net_init tipc_init_net(struct net *net) err = tipc_topsrv_start(net); if (err) goto out_subscr; + + err = tipc_bcast_init(net); + if (err) + goto out_bclink; + return 0; +out_bclink: + tipc_bcast_stop(net); out_subscr: tipc_nametbl_stop(net); out_nametbl: @@ -85,6 +93,7 @@ static void __net_exit tipc_exit_net(struct net *net) { tipc_topsrv_stop(net); tipc_net_stop(net); + tipc_bcast_stop(net); tipc_nametbl_stop(net); tipc_sk_rht_destroy(net); } diff --git a/net/tipc/core.h b/net/tipc/core.h index b96b41eabf12..18e95a8020cd 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -62,8 +62,7 @@ struct tipc_node; struct tipc_bearer; -struct tipc_bcbearer; -struct tipc_bclink; +struct tipc_bc_base; struct tipc_link; struct tipc_name_table; struct tipc_server; @@ -93,8 +92,8 @@ struct tipc_net { struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1]; /* Broadcast link */ - struct tipc_bcbearer *bcbearer; - struct tipc_bclink *bclink; + spinlock_t bclock; + struct tipc_bc_base *bcbase; struct tipc_link *bcl; /* Socket hash table */ @@ -114,6 +113,11 @@ static inline struct tipc_net *tipc_net(struct net *net) return net_generic(net, tipc_net_id); } +static inline int tipc_netid(struct net *net) +{ + return tipc_net(net)->net_id; +} + static inline u16 mod(u16 x) { return x & 0xffffu; diff --git a/net/tipc/discover.c b/net/tipc/discover.c index d14e0a4aa9af..afe8c47c4085 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -89,7 +89,7 @@ static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type, MAX_H_SIZE, dest_domain); msg_set_non_seq(msg, 1); msg_set_node_sig(msg, tn->random); - msg_set_node_capabilities(msg, 0); + msg_set_node_capabilities(msg, TIPC_NODE_CAPABILITIES); msg_set_dest_domain(msg, dest_domain); msg_set_bc_netid(msg, tn->net_id); b_ptr->media->addr2msg(msg_media_addr(msg), &b_ptr->addr); @@ -167,11 +167,10 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb, /* Send response, if necessary */ if (respond && (mtyp == DSC_REQ_MSG)) { rskb = tipc_buf_acquire(MAX_H_SIZE); - if (rskb) { - tipc_disc_init_msg(net, rskb, DSC_RESP_MSG, bearer); - tipc_bearer_send(net, bearer->identity, rskb, &maddr); - kfree_skb(rskb); - } + if (!rskb) + return; + tipc_disc_init_msg(net, rskb, DSC_RESP_MSG, bearer); + tipc_bearer_xmit_skb(net, bearer->identity, rskb, &maddr); } } @@ -225,6 +224,7 @@ void tipc_disc_remove_dest(struct tipc_link_req *req) static void disc_timeout(unsigned long data) { struct tipc_link_req *req = (struct tipc_link_req *)data; + struct sk_buff *skb; int max_delay; spin_lock_bh(&req->lock); @@ -242,9 +242,9 @@ static void disc_timeout(unsigned long data) * hold at fast polling rate if don't have any associated nodes, * otherwise hold at slow polling rate */ - tipc_bearer_send(req->net, req->bearer_id, req->buf, &req->dest); - - + skb = skb_clone(req->buf, GFP_ATOMIC); + if (skb) + tipc_bearer_xmit_skb(req->net, req->bearer_id, skb, &req->dest); req->timer_intv *= 2; if (req->num_nodes) max_delay = TIPC_LINK_REQ_SLOW; @@ -271,6 +271,7 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr, struct tipc_media_addr *dest) { struct tipc_link_req *req; + struct sk_buff *skb; req = kmalloc(sizeof(*req), GFP_ATOMIC); if (!req) @@ -292,7 +293,9 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr, setup_timer(&req->timer, disc_timeout, (unsigned long)req); mod_timer(&req->timer, jiffies + req->timer_intv); b_ptr->link_req = req; - tipc_bearer_send(net, req->bearer_id, req->buf, &req->dest); + skb = skb_clone(req->buf, GFP_ATOMIC); + if (skb) + tipc_bearer_xmit_skb(net, req->bearer_id, skb, &req->dest); return 0; } @@ -316,6 +319,7 @@ void tipc_disc_delete(struct tipc_link_req *req) void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr) { struct tipc_link_req *req = b_ptr->link_req; + struct sk_buff *skb; spin_lock_bh(&req->lock); tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b_ptr); @@ -325,6 +329,8 @@ void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr) req->num_nodes = 0; req->timer_intv = TIPC_LINK_REQ_INIT; mod_timer(&req->timer, jiffies + req->timer_intv); - tipc_bearer_send(net, req->bearer_id, req->buf, &req->dest); + skb = skb_clone(req->buf, GFP_ATOMIC); + if (skb) + tipc_bearer_xmit_skb(net, req->bearer_id, skb, &req->dest); spin_unlock_bh(&req->lock); } diff --git a/net/tipc/link.c b/net/tipc/link.c index ff9b0b92e62e..9efbdbde2b08 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -50,6 +50,7 @@ */ static const char *link_co_err = "Link tunneling error, "; static const char *link_rst_msg = "Resetting link "; +static const char tipc_bclink_name[] = "broadcast-link"; static const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = { [TIPC_NLA_LINK_UNSPEC] = { .type = NLA_UNSPEC }, @@ -75,6 +76,14 @@ static const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = { [TIPC_NLA_PROP_WIN] = { .type = NLA_U32 } }; +/* Send states for broadcast NACKs + */ +enum { + BC_NACK_SND_CONDITIONAL, + BC_NACK_SND_UNCONDITIONAL, + BC_NACK_SND_SUPPRESS, +}; + /* * Interval between NACKs when packets arrive out of order */ @@ -110,7 +119,11 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, struct sk_buff_head *xmitq); static void link_reset_statistics(struct tipc_link *l_ptr); static void link_print(struct tipc_link *l_ptr, const char *str); -static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf); +static void tipc_link_build_nack_msg(struct tipc_link *l, + struct sk_buff_head *xmitq); +static void tipc_link_build_bc_init_msg(struct tipc_link *l, + struct sk_buff_head *xmitq); +static bool tipc_link_release_pkts(struct tipc_link *l, u16 to); /* * Simple non-static link routines (i.e. referenced outside this file) @@ -150,11 +163,66 @@ bool tipc_link_is_blocked(struct tipc_link *l) return l->state & (LINK_RESETTING | LINK_PEER_RESET | LINK_FAILINGOVER); } +static bool link_is_bc_sndlink(struct tipc_link *l) +{ + return !l->bc_sndlink; +} + +static bool link_is_bc_rcvlink(struct tipc_link *l) +{ + return ((l->bc_rcvlink == l) && !link_is_bc_sndlink(l)); +} + int tipc_link_is_active(struct tipc_link *l) { - struct tipc_node *n = l->owner; + return l->active; +} + +void tipc_link_set_active(struct tipc_link *l, bool active) +{ + l->active = active; +} + +void tipc_link_add_bc_peer(struct tipc_link *snd_l, + struct tipc_link *uc_l, + struct sk_buff_head *xmitq) +{ + struct tipc_link *rcv_l = uc_l->bc_rcvlink; - return (node_active_link(n, 0) == l) || (node_active_link(n, 1) == l); + snd_l->ackers++; + rcv_l->acked = snd_l->snd_nxt - 1; + tipc_link_build_bc_init_msg(uc_l, xmitq); +} + +void tipc_link_remove_bc_peer(struct tipc_link *snd_l, + struct tipc_link *rcv_l, + struct sk_buff_head *xmitq) +{ + u16 ack = snd_l->snd_nxt - 1; + + snd_l->ackers--; + tipc_link_bc_ack_rcv(rcv_l, ack, xmitq); + tipc_link_reset(rcv_l); + rcv_l->state = LINK_RESET; + if (!snd_l->ackers) { + tipc_link_reset(snd_l); + __skb_queue_purge(xmitq); + } +} + +int tipc_link_bc_peers(struct tipc_link *l) +{ + return l->ackers; +} + +void tipc_link_set_mtu(struct tipc_link *l, int mtu) +{ + l->mtu = mtu; +} + +int tipc_link_mtu(struct tipc_link *l) +{ + return l->mtu; } static u32 link_own_addr(struct tipc_link *l) @@ -165,57 +233,72 @@ static u32 link_own_addr(struct tipc_link *l) /** * tipc_link_create - create a new link * @n: pointer to associated node - * @b: pointer to associated bearer + * @if_name: associated interface name + * @bearer_id: id (index) of associated bearer + * @tolerance: link tolerance to be used by link + * @net_plane: network plane (A,B,c..) this link belongs to + * @mtu: mtu to be advertised by link + * @priority: priority to be used by link + * @window: send window to be used by link + * @session: session to be used by link * @ownnode: identity of own node - * @peer: identity of peer node - * @maddr: media address to be used + * @peer: node id of peer node + * @peer_caps: bitmap describing peer node capabilities + * @bc_sndlink: the namespace global link used for broadcast sending + * @bc_rcvlink: the peer specific link used for broadcast reception * @inputq: queue to put messages ready for delivery * @namedq: queue to put binding table update messages ready for delivery * @link: return value, pointer to put the created link * * Returns true if link was created, otherwise false */ -bool tipc_link_create(struct tipc_node *n, struct tipc_bearer *b, u32 session, - u32 ownnode, u32 peer, struct tipc_media_addr *maddr, - struct sk_buff_head *inputq, struct sk_buff_head *namedq, +bool tipc_link_create(struct net *net, char *if_name, int bearer_id, + int tolerance, char net_plane, u32 mtu, int priority, + int window, u32 session, u32 ownnode, u32 peer, + u16 peer_caps, + struct tipc_link *bc_sndlink, + struct tipc_link *bc_rcvlink, + struct sk_buff_head *inputq, + struct sk_buff_head *namedq, struct tipc_link **link) { struct tipc_link *l; struct tipc_msg *hdr; - char *if_name; l = kzalloc(sizeof(*l), GFP_ATOMIC); if (!l) return false; *link = l; + l->pmsg = (struct tipc_msg *)&l->proto_msg; + hdr = l->pmsg; + tipc_msg_init(ownnode, hdr, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, peer); + msg_set_size(hdr, sizeof(l->proto_msg)); + msg_set_session(hdr, session); + msg_set_bearer_id(hdr, l->bearer_id); /* Note: peer i/f name is completed by reset/activate message */ - if_name = strchr(b->name, ':') + 1; sprintf(l->name, "%u.%u.%u:%s-%u.%u.%u:unknown", tipc_zone(ownnode), tipc_cluster(ownnode), tipc_node(ownnode), if_name, tipc_zone(peer), tipc_cluster(peer), tipc_node(peer)); + strcpy((char *)msg_data(hdr), if_name); l->addr = peer; - l->media_addr = maddr; - l->owner = n; + l->peer_caps = peer_caps; + l->net = net; l->peer_session = WILDCARD_SESSION; - l->bearer_id = b->identity; - l->tolerance = b->tolerance; - l->net_plane = b->net_plane; - l->advertised_mtu = b->mtu; - l->mtu = b->mtu; - l->priority = b->priority; - tipc_link_set_queue_limits(l, b->window); + l->bearer_id = bearer_id; + l->tolerance = tolerance; + l->net_plane = net_plane; + l->advertised_mtu = mtu; + l->mtu = mtu; + l->priority = priority; + tipc_link_set_queue_limits(l, window); + l->ackers = 1; + l->bc_sndlink = bc_sndlink; + l->bc_rcvlink = bc_rcvlink; l->inputq = inputq; l->namedq = namedq; l->state = LINK_RESETTING; - l->pmsg = (struct tipc_msg *)&l->proto_msg; - hdr = l->pmsg; - tipc_msg_init(ownnode, hdr, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, peer); - msg_set_size(hdr, sizeof(l->proto_msg)); - msg_set_session(hdr, session); - msg_set_bearer_id(hdr, l->bearer_id); - strcpy((char *)msg_data(hdr), if_name); __skb_queue_head_init(&l->transmq); __skb_queue_head_init(&l->backlogq); __skb_queue_head_init(&l->deferdq); @@ -224,27 +307,43 @@ bool tipc_link_create(struct tipc_node *n, struct tipc_bearer *b, u32 session, return true; } -/* tipc_link_build_bcast_sync_msg() - synchronize broadcast link endpoints. +/** + * tipc_link_bc_create - create new link to be used for broadcast + * @n: pointer to associated node + * @mtu: mtu to be used + * @window: send window to be used + * @inputq: queue to put messages ready for delivery + * @namedq: queue to put binding table update messages ready for delivery + * @link: return value, pointer to put the created link * - * Give a newly added peer node the sequence number where it should - * start receiving and acking broadcast packets. + * Returns true if link was created, otherwise false */ -void tipc_link_build_bcast_sync_msg(struct tipc_link *l, - struct sk_buff_head *xmitq) +bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, + int mtu, int window, u16 peer_caps, + struct sk_buff_head *inputq, + struct sk_buff_head *namedq, + struct tipc_link *bc_sndlink, + struct tipc_link **link) { - struct sk_buff *skb; - struct sk_buff_head list; - u16 last_sent; + struct tipc_link *l; - skb = tipc_msg_create(BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE, - 0, l->addr, link_own_addr(l), 0, 0, 0); - if (!skb) - return; - last_sent = tipc_bclink_get_last_sent(l->owner->net); - msg_set_last_bcast(buf_msg(skb), last_sent); - __skb_queue_head_init(&list); - __skb_queue_tail(&list, skb); - tipc_link_xmit(l, &list, xmitq); + if (!tipc_link_create(net, "", MAX_BEARERS, 0, 'Z', mtu, 0, window, + 0, ownnode, peer, peer_caps, bc_sndlink, + NULL, inputq, namedq, link)) + return false; + + l = *link; + strcpy(l->name, tipc_bclink_name); + tipc_link_reset(l); + l->state = LINK_RESET; + l->ackers = 0; + l->bc_rcvlink = l; + + /* Broadcast send link is always up */ + if (link_is_bc_sndlink(l)) + l->state = LINK_ESTABLISHED; + + return true; } /** @@ -451,12 +550,17 @@ static void link_profile_stats(struct tipc_link *l) /* tipc_link_timeout - perform periodic task as instructed from node timeout */ +/* tipc_link_timeout - perform periodic task as instructed from node timeout + */ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) { int rc = 0; int mtyp = STATE_MSG; bool xmit = false; bool prb = false; + u16 bc_snt = l->bc_sndlink->snd_nxt - 1; + u16 bc_acked = l->bc_rcvlink->acked; + bool bc_up = link_is_up(l->bc_rcvlink); link_profile_stats(l); @@ -464,7 +568,7 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) case LINK_ESTABLISHED: case LINK_SYNCHING: if (!l->silent_intv_cnt) { - if (tipc_bclink_acks_missing(l->owner)) + if (bc_up && (bc_acked != bc_snt)) xmit = true; } else if (l->silent_intv_cnt <= l->abort_limit) { xmit = true; @@ -555,38 +659,6 @@ void link_prepare_wakeup(struct tipc_link *l) } } -/** - * tipc_link_reset_fragments - purge link's inbound message fragments queue - * @l_ptr: pointer to link - */ -void tipc_link_reset_fragments(struct tipc_link *l_ptr) -{ - kfree_skb(l_ptr->reasm_buf); - l_ptr->reasm_buf = NULL; -} - -void tipc_link_purge_backlog(struct tipc_link *l) -{ - __skb_queue_purge(&l->backlogq); - l->backlog[TIPC_LOW_IMPORTANCE].len = 0; - l->backlog[TIPC_MEDIUM_IMPORTANCE].len = 0; - l->backlog[TIPC_HIGH_IMPORTANCE].len = 0; - l->backlog[TIPC_CRITICAL_IMPORTANCE].len = 0; - l->backlog[TIPC_SYSTEM_IMPORTANCE].len = 0; -} - -/** - * tipc_link_purge_queues - purge all pkt queues associated with link - * @l_ptr: pointer to link - */ -void tipc_link_purge_queues(struct tipc_link *l_ptr) -{ - __skb_queue_purge(&l_ptr->deferdq); - __skb_queue_purge(&l_ptr->transmq); - tipc_link_purge_backlog(l_ptr); - tipc_link_reset_fragments(l_ptr); -} - void tipc_link_reset(struct tipc_link *l) { /* Link is down, accept any session */ @@ -598,12 +670,16 @@ void tipc_link_reset(struct tipc_link *l) /* Prepare for renewed mtu size negotiation */ l->mtu = l->advertised_mtu; - /* Clean up all queues: */ + /* Clean up all queues and counters: */ __skb_queue_purge(&l->transmq); __skb_queue_purge(&l->deferdq); skb_queue_splice_init(&l->wakeupq, l->inputq); - - tipc_link_purge_backlog(l); + __skb_queue_purge(&l->backlogq); + l->backlog[TIPC_LOW_IMPORTANCE].len = 0; + l->backlog[TIPC_MEDIUM_IMPORTANCE].len = 0; + l->backlog[TIPC_HIGH_IMPORTANCE].len = 0; + l->backlog[TIPC_CRITICAL_IMPORTANCE].len = 0; + l->backlog[TIPC_SYSTEM_IMPORTANCE].len = 0; kfree_skb(l->reasm_buf); kfree_skb(l->failover_reasm_skb); l->reasm_buf = NULL; @@ -611,81 +687,15 @@ void tipc_link_reset(struct tipc_link *l) l->rcv_unacked = 0; l->snd_nxt = 1; l->rcv_nxt = 1; + l->acked = 0; l->silent_intv_cnt = 0; l->stats.recv_info = 0; l->stale_count = 0; + l->bc_peer_is_up = false; link_reset_statistics(l); } /** - * __tipc_link_xmit(): same as tipc_link_xmit, but destlink is known & locked - * @link: link to use - * @list: chain of buffers containing message - * - * Consumes the buffer chain, except when returning an error code, - * Returns 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS - * Messages at TIPC_SYSTEM_IMPORTANCE are always accepted - */ -int __tipc_link_xmit(struct net *net, struct tipc_link *link, - struct sk_buff_head *list) -{ - struct tipc_msg *msg = buf_msg(skb_peek(list)); - unsigned int maxwin = link->window; - unsigned int i, imp = msg_importance(msg); - uint mtu = link->mtu; - u16 ack = mod(link->rcv_nxt - 1); - u16 seqno = link->snd_nxt; - u16 bc_last_in = link->owner->bclink.last_in; - struct tipc_media_addr *addr = link->media_addr; - struct sk_buff_head *transmq = &link->transmq; - struct sk_buff_head *backlogq = &link->backlogq; - struct sk_buff *skb, *bskb; - - /* Match msg importance against this and all higher backlog limits: */ - for (i = imp; i <= TIPC_SYSTEM_IMPORTANCE; i++) { - if (unlikely(link->backlog[i].len >= link->backlog[i].limit)) - return link_schedule_user(link, list); - } - if (unlikely(msg_size(msg) > mtu)) - return -EMSGSIZE; - - /* Prepare each packet for sending, and add to relevant queue: */ - while (skb_queue_len(list)) { - skb = skb_peek(list); - msg = buf_msg(skb); - msg_set_seqno(msg, seqno); - msg_set_ack(msg, ack); - msg_set_bcast_ack(msg, bc_last_in); - - if (likely(skb_queue_len(transmq) < maxwin)) { - __skb_dequeue(list); - __skb_queue_tail(transmq, skb); - tipc_bearer_send(net, link->bearer_id, skb, addr); - link->rcv_unacked = 0; - seqno++; - continue; - } - if (tipc_msg_bundle(skb_peek_tail(backlogq), msg, mtu)) { - kfree_skb(__skb_dequeue(list)); - link->stats.sent_bundled++; - continue; - } - if (tipc_msg_make_bundle(&bskb, msg, mtu, link->addr)) { - kfree_skb(__skb_dequeue(list)); - __skb_queue_tail(backlogq, bskb); - link->backlog[msg_importance(buf_msg(bskb))].len++; - link->stats.sent_bundled++; - link->stats.sent_bundles++; - continue; - } - link->backlog[imp].len += skb_queue_len(list); - skb_queue_splice_tail_init(list, backlogq); - } - link->snd_nxt = seqno; - return 0; -} - -/** * tipc_link_xmit(): enqueue buffer list according to queue situation * @link: link to use * @list: chain of buffers containing message @@ -705,7 +715,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, unsigned int mtu = l->mtu; u16 ack = l->rcv_nxt - 1; u16 seqno = l->snd_nxt; - u16 bc_last_in = l->owner->bclink.last_in; + u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; struct sk_buff_head *transmq = &l->transmq; struct sk_buff_head *backlogq = &l->backlogq; struct sk_buff *skb, *_skb, *bskb; @@ -724,7 +734,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, hdr = buf_msg(skb); msg_set_seqno(hdr, seqno); msg_set_ack(hdr, ack); - msg_set_bcast_ack(hdr, bc_last_in); + msg_set_bcast_ack(hdr, bc_ack); if (likely(skb_queue_len(transmq) < maxwin)) { _skb = skb_clone(skb, GFP_ATOMIC); @@ -733,6 +743,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, __skb_dequeue(list); __skb_queue_tail(transmq, skb); __skb_queue_tail(xmitq, _skb); + TIPC_SKB_CB(skb)->ackers = l->ackers; l->rcv_unacked = 0; seqno++; continue; @@ -757,62 +768,13 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, return 0; } -/* - * tipc_link_sync_rcv - synchronize broadcast link endpoints. - * Receive the sequence number where we should start receiving and - * acking broadcast packets from a newly added peer node, and open - * up for reception of such packets. - * - * Called with node locked - */ -static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf) -{ - struct tipc_msg *msg = buf_msg(buf); - - n->bclink.last_sent = n->bclink.last_in = msg_last_bcast(msg); - n->bclink.recv_permitted = true; - kfree_skb(buf); -} - -/* - * tipc_link_push_packets - push unsent packets to bearer - * - * Push out the unsent messages of a link where congestion - * has abated. Node is locked. - * - * Called with node locked - */ -void tipc_link_push_packets(struct tipc_link *link) -{ - struct sk_buff *skb; - struct tipc_msg *msg; - u16 seqno = link->snd_nxt; - u16 ack = mod(link->rcv_nxt - 1); - - while (skb_queue_len(&link->transmq) < link->window) { - skb = __skb_dequeue(&link->backlogq); - if (!skb) - break; - msg = buf_msg(skb); - link->backlog[msg_importance(msg)].len--; - msg_set_ack(msg, ack); - msg_set_seqno(msg, seqno); - seqno = mod(seqno + 1); - msg_set_bcast_ack(msg, link->owner->bclink.last_in); - link->rcv_unacked = 0; - __skb_queue_tail(&link->transmq, skb); - tipc_bearer_send(link->owner->net, link->bearer_id, - skb, link->media_addr); - } - link->snd_nxt = seqno; -} - void tipc_link_advance_backlog(struct tipc_link *l, struct sk_buff_head *xmitq) { struct sk_buff *skb, *_skb; struct tipc_msg *hdr; u16 seqno = l->snd_nxt; u16 ack = l->rcv_nxt - 1; + u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; while (skb_queue_len(&l->transmq) < l->window) { skb = skb_peek(&l->backlogq); @@ -826,96 +788,35 @@ void tipc_link_advance_backlog(struct tipc_link *l, struct sk_buff_head *xmitq) l->backlog[msg_importance(hdr)].len--; __skb_queue_tail(&l->transmq, skb); __skb_queue_tail(xmitq, _skb); - msg_set_ack(hdr, ack); + TIPC_SKB_CB(skb)->ackers = l->ackers; msg_set_seqno(hdr, seqno); - msg_set_bcast_ack(hdr, l->owner->bclink.last_in); + msg_set_ack(hdr, ack); + msg_set_bcast_ack(hdr, bc_ack); l->rcv_unacked = 0; seqno++; } l->snd_nxt = seqno; } -static void link_retransmit_failure(struct tipc_link *l_ptr, - struct sk_buff *buf) -{ - struct tipc_msg *msg = buf_msg(buf); - struct net *net = l_ptr->owner->net; - - pr_warn("Retransmission failure on link <%s>\n", l_ptr->name); - - if (l_ptr->addr) { - /* Handle failure on standard link */ - link_print(l_ptr, "Resetting link "); - pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n", - msg_user(msg), msg_type(msg), msg_size(msg), - msg_errcode(msg)); - pr_info("sqno %u, prev: %x, src: %x\n", - msg_seqno(msg), msg_prevnode(msg), msg_orignode(msg)); - } else { - /* Handle failure on broadcast link */ - struct tipc_node *n_ptr; - char addr_string[16]; - - pr_info("Msg seq number: %u, ", msg_seqno(msg)); - pr_cont("Outstanding acks: %lu\n", - (unsigned long) TIPC_SKB_CB(buf)->handle); - - n_ptr = tipc_bclink_retransmit_to(net); - - tipc_addr_string_fill(addr_string, n_ptr->addr); - pr_info("Broadcast link info for %s\n", addr_string); - pr_info("Reception permitted: %d, Acked: %u\n", - n_ptr->bclink.recv_permitted, - n_ptr->bclink.acked); - pr_info("Last in: %u, Oos state: %u, Last sent: %u\n", - n_ptr->bclink.last_in, - n_ptr->bclink.oos_state, - n_ptr->bclink.last_sent); - - n_ptr->action_flags |= TIPC_BCAST_RESET; - l_ptr->stale_count = 0; - } -} - -void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *skb, - u32 retransmits) +static void link_retransmit_failure(struct tipc_link *l, struct sk_buff *skb) { - struct tipc_msg *msg; - - if (!skb) - return; - - msg = buf_msg(skb); - - /* Detect repeated retransmit failures */ - if (l_ptr->last_retransm == msg_seqno(msg)) { - if (++l_ptr->stale_count > 100) { - link_retransmit_failure(l_ptr, skb); - return; - } - } else { - l_ptr->last_retransm = msg_seqno(msg); - l_ptr->stale_count = 1; - } + struct tipc_msg *hdr = buf_msg(skb); - skb_queue_walk_from(&l_ptr->transmq, skb) { - if (!retransmits) - break; - msg = buf_msg(skb); - msg_set_ack(msg, mod(l_ptr->rcv_nxt - 1)); - msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); - tipc_bearer_send(l_ptr->owner->net, l_ptr->bearer_id, skb, - l_ptr->media_addr); - retransmits--; - l_ptr->stats.retransmitted++; - } + pr_warn("Retransmission failure on link <%s>\n", l->name); + link_print(l, "Resetting link "); + pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n", + msg_user(hdr), msg_type(hdr), msg_size(hdr), msg_errcode(hdr)); + pr_info("sqno %u, prev: %x, src: %x\n", + msg_seqno(hdr), msg_prevnode(hdr), msg_orignode(hdr)); } -static int tipc_link_retransm(struct tipc_link *l, int retransm, - struct sk_buff_head *xmitq) +int tipc_link_retrans(struct tipc_link *l, u16 from, u16 to, + struct sk_buff_head *xmitq) { struct sk_buff *_skb, *skb = skb_peek(&l->transmq); struct tipc_msg *hdr; + u16 ack = l->rcv_nxt - 1; + u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; if (!skb) return 0; @@ -928,19 +829,25 @@ static int tipc_link_retransm(struct tipc_link *l, int retransm, link_retransmit_failure(l, skb); return tipc_link_fsm_evt(l, LINK_FAILURE_EVT); } + + /* Move forward to where retransmission should start */ skb_queue_walk(&l->transmq, skb) { - if (!retransm) - return 0; + if (!less(buf_seqno(skb), from)) + break; + } + + skb_queue_walk_from(&l->transmq, skb) { + if (more(buf_seqno(skb), to)) + break; hdr = buf_msg(skb); _skb = __pskb_copy(skb, MIN_H_SIZE, GFP_ATOMIC); if (!_skb) return 0; hdr = buf_msg(_skb); - msg_set_ack(hdr, l->rcv_nxt - 1); - msg_set_bcast_ack(hdr, l->owner->bclink.last_in); + msg_set_ack(hdr, ack); + msg_set_bcast_ack(hdr, bc_ack); _skb->priority = TC_PRIO_CONTROL; __skb_queue_tail(xmitq, _skb); - retransm--; l->stats.retransmitted++; } return 0; @@ -951,11 +858,9 @@ static int tipc_link_retransm(struct tipc_link *l, int retransm, * Consumes buffer if message is of right type * Node lock must be held */ -static bool tipc_data_input(struct tipc_link *link, struct sk_buff *skb, +static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *inputq) { - struct tipc_node *node = link->owner; - switch (msg_user(buf_msg(skb))) { case TIPC_LOW_IMPORTANCE: case TIPC_MEDIUM_IMPORTANCE: @@ -965,8 +870,8 @@ static bool tipc_data_input(struct tipc_link *link, struct sk_buff *skb, skb_queue_tail(inputq, skb); return true; case NAME_DISTRIBUTOR: - node->bclink.recv_permitted = true; - skb_queue_tail(link->namedq, skb); + l->bc_rcvlink->state = LINK_ESTABLISHED; + skb_queue_tail(l->namedq, skb); return true; case MSG_BUNDLER: case TUNNEL_PROTOCOL: @@ -987,7 +892,6 @@ static bool tipc_data_input(struct tipc_link *link, struct sk_buff *skb, static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *inputq) { - struct tipc_node *node = l->owner; struct tipc_msg *hdr = buf_msg(skb); struct sk_buff **reasm_skb = &l->reasm_buf; struct sk_buff *iskb; @@ -1028,13 +932,15 @@ static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb, if (tipc_buf_append(reasm_skb, &skb)) { l->stats.recv_fragmented++; tipc_data_input(l, skb, inputq); - } else if (!*reasm_skb) { + } else if (!*reasm_skb && !link_is_bc_rcvlink(l)) { + pr_warn_ratelimited("Unable to build fragment list\n"); return tipc_link_fsm_evt(l, LINK_FAILURE_EVT); } return 0; } else if (usr == BCAST_PROTOCOL) { - tipc_link_sync_rcv(node, skb); - return 0; + tipc_bcast_lock(l->net); + tipc_link_bc_init_rcv(l->bc_rcvlink, hdr); + tipc_bcast_unlock(l->net); } drop: kfree_skb(skb); @@ -1057,12 +963,28 @@ static bool tipc_link_release_pkts(struct tipc_link *l, u16 acked) } /* tipc_link_build_ack_msg: prepare link acknowledge message for transmission + * + * Note that sending of broadcast ack is coordinated among nodes, to reduce + * risk of ack storms towards the sender */ -void tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq) +int tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq) { + if (!l) + return 0; + + /* Broadcast ACK must be sent via a unicast link => defer to caller */ + if (link_is_bc_rcvlink(l)) { + if (((l->rcv_nxt ^ link_own_addr(l)) & 0xf) != 0xf) + return 0; + l->rcv_unacked = 0; + return TIPC_LINK_SND_BC_ACK; + } + + /* Unicast ACK */ l->rcv_unacked = 0; l->stats.sent_acks++; tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq); + return 0; } /* tipc_link_build_reset_msg: prepare link RESET or ACTIVATE message @@ -1084,6 +1006,9 @@ static void tipc_link_build_nack_msg(struct tipc_link *l, { u32 def_cnt = ++l->stats.deferred_recv; + if (link_is_bc_rcvlink(l)) + return; + if ((skb_queue_len(&l->deferdq) == 1) || !(def_cnt % TIPC_NACK_INTV)) tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq); } @@ -1144,12 +1069,11 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, l->rcv_nxt++; l->stats.recv_info++; if (!tipc_data_input(l, skb, l->inputq)) - rc = tipc_link_input(l, skb, l->inputq); - if (unlikely(rc)) - break; + rc |= tipc_link_input(l, skb, l->inputq); if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN)) - tipc_link_build_ack_msg(l, xmitq); - + rc |= tipc_link_build_ack_msg(l, xmitq); + if (unlikely(rc & ~TIPC_LINK_SND_BC_ACK)) + break; } while ((skb = __skb_dequeue(defq))); return rc; @@ -1158,45 +1082,6 @@ drop: return rc; } -/** - * tipc_link_defer_pkt - Add out-of-sequence message to deferred reception queue - * - * Returns increase in queue length (i.e. 0 or 1) - */ -u32 tipc_link_defer_pkt(struct sk_buff_head *list, struct sk_buff *skb) -{ - struct sk_buff *skb1; - u16 seq_no = buf_seqno(skb); - - /* Empty queue ? */ - if (skb_queue_empty(list)) { - __skb_queue_tail(list, skb); - return 1; - } - - /* Last ? */ - if (less(buf_seqno(skb_peek_tail(list)), seq_no)) { - __skb_queue_tail(list, skb); - return 1; - } - - /* Locate insertion point in queue, then insert; discard if duplicate */ - skb_queue_walk(list, skb1) { - u16 curr_seqno = buf_seqno(skb1); - - if (seq_no == curr_seqno) { - kfree_skb(skb); - return 0; - } - - if (less(seq_no, curr_seqno)) - break; - } - - __skb_queue_before(list, skb1, skb); - return 1; -} - /* * Send protocol message to the other endpoint. */ @@ -1212,23 +1097,17 @@ void tipc_link_proto_xmit(struct tipc_link *l, u32 msg_typ, int probe_msg, skb = __skb_dequeue(&xmitq); if (!skb) return; - tipc_bearer_send(l->owner->net, l->bearer_id, skb, l->media_addr); + tipc_bearer_xmit_skb(l->net, l->bearer_id, skb, l->media_addr); l->rcv_unacked = 0; - kfree_skb(skb); } -/* tipc_link_build_proto_msg: prepare link protocol message for transmission - */ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, u16 rcvgap, int tolerance, int priority, struct sk_buff_head *xmitq) { struct sk_buff *skb = NULL; struct tipc_msg *hdr = l->pmsg; - u16 snd_nxt = l->snd_nxt; - u16 rcv_nxt = l->rcv_nxt; - u16 rcv_last = rcv_nxt - 1; - int node_up = l->owner->bclink.recv_permitted; + bool node_up = link_is_up(l->bc_rcvlink); /* Don't send protocol message during reset or link failover */ if (tipc_link_is_blocked(l)) @@ -1236,33 +1115,34 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, msg_set_type(hdr, mtyp); msg_set_net_plane(hdr, l->net_plane); - msg_set_bcast_ack(hdr, l->owner->bclink.last_in); - msg_set_last_bcast(hdr, tipc_bclink_get_last_sent(l->owner->net)); + msg_set_next_sent(hdr, l->snd_nxt); + msg_set_ack(hdr, l->rcv_nxt - 1); + msg_set_bcast_ack(hdr, l->bc_rcvlink->rcv_nxt - 1); + msg_set_last_bcast(hdr, l->bc_sndlink->snd_nxt - 1); msg_set_link_tolerance(hdr, tolerance); msg_set_linkprio(hdr, priority); msg_set_redundant_link(hdr, node_up); msg_set_seq_gap(hdr, 0); /* Compatibility: created msg must not be in sequence with pkt flow */ - msg_set_seqno(hdr, snd_nxt + U16_MAX / 2); + msg_set_seqno(hdr, l->snd_nxt + U16_MAX / 2); if (mtyp == STATE_MSG) { if (!tipc_link_is_up(l)) return; - msg_set_next_sent(hdr, snd_nxt); /* Override rcvgap if there are packets in deferred queue */ if (!skb_queue_empty(&l->deferdq)) - rcvgap = buf_seqno(skb_peek(&l->deferdq)) - rcv_nxt; + rcvgap = buf_seqno(skb_peek(&l->deferdq)) - l->rcv_nxt; if (rcvgap) { msg_set_seq_gap(hdr, rcvgap); l->stats.sent_nacks++; } - msg_set_ack(hdr, rcv_last); msg_set_probe(hdr, probe); if (probe) l->stats.sent_probes++; l->stats.sent_states++; + l->rcv_unacked = 0; } else { /* RESET_MSG or ACTIVATE_MSG */ msg_set_max_pkt(hdr, l->advertised_mtu); @@ -1354,7 +1234,8 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, { struct tipc_msg *hdr = buf_msg(skb); u16 rcvgap = 0; - u16 nacked_gap = msg_seq_gap(hdr); + u16 ack = msg_ack(hdr); + u16 gap = msg_seq_gap(hdr); u16 peers_snd_nxt = msg_next_sent(hdr); u16 peers_tol = msg_link_tolerance(hdr); u16 peers_prio = msg_linkprio(hdr); @@ -1363,7 +1244,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, char *if_name; int rc = 0; - if (tipc_link_is_blocked(l)) + if (tipc_link_is_blocked(l) || !xmitq) goto exit; if (link_own_addr(l) > msg_prevnode(hdr)) @@ -1433,11 +1314,11 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, if (rcvgap || (msg_probe(hdr))) tipc_link_build_proto_msg(l, STATE_MSG, 0, rcvgap, 0, 0, xmitq); - tipc_link_release_pkts(l, msg_ack(hdr)); + tipc_link_release_pkts(l, ack); /* If NACK, retransmit will now start at right position */ - if (nacked_gap) { - rc = tipc_link_retransm(l, nacked_gap, xmitq); + if (gap) { + rc = tipc_link_retrans(l, ack + 1, ack + gap, xmitq); l->stats.recv_nacks++; } @@ -1450,6 +1331,188 @@ exit: return rc; } +/* tipc_link_build_bc_proto_msg() - create broadcast protocol message + */ +static bool tipc_link_build_bc_proto_msg(struct tipc_link *l, bool bcast, + u16 peers_snd_nxt, + struct sk_buff_head *xmitq) +{ + struct sk_buff *skb; + struct tipc_msg *hdr; + struct sk_buff *dfrd_skb = skb_peek(&l->deferdq); + u16 ack = l->rcv_nxt - 1; + u16 gap_to = peers_snd_nxt - 1; + + skb = tipc_msg_create(BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE, + 0, l->addr, link_own_addr(l), 0, 0, 0); + if (!skb) + return false; + hdr = buf_msg(skb); + msg_set_last_bcast(hdr, l->bc_sndlink->snd_nxt - 1); + msg_set_bcast_ack(hdr, ack); + msg_set_bcgap_after(hdr, ack); + if (dfrd_skb) + gap_to = buf_seqno(dfrd_skb) - 1; + msg_set_bcgap_to(hdr, gap_to); + msg_set_non_seq(hdr, bcast); + __skb_queue_tail(xmitq, skb); + return true; +} + +/* tipc_link_build_bc_init_msg() - synchronize broadcast link endpoints. + * + * Give a newly added peer node the sequence number where it should + * start receiving and acking broadcast packets. + */ +static void tipc_link_build_bc_init_msg(struct tipc_link *l, + struct sk_buff_head *xmitq) +{ + struct sk_buff_head list; + + __skb_queue_head_init(&list); + if (!tipc_link_build_bc_proto_msg(l->bc_rcvlink, false, 0, &list)) + return; + tipc_link_xmit(l, &list, xmitq); +} + +/* tipc_link_bc_init_rcv - receive initial broadcast synch data from peer + */ +void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr) +{ + int mtyp = msg_type(hdr); + u16 peers_snd_nxt = msg_bc_snd_nxt(hdr); + + if (link_is_up(l)) + return; + + if (msg_user(hdr) == BCAST_PROTOCOL) { + l->rcv_nxt = peers_snd_nxt; + l->state = LINK_ESTABLISHED; + return; + } + + if (l->peer_caps & TIPC_BCAST_SYNCH) + return; + + if (msg_peer_node_is_up(hdr)) + return; + + /* Compatibility: accept older, less safe initial synch data */ + if ((mtyp == RESET_MSG) || (mtyp == ACTIVATE_MSG)) + l->rcv_nxt = peers_snd_nxt; +} + +/* tipc_link_bc_sync_rcv - update rcv link according to peer's send state + */ +void tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, + struct sk_buff_head *xmitq) +{ + u16 peers_snd_nxt = msg_bc_snd_nxt(hdr); + + if (!link_is_up(l)) + return; + + if (!msg_peer_node_is_up(hdr)) + return; + + l->bc_peer_is_up = true; + + /* Ignore if peers_snd_nxt goes beyond receive window */ + if (more(peers_snd_nxt, l->rcv_nxt + l->window)) + return; + + if (!more(peers_snd_nxt, l->rcv_nxt)) { + l->nack_state = BC_NACK_SND_CONDITIONAL; + return; + } + + /* Don't NACK if one was recently sent or peeked */ + if (l->nack_state == BC_NACK_SND_SUPPRESS) { + l->nack_state = BC_NACK_SND_UNCONDITIONAL; + return; + } + + /* Conditionally delay NACK sending until next synch rcv */ + if (l->nack_state == BC_NACK_SND_CONDITIONAL) { + l->nack_state = BC_NACK_SND_UNCONDITIONAL; + if ((peers_snd_nxt - l->rcv_nxt) < TIPC_MIN_LINK_WIN) + return; + } + + /* Send NACK now but suppress next one */ + tipc_link_build_bc_proto_msg(l, true, peers_snd_nxt, xmitq); + l->nack_state = BC_NACK_SND_SUPPRESS; +} + +void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked, + struct sk_buff_head *xmitq) +{ + struct sk_buff *skb, *tmp; + struct tipc_link *snd_l = l->bc_sndlink; + + if (!link_is_up(l) || !l->bc_peer_is_up) + return; + + if (!more(acked, l->acked)) + return; + + /* Skip over packets peer has already acked */ + skb_queue_walk(&snd_l->transmq, skb) { + if (more(buf_seqno(skb), l->acked)) + break; + } + + /* Update/release the packets peer is acking now */ + skb_queue_walk_from_safe(&snd_l->transmq, skb, tmp) { + if (more(buf_seqno(skb), acked)) + break; + if (!--TIPC_SKB_CB(skb)->ackers) { + __skb_unlink(skb, &snd_l->transmq); + kfree_skb(skb); + } + } + l->acked = acked; + tipc_link_advance_backlog(snd_l, xmitq); + if (unlikely(!skb_queue_empty(&snd_l->wakeupq))) + link_prepare_wakeup(snd_l); +} + +/* tipc_link_bc_nack_rcv(): receive broadcast nack message + */ +int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb, + struct sk_buff_head *xmitq) +{ + struct tipc_msg *hdr = buf_msg(skb); + u32 dnode = msg_destnode(hdr); + int mtyp = msg_type(hdr); + u16 acked = msg_bcast_ack(hdr); + u16 from = acked + 1; + u16 to = msg_bcgap_to(hdr); + u16 peers_snd_nxt = to + 1; + int rc = 0; + + kfree_skb(skb); + + if (!tipc_link_is_up(l) || !l->bc_peer_is_up) + return 0; + + if (mtyp != STATE_MSG) + return 0; + + if (dnode == link_own_addr(l)) { + tipc_link_bc_ack_rcv(l, acked, xmitq); + rc = tipc_link_retrans(l->bc_sndlink, from, to, xmitq); + l->stats.recv_nacks++; + return rc; + } + + /* Msg for other node => suppress own NACK at next sync if applicable */ + if (more(peers_snd_nxt, l->rcv_nxt) && !less(l->rcv_nxt, from)) + l->nack_state = BC_NACK_SND_SUPPRESS; + + return 0; +} + void tipc_link_set_queue_limits(struct tipc_link *l, u32 win) { int max_bulk = TIPC_MAX_PUBLICATIONS / (l->mtu / ITEM_SIZE); @@ -1514,7 +1577,7 @@ static void link_reset_statistics(struct tipc_link *l_ptr) static void link_print(struct tipc_link *l, const char *str) { struct sk_buff *hskb = skb_peek(&l->transmq); - u16 head = hskb ? msg_seqno(buf_msg(hskb)) : l->snd_nxt; + u16 head = hskb ? msg_seqno(buf_msg(hskb)) : l->snd_nxt - 1; u16 tail = l->snd_nxt - 1; pr_info("%s Link <%s> state %x\n", str, l->name, l->state); @@ -1738,7 +1801,7 @@ static int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, if (tipc_link_is_up(link)) if (nla_put_flag(msg->skb, TIPC_NLA_LINK_UP)) goto attr_msg_full; - if (tipc_link_is_active(link)) + if (link->active) if (nla_put_flag(msg->skb, TIPC_NLA_LINK_ACTIVE)) goto attr_msg_full; diff --git a/net/tipc/link.h b/net/tipc/link.h index 0201212cb49a..66d859b66c84 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -66,7 +66,8 @@ enum { */ enum { TIPC_LINK_UP_EVT = 1, - TIPC_LINK_DOWN_EVT = (1 << 1) + TIPC_LINK_DOWN_EVT = (1 << 1), + TIPC_LINK_SND_BC_ACK = (1 << 2) }; /* Starting value for maximum packet size negotiation on unicast links @@ -110,7 +111,7 @@ struct tipc_stats { * @name: link name character string * @media_addr: media address to use when sending messages over link * @timer: link timer - * @owner: pointer to peer node + * @net: pointer to namespace struct * @refcnt: reference counter for permanent references (owner node & timer) * @peer_session: link session # being used by peer end of link * @peer_bearer_id: bearer id used by link's peer endpoint @@ -119,6 +120,7 @@ struct tipc_stats { * @keepalive_intv: link keepalive timer interval * @abort_limit: # of unacknowledged continuity probes needed to reset link * @state: current state of link FSM + * @peer_caps: bitmap describing capabilities of peer node * @silent_intv_cnt: # of timer intervals without any reception from peer * @proto_msg: template for control messages generated by link * @pmsg: convenience pointer to "proto_msg" field @@ -134,6 +136,8 @@ struct tipc_stats { * @snt_nxt: next sequence number to use for outbound messages * @last_retransmitted: sequence number of most recently retransmitted message * @stale_count: # of identical retransmit requests made by peer + * @ackers: # of peers that needs to ack each packet before it can be released + * @acked: # last packet acked by a certain peer. Used for broadcast. * @rcv_nxt: next sequence number to expect for inbound messages * @deferred_queue: deferred queue saved OOS b'cast message received from node * @unacked_window: # of inbound messages rx'd without ack'ing back to peer @@ -143,13 +147,14 @@ struct tipc_stats { * @wakeupq: linked list of wakeup msgs waiting for link congestion to abate * @long_msg_seq_no: next identifier to use for outbound fragmented messages * @reasm_buf: head of partially reassembled inbound message fragments + * @bc_rcvr: marks that this is a broadcast receiver link * @stats: collects statistics regarding link activity */ struct tipc_link { u32 addr; char name[TIPC_MAX_LINK_NAME]; struct tipc_media_addr *media_addr; - struct tipc_node *owner; + struct net *net; /* Management and link supervision data */ u32 peer_session; @@ -159,6 +164,8 @@ struct tipc_link { unsigned long keepalive_intv; u32 abort_limit; u32 state; + u16 peer_caps; + bool active; u32 silent_intv_cnt; struct { unchar hdr[INT_H_SIZE]; @@ -201,18 +208,35 @@ struct tipc_link { /* Fragmentation/reassembly */ struct sk_buff *reasm_buf; + /* Broadcast */ + u16 ackers; + u16 acked; + struct tipc_link *bc_rcvlink; + struct tipc_link *bc_sndlink; + int nack_state; + bool bc_peer_is_up; + /* Statistics */ struct tipc_stats stats; }; -bool tipc_link_create(struct tipc_node *n, struct tipc_bearer *b, u32 session, - u32 ownnode, u32 peer, struct tipc_media_addr *maddr, - struct sk_buff_head *inputq, struct sk_buff_head *namedq, +bool tipc_link_create(struct net *net, char *if_name, int bearer_id, + int tolerance, char net_plane, u32 mtu, int priority, + int window, u32 session, u32 ownnode, u32 peer, + u16 peer_caps, + struct tipc_link *bc_sndlink, + struct tipc_link *bc_rcvlink, + struct sk_buff_head *inputq, + struct sk_buff_head *namedq, struct tipc_link **link); +bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, + int mtu, int window, u16 peer_caps, + struct sk_buff_head *inputq, + struct sk_buff_head *namedq, + struct tipc_link *bc_sndlink, + struct tipc_link **link); void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, int mtyp, struct sk_buff_head *xmitq); -void tipc_link_build_bcast_sync_msg(struct tipc_link *l, - struct sk_buff_head *xmitq); void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq); int tipc_link_fsm_evt(struct tipc_link *l, int evt); void tipc_link_reset_fragments(struct tipc_link *l_ptr); @@ -223,23 +247,11 @@ bool tipc_link_is_establishing(struct tipc_link *l); bool tipc_link_is_synching(struct tipc_link *l); bool tipc_link_is_failingover(struct tipc_link *l); bool tipc_link_is_blocked(struct tipc_link *l); -int tipc_link_is_active(struct tipc_link *l_ptr); -void tipc_link_purge_queues(struct tipc_link *l_ptr); -void tipc_link_purge_backlog(struct tipc_link *l); +void tipc_link_set_active(struct tipc_link *l, bool active); void tipc_link_reset(struct tipc_link *l_ptr); -int __tipc_link_xmit(struct net *net, struct tipc_link *link, - struct sk_buff_head *list); int tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list, struct sk_buff_head *xmitq); -void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int prob, - u32 gap, u32 tolerance, u32 priority); -void tipc_link_push_packets(struct tipc_link *l_ptr); -u32 tipc_link_defer_pkt(struct sk_buff_head *list, struct sk_buff *buf); -void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window); -void tipc_link_retransmit(struct tipc_link *l_ptr, - struct sk_buff *start, u32 retransmits); -struct sk_buff *tipc_skb_queue_next(const struct sk_buff_head *list, - const struct sk_buff *skb); +void tipc_link_set_queue_limits(struct tipc_link *l, u32 window); int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info); @@ -249,5 +261,23 @@ int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]); int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq); int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq); - +int tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq); +void tipc_link_add_bc_peer(struct tipc_link *snd_l, + struct tipc_link *uc_l, + struct sk_buff_head *xmitq); +void tipc_link_remove_bc_peer(struct tipc_link *snd_l, + struct tipc_link *rcv_l, + struct sk_buff_head *xmitq); +int tipc_link_bc_peers(struct tipc_link *l); +void tipc_link_set_mtu(struct tipc_link *l, int mtu); +int tipc_link_mtu(struct tipc_link *l); +void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked, + struct sk_buff_head *xmitq); +void tipc_link_build_bc_sync_msg(struct tipc_link *l, + struct sk_buff_head *xmitq); +void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr); +void tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, + struct sk_buff_head *xmitq); +int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb, + struct sk_buff_head *xmitq); #endif diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 454f5ec275c8..8740930f0787 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -121,7 +121,7 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) { struct sk_buff *head = *headbuf; struct sk_buff *frag = *buf; - struct sk_buff *tail; + struct sk_buff *tail = NULL; struct tipc_msg *msg; u32 fragid; int delta; @@ -141,9 +141,15 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) if (unlikely(skb_unclone(frag, GFP_ATOMIC))) goto err; head = *headbuf = frag; - skb_frag_list_init(head); - TIPC_SKB_CB(head)->tail = NULL; *buf = NULL; + TIPC_SKB_CB(head)->tail = NULL; + if (skb_is_nonlinear(head)) { + skb_walk_frags(head, tail) { + TIPC_SKB_CB(head)->tail = tail; + } + } else { + skb_frag_list_init(head); + } return 0; } @@ -176,7 +182,6 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) *buf = NULL; return 0; err: - pr_warn_ratelimited("Unable to build fragment list\n"); kfree_skb(*buf); kfree_skb(*headbuf); *buf = *headbuf = NULL; @@ -559,18 +564,22 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err) /* tipc_msg_reassemble() - clone a buffer chain of fragments and * reassemble the clones into one message */ -struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list) +bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq) { - struct sk_buff *skb; + struct sk_buff *skb, *_skb; struct sk_buff *frag = NULL; struct sk_buff *head = NULL; - int hdr_sz; + int hdr_len; /* Copy header if single buffer */ if (skb_queue_len(list) == 1) { skb = skb_peek(list); - hdr_sz = skb_headroom(skb) + msg_hdr_sz(buf_msg(skb)); - return __pskb_copy(skb, hdr_sz, GFP_ATOMIC); + hdr_len = skb_headroom(skb) + msg_hdr_sz(buf_msg(skb)); + _skb = __pskb_copy(skb, hdr_len, GFP_ATOMIC); + if (!_skb) + return false; + __skb_queue_tail(rcvq, _skb); + return true; } /* Clone all fragments and reassemble */ @@ -584,11 +593,12 @@ struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list) if (!head) goto error; } - return frag; + __skb_queue_tail(rcvq, frag); + return true; error: pr_warn("Failed do clone local mcast rcv buffer\n"); kfree_skb(head); - return NULL; + return false; } /* tipc_skb_queue_sorted(); sort pkt into list according to sequence number diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 9f0ef54be612..55778a0aebf3 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -112,6 +112,7 @@ struct tipc_skb_cb { bool wakeup_pending; u16 chain_sz; u16 chain_imp; + u16 ackers; }; #define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0])) @@ -600,6 +601,11 @@ static inline u32 msg_last_bcast(struct tipc_msg *m) return msg_bits(m, 4, 16, 0xffff); } +static inline u32 msg_bc_snd_nxt(struct tipc_msg *m) +{ + return msg_last_bcast(m) + 1; +} + static inline void msg_set_last_bcast(struct tipc_msg *m, u32 n) { msg_set_bits(m, 4, 16, 0xffff, n); @@ -789,7 +795,7 @@ bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos); int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, int dsz, int mtu, struct sk_buff_head *list); bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err); -struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list); +bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq); void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno, struct sk_buff *skb); diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index e6018b7eb197..c07612bab95c 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -102,7 +102,7 @@ void named_cluster_distribute(struct net *net, struct sk_buff *skb) if (!oskb) break; msg_set_destnode(buf_msg(oskb), dnode); - tipc_node_xmit_skb(net, oskb, dnode, dnode); + tipc_node_xmit_skb(net, oskb, dnode, 0); } rcu_read_unlock(); @@ -223,7 +223,7 @@ void tipc_named_node_up(struct net *net, u32 dnode) &tn->nametbl->publ_list[TIPC_ZONE_SCOPE]); rcu_read_unlock(); - tipc_node_xmit(net, &head, dnode, dnode); + tipc_node_xmit(net, &head, dnode, 0); } static void tipc_publ_subscribe(struct net *net, struct publication *publ, diff --git a/net/tipc/net.c b/net/tipc/net.c index d6d1399ae229..77bf9113c7a7 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -112,14 +112,11 @@ int tipc_net_start(struct net *net, u32 addr) { struct tipc_net *tn = net_generic(net, tipc_net_id); char addr_string[16]; - int res; tn->own_addr = addr; tipc_named_reinit(net); tipc_sk_reinit(net); - res = tipc_bclink_init(net); - if (res) - return res; + tipc_bcast_reinit(net); tipc_nametbl_publish(net, TIPC_CFG_SRV, tn->own_addr, tn->own_addr, TIPC_ZONE_SCOPE, 0, tn->own_addr); @@ -142,7 +139,6 @@ void tipc_net_stop(struct net *net) tn->own_addr); rtnl_lock(); tipc_bearer_stop(net); - tipc_bclink_stop(net); tipc_node_stop(net); rtnl_unlock(); diff --git a/net/tipc/node.c b/net/tipc/node.c index 2670751d0e2e..20cddec0a43c 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -72,7 +72,6 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete); static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq); -static void node_established_contact(struct tipc_node *n_ptr); static void tipc_node_delete(struct tipc_node *node); static void tipc_node_timeout(unsigned long data); static void tipc_node_fsm_evt(struct tipc_node *n, int evt); @@ -165,8 +164,10 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities) INIT_LIST_HEAD(&n_ptr->list); INIT_LIST_HEAD(&n_ptr->publ_list); INIT_LIST_HEAD(&n_ptr->conn_sks); - skb_queue_head_init(&n_ptr->bclink.namedq); - __skb_queue_head_init(&n_ptr->bclink.deferdq); + skb_queue_head_init(&n_ptr->bc_entry.namedq); + skb_queue_head_init(&n_ptr->bc_entry.inputq1); + __skb_queue_head_init(&n_ptr->bc_entry.arrvq); + skb_queue_head_init(&n_ptr->bc_entry.inputq2); hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]); list_for_each_entry_rcu(temp_node, &tn->node_list, list) { if (n_ptr->addr < temp_node->addr) @@ -177,6 +178,18 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities) n_ptr->signature = INVALID_NODE_SIG; n_ptr->active_links[0] = INVALID_BEARER_ID; n_ptr->active_links[1] = INVALID_BEARER_ID; + if (!tipc_link_bc_create(net, tipc_own_addr(net), n_ptr->addr, + U16_MAX, tipc_bc_sndlink(net)->window, + n_ptr->capabilities, + &n_ptr->bc_entry.inputq1, + &n_ptr->bc_entry.namedq, + tipc_bc_sndlink(net), + &n_ptr->bc_entry.link)) { + pr_warn("Broadcast rcv link creation failed, no memory\n"); + kfree(n_ptr); + n_ptr = NULL; + goto exit; + } tipc_node_get(n_ptr); setup_timer(&n_ptr->timer, tipc_node_timeout, (unsigned long)n_ptr); n_ptr->keepalive_intv = U32_MAX; @@ -203,6 +216,7 @@ static void tipc_node_delete(struct tipc_node *node) { list_del_rcu(&node->list); hlist_del_rcu(&node->hash); + kfree(node->bc_entry.link); kfree_rcu(node, rcu); } @@ -332,6 +346,7 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, n->links[bearer_id].mtu = nl->mtu - INT_H_SIZE; tipc_bearer_add_dest(n->net, bearer_id, n->addr); + tipc_bcast_inc_bearer_dst_cnt(n->net, bearer_id); pr_debug("Established link <%s> on network plane %c\n", nl->name, nl->net_plane); @@ -340,8 +355,9 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, if (!ol) { *slot0 = bearer_id; *slot1 = bearer_id; - tipc_link_build_bcast_sync_msg(nl, xmitq); - node_established_contact(n); + tipc_node_fsm_evt(n, SELF_ESTABL_CONTACT_EVT); + n->action_flags |= TIPC_NOTIFY_NODE_UP; + tipc_bcast_add_peer(n->net, nl, xmitq); return; } @@ -350,8 +366,11 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, pr_debug("Old link <%s> becomes standby\n", ol->name); *slot0 = bearer_id; *slot1 = bearer_id; + tipc_link_set_active(nl, true); + tipc_link_set_active(ol, false); } else if (nl->priority == ol->priority) { - *slot0 = bearer_id; + tipc_link_set_active(nl, true); + *slot1 = bearer_id; } else { pr_debug("New link <%s> is standby\n", nl->name); } @@ -428,8 +447,10 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, tipc_link_build_reset_msg(l, xmitq); *maddr = &n->links[*bearer_id].maddr; node_lost_contact(n, &le->inputq); + tipc_bcast_dec_bearer_dst_cnt(n->net, *bearer_id); return; } + tipc_bcast_dec_bearer_dst_cnt(n->net, *bearer_id); /* There is still a working link => initiate failover */ tnl = node_active_link(n, 0); @@ -493,6 +514,7 @@ void tipc_node_check_dest(struct net *net, u32 onode, bool link_up = false; bool accept_addr = false; bool reset = true; + char *if_name; *dupl_addr = false; *respond = false; @@ -579,9 +601,15 @@ void tipc_node_check_dest(struct net *net, u32 onode, pr_warn("Cannot establish 3rd link to %x\n", n->addr); goto exit; } - if (!tipc_link_create(n, b, mod(tipc_net(net)->random), - tipc_own_addr(net), onode, &le->maddr, - &le->inputq, &n->bclink.namedq, &l)) { + if_name = strchr(b->name, ':') + 1; + if (!tipc_link_create(net, if_name, b->identity, b->tolerance, + b->net_plane, b->mtu, b->priority, + b->window, mod(tipc_net(net)->random), + tipc_own_addr(net), onode, + n->capabilities, + tipc_bc_sndlink(n->net), n->bc_entry.link, + &le->inputq, + &n->bc_entry.namedq, &l)) { *respond = false; goto exit; } @@ -824,58 +852,36 @@ bool tipc_node_filter_pkt(struct tipc_node *n, struct tipc_msg *hdr) return true; } -static void node_established_contact(struct tipc_node *n_ptr) -{ - tipc_node_fsm_evt(n_ptr, SELF_ESTABL_CONTACT_EVT); - n_ptr->action_flags |= TIPC_NOTIFY_NODE_UP; - n_ptr->bclink.oos_state = 0; - n_ptr->bclink.acked = tipc_bclink_get_last_sent(n_ptr->net); - tipc_bclink_add_node(n_ptr->net, n_ptr->addr); -} - -static void node_lost_contact(struct tipc_node *n_ptr, +static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq) { char addr_string[16]; struct tipc_sock_conn *conn, *safe; struct tipc_link *l; - struct list_head *conns = &n_ptr->conn_sks; + struct list_head *conns = &n->conn_sks; struct sk_buff *skb; - struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id); uint i; pr_debug("Lost contact with %s\n", - tipc_addr_string_fill(addr_string, n_ptr->addr)); + tipc_addr_string_fill(addr_string, n->addr)); - /* Flush broadcast link info associated with lost node */ - if (n_ptr->bclink.recv_permitted) { - __skb_queue_purge(&n_ptr->bclink.deferdq); - - if (n_ptr->bclink.reasm_buf) { - kfree_skb(n_ptr->bclink.reasm_buf); - n_ptr->bclink.reasm_buf = NULL; - } - - tipc_bclink_remove_node(n_ptr->net, n_ptr->addr); - tipc_bclink_acknowledge(n_ptr, INVALID_LINK_SEQ); - - n_ptr->bclink.recv_permitted = false; - } + /* Clean up broadcast state */ + tipc_bcast_remove_peer(n->net, n->bc_entry.link); /* Abort any ongoing link failover */ for (i = 0; i < MAX_BEARERS; i++) { - l = n_ptr->links[i].link; + l = n->links[i].link; if (l) tipc_link_fsm_evt(l, LINK_FAILOVER_END_EVT); } /* Notify publications from this node */ - n_ptr->action_flags |= TIPC_NOTIFY_NODE_DOWN; + n->action_flags |= TIPC_NOTIFY_NODE_DOWN; /* Notify sockets connected to node */ list_for_each_entry_safe(conn, safe, conns, list) { skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, - SHORT_H_SIZE, 0, tn->own_addr, + SHORT_H_SIZE, 0, tipc_own_addr(n->net), conn->peer_node, conn->port, conn->peer_port, TIPC_ERR_NO_NODE); if (likely(skb)) @@ -937,18 +943,13 @@ void tipc_node_unlock(struct tipc_node *node) publ_list = &node->publ_list; node->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP | - TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP | - TIPC_WAKEUP_BCAST_USERS | TIPC_BCAST_MSG_EVT | - TIPC_BCAST_RESET); + TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP); spin_unlock_bh(&node->lock); if (flags & TIPC_NOTIFY_NODE_DOWN) tipc_publ_notify(net, publ_list, addr); - if (flags & TIPC_WAKEUP_BCAST_USERS) - tipc_bclink_wakeup_users(net); - if (flags & TIPC_NOTIFY_NODE_UP) tipc_named_node_up(net, addr); @@ -960,11 +961,6 @@ void tipc_node_unlock(struct tipc_node *node) tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr, link_id, addr); - if (flags & TIPC_BCAST_MSG_EVT) - tipc_bclink_input(net); - - if (flags & TIPC_BCAST_RESET) - tipc_node_reset_links(node); } /* Caller should hold node lock for the passed node */ @@ -1080,6 +1076,67 @@ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, } /** + * tipc_node_bc_rcv - process TIPC broadcast packet arriving from off-node + * @net: the applicable net namespace + * @skb: TIPC packet + * @bearer_id: id of bearer message arrived on + * + * Invoked with no locks held. + */ +static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id) +{ + int rc; + struct sk_buff_head xmitq; + struct tipc_bclink_entry *be; + struct tipc_link_entry *le; + struct tipc_msg *hdr = buf_msg(skb); + int usr = msg_user(hdr); + u32 dnode = msg_destnode(hdr); + struct tipc_node *n; + + __skb_queue_head_init(&xmitq); + + /* If NACK for other node, let rcv link for that node peek into it */ + if ((usr == BCAST_PROTOCOL) && (dnode != tipc_own_addr(net))) + n = tipc_node_find(net, dnode); + else + n = tipc_node_find(net, msg_prevnode(hdr)); + if (!n) { + kfree_skb(skb); + return; + } + be = &n->bc_entry; + le = &n->links[bearer_id]; + + rc = tipc_bcast_rcv(net, be->link, skb); + + /* Broadcast link reset may happen at reassembly failure */ + if (rc & TIPC_LINK_DOWN_EVT) + tipc_node_reset_links(n); + + /* Broadcast ACKs are sent on a unicast link */ + if (rc & TIPC_LINK_SND_BC_ACK) { + tipc_node_lock(n); + tipc_link_build_ack_msg(le->link, &xmitq); + tipc_node_unlock(n); + } + + if (!skb_queue_empty(&xmitq)) + tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr); + + /* Deliver. 'arrvq' is under inputq2's lock protection */ + if (!skb_queue_empty(&be->inputq1)) { + spin_lock_bh(&be->inputq2.lock); + spin_lock_bh(&be->inputq1.lock); + skb_queue_splice_tail_init(&be->inputq1, &be->arrvq); + spin_unlock_bh(&be->inputq1.lock); + spin_unlock_bh(&be->inputq2.lock); + tipc_sk_mcast_rcv(net, &be->arrvq, &be->inputq2); + } + tipc_node_put(n); +} + +/** * tipc_node_check_state - check and if necessary update node state * @skb: TIPC packet * @bearer_id: identity of bearer delivering the packet @@ -1221,6 +1278,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) int usr = msg_user(hdr); int bearer_id = b->identity; struct tipc_link_entry *le; + u16 bc_ack = msg_bcast_ack(hdr); int rc = 0; __skb_queue_head_init(&xmitq); @@ -1229,13 +1287,12 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) if (unlikely(!tipc_msg_validate(skb))) goto discard; - /* Handle arrival of a non-unicast link packet */ + /* Handle arrival of discovery or broadcast packet */ if (unlikely(msg_non_seq(hdr))) { - if (usr == LINK_CONFIG) - tipc_disc_rcv(net, skb, b); + if (unlikely(usr == LINK_CONFIG)) + return tipc_disc_rcv(net, skb, b); else - tipc_bclink_rcv(net, skb); - return; + return tipc_node_bc_rcv(net, skb, bearer_id); } /* Locate neighboring node that sent packet */ @@ -1244,19 +1301,18 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) goto discard; le = &n->links[bearer_id]; + /* Ensure broadcast reception is in synch with peer's send state */ + if (unlikely(usr == LINK_PROTOCOL)) + tipc_bcast_sync_rcv(net, n->bc_entry.link, hdr); + else if (unlikely(n->bc_entry.link->acked != bc_ack)) + tipc_bcast_ack_rcv(net, n->bc_entry.link, bc_ack); + tipc_node_lock(n); /* Is reception permitted at the moment ? */ if (!tipc_node_filter_pkt(n, hdr)) goto unlock; - if (unlikely(msg_user(hdr) == LINK_PROTOCOL)) - tipc_bclink_sync_state(n, hdr); - - /* Release acked broadcast packets */ - if (unlikely(n->bclink.acked != msg_bcast_ack(hdr))) - tipc_bclink_acknowledge(n, msg_bcast_ack(hdr)); - /* Check and if necessary update node state */ if (likely(tipc_node_check_state(n, skb, bearer_id, &xmitq))) { rc = tipc_link_rcv(le->link, skb, &xmitq); @@ -1271,8 +1327,8 @@ unlock: if (unlikely(rc & TIPC_LINK_DOWN_EVT)) tipc_node_link_down(n, bearer_id, false); - if (unlikely(!skb_queue_empty(&n->bclink.namedq))) - tipc_named_rcv(net, &n->bclink.namedq); + if (unlikely(!skb_queue_empty(&n->bc_entry.namedq))) + tipc_named_rcv(net, &n->bc_entry.namedq); if (!skb_queue_empty(&le->inputq)) tipc_sk_rcv(net, &le->inputq); diff --git a/net/tipc/node.h b/net/tipc/node.h index 344b3e7594fd..6734562d3c6e 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -55,36 +55,18 @@ enum { TIPC_NOTIFY_NODE_DOWN = (1 << 3), TIPC_NOTIFY_NODE_UP = (1 << 4), - TIPC_WAKEUP_BCAST_USERS = (1 << 5), TIPC_NOTIFY_LINK_UP = (1 << 6), - TIPC_NOTIFY_LINK_DOWN = (1 << 7), - TIPC_BCAST_MSG_EVT = (1 << 9), - TIPC_BCAST_RESET = (1 << 10) + TIPC_NOTIFY_LINK_DOWN = (1 << 7) }; -/** - * struct tipc_node_bclink - TIPC node bclink structure - * @acked: sequence # of last outbound b'cast message acknowledged by node - * @last_in: sequence # of last in-sequence b'cast message received from node - * @last_sent: sequence # of last b'cast message sent by node - * @oos_state: state tracker for handling OOS b'cast messages - * @deferred_queue: deferred queue saved OOS b'cast message received from node - * @reasm_buf: broadcast reassembly queue head from node - * @inputq_map: bitmap indicating which inqueues should be kicked - * @recv_permitted: true if node is allowed to receive b'cast messages +/* Optional capabilities supported by this code version */ -struct tipc_node_bclink { - u32 acked; - u32 last_in; - u32 last_sent; - u32 oos_state; - u32 deferred_size; - struct sk_buff_head deferdq; - struct sk_buff *reasm_buf; - struct sk_buff_head namedq; - bool recv_permitted; +enum { + TIPC_BCAST_SYNCH = (1 << 1) }; +#define TIPC_NODE_CAPABILITIES TIPC_BCAST_SYNCH + struct tipc_link_entry { struct tipc_link *link; u32 mtu; @@ -92,6 +74,14 @@ struct tipc_link_entry { struct tipc_media_addr maddr; }; +struct tipc_bclink_entry { + struct tipc_link *link; + struct sk_buff_head inputq1; + struct sk_buff_head arrvq; + struct sk_buff_head inputq2; + struct sk_buff_head namedq; +}; + /** * struct tipc_node - TIPC node structure * @addr: network address of node @@ -104,7 +94,6 @@ struct tipc_link_entry { * @active_links: bearer ids of active links, used as index into links[] array * @links: array containing references to all links to node * @action_flags: bit mask of different types of node actions - * @bclink: broadcast-related info * @state: connectivity state vs peer node * @sync_point: sequence number where synch/failover is finished * @list: links to adjacent nodes in sorted list of cluster's nodes @@ -124,8 +113,8 @@ struct tipc_node { struct hlist_node hash; int active_links[2]; struct tipc_link_entry links[MAX_BEARERS]; + struct tipc_bclink_entry bc_entry; int action_flags; - struct tipc_node_bclink bclink; struct list_head list; int state; u16 sync_point; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 1060d52ff23e..552dbaba9cf3 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -689,13 +689,13 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, msg_set_hdr_sz(mhdr, MCAST_H_SIZE); new_mtu: - mtu = tipc_bclink_get_mtu(); + mtu = tipc_bcast_get_mtu(net); rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, pktchain); if (unlikely(rc < 0)) return rc; do { - rc = tipc_bclink_xmit(net, pktchain); + rc = tipc_bcast_xmit(net, pktchain); if (likely(!rc)) return dsz; diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 9bc0b1e515fa..816914ef228d 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -52,6 +52,8 @@ /* IANA assigned UDP port */ #define UDP_PORT_DEFAULT 6118 +#define UDP_MIN_HEADROOM 28 + static const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = { [TIPC_NLA_UDP_UNSPEC] = {.type = NLA_UNSPEC}, [TIPC_NLA_UDP_LOCAL] = {.type = NLA_BINARY, @@ -153,11 +155,12 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, struct udp_bearer *ub; struct udp_media_addr *dst = (struct udp_media_addr *)&dest->value; struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value; - struct sk_buff *clone; struct rtable *rt; - clone = skb_clone(skb, GFP_ATOMIC); - skb_set_inner_protocol(clone, htons(ETH_P_TIPC)); + if (skb_headroom(skb) < UDP_MIN_HEADROOM) + pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC); + + skb_set_inner_protocol(skb, htons(ETH_P_TIPC)); ub = rcu_dereference_rtnl(b->media_ptr); if (!ub) { err = -ENODEV; @@ -167,7 +170,7 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, struct flowi4 fl = { .daddr = dst->ipv4.s_addr, .saddr = src->ipv4.s_addr, - .flowi4_mark = clone->mark, + .flowi4_mark = skb->mark, .flowi4_proto = IPPROTO_UDP }; rt = ip_route_output_key(net, &fl); @@ -176,7 +179,7 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, goto tx_error; } ttl = ip4_dst_hoplimit(&rt->dst); - err = udp_tunnel_xmit_skb(rt, ub->ubsock->sk, clone, + err = udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb, src->ipv4.s_addr, dst->ipv4.s_addr, 0, ttl, 0, src->udp_port, dst->udp_port, @@ -199,7 +202,7 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, if (err) goto tx_error; ttl = ip6_dst_hoplimit(ndst); - err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, clone, + err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb, ndst->dev, &src->ipv6, &dst->ipv6, 0, ttl, src->udp_port, dst->udp_port, false); @@ -208,7 +211,7 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, return err; tx_error: - kfree_skb(clone); + kfree_skb(skb); return err; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 94f658235fb4..aaa0b58d6aba 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -326,9 +326,10 @@ found: return s; } -static inline int unix_writable(struct sock *sk) +static int unix_writable(const struct sock *sk) { - return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; + return sk->sk_state != TCP_LISTEN && + (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; } static void unix_write_space(struct sock *sk) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index df5fc6b340f1..00e8a349aabc 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1948,13 +1948,13 @@ int __vsock_core_init(const struct vsock_transport *t, struct module *owner) err = misc_register(&vsock_device); if (err) { pr_err("Failed to register misc device\n"); - return -ENOENT; + goto err_reset_transport; } err = proto_register(&vsock_proto, 1); /* we want our slab */ if (err) { pr_err("Cannot register vsock protocol\n"); - goto err_misc_deregister; + goto err_deregister_misc; } err = sock_register(&vsock_family_ops); @@ -1969,8 +1969,9 @@ int __vsock_core_init(const struct vsock_transport *t, struct module *owner) err_unregister_proto: proto_unregister(&vsock_proto); -err_misc_deregister: +err_deregister_misc: misc_deregister(&vsock_device); +err_reset_transport: transport = NULL; err_busy: mutex_unlock(&vsock_register_mutex); diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 1f63daff3965..7555cad83a75 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -40,13 +40,11 @@ static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg); static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg); -static void vmci_transport_peer_attach_cb(u32 sub_id, - const struct vmci_event_data *ed, - void *client_data); static void vmci_transport_peer_detach_cb(u32 sub_id, const struct vmci_event_data *ed, void *client_data); static void vmci_transport_recv_pkt_work(struct work_struct *work); +static void vmci_transport_cleanup(struct work_struct *work); static int vmci_transport_recv_listen(struct sock *sk, struct vmci_transport_packet *pkt); static int vmci_transport_recv_connecting_server( @@ -75,6 +73,10 @@ struct vmci_transport_recv_pkt_info { struct vmci_transport_packet pkt; }; +static LIST_HEAD(vmci_transport_cleanup_list); +static DEFINE_SPINLOCK(vmci_transport_cleanup_lock); +static DECLARE_WORK(vmci_transport_cleanup_work, vmci_transport_cleanup); + static struct vmci_handle vmci_transport_stream_handle = { VMCI_INVALID_ID, VMCI_INVALID_ID }; static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID; @@ -791,44 +793,6 @@ out: return err; } -static void vmci_transport_peer_attach_cb(u32 sub_id, - const struct vmci_event_data *e_data, - void *client_data) -{ - struct sock *sk = client_data; - const struct vmci_event_payload_qp *e_payload; - struct vsock_sock *vsk; - - e_payload = vmci_event_data_const_payload(e_data); - - vsk = vsock_sk(sk); - - /* We don't ask for delayed CBs when we subscribe to this event (we - * pass 0 as flags to vmci_event_subscribe()). VMCI makes no - * guarantees in that case about what context we might be running in, - * so it could be BH or process, blockable or non-blockable. So we - * need to account for all possible contexts here. - */ - local_bh_disable(); - bh_lock_sock(sk); - - /* XXX This is lame, we should provide a way to lookup sockets by - * qp_handle. - */ - if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle, - e_payload->handle)) { - /* XXX This doesn't do anything, but in the future we may want - * to set a flag here to verify the attach really did occur and - * we weren't just sent a datagram claiming it was. - */ - goto out; - } - -out: - bh_unlock_sock(sk); - local_bh_enable(); -} - static void vmci_transport_handle_detach(struct sock *sk) { struct vsock_sock *vsk; @@ -871,28 +835,38 @@ static void vmci_transport_peer_detach_cb(u32 sub_id, const struct vmci_event_data *e_data, void *client_data) { - struct sock *sk = client_data; + struct vmci_transport *trans = client_data; const struct vmci_event_payload_qp *e_payload; - struct vsock_sock *vsk; e_payload = vmci_event_data_const_payload(e_data); - vsk = vsock_sk(sk); - if (vmci_handle_is_invalid(e_payload->handle)) - return; - - /* Same rules for locking as for peer_attach_cb(). */ - local_bh_disable(); - bh_lock_sock(sk); /* XXX This is lame, we should provide a way to lookup sockets by * qp_handle. */ - if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle, - e_payload->handle)) - vmci_transport_handle_detach(sk); + if (vmci_handle_is_invalid(e_payload->handle) || + vmci_handle_is_equal(trans->qp_handle, e_payload->handle)) + return; - bh_unlock_sock(sk); - local_bh_enable(); + /* We don't ask for delayed CBs when we subscribe to this event (we + * pass 0 as flags to vmci_event_subscribe()). VMCI makes no + * guarantees in that case about what context we might be running in, + * so it could be BH or process, blockable or non-blockable. So we + * need to account for all possible contexts here. + */ + spin_lock_bh(&trans->lock); + if (!trans->sk) + goto out; + + /* Apart from here, trans->lock is only grabbed as part of sk destruct, + * where trans->sk isn't locked. + */ + bh_lock_sock(trans->sk); + + vmci_transport_handle_detach(trans->sk); + + bh_unlock_sock(trans->sk); + out: + spin_unlock_bh(&trans->lock); } static void vmci_transport_qp_resumed_cb(u32 sub_id, @@ -1181,7 +1155,7 @@ vmci_transport_recv_connecting_server(struct sock *listener, */ err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH, vmci_transport_peer_detach_cb, - pending, &detach_sub_id); + vmci_trans(vpending), &detach_sub_id); if (err < VMCI_SUCCESS) { vmci_transport_send_reset(pending, pkt); err = vmci_transport_error_to_vsock_error(err); @@ -1321,7 +1295,6 @@ vmci_transport_recv_connecting_client(struct sock *sk, || vmci_trans(vsk)->qpair || vmci_trans(vsk)->produce_size != 0 || vmci_trans(vsk)->consume_size != 0 - || vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID || vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) { skerr = EPROTO; err = -EINVAL; @@ -1389,7 +1362,6 @@ static int vmci_transport_recv_connecting_client_negotiate( struct vsock_sock *vsk; struct vmci_handle handle; struct vmci_qp *qpair; - u32 attach_sub_id; u32 detach_sub_id; bool is_local; u32 flags; @@ -1399,7 +1371,6 @@ static int vmci_transport_recv_connecting_client_negotiate( vsk = vsock_sk(sk); handle = VMCI_INVALID_HANDLE; - attach_sub_id = VMCI_INVALID_ID; detach_sub_id = VMCI_INVALID_ID; /* If we have gotten here then we should be past the point where old @@ -1444,23 +1415,15 @@ static int vmci_transport_recv_connecting_client_negotiate( goto destroy; } - /* Subscribe to attach and detach events first. + /* Subscribe to detach events first. * * XXX We attach once for each queue pair created for now so it is easy * to find the socket (it's provided), but later we should only * subscribe once and add a way to lookup sockets by queue pair handle. */ - err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_ATTACH, - vmci_transport_peer_attach_cb, - sk, &attach_sub_id); - if (err < VMCI_SUCCESS) { - err = vmci_transport_error_to_vsock_error(err); - goto destroy; - } - err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH, vmci_transport_peer_detach_cb, - sk, &detach_sub_id); + vmci_trans(vsk), &detach_sub_id); if (err < VMCI_SUCCESS) { err = vmci_transport_error_to_vsock_error(err); goto destroy; @@ -1496,7 +1459,6 @@ static int vmci_transport_recv_connecting_client_negotiate( vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = pkt->u.size; - vmci_trans(vsk)->attach_sub_id = attach_sub_id; vmci_trans(vsk)->detach_sub_id = detach_sub_id; vmci_trans(vsk)->notify_ops->process_negotiate(sk); @@ -1504,9 +1466,6 @@ static int vmci_transport_recv_connecting_client_negotiate( return 0; destroy: - if (attach_sub_id != VMCI_INVALID_ID) - vmci_event_unsubscribe(attach_sub_id); - if (detach_sub_id != VMCI_INVALID_ID) vmci_event_unsubscribe(detach_sub_id); @@ -1607,9 +1566,11 @@ static int vmci_transport_socket_init(struct vsock_sock *vsk, vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE; vmci_trans(vsk)->qpair = NULL; vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = 0; - vmci_trans(vsk)->attach_sub_id = vmci_trans(vsk)->detach_sub_id = - VMCI_INVALID_ID; + vmci_trans(vsk)->detach_sub_id = VMCI_INVALID_ID; vmci_trans(vsk)->notify_ops = NULL; + INIT_LIST_HEAD(&vmci_trans(vsk)->elem); + vmci_trans(vsk)->sk = &vsk->sk; + spin_lock_init(&vmci_trans(vsk)->lock); if (psk) { vmci_trans(vsk)->queue_pair_size = vmci_trans(psk)->queue_pair_size; @@ -1629,29 +1590,57 @@ static int vmci_transport_socket_init(struct vsock_sock *vsk, return 0; } -static void vmci_transport_destruct(struct vsock_sock *vsk) +static void vmci_transport_free_resources(struct list_head *transport_list) { - if (vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID) { - vmci_event_unsubscribe(vmci_trans(vsk)->attach_sub_id); - vmci_trans(vsk)->attach_sub_id = VMCI_INVALID_ID; - } + while (!list_empty(transport_list)) { + struct vmci_transport *transport = + list_first_entry(transport_list, struct vmci_transport, + elem); + list_del(&transport->elem); - if (vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) { - vmci_event_unsubscribe(vmci_trans(vsk)->detach_sub_id); - vmci_trans(vsk)->detach_sub_id = VMCI_INVALID_ID; - } + if (transport->detach_sub_id != VMCI_INVALID_ID) { + vmci_event_unsubscribe(transport->detach_sub_id); + transport->detach_sub_id = VMCI_INVALID_ID; + } - if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) { - vmci_qpair_detach(&vmci_trans(vsk)->qpair); - vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE; - vmci_trans(vsk)->produce_size = 0; - vmci_trans(vsk)->consume_size = 0; + if (!vmci_handle_is_invalid(transport->qp_handle)) { + vmci_qpair_detach(&transport->qpair); + transport->qp_handle = VMCI_INVALID_HANDLE; + transport->produce_size = 0; + transport->consume_size = 0; + } + + kfree(transport); } +} + +static void vmci_transport_cleanup(struct work_struct *work) +{ + LIST_HEAD(pending); + + spin_lock_bh(&vmci_transport_cleanup_lock); + list_replace_init(&vmci_transport_cleanup_list, &pending); + spin_unlock_bh(&vmci_transport_cleanup_lock); + vmci_transport_free_resources(&pending); +} + +static void vmci_transport_destruct(struct vsock_sock *vsk) +{ + /* Ensure that the detach callback doesn't use the sk/vsk + * we are about to destruct. + */ + spin_lock_bh(&vmci_trans(vsk)->lock); + vmci_trans(vsk)->sk = NULL; + spin_unlock_bh(&vmci_trans(vsk)->lock); if (vmci_trans(vsk)->notify_ops) vmci_trans(vsk)->notify_ops->socket_destruct(vsk); - kfree(vsk->trans); + spin_lock_bh(&vmci_transport_cleanup_lock); + list_add(&vmci_trans(vsk)->elem, &vmci_transport_cleanup_list); + spin_unlock_bh(&vmci_transport_cleanup_lock); + schedule_work(&vmci_transport_cleanup_work); + vsk->trans = NULL; } @@ -2146,6 +2135,9 @@ module_init(vmci_transport_init); static void __exit vmci_transport_exit(void) { + cancel_work_sync(&vmci_transport_cleanup_work); + vmci_transport_free_resources(&vmci_transport_cleanup_list); + if (!vmci_handle_is_invalid(vmci_transport_stream_handle)) { if (vmci_datagram_destroy_handle( vmci_transport_stream_handle) != VMCI_SUCCESS) @@ -2164,6 +2156,7 @@ module_exit(vmci_transport_exit); MODULE_AUTHOR("VMware, Inc."); MODULE_DESCRIPTION("VMCI transport for Virtual Sockets"); +MODULE_VERSION("1.0.2.0-k"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("vmware_vsock"); MODULE_ALIAS_NETPROTO(PF_VSOCK); diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h index ce6c9623d5f0..2ad46f39649f 100644 --- a/net/vmw_vsock/vmci_transport.h +++ b/net/vmw_vsock/vmci_transport.h @@ -119,10 +119,12 @@ struct vmci_transport { u64 queue_pair_size; u64 queue_pair_min_size; u64 queue_pair_max_size; - u32 attach_sub_id; u32 detach_sub_id; union vmci_transport_notify notify; struct vmci_transport_notify_ops *notify_ops; + struct list_head elem; + struct sock *sk; + spinlock_t lock; /* protects sk. */ }; int vmci_transport_register(void); diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 4f5543dd2524..da72ed32f143 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -174,6 +174,16 @@ config CFG80211_INTERNAL_REGDB Most distributions have a CRDA package. So if unsure, say N. +config CFG80211_CRDA_SUPPORT + bool "support CRDA" if CFG80211_INTERNAL_REGDB + default y + depends on CFG80211 + help + You should enable this option unless you know for sure you have no + need for it, for example when using internal regdb (above.) + + If unsure, say Y. + config CFG80211_WEXT bool "cfg80211 wireless extensions compatibility" if !CFG80211_WEXT_EXPORT depends on CFG80211 diff --git a/net/wireless/core.c b/net/wireless/core.c index f223026ddb03..b0915515640e 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -461,6 +461,9 @@ use_default_name: rdev->wiphy.max_num_csa_counters = 1; + rdev->wiphy.max_sched_scan_plans = 1; + rdev->wiphy.max_sched_scan_plan_interval = U32_MAX; + return &rdev->wiphy; } EXPORT_SYMBOL(wiphy_new_nm); @@ -636,7 +639,7 @@ int wiphy_register(struct wiphy *wiphy) if (WARN_ON(!sband->n_channels)) return -EINVAL; /* - * on 60gHz band, there are no legacy rates, so + * on 60GHz band, there are no legacy rates, so * n_bitrates is 0 */ if (WARN_ON(band != IEEE80211_BAND_60GHZ && diff --git a/net/wireless/core.h b/net/wireless/core.h index b9d5bc8c148d..a618b4b86fa4 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -137,6 +137,7 @@ struct cfg80211_internal_bss { struct list_head list; struct list_head hidden_list; struct rb_node rbn; + u64 ts_boottime; unsigned long ts; unsigned long refcount; atomic_t hold; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f05ba8b7af61..d693c9d031fc 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -479,6 +479,12 @@ nl80211_match_policy[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1] = { [NL80211_SCHED_SCAN_MATCH_ATTR_RSSI] = { .type = NLA_U32 }, }; +static const struct nla_policy +nl80211_plan_policy[NL80211_SCHED_SCAN_PLAN_MAX + 1] = { + [NL80211_SCHED_SCAN_PLAN_INTERVAL] = { .type = NLA_U32 }, + [NL80211_SCHED_SCAN_PLAN_ITERATIONS] = { .type = NLA_U32 }, +}; + static int nl80211_prepare_wdev_dump(struct sk_buff *skb, struct netlink_callback *cb, struct cfg80211_registered_device **rdev, @@ -1304,7 +1310,13 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, nla_put_u16(msg, NL80211_ATTR_MAX_SCHED_SCAN_IE_LEN, rdev->wiphy.max_sched_scan_ie_len) || nla_put_u8(msg, NL80211_ATTR_MAX_MATCH_SETS, - rdev->wiphy.max_match_sets)) + rdev->wiphy.max_match_sets) || + nla_put_u32(msg, NL80211_ATTR_MAX_NUM_SCHED_SCAN_PLANS, + rdev->wiphy.max_sched_scan_plans) || + nla_put_u32(msg, NL80211_ATTR_MAX_SCAN_PLAN_INTERVAL, + rdev->wiphy.max_sched_scan_plan_interval) || + nla_put_u32(msg, NL80211_ATTR_MAX_SCAN_PLAN_ITERATIONS, + rdev->wiphy.max_sched_scan_plan_iterations)) goto nla_put_failure; if ((rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN) && @@ -4932,56 +4944,6 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info) return err; } -static const struct nla_policy reg_rule_policy[NL80211_REG_RULE_ATTR_MAX + 1] = { - [NL80211_ATTR_REG_RULE_FLAGS] = { .type = NLA_U32 }, - [NL80211_ATTR_FREQ_RANGE_START] = { .type = NLA_U32 }, - [NL80211_ATTR_FREQ_RANGE_END] = { .type = NLA_U32 }, - [NL80211_ATTR_FREQ_RANGE_MAX_BW] = { .type = NLA_U32 }, - [NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN] = { .type = NLA_U32 }, - [NL80211_ATTR_POWER_RULE_MAX_EIRP] = { .type = NLA_U32 }, - [NL80211_ATTR_DFS_CAC_TIME] = { .type = NLA_U32 }, -}; - -static int parse_reg_rule(struct nlattr *tb[], - struct ieee80211_reg_rule *reg_rule) -{ - struct ieee80211_freq_range *freq_range = ®_rule->freq_range; - struct ieee80211_power_rule *power_rule = ®_rule->power_rule; - - if (!tb[NL80211_ATTR_REG_RULE_FLAGS]) - return -EINVAL; - if (!tb[NL80211_ATTR_FREQ_RANGE_START]) - return -EINVAL; - if (!tb[NL80211_ATTR_FREQ_RANGE_END]) - return -EINVAL; - if (!tb[NL80211_ATTR_FREQ_RANGE_MAX_BW]) - return -EINVAL; - if (!tb[NL80211_ATTR_POWER_RULE_MAX_EIRP]) - return -EINVAL; - - reg_rule->flags = nla_get_u32(tb[NL80211_ATTR_REG_RULE_FLAGS]); - - freq_range->start_freq_khz = - nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_START]); - freq_range->end_freq_khz = - nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_END]); - freq_range->max_bandwidth_khz = - nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_MAX_BW]); - - power_rule->max_eirp = - nla_get_u32(tb[NL80211_ATTR_POWER_RULE_MAX_EIRP]); - - if (tb[NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN]) - power_rule->max_antenna_gain = - nla_get_u32(tb[NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN]); - - if (tb[NL80211_ATTR_DFS_CAC_TIME]) - reg_rule->dfs_cac_ms = - nla_get_u32(tb[NL80211_ATTR_DFS_CAC_TIME]); - - return 0; -} - static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) { char *data = NULL; @@ -5613,6 +5575,57 @@ out_err: return err; } +#ifdef CONFIG_CFG80211_CRDA_SUPPORT +static const struct nla_policy reg_rule_policy[NL80211_REG_RULE_ATTR_MAX + 1] = { + [NL80211_ATTR_REG_RULE_FLAGS] = { .type = NLA_U32 }, + [NL80211_ATTR_FREQ_RANGE_START] = { .type = NLA_U32 }, + [NL80211_ATTR_FREQ_RANGE_END] = { .type = NLA_U32 }, + [NL80211_ATTR_FREQ_RANGE_MAX_BW] = { .type = NLA_U32 }, + [NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN] = { .type = NLA_U32 }, + [NL80211_ATTR_POWER_RULE_MAX_EIRP] = { .type = NLA_U32 }, + [NL80211_ATTR_DFS_CAC_TIME] = { .type = NLA_U32 }, +}; + +static int parse_reg_rule(struct nlattr *tb[], + struct ieee80211_reg_rule *reg_rule) +{ + struct ieee80211_freq_range *freq_range = ®_rule->freq_range; + struct ieee80211_power_rule *power_rule = ®_rule->power_rule; + + if (!tb[NL80211_ATTR_REG_RULE_FLAGS]) + return -EINVAL; + if (!tb[NL80211_ATTR_FREQ_RANGE_START]) + return -EINVAL; + if (!tb[NL80211_ATTR_FREQ_RANGE_END]) + return -EINVAL; + if (!tb[NL80211_ATTR_FREQ_RANGE_MAX_BW]) + return -EINVAL; + if (!tb[NL80211_ATTR_POWER_RULE_MAX_EIRP]) + return -EINVAL; + + reg_rule->flags = nla_get_u32(tb[NL80211_ATTR_REG_RULE_FLAGS]); + + freq_range->start_freq_khz = + nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_START]); + freq_range->end_freq_khz = + nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_END]); + freq_range->max_bandwidth_khz = + nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_MAX_BW]); + + power_rule->max_eirp = + nla_get_u32(tb[NL80211_ATTR_POWER_RULE_MAX_EIRP]); + + if (tb[NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN]) + power_rule->max_antenna_gain = + nla_get_u32(tb[NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN]); + + if (tb[NL80211_ATTR_DFS_CAC_TIME]) + reg_rule->dfs_cac_ms = + nla_get_u32(tb[NL80211_ATTR_DFS_CAC_TIME]); + + return 0; +} + static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) { struct nlattr *tb[NL80211_REG_RULE_ATTR_MAX + 1]; @@ -5689,6 +5702,7 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) kfree(rd); return r; } +#endif /* CONFIG_CFG80211_CRDA_SUPPORT */ static int validate_scan_freqs(struct nlattr *freqs) { @@ -5974,14 +5988,100 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) return err; } +static int +nl80211_parse_sched_scan_plans(struct wiphy *wiphy, int n_plans, + struct cfg80211_sched_scan_request *request, + struct nlattr **attrs) +{ + int tmp, err, i = 0; + struct nlattr *attr; + + if (!attrs[NL80211_ATTR_SCHED_SCAN_PLANS]) { + u32 interval; + + /* + * If scan plans are not specified, + * %NL80211_ATTR_SCHED_SCAN_INTERVAL must be specified. In this + * case one scan plan will be set with the specified scan + * interval and infinite number of iterations. + */ + if (!attrs[NL80211_ATTR_SCHED_SCAN_INTERVAL]) + return -EINVAL; + + interval = nla_get_u32(attrs[NL80211_ATTR_SCHED_SCAN_INTERVAL]); + if (!interval) + return -EINVAL; + + request->scan_plans[0].interval = + DIV_ROUND_UP(interval, MSEC_PER_SEC); + if (!request->scan_plans[0].interval) + return -EINVAL; + + if (request->scan_plans[0].interval > + wiphy->max_sched_scan_plan_interval) + request->scan_plans[0].interval = + wiphy->max_sched_scan_plan_interval; + + return 0; + } + + nla_for_each_nested(attr, attrs[NL80211_ATTR_SCHED_SCAN_PLANS], tmp) { + struct nlattr *plan[NL80211_SCHED_SCAN_PLAN_MAX + 1]; + + if (WARN_ON(i >= n_plans)) + return -EINVAL; + + err = nla_parse(plan, NL80211_SCHED_SCAN_PLAN_MAX, + nla_data(attr), nla_len(attr), + nl80211_plan_policy); + if (err) + return err; + + if (!plan[NL80211_SCHED_SCAN_PLAN_INTERVAL]) + return -EINVAL; + + request->scan_plans[i].interval = + nla_get_u32(plan[NL80211_SCHED_SCAN_PLAN_INTERVAL]); + if (!request->scan_plans[i].interval || + request->scan_plans[i].interval > + wiphy->max_sched_scan_plan_interval) + return -EINVAL; + + if (plan[NL80211_SCHED_SCAN_PLAN_ITERATIONS]) { + request->scan_plans[i].iterations = + nla_get_u32(plan[NL80211_SCHED_SCAN_PLAN_ITERATIONS]); + if (!request->scan_plans[i].iterations || + (request->scan_plans[i].iterations > + wiphy->max_sched_scan_plan_iterations)) + return -EINVAL; + } else if (i < n_plans - 1) { + /* + * All scan plans but the last one must specify + * a finite number of iterations + */ + return -EINVAL; + } + + i++; + } + + /* + * The last scan plan must not specify the number of + * iterations, it is supposed to run infinitely + */ + if (request->scan_plans[n_plans - 1].iterations) + return -EINVAL; + + return 0; +} + static struct cfg80211_sched_scan_request * nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, struct nlattr **attrs) { struct cfg80211_sched_scan_request *request; struct nlattr *attr; - int err, tmp, n_ssids = 0, n_match_sets = 0, n_channels, i; - u32 interval; + int err, tmp, n_ssids = 0, n_match_sets = 0, n_channels, i, n_plans = 0; enum ieee80211_band band; size_t ie_len; struct nlattr *tb[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1]; @@ -5990,13 +6090,6 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, if (!is_valid_ie_attr(attrs[NL80211_ATTR_IE])) return ERR_PTR(-EINVAL); - if (!attrs[NL80211_ATTR_SCHED_SCAN_INTERVAL]) - return ERR_PTR(-EINVAL); - - interval = nla_get_u32(attrs[NL80211_ATTR_SCHED_SCAN_INTERVAL]); - if (interval == 0) - return ERR_PTR(-EINVAL); - if (attrs[NL80211_ATTR_SCAN_FREQUENCIES]) { n_channels = validate_scan_freqs( attrs[NL80211_ATTR_SCAN_FREQUENCIES]); @@ -6060,9 +6153,37 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, if (ie_len > wiphy->max_sched_scan_ie_len) return ERR_PTR(-EINVAL); + if (attrs[NL80211_ATTR_SCHED_SCAN_PLANS]) { + /* + * NL80211_ATTR_SCHED_SCAN_INTERVAL must not be specified since + * each scan plan already specifies its own interval + */ + if (attrs[NL80211_ATTR_SCHED_SCAN_INTERVAL]) + return ERR_PTR(-EINVAL); + + nla_for_each_nested(attr, + attrs[NL80211_ATTR_SCHED_SCAN_PLANS], tmp) + n_plans++; + } else { + /* + * The scan interval attribute is kept for backward + * compatibility. If no scan plans are specified and sched scan + * interval is specified, one scan plan will be set with this + * scan interval and infinite number of iterations. + */ + if (!attrs[NL80211_ATTR_SCHED_SCAN_INTERVAL]) + return ERR_PTR(-EINVAL); + + n_plans = 1; + } + + if (!n_plans || n_plans > wiphy->max_sched_scan_plans) + return ERR_PTR(-EINVAL); + request = kzalloc(sizeof(*request) + sizeof(*request->ssids) * n_ssids + sizeof(*request->match_sets) * n_match_sets + + sizeof(*request->scan_plans) * n_plans + sizeof(*request->channels) * n_channels + ie_len, GFP_KERNEL); if (!request) @@ -6090,6 +6211,18 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, } request->n_match_sets = n_match_sets; + if (n_match_sets) + request->scan_plans = (void *)(request->match_sets + + n_match_sets); + else if (request->ie) + request->scan_plans = (void *)(request->ie + ie_len); + else if (n_ssids) + request->scan_plans = (void *)(request->ssids + n_ssids); + else + request->scan_plans = (void *)(request->channels + n_channels); + + request->n_scan_plans = n_plans; + i = 0; if (attrs[NL80211_ATTR_SCAN_FREQUENCIES]) { /* user specified, bail out if channel not found */ @@ -6252,7 +6385,10 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, request->delay = nla_get_u32(attrs[NL80211_ATTR_SCHED_SCAN_DELAY]); - request->interval = interval; + err = nl80211_parse_sched_scan_plans(wiphy, n_plans, request, attrs); + if (err) + goto out_free; + request->scan_start = jiffies; return request; @@ -6605,6 +6741,11 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, jiffies_to_msecs(jiffies - intbss->ts))) goto nla_put_failure; + if (intbss->ts_boottime && + nla_put_u64(msg, NL80211_BSS_LAST_SEEN_BOOTTIME, + intbss->ts_boottime)) + goto nla_put_failure; + switch (rdev->wiphy.signal_type) { case CFG80211_SIGNAL_TYPE_MBM: if (nla_put_u32(msg, NL80211_BSS_SIGNAL_MBM, res->signal)) @@ -8845,7 +8986,7 @@ static int nl80211_send_wowlan_tcp(struct sk_buff *msg, static int nl80211_send_wowlan_nd(struct sk_buff *msg, struct cfg80211_sched_scan_request *req) { - struct nlattr *nd, *freqs, *matches, *match; + struct nlattr *nd, *freqs, *matches, *match, *scan_plans, *scan_plan; int i; if (!req) @@ -8855,7 +8996,9 @@ static int nl80211_send_wowlan_nd(struct sk_buff *msg, if (!nd) return -ENOBUFS; - if (nla_put_u32(msg, NL80211_ATTR_SCHED_SCAN_INTERVAL, req->interval)) + if (req->n_scan_plans == 1 && + nla_put_u32(msg, NL80211_ATTR_SCHED_SCAN_INTERVAL, + req->scan_plans[0].interval * 1000)) return -ENOBUFS; if (nla_put_u32(msg, NL80211_ATTR_SCHED_SCAN_DELAY, req->delay)) @@ -8882,6 +9025,23 @@ static int nl80211_send_wowlan_nd(struct sk_buff *msg, nla_nest_end(msg, matches); } + scan_plans = nla_nest_start(msg, NL80211_ATTR_SCHED_SCAN_PLANS); + if (!scan_plans) + return -ENOBUFS; + + for (i = 0; i < req->n_scan_plans; i++) { + scan_plan = nla_nest_start(msg, i + 1); + if (!scan_plan || + nla_put_u32(msg, NL80211_SCHED_SCAN_PLAN_INTERVAL, + req->scan_plans[i].interval) || + (req->scan_plans[i].iterations && + nla_put_u32(msg, NL80211_SCHED_SCAN_PLAN_ITERATIONS, + req->scan_plans[i].iterations))) + return -ENOBUFS; + nla_nest_end(msg, scan_plan); + } + nla_nest_end(msg, scan_plans); + nla_nest_end(msg, nd); return 0; @@ -10737,6 +10897,7 @@ static const struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_RTNL, /* can be retrieved by unprivileged users */ }, +#ifdef CONFIG_CFG80211_CRDA_SUPPORT { .cmd = NL80211_CMD_SET_REG, .doit = nl80211_set_reg, @@ -10744,6 +10905,7 @@ static const struct genl_ops nl80211_ops[] = { .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_RTNL, }, +#endif { .cmd = NL80211_CMD_REQ_SET_REG, .doit = nl80211_req_set_reg, diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 7258246b7458..2e8d6f39ed56 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -135,10 +135,7 @@ static spinlock_t reg_indoor_lock; /* Used to track the userspace process controlling the indoor setting */ static u32 reg_is_indoor_portid; -/* Max number of consecutive attempts to communicate with CRDA */ -#define REG_MAX_CRDA_TIMEOUTS 10 - -static u32 reg_crda_timeouts; +static void restore_regulatory_settings(bool reset_user); static const struct ieee80211_regdomain *get_cfg80211_regdom(void) { @@ -226,9 +223,6 @@ static DECLARE_DELAYED_WORK(reg_check_chans, reg_check_chans_work); static void reg_todo(struct work_struct *work); static DECLARE_WORK(reg_work, reg_todo); -static void reg_timeout_work(struct work_struct *work); -static DECLARE_DELAYED_WORK(reg_timeout, reg_timeout_work); - /* We keep a static world regulatory domain in case of the absence of CRDA */ static const struct ieee80211_regdomain world_regdom = { .n_reg_rules = 8, @@ -262,7 +256,7 @@ static const struct ieee80211_regdomain world_regdom = { REG_RULE(5745-10, 5825+10, 80, 6, 20, NL80211_RRF_NO_IR), - /* IEEE 802.11ad (60gHz), channels 1..3 */ + /* IEEE 802.11ad (60GHz), channels 1..3 */ REG_RULE(56160+2160*1-1080, 56160+2160*3+1080, 2160, 0, 0, 0), } }; @@ -279,6 +273,9 @@ MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code"); static void reg_free_request(struct regulatory_request *request) { + if (request == &core_request_world) + return; + if (request != get_last_request()) kfree(request); } @@ -453,68 +450,70 @@ reg_copy_regd(const struct ieee80211_regdomain *src_regd) } #ifdef CONFIG_CFG80211_INTERNAL_REGDB -struct reg_regdb_search_request { - char alpha2[2]; +struct reg_regdb_apply_request { struct list_head list; + const struct ieee80211_regdomain *regdom; }; -static LIST_HEAD(reg_regdb_search_list); -static DEFINE_MUTEX(reg_regdb_search_mutex); +static LIST_HEAD(reg_regdb_apply_list); +static DEFINE_MUTEX(reg_regdb_apply_mutex); -static void reg_regdb_search(struct work_struct *work) +static void reg_regdb_apply(struct work_struct *work) { - struct reg_regdb_search_request *request; - const struct ieee80211_regdomain *curdom, *regdom = NULL; - int i; + struct reg_regdb_apply_request *request; rtnl_lock(); - mutex_lock(®_regdb_search_mutex); - while (!list_empty(®_regdb_search_list)) { - request = list_first_entry(®_regdb_search_list, - struct reg_regdb_search_request, + mutex_lock(®_regdb_apply_mutex); + while (!list_empty(®_regdb_apply_list)) { + request = list_first_entry(®_regdb_apply_list, + struct reg_regdb_apply_request, list); list_del(&request->list); - for (i = 0; i < reg_regdb_size; i++) { - curdom = reg_regdb[i]; - - if (alpha2_equal(request->alpha2, curdom->alpha2)) { - regdom = reg_copy_regd(curdom); - break; - } - } - + set_regdom(request->regdom, REGD_SOURCE_INTERNAL_DB); kfree(request); } - mutex_unlock(®_regdb_search_mutex); - - if (!IS_ERR_OR_NULL(regdom)) - set_regdom(regdom, REGD_SOURCE_INTERNAL_DB); + mutex_unlock(®_regdb_apply_mutex); rtnl_unlock(); } -static DECLARE_WORK(reg_regdb_work, reg_regdb_search); +static DECLARE_WORK(reg_regdb_work, reg_regdb_apply); -static void reg_regdb_query(const char *alpha2) +static int reg_query_builtin(const char *alpha2) { - struct reg_regdb_search_request *request; + const struct ieee80211_regdomain *regdom = NULL; + struct reg_regdb_apply_request *request; + unsigned int i; - if (!alpha2) - return; + for (i = 0; i < reg_regdb_size; i++) { + if (alpha2_equal(alpha2, reg_regdb[i]->alpha2)) { + regdom = reg_regdb[i]; + break; + } + } + + if (!regdom) + return -ENODATA; - request = kzalloc(sizeof(struct reg_regdb_search_request), GFP_KERNEL); + request = kzalloc(sizeof(struct reg_regdb_apply_request), GFP_KERNEL); if (!request) - return; + return -ENOMEM; - memcpy(request->alpha2, alpha2, 2); + request->regdom = reg_copy_regd(regdom); + if (IS_ERR_OR_NULL(request->regdom)) { + kfree(request); + return -ENOMEM; + } - mutex_lock(®_regdb_search_mutex); - list_add_tail(&request->list, ®_regdb_search_list); - mutex_unlock(®_regdb_search_mutex); + mutex_lock(®_regdb_apply_mutex); + list_add_tail(&request->list, ®_regdb_apply_list); + mutex_unlock(®_regdb_apply_mutex); schedule_work(®_regdb_work); + + return 0; } /* Feel free to add any other sanity checks here */ @@ -525,9 +524,45 @@ static void reg_regdb_size_check(void) } #else static inline void reg_regdb_size_check(void) {} -static inline void reg_regdb_query(const char *alpha2) {} +static inline int reg_query_builtin(const char *alpha2) +{ + return -ENODATA; +} #endif /* CONFIG_CFG80211_INTERNAL_REGDB */ +#ifdef CONFIG_CFG80211_CRDA_SUPPORT +/* Max number of consecutive attempts to communicate with CRDA */ +#define REG_MAX_CRDA_TIMEOUTS 10 + +static u32 reg_crda_timeouts; + +static void crda_timeout_work(struct work_struct *work); +static DECLARE_DELAYED_WORK(crda_timeout, crda_timeout_work); + +static void crda_timeout_work(struct work_struct *work) +{ + REG_DBG_PRINT("Timeout while waiting for CRDA to reply, restoring regulatory settings\n"); + rtnl_lock(); + reg_crda_timeouts++; + restore_regulatory_settings(true); + rtnl_unlock(); +} + +static void cancel_crda_timeout(void) +{ + cancel_delayed_work(&crda_timeout); +} + +static void cancel_crda_timeout_sync(void) +{ + cancel_delayed_work_sync(&crda_timeout); +} + +static void reset_crda_timeouts(void) +{ + reg_crda_timeouts = 0; +} + /* * This lets us keep regulatory code which is updated on a regulatory * basis in userspace. @@ -536,13 +571,11 @@ static int call_crda(const char *alpha2) { char country[12]; char *env[] = { country, NULL }; + int ret; snprintf(country, sizeof(country), "COUNTRY=%c%c", alpha2[0], alpha2[1]); - /* query internal regulatory database (if it exists) */ - reg_regdb_query(alpha2); - if (reg_crda_timeouts > REG_MAX_CRDA_TIMEOUTS) { pr_debug("Exceeded CRDA call max attempts. Not calling CRDA\n"); return -EINVAL; @@ -554,18 +587,34 @@ static int call_crda(const char *alpha2) else pr_debug("Calling CRDA to update world regulatory domain\n"); - return kobject_uevent_env(®_pdev->dev.kobj, KOBJ_CHANGE, env); + ret = kobject_uevent_env(®_pdev->dev.kobj, KOBJ_CHANGE, env); + if (ret) + return ret; + + queue_delayed_work(system_power_efficient_wq, + &crda_timeout, msecs_to_jiffies(3142)); + return 0; +} +#else +static inline void cancel_crda_timeout(void) {} +static inline void cancel_crda_timeout_sync(void) {} +static inline void reset_crda_timeouts(void) {} +static inline int call_crda(const char *alpha2) +{ + return -ENODATA; } +#endif /* CONFIG_CFG80211_CRDA_SUPPORT */ -static enum reg_request_treatment -reg_call_crda(struct regulatory_request *request) +static bool reg_query_database(struct regulatory_request *request) { - if (call_crda(request->alpha2)) - return REG_REQ_IGNORE; + /* query internal regulatory database (if it exists) */ + if (reg_query_builtin(request->alpha2) == 0) + return true; - queue_delayed_work(system_power_efficient_wq, - ®_timeout, msecs_to_jiffies(3142)); - return REG_REQ_OK; + if (call_crda(request->alpha2) == 0) + return true; + + return false; } bool reg_is_valid_request(const char *alpha2) @@ -1081,11 +1130,11 @@ const char *reg_initiator_name(enum nl80211_reg_initiator initiator) } EXPORT_SYMBOL(reg_initiator_name); -#ifdef CONFIG_CFG80211_REG_DEBUG static void chan_reg_rule_print_dbg(const struct ieee80211_regdomain *regd, struct ieee80211_channel *chan, const struct ieee80211_reg_rule *reg_rule) { +#ifdef CONFIG_CFG80211_REG_DEBUG const struct ieee80211_power_rule *power_rule; const struct ieee80211_freq_range *freq_range; char max_antenna_gain[32], bw[32]; @@ -1096,7 +1145,7 @@ static void chan_reg_rule_print_dbg(const struct ieee80211_regdomain *regd, if (!power_rule->max_antenna_gain) snprintf(max_antenna_gain, sizeof(max_antenna_gain), "N/A"); else - snprintf(max_antenna_gain, sizeof(max_antenna_gain), "%d", + snprintf(max_antenna_gain, sizeof(max_antenna_gain), "%d mBi", power_rule->max_antenna_gain); if (reg_rule->flags & NL80211_RRF_AUTO_BW) @@ -1110,19 +1159,12 @@ static void chan_reg_rule_print_dbg(const struct ieee80211_regdomain *regd, REG_DBG_PRINT("Updating information on frequency %d MHz with regulatory rule:\n", chan->center_freq); - REG_DBG_PRINT("%d KHz - %d KHz @ %s), (%s mBi, %d mBm)\n", + REG_DBG_PRINT("(%d KHz - %d KHz @ %s), (%s, %d mBm)\n", freq_range->start_freq_khz, freq_range->end_freq_khz, bw, max_antenna_gain, power_rule->max_eirp); -} -#else -static void chan_reg_rule_print_dbg(const struct ieee80211_regdomain *regd, - struct ieee80211_channel *chan, - const struct ieee80211_reg_rule *reg_rule) -{ - return; -} #endif +} /* * Note that right now we assume the desired channel bandwidth @@ -1311,7 +1353,8 @@ static bool reg_dev_ignore_cell_hint(struct wiphy *wiphy) return !(wiphy->features & NL80211_FEATURE_CELL_BASE_REG_HINTS); } #else -static int reg_ignore_cell_hint(struct regulatory_request *pending_request) +static enum reg_request_treatment +reg_ignore_cell_hint(struct regulatory_request *pending_request) { return REG_REQ_IGNORE; } @@ -1846,7 +1889,7 @@ static void reg_set_request_processed(void) need_more_processing = true; spin_unlock(®_requests_lock); - cancel_delayed_work(®_timeout); + cancel_crda_timeout(); if (need_more_processing) schedule_work(®_work); @@ -1858,19 +1901,18 @@ static void reg_set_request_processed(void) * * The wireless subsystem can use this function to process * a regulatory request issued by the regulatory core. - * - * Returns one of the different reg request treatment values. */ static enum reg_request_treatment reg_process_hint_core(struct regulatory_request *core_request) { + if (reg_query_database(core_request)) { + core_request->intersect = false; + core_request->processed = false; + reg_update_last_request(core_request); + return REG_REQ_OK; + } - core_request->intersect = false; - core_request->processed = false; - - reg_update_last_request(core_request); - - return reg_call_crda(core_request); + return REG_REQ_IGNORE; } static enum reg_request_treatment @@ -1915,8 +1957,6 @@ __reg_process_hint_user(struct regulatory_request *user_request) * * The wireless subsystem can use this function to process * a regulatory request initiated by userspace. - * - * Returns one of the different reg request treatment values. */ static enum reg_request_treatment reg_process_hint_user(struct regulatory_request *user_request) @@ -1925,20 +1965,20 @@ reg_process_hint_user(struct regulatory_request *user_request) treatment = __reg_process_hint_user(user_request); if (treatment == REG_REQ_IGNORE || - treatment == REG_REQ_ALREADY_SET) { - reg_free_request(user_request); - return treatment; - } + treatment == REG_REQ_ALREADY_SET) + return REG_REQ_IGNORE; user_request->intersect = treatment == REG_REQ_INTERSECT; user_request->processed = false; - reg_update_last_request(user_request); - - user_alpha2[0] = user_request->alpha2[0]; - user_alpha2[1] = user_request->alpha2[1]; + if (reg_query_database(user_request)) { + reg_update_last_request(user_request); + user_alpha2[0] = user_request->alpha2[0]; + user_alpha2[1] = user_request->alpha2[1]; + return REG_REQ_OK; + } - return reg_call_crda(user_request); + return REG_REQ_IGNORE; } static enum reg_request_treatment @@ -1986,16 +2026,12 @@ reg_process_hint_driver(struct wiphy *wiphy, case REG_REQ_OK: break; case REG_REQ_IGNORE: - reg_free_request(driver_request); - return treatment; + return REG_REQ_IGNORE; case REG_REQ_INTERSECT: - /* fall through */ case REG_REQ_ALREADY_SET: regd = reg_copy_regd(get_cfg80211_regdom()); - if (IS_ERR(regd)) { - reg_free_request(driver_request); + if (IS_ERR(regd)) return REG_REQ_IGNORE; - } tmp = get_wiphy_regdom(wiphy); rcu_assign_pointer(wiphy->regd, regd); @@ -2006,8 +2042,6 @@ reg_process_hint_driver(struct wiphy *wiphy, driver_request->intersect = treatment == REG_REQ_INTERSECT; driver_request->processed = false; - reg_update_last_request(driver_request); - /* * Since CRDA will not be called in this case as we already * have applied the requested regulatory domain before we just @@ -2015,11 +2049,17 @@ reg_process_hint_driver(struct wiphy *wiphy, */ if (treatment == REG_REQ_ALREADY_SET) { nl80211_send_reg_change_event(driver_request); + reg_update_last_request(driver_request); reg_set_request_processed(); - return treatment; + return REG_REQ_ALREADY_SET; } - return reg_call_crda(driver_request); + if (reg_query_database(driver_request)) { + reg_update_last_request(driver_request); + return REG_REQ_OK; + } + + return REG_REQ_IGNORE; } static enum reg_request_treatment @@ -2085,12 +2125,11 @@ reg_process_hint_country_ie(struct wiphy *wiphy, case REG_REQ_OK: break; case REG_REQ_IGNORE: - /* fall through */ + return REG_REQ_IGNORE; case REG_REQ_ALREADY_SET: reg_free_request(country_ie_request); - return treatment; + return REG_REQ_ALREADY_SET; case REG_REQ_INTERSECT: - reg_free_request(country_ie_request); /* * This doesn't happen yet, not sure we * ever want to support it for this case. @@ -2102,9 +2141,12 @@ reg_process_hint_country_ie(struct wiphy *wiphy, country_ie_request->intersect = false; country_ie_request->processed = false; - reg_update_last_request(country_ie_request); + if (reg_query_database(country_ie_request)) { + reg_update_last_request(country_ie_request); + return REG_REQ_OK; + } - return reg_call_crda(country_ie_request); + return REG_REQ_IGNORE; } /* This processes *all* regulatory hints */ @@ -2118,11 +2160,11 @@ static void reg_process_hint(struct regulatory_request *reg_request) switch (reg_request->initiator) { case NL80211_REGDOM_SET_BY_CORE: - reg_process_hint_core(reg_request); - return; + treatment = reg_process_hint_core(reg_request); + break; case NL80211_REGDOM_SET_BY_USER: - reg_process_hint_user(reg_request); - return; + treatment = reg_process_hint_user(reg_request); + break; case NL80211_REGDOM_SET_BY_DRIVER: if (!wiphy) goto out_free; @@ -2138,6 +2180,12 @@ static void reg_process_hint(struct regulatory_request *reg_request) goto out_free; } + if (treatment == REG_REQ_IGNORE) + goto out_free; + + WARN(treatment != REG_REQ_OK && treatment != REG_REQ_ALREADY_SET, + "unexpected treatment value %d\n", treatment); + /* This is required so that the orig_* parameters are saved. * NOTE: treatment must be set for any case that reaches here! */ @@ -2345,7 +2393,7 @@ int regulatory_hint_user(const char *alpha2, request->user_reg_hint_type = user_reg_hint_type; /* Allow calling CRDA again */ - reg_crda_timeouts = 0; + reset_crda_timeouts(); queue_regulatory_request(request); @@ -2417,7 +2465,7 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2) request->initiator = NL80211_REGDOM_SET_BY_DRIVER; /* Allow calling CRDA again */ - reg_crda_timeouts = 0; + reset_crda_timeouts(); queue_regulatory_request(request); @@ -2473,7 +2521,7 @@ void regulatory_hint_country_ie(struct wiphy *wiphy, enum ieee80211_band band, request->country_ie_env = env; /* Allow calling CRDA again */ - reg_crda_timeouts = 0; + reset_crda_timeouts(); queue_regulatory_request(request); request = NULL; @@ -2874,11 +2922,8 @@ static int reg_set_rd_driver(const struct ieee80211_regdomain *rd, } request_wiphy = wiphy_idx_to_wiphy(driver_request->wiphy_idx); - if (!request_wiphy) { - queue_delayed_work(system_power_efficient_wq, - ®_timeout, 0); + if (!request_wiphy) return -ENODEV; - } if (!driver_request->intersect) { if (request_wiphy->regd) @@ -2935,11 +2980,8 @@ static int reg_set_rd_country_ie(const struct ieee80211_regdomain *rd, } request_wiphy = wiphy_idx_to_wiphy(country_ie_request->wiphy_idx); - if (!request_wiphy) { - queue_delayed_work(system_power_efficient_wq, - ®_timeout, 0); + if (!request_wiphy) return -ENODEV; - } if (country_ie_request->intersect) return -EINVAL; @@ -2966,7 +3008,7 @@ int set_regdom(const struct ieee80211_regdomain *rd, } if (regd_src == REGD_SOURCE_CRDA) - reg_crda_timeouts = 0; + reset_crda_timeouts(); lr = get_last_request(); @@ -3123,15 +3165,6 @@ void wiphy_regulatory_deregister(struct wiphy *wiphy) lr->country_ie_env = ENVIRON_ANY; } -static void reg_timeout_work(struct work_struct *work) -{ - REG_DBG_PRINT("Timeout while waiting for CRDA to reply, restoring regulatory settings\n"); - rtnl_lock(); - reg_crda_timeouts++; - restore_regulatory_settings(true); - rtnl_unlock(); -} - /* * See http://www.fcc.gov/document/5-ghz-unlicensed-spectrum-unii, for * UNII band definitions @@ -3217,7 +3250,7 @@ void regulatory_exit(void) struct reg_beacon *reg_beacon, *btmp; cancel_work_sync(®_work); - cancel_delayed_work_sync(®_timeout); + cancel_crda_timeout_sync(); cancel_delayed_work_sync(®_check_chans); /* Lock to suppress warnings */ diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 3a50aa2553bf..14d5369eb778 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -266,8 +266,7 @@ void __cfg80211_sched_scan_results(struct work_struct *wk) spin_lock_bh(&rdev->bss_lock); __cfg80211_bss_expire(rdev, request->scan_start); spin_unlock_bh(&rdev->bss_lock); - request->scan_start = - jiffies + msecs_to_jiffies(request->interval); + request->scan_start = jiffies; } nl80211_send_sched_scan_results(rdev, request->dev); } @@ -839,6 +838,7 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, found->pub.signal = tmp->pub.signal; found->pub.capability = tmp->pub.capability; found->ts = tmp->ts; + found->ts_boottime = tmp->ts_boottime; } else { struct cfg80211_internal_bss *new; struct cfg80211_internal_bss *hidden; @@ -938,14 +938,13 @@ cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen, } /* Returned bss is reference counted and must be cleaned up appropriately. */ -struct cfg80211_bss* -cfg80211_inform_bss_width(struct wiphy *wiphy, - struct ieee80211_channel *rx_channel, - enum nl80211_bss_scan_width scan_width, - enum cfg80211_bss_frame_type ftype, - const u8 *bssid, u64 tsf, u16 capability, - u16 beacon_interval, const u8 *ie, size_t ielen, - s32 signal, gfp_t gfp) +struct cfg80211_bss * +cfg80211_inform_bss_data(struct wiphy *wiphy, + struct cfg80211_inform_bss *data, + enum cfg80211_bss_frame_type ftype, + const u8 *bssid, u64 tsf, u16 capability, + u16 beacon_interval, const u8 *ie, size_t ielen, + gfp_t gfp) { struct cfg80211_bss_ies *ies; struct ieee80211_channel *channel; @@ -957,19 +956,21 @@ cfg80211_inform_bss_width(struct wiphy *wiphy, return NULL; if (WARN_ON(wiphy->signal_type == CFG80211_SIGNAL_TYPE_UNSPEC && - (signal < 0 || signal > 100))) + (data->signal < 0 || data->signal > 100))) return NULL; - channel = cfg80211_get_bss_channel(wiphy, ie, ielen, rx_channel); + channel = cfg80211_get_bss_channel(wiphy, ie, ielen, data->chan); if (!channel) return NULL; memcpy(tmp.pub.bssid, bssid, ETH_ALEN); tmp.pub.channel = channel; - tmp.pub.scan_width = scan_width; - tmp.pub.signal = signal; + tmp.pub.scan_width = data->scan_width; + tmp.pub.signal = data->signal; tmp.pub.beacon_interval = beacon_interval; tmp.pub.capability = capability; + tmp.ts_boottime = data->boottime_ns; + /* * If we do not know here whether the IEs are from a Beacon or Probe * Response frame, we need to pick one of the options and only use it @@ -999,7 +1000,7 @@ cfg80211_inform_bss_width(struct wiphy *wiphy, } rcu_assign_pointer(tmp.pub.ies, ies); - signal_valid = abs(rx_channel->center_freq - channel->center_freq) <= + signal_valid = abs(data->chan->center_freq - channel->center_freq) <= wiphy->max_adj_channel_rssi_comp; res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid); if (!res) @@ -1019,15 +1020,15 @@ cfg80211_inform_bss_width(struct wiphy *wiphy, /* cfg80211_bss_update gives us a referenced result */ return &res->pub; } -EXPORT_SYMBOL(cfg80211_inform_bss_width); +EXPORT_SYMBOL(cfg80211_inform_bss_data); -/* Returned bss is reference counted and must be cleaned up appropriately. */ +/* cfg80211_inform_bss_width_frame helper */ struct cfg80211_bss * -cfg80211_inform_bss_width_frame(struct wiphy *wiphy, - struct ieee80211_channel *rx_channel, - enum nl80211_bss_scan_width scan_width, - struct ieee80211_mgmt *mgmt, size_t len, - s32 signal, gfp_t gfp) +cfg80211_inform_bss_frame_data(struct wiphy *wiphy, + struct cfg80211_inform_bss *data, + struct ieee80211_mgmt *mgmt, size_t len, + gfp_t gfp) + { struct cfg80211_internal_bss tmp = {}, *res; struct cfg80211_bss_ies *ies; @@ -1040,8 +1041,7 @@ cfg80211_inform_bss_width_frame(struct wiphy *wiphy, BUILD_BUG_ON(offsetof(struct ieee80211_mgmt, u.probe_resp.variable) != offsetof(struct ieee80211_mgmt, u.beacon.variable)); - trace_cfg80211_inform_bss_width_frame(wiphy, rx_channel, scan_width, mgmt, - len, signal); + trace_cfg80211_inform_bss_frame(wiphy, data, mgmt, len); if (WARN_ON(!mgmt)) return NULL; @@ -1050,14 +1050,14 @@ cfg80211_inform_bss_width_frame(struct wiphy *wiphy, return NULL; if (WARN_ON(wiphy->signal_type == CFG80211_SIGNAL_TYPE_UNSPEC && - (signal < 0 || signal > 100))) + (data->signal < 0 || data->signal > 100))) return NULL; if (WARN_ON(len < offsetof(struct ieee80211_mgmt, u.probe_resp.variable))) return NULL; channel = cfg80211_get_bss_channel(wiphy, mgmt->u.beacon.variable, - ielen, rx_channel); + ielen, data->chan); if (!channel) return NULL; @@ -1077,12 +1077,13 @@ cfg80211_inform_bss_width_frame(struct wiphy *wiphy, memcpy(tmp.pub.bssid, mgmt->bssid, ETH_ALEN); tmp.pub.channel = channel; - tmp.pub.scan_width = scan_width; - tmp.pub.signal = signal; + tmp.pub.scan_width = data->scan_width; + tmp.pub.signal = data->signal; tmp.pub.beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int); tmp.pub.capability = le16_to_cpu(mgmt->u.probe_resp.capab_info); + tmp.ts_boottime = data->boottime_ns; - signal_valid = abs(rx_channel->center_freq - channel->center_freq) <= + signal_valid = abs(data->chan->center_freq - channel->center_freq) <= wiphy->max_adj_channel_rssi_comp; res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid); if (!res) @@ -1102,7 +1103,7 @@ cfg80211_inform_bss_width_frame(struct wiphy *wiphy, /* cfg80211_bss_update gives us a referenced result */ return &res->pub; } -EXPORT_SYMBOL(cfg80211_inform_bss_width_frame); +EXPORT_SYMBOL(cfg80211_inform_bss_frame_data); void cfg80211_ref_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) { diff --git a/net/wireless/trace.h b/net/wireless/trace.h index a808279a432a..0c392d36781b 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2670,30 +2670,30 @@ TRACE_EVENT(cfg80211_get_bss, __entry->privacy) ); -TRACE_EVENT(cfg80211_inform_bss_width_frame, - TP_PROTO(struct wiphy *wiphy, struct ieee80211_channel *channel, - enum nl80211_bss_scan_width scan_width, - struct ieee80211_mgmt *mgmt, size_t len, - s32 signal), - TP_ARGS(wiphy, channel, scan_width, mgmt, len, signal), +TRACE_EVENT(cfg80211_inform_bss_frame, + TP_PROTO(struct wiphy *wiphy, struct cfg80211_inform_bss *data, + struct ieee80211_mgmt *mgmt, size_t len), + TP_ARGS(wiphy, data, mgmt, len), TP_STRUCT__entry( WIPHY_ENTRY CHAN_ENTRY __field(enum nl80211_bss_scan_width, scan_width) __dynamic_array(u8, mgmt, len) __field(s32, signal) + __field(u64, ts_boottime) ), TP_fast_assign( WIPHY_ASSIGN; - CHAN_ASSIGN(channel); - __entry->scan_width = scan_width; + CHAN_ASSIGN(data->chan); + __entry->scan_width = data->scan_width; if (mgmt) memcpy(__get_dynamic_array(mgmt), mgmt, len); - __entry->signal = signal; + __entry->signal = data->signal; + __entry->ts_boottime = data->boottime_ns; ), - TP_printk(WIPHY_PR_FMT ", " CHAN_PR_FMT "(scan_width: %d) signal: %d", + TP_printk(WIPHY_PR_FMT ", " CHAN_PR_FMT "(scan_width: %d) signal: %d, tsb:%llu", WIPHY_PR_ARG, CHAN_PR_ARG, __entry->scan_width, - __entry->signal) + __entry->signal, (unsigned long long)__entry->ts_boottime) ); DECLARE_EVENT_CLASS(cfg80211_bss_evt, diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index a8de9e300200..24e06a2377f6 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1928,8 +1928,10 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr *rp = attrs[XFRMA_REPLAY_VAL]; struct nlattr *re = attrs[XFRMA_REPLAY_ESN_VAL]; struct nlattr *lt = attrs[XFRMA_LTIME_VAL]; + struct nlattr *et = attrs[XFRMA_ETIMER_THRESH]; + struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH]; - if (!lt && !rp && !re) + if (!lt && !rp && !re && !et && !rt) return err; /* pedantic mode - thou shalt sayeth replaceth */ |