From 533555e5cbb6aa2d77598917871ae5b579fe724b Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 27 Oct 2018 06:12:06 +0000 Subject: xfrm: Fix error return code in xfrm_output_one() xfrm_output_one() does not return a error code when there is no dst_entry attached to the skb, it is still possible crash with a NULL pointer dereference in xfrm_output_resume(). Fix it by return error code -EHOSTUNREACH. Fixes: 9e1437937807 ("xfrm: Fix NULL pointer dereference when skb_dst_force clears the dst_entry.") Signed-off-by: Wei Yongjun Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_output.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 4ae87c5ce2e3..fef6b2da3c5d 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -102,6 +102,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err) skb_dst_force(skb); if (!skb_dst(skb)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); + err = -EHOSTUNREACH; goto error_nolock; } -- cgit v1.2.3-59-g8ed1b From ca92e173ab34a4f7fc4128bd372bd96f1af6f507 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Mon, 5 Nov 2018 17:00:53 +0900 Subject: xfrm: Fix bucket count reported to userspace sadhcnt is reported by `ip -s xfrm state count` as "buckets count", not the hash mask. Fixes: 28d8909bc790 ("[XFRM]: Export SAD info.") Signed-off-by: Benjamin Poirier Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index b669262682c9..12cdb350c456 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -788,7 +788,7 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si) { spin_lock_bh(&net->xfrm.xfrm_state_lock); si->sadcnt = net->xfrm.state_num; - si->sadhcnt = net->xfrm.state_hmask; + si->sadhcnt = net->xfrm.state_hmask + 1; si->sadhmcnt = xfrm_state_hashmax; spin_unlock_bh(&net->xfrm.xfrm_state_lock); } -- cgit v1.2.3-59-g8ed1b From 0152eee6fc3b84298bb6a79961961734e8afa5b8 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Thu, 22 Nov 2018 07:26:24 +0100 Subject: xfrm: Fix NULL pointer dereference in xfrm_input when skb_dst_force clears the dst_entry. Since commit 222d7dbd258d ("net: prevent dst uses after free") skb_dst_force() might clear the dst_entry attached to the skb. The xfrm code doesn't expect this to happen, so we crash with a NULL pointer dereference in this case. Fix it by checking skb_dst(skb) for NULL after skb_dst_force() and drop the packet in case the dst_entry was cleared. We also move the skb_dst_force() to a codepath that is not used when the transformation was offloaded, because in this case we don't have a dst_entry attached to the skb. The output and forwarding path was already fixed by commit 9e1437937807 ("xfrm: Fix NULL pointer dereference when skb_dst_force clears the dst_entry.") Fixes: 222d7dbd258d ("net: prevent dst uses after free") Reported-by: Jean-Philippe Menil Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_input.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 684c0bc01e2c..d5635908587f 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -346,6 +346,12 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) skb->sp->xvec[skb->sp->len++] = x; + skb_dst_force(skb); + if (!skb_dst(skb)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR); + goto drop; + } + lock: spin_lock(&x->lock); @@ -385,7 +391,6 @@ lock: XFRM_SKB_CB(skb)->seq.input.low = seq; XFRM_SKB_CB(skb)->seq.input.hi = seq_hi; - skb_dst_force(skb); dev_hold(skb->dev); if (crypto_done) -- cgit v1.2.3-59-g8ed1b From 4a135e538962cb00a9667c82e7d2b9e4d7cd7177 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 21 Nov 2018 21:09:23 +0100 Subject: xfrm_user: fix freeing of xfrm states on acquire Commit 565f0fa902b6 ("xfrm: use a dedicated slab cache for struct xfrm_state") moved xfrm state objects to use their own slab cache. However, it missed to adapt xfrm_user to use this new cache when freeing xfrm states. Fix this by introducing and make use of a new helper for freeing xfrm_state objects. Fixes: 565f0fa902b6 ("xfrm: use a dedicated slab cache for struct xfrm_state") Reported-by: Pan Bian Cc: # v4.18+ Signed-off-by: Mathias Krause Acked-by: Herbert Xu Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 1 + net/xfrm/xfrm_state.c | 8 +++++++- net/xfrm/xfrm_user.c | 4 ++-- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 0eb390c205af..da588def3c61 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1552,6 +1552,7 @@ int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk, int (*func)(struct xfrm_state *, int, void*), void *); void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net); struct xfrm_state *xfrm_state_alloc(struct net *net); +void xfrm_state_free(struct xfrm_state *x); struct xfrm_state *xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, const struct flowi *fl, diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 12cdb350c456..cc0203efb584 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -426,6 +426,12 @@ static void xfrm_put_mode(struct xfrm_mode *mode) module_put(mode->owner); } +void xfrm_state_free(struct xfrm_state *x) +{ + kmem_cache_free(xfrm_state_cache, x); +} +EXPORT_SYMBOL(xfrm_state_free); + static void xfrm_state_gc_destroy(struct xfrm_state *x) { tasklet_hrtimer_cancel(&x->mtimer); @@ -452,7 +458,7 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) } xfrm_dev_state_free(x); security_xfrm_state_free(x); - kmem_cache_free(xfrm_state_cache, x); + xfrm_state_free(x); } static void xfrm_state_gc_task(struct work_struct *work) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index ca7a207b81a9..683080172655 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2288,13 +2288,13 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, } - kfree(x); + xfrm_state_free(x); kfree(xp); return 0; free_state: - kfree(x); + xfrm_state_free(x); nomem: return err; } -- cgit v1.2.3-59-g8ed1b From a73d4e1490913b76b292f91553b7ba08a65caa3f Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Thu, 29 Nov 2018 17:41:54 -0500 Subject: ieee802154: hwsim: fix off-by-one in parse nested This patch fixes a off-by-one mistake in nla_parse_nested() functions of mac802154_hwsim driver. I had to enabled stack protector so I was able to reproduce it. Reference: https://github.com/linux-wpan/wpan-tools/issues/17 Signed-off-by: Alexander Aring Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/mac802154_hwsim.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ieee802154/mac802154_hwsim.c b/drivers/net/ieee802154/mac802154_hwsim.c index 51b5198d5943..b6743f03dce0 100644 --- a/drivers/net/ieee802154/mac802154_hwsim.c +++ b/drivers/net/ieee802154/mac802154_hwsim.c @@ -492,7 +492,7 @@ static int hwsim_del_edge_nl(struct sk_buff *msg, struct genl_info *info) !info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE]) return -EINVAL; - if (nla_parse_nested(edge_attrs, MAC802154_HWSIM_EDGE_ATTR_MAX + 1, + if (nla_parse_nested(edge_attrs, MAC802154_HWSIM_EDGE_ATTR_MAX, info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE], hwsim_edge_policy, NULL)) return -EINVAL; @@ -542,7 +542,7 @@ static int hwsim_set_edge_lqi(struct sk_buff *msg, struct genl_info *info) !info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE]) return -EINVAL; - if (nla_parse_nested(edge_attrs, MAC802154_HWSIM_EDGE_ATTR_MAX + 1, + if (nla_parse_nested(edge_attrs, MAC802154_HWSIM_EDGE_ATTR_MAX, info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE], hwsim_edge_policy, NULL)) return -EINVAL; -- cgit v1.2.3-59-g8ed1b From 4c05ec47384ab3627b62814e8f886e90cc38ce15 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 26 Nov 2018 20:03:30 +0900 Subject: netfilter: nf_tables: fix suspicious RCU usage in nft_chain_stats_replace() basechain->stats is rcu protected data which is updated from nft_chain_stats_replace(). This function is executed from the commit phase which holds the pernet nf_tables commit mutex - not the global nfnetlink subsystem mutex. Test commands to reproduce the problem are: %iptables-nft -I INPUT %iptables-nft -Z %iptables-nft -Z This patch uses RCU calls to handle basechain->stats updates to fix a splat that looks like: [89279.358755] ============================= [89279.363656] WARNING: suspicious RCU usage [89279.368458] 4.20.0-rc2+ #44 Tainted: G W L [89279.374661] ----------------------------- [89279.379542] net/netfilter/nf_tables_api.c:1404 suspicious rcu_dereference_protected() usage! [...] [89279.406556] 1 lock held by iptables-nft/5225: [89279.411728] #0: 00000000bf45a000 (&net->nft.commit_mutex){+.+.}, at: nf_tables_valid_genid+0x1f/0x70 [nf_tables] [89279.424022] stack backtrace: [89279.429236] CPU: 0 PID: 5225 Comm: iptables-nft Tainted: G W L 4.20.0-rc2+ #44 [89279.430135] Call Trace: [89279.430135] dump_stack+0xc9/0x16b [89279.430135] ? show_regs_print_info+0x5/0x5 [89279.430135] ? lockdep_rcu_suspicious+0x117/0x160 [89279.430135] nft_chain_commit_update+0x4ea/0x640 [nf_tables] [89279.430135] ? sched_clock_local+0xd4/0x140 [89279.430135] ? check_flags.part.35+0x440/0x440 [89279.430135] ? __rhashtable_remove_fast.constprop.67+0xec0/0xec0 [nf_tables] [89279.430135] ? sched_clock_cpu+0x126/0x170 [89279.430135] ? find_held_lock+0x39/0x1c0 [89279.430135] ? hlock_class+0x140/0x140 [89279.430135] ? is_bpf_text_address+0x5/0xf0 [89279.430135] ? check_flags.part.35+0x440/0x440 [89279.430135] ? __lock_is_held+0xb4/0x140 [89279.430135] nf_tables_commit+0x2555/0x39c0 [nf_tables] Fixes: f102d66b335a4 ("netfilter: nf_tables: use dedicated mutex to guard transactions") Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink.h | 12 ------------ net/netfilter/nf_tables_api.c | 21 +++++++++++++-------- net/netfilter/nf_tables_core.c | 2 +- 3 files changed, 14 insertions(+), 21 deletions(-) diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 4a520d3304a2..cf09ab37b45b 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -62,18 +62,6 @@ static inline bool lockdep_nfnl_is_held(__u8 subsys_id) } #endif /* CONFIG_PROVE_LOCKING */ -/* - * nfnl_dereference - fetch RCU pointer when updates are prevented by subsys mutex - * - * @p: The pointer to read, prior to dereferencing - * @ss: The nfnetlink subsystem ID - * - * Return the value of the specified RCU-protected pointer, but omit - * the READ_ONCE(), because caller holds the NFNL subsystem mutex. - */ -#define nfnl_dereference(p, ss) \ - rcu_dereference_protected(p, lockdep_nfnl_is_held(ss)) - #define MODULE_ALIAS_NFNL_SUBSYS(subsys) \ MODULE_ALIAS("nfnetlink-subsys-" __stringify(subsys)) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 2e61aab6ed73..6e548d7c9f67 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1216,7 +1216,8 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, if (nla_put_string(skb, NFTA_CHAIN_TYPE, basechain->type->name)) goto nla_put_failure; - if (basechain->stats && nft_dump_stats(skb, basechain->stats)) + if (rcu_access_pointer(basechain->stats) && + nft_dump_stats(skb, rcu_dereference(basechain->stats))) goto nla_put_failure; } @@ -1392,7 +1393,8 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr) return newstats; } -static void nft_chain_stats_replace(struct nft_base_chain *chain, +static void nft_chain_stats_replace(struct net *net, + struct nft_base_chain *chain, struct nft_stats __percpu *newstats) { struct nft_stats __percpu *oldstats; @@ -1400,8 +1402,9 @@ static void nft_chain_stats_replace(struct nft_base_chain *chain, if (newstats == NULL) return; - if (chain->stats) { - oldstats = nfnl_dereference(chain->stats, NFNL_SUBSYS_NFTABLES); + if (rcu_access_pointer(chain->stats)) { + oldstats = rcu_dereference_protected(chain->stats, + lockdep_commit_lock_is_held(net)); rcu_assign_pointer(chain->stats, newstats); synchronize_rcu(); free_percpu(oldstats); @@ -1439,9 +1442,10 @@ static void nf_tables_chain_destroy(struct nft_ctx *ctx) struct nft_base_chain *basechain = nft_base_chain(chain); module_put(basechain->type->owner); - free_percpu(basechain->stats); - if (basechain->stats) + if (rcu_access_pointer(basechain->stats)) { static_branch_dec(&nft_counters_enabled); + free_percpu(rcu_dereference_raw(basechain->stats)); + } kfree(chain->name); kfree(basechain); } else { @@ -1590,7 +1594,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, kfree(basechain); return PTR_ERR(stats); } - basechain->stats = stats; + rcu_assign_pointer(basechain->stats, stats); static_branch_inc(&nft_counters_enabled); } @@ -6180,7 +6184,8 @@ static void nft_chain_commit_update(struct nft_trans *trans) return; basechain = nft_base_chain(trans->ctx.chain); - nft_chain_stats_replace(basechain, nft_trans_chain_stats(trans)); + nft_chain_stats_replace(trans->ctx.net, basechain, + nft_trans_chain_stats(trans)); switch (nft_trans_chain_policy(trans)) { case NF_DROP: diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 3fbce3b9c5ec..a50500232b0a 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -101,7 +101,7 @@ static noinline void nft_update_chain_stats(const struct nft_chain *chain, struct nft_stats *stats; base_chain = nft_base_chain(chain); - if (!base_chain->stats) + if (!rcu_access_pointer(base_chain->stats)) return; local_bh_disable(); -- cgit v1.2.3-59-g8ed1b From 530aad77010b81526586dfc09130ec875cd084e4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 5 Dec 2018 14:12:19 +0100 Subject: netfilter: seqadj: re-load tcp header pointer after possible head reallocation When adjusting sack block sequence numbers, skb_make_writable() gets called to make sure tcp options are all in the linear area, and buffer is not shared. This can cause tcp header pointer to get reallocated, so we must reaload it to avoid memory corruption. This bug pre-dates git history. Reported-by: Neel Mehta Reported-by: Shane Huntley Reported-by: Heather Adkins Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_seqadj.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c index a975efd6b8c3..9da303461069 100644 --- a/net/netfilter/nf_conntrack_seqadj.c +++ b/net/netfilter/nf_conntrack_seqadj.c @@ -115,12 +115,12 @@ static void nf_ct_sack_block_adjust(struct sk_buff *skb, /* TCP SACK sequence number adjustment */ static unsigned int nf_ct_sack_adjust(struct sk_buff *skb, unsigned int protoff, - struct tcphdr *tcph, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { - unsigned int dir, optoff, optend; + struct tcphdr *tcph = (void *)skb->data + protoff; struct nf_conn_seqadj *seqadj = nfct_seqadj(ct); + unsigned int dir, optoff, optend; optoff = protoff + sizeof(struct tcphdr); optend = protoff + tcph->doff * 4; @@ -128,6 +128,7 @@ static unsigned int nf_ct_sack_adjust(struct sk_buff *skb, if (!skb_make_writable(skb, optend)) return 0; + tcph = (void *)skb->data + protoff; dir = CTINFO2DIR(ctinfo); while (optoff < optend) { @@ -207,7 +208,7 @@ int nf_ct_seq_adjust(struct sk_buff *skb, ntohl(newack)); tcph->ack_seq = newack; - res = nf_ct_sack_adjust(skb, protoff, tcph, ct, ctinfo); + res = nf_ct_sack_adjust(skb, protoff, ct, ctinfo); out: spin_unlock_bh(&ct->lock); -- cgit v1.2.3-59-g8ed1b From 13e56ec2cc9860aa22e01ffc7a3160f35a96b728 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 5 Dec 2018 20:40:47 -0800 Subject: selftests/bpf: use thoff instead of nhoff in BPF flow dissector We are returning thoff from the flow dissector, not the nhoff. Pass thoff along with nhoff to the bpf program (initially thoff == nhoff) and expect flow dissector amend/return thoff, not nhoff. This avoids confusion, when by the time bpf flow dissector exits, nhoff == thoff, which doesn't make much sense. Signed-off-by: Stanislav Fomichev Acked-by: Song Liu Signed-off-by: Alexei Starovoitov --- net/core/flow_dissector.c | 1 + tools/testing/selftests/bpf/bpf_flow.c | 36 ++++++++++++++++------------------ 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 588f475019d4..ff5556d80570 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -783,6 +783,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb, /* Pass parameters to the BPF program */ cb->qdisc_cb.flow_keys = &flow_keys; flow_keys.nhoff = nhoff; + flow_keys.thoff = nhoff; bpf_compute_data_pointers((struct sk_buff *)skb); result = BPF_PROG_RUN(attached, skb); diff --git a/tools/testing/selftests/bpf/bpf_flow.c b/tools/testing/selftests/bpf/bpf_flow.c index 107350a7821d..df9d32fd2055 100644 --- a/tools/testing/selftests/bpf/bpf_flow.c +++ b/tools/testing/selftests/bpf/bpf_flow.c @@ -70,18 +70,18 @@ static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb, { void *data_end = (void *)(long)skb->data_end; void *data = (void *)(long)skb->data; - __u16 nhoff = skb->flow_keys->nhoff; + __u16 thoff = skb->flow_keys->thoff; __u8 *hdr; /* Verifies this variable offset does not overflow */ - if (nhoff > (USHRT_MAX - hdr_size)) + if (thoff > (USHRT_MAX - hdr_size)) return NULL; - hdr = data + nhoff; + hdr = data + thoff; if (hdr + hdr_size <= data_end) return hdr; - if (bpf_skb_load_bytes(skb, nhoff, buffer, hdr_size)) + if (bpf_skb_load_bytes(skb, thoff, buffer, hdr_size)) return NULL; return buffer; @@ -158,13 +158,13 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) /* Only inspect standard GRE packets with version 0 */ return BPF_OK; - keys->nhoff += sizeof(*gre); /* Step over GRE Flags and Proto */ + keys->thoff += sizeof(*gre); /* Step over GRE Flags and Proto */ if (GRE_IS_CSUM(gre->flags)) - keys->nhoff += 4; /* Step over chksum and Padding */ + keys->thoff += 4; /* Step over chksum and Padding */ if (GRE_IS_KEY(gre->flags)) - keys->nhoff += 4; /* Step over key */ + keys->thoff += 4; /* Step over key */ if (GRE_IS_SEQ(gre->flags)) - keys->nhoff += 4; /* Step over sequence number */ + keys->thoff += 4; /* Step over sequence number */ keys->is_encap = true; @@ -174,7 +174,7 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) if (!eth) return BPF_DROP; - keys->nhoff += sizeof(*eth); + keys->thoff += sizeof(*eth); return parse_eth_proto(skb, eth->h_proto); } else { @@ -191,7 +191,6 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) if ((__u8 *)tcp + (tcp->doff << 2) > data_end) return BPF_DROP; - keys->thoff = keys->nhoff; keys->sport = tcp->source; keys->dport = tcp->dest; return BPF_OK; @@ -201,7 +200,6 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) if (!udp) return BPF_DROP; - keys->thoff = keys->nhoff; keys->sport = udp->source; keys->dport = udp->dest; return BPF_OK; @@ -252,8 +250,8 @@ PROG(IP)(struct __sk_buff *skb) keys->ipv4_src = iph->saddr; keys->ipv4_dst = iph->daddr; - keys->nhoff += iph->ihl << 2; - if (data + keys->nhoff > data_end) + keys->thoff += iph->ihl << 2; + if (data + keys->thoff > data_end) return BPF_DROP; if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) { @@ -285,7 +283,7 @@ PROG(IPV6)(struct __sk_buff *skb) keys->addr_proto = ETH_P_IPV6; memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr)); - keys->nhoff += sizeof(struct ipv6hdr); + keys->thoff += sizeof(struct ipv6hdr); return parse_ipv6_proto(skb, ip6h->nexthdr); } @@ -301,7 +299,7 @@ PROG(IPV6OP)(struct __sk_buff *skb) /* hlen is in 8-octets and does not include the first 8 bytes * of the header */ - skb->flow_keys->nhoff += (1 + ip6h->hdrlen) << 3; + skb->flow_keys->thoff += (1 + ip6h->hdrlen) << 3; return parse_ipv6_proto(skb, ip6h->nexthdr); } @@ -315,7 +313,7 @@ PROG(IPV6FR)(struct __sk_buff *skb) if (!fragh) return BPF_DROP; - keys->nhoff += sizeof(*fragh); + keys->thoff += sizeof(*fragh); keys->is_frag = true; if (!(fragh->frag_off & bpf_htons(IP6_OFFSET))) keys->is_first_frag = true; @@ -341,7 +339,7 @@ PROG(VLAN)(struct __sk_buff *skb) __be16 proto; /* Peek back to see if single or double-tagging */ - if (bpf_skb_load_bytes(skb, keys->nhoff - sizeof(proto), &proto, + if (bpf_skb_load_bytes(skb, keys->thoff - sizeof(proto), &proto, sizeof(proto))) return BPF_DROP; @@ -354,14 +352,14 @@ PROG(VLAN)(struct __sk_buff *skb) if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q)) return BPF_DROP; - keys->nhoff += sizeof(*vlan); + keys->thoff += sizeof(*vlan); } vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan); if (!vlan) return BPF_DROP; - keys->nhoff += sizeof(*vlan); + keys->thoff += sizeof(*vlan); /* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/ if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) || vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q)) -- cgit v1.2.3-59-g8ed1b From ec3d837aac5dca7cb8a69c9f101690c182da79c4 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 5 Dec 2018 20:40:48 -0800 Subject: net/flow_dissector: correctly cap nhoff and thoff in case of BPF We want to make sure that the following condition holds: 0 <= nhoff <= thoff <= skb->len BPF program can set out-of-bounds nhoff and thoff, which is dangerous, see recent commit d0c081b49137 ("flow_dissector: properly cap thoff field")'. Signed-off-by: Stanislav Fomichev Acked-by: Song Liu Signed-off-by: Alexei Starovoitov --- net/core/flow_dissector.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index ff5556d80570..af68207ee56c 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -791,9 +791,12 @@ bool __skb_flow_dissect(const struct sk_buff *skb, /* Restore state */ memcpy(cb, &cb_saved, sizeof(cb_saved)); + flow_keys.nhoff = clamp_t(u16, flow_keys.nhoff, 0, skb->len); + flow_keys.thoff = clamp_t(u16, flow_keys.thoff, + flow_keys.nhoff, skb->len); + __skb_flow_bpf_to_target(&flow_keys, flow_dissector, target_container); - key_control->thoff = min_t(u16, key_control->thoff, skb->len); rcu_read_unlock(); return result == BPF_OK; } -- cgit v1.2.3-59-g8ed1b From 51a11b14c29c6c7cc985c6ce7dd9e20f61cf7f0a Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Thu, 6 Dec 2018 14:57:01 +0530 Subject: bpf: powerpc: fix broken uapi for BPF_PROG_TYPE_PERF_EVENT Now that there are different variants of pt_regs for userspace and kernel, the uapi for the BPF_PROG_TYPE_PERF_EVENT program type must be changed by exporting the user_pt_regs structure instead of the pt_regs structure that is in-kernel only. Fixes: 002af9391bfb ("powerpc: Split user/kernel definitions of struct pt_regs") Signed-off-by: Sandipan Das Signed-off-by: Alexei Starovoitov --- arch/powerpc/include/asm/perf_event.h | 2 ++ arch/powerpc/include/uapi/asm/Kbuild | 1 - arch/powerpc/include/uapi/asm/bpf_perf_event.h | 9 +++++++++ 3 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/include/uapi/asm/bpf_perf_event.h diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h index 8bf1b6351716..16a49819da9a 100644 --- a/arch/powerpc/include/asm/perf_event.h +++ b/arch/powerpc/include/asm/perf_event.h @@ -26,6 +26,8 @@ #include #include +#define perf_arch_bpf_user_pt_regs(regs) ®s->user_regs + /* * Overload regs->result to specify whether we should use the MSR (result * is zero) or the SIAR (result is non zero). diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild index a658091a19f9..3712152206f3 100644 --- a/arch/powerpc/include/uapi/asm/Kbuild +++ b/arch/powerpc/include/uapi/asm/Kbuild @@ -1,7 +1,6 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm -generic-y += bpf_perf_event.h generic-y += param.h generic-y += poll.h generic-y += resource.h diff --git a/arch/powerpc/include/uapi/asm/bpf_perf_event.h b/arch/powerpc/include/uapi/asm/bpf_perf_event.h new file mode 100644 index 000000000000..b551b741653d --- /dev/null +++ b/arch/powerpc/include/uapi/asm/bpf_perf_event.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__ +#define _UAPI__ASM_BPF_PERF_EVENT_H__ + +#include + +typedef struct user_pt_regs bpf_user_pt_regs_t; + +#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */ -- cgit v1.2.3-59-g8ed1b From c2a20a2731df11f9b7b7030f7ac3fc222c9ce39d Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 6 Dec 2018 20:14:11 -0800 Subject: selftests/bpf: add missing pointer dereference for map stacktrace fixup I get a segfault without it, other fixups always do dereference, and without dereference I don't understand how it can ever work. Fixes: 7c85c448e7d74 ("selftests/bpf: test_verifier, check bpf_map_lookup_elem access in bpf prog") Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index df6f751cc1e8..d23929a1985d 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -14166,7 +14166,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_map_type prog_type, do { prog[*fixup_map_stacktrace].imm = map_fds[12]; fixup_map_stacktrace++; - } while (fixup_map_stacktrace); + } while (*fixup_map_stacktrace); } } -- cgit v1.2.3-59-g8ed1b From 4a2eb0c37b4759416996fbb4c45b932500cf06d3 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 10 Dec 2018 18:00:52 +0800 Subject: sctp: initialize sin6_flowinfo for ipv6 addrs in sctp_inet6addr_event syzbot reported a kernel-infoleak, which is caused by an uninitialized field(sin6_flowinfo) of addr->a.v6 in sctp_inet6addr_event(). The call trace is as below: BUG: KMSAN: kernel-infoleak in _copy_to_user+0x19a/0x230 lib/usercopy.c:33 CPU: 1 PID: 8164 Comm: syz-executor2 Not tainted 4.20.0-rc3+ #95 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x32d/0x480 lib/dump_stack.c:113 kmsan_report+0x12c/0x290 mm/kmsan/kmsan.c:683 kmsan_internal_check_memory+0x32a/0xa50 mm/kmsan/kmsan.c:743 kmsan_copy_to_user+0x78/0xd0 mm/kmsan/kmsan_hooks.c:634 _copy_to_user+0x19a/0x230 lib/usercopy.c:33 copy_to_user include/linux/uaccess.h:183 [inline] sctp_getsockopt_local_addrs net/sctp/socket.c:5998 [inline] sctp_getsockopt+0x15248/0x186f0 net/sctp/socket.c:7477 sock_common_getsockopt+0x13f/0x180 net/core/sock.c:2937 __sys_getsockopt+0x489/0x550 net/socket.c:1939 __do_sys_getsockopt net/socket.c:1950 [inline] __se_sys_getsockopt+0xe1/0x100 net/socket.c:1947 __x64_sys_getsockopt+0x62/0x80 net/socket.c:1947 do_syscall_64+0xcf/0x110 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 sin6_flowinfo is not really used by SCTP, so it will be fixed by simply setting it to 0. The issue exists since very beginning. Thanks Alexander for the reproducer provided. Reported-by: syzbot+ad5d327e6936a2e284be@syzkaller.appspotmail.com Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/ipv6.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index fc6c5e4bffa5..7f0539db5604 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -101,6 +101,7 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, if (addr) { addr->a.v6.sin6_family = AF_INET6; addr->a.v6.sin6_port = 0; + addr->a.v6.sin6_flowinfo = 0; addr->a.v6.sin6_addr = ifa->addr; addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex; addr->valid = 1; -- cgit v1.2.3-59-g8ed1b From 5648451e30a0d13d11796574919a359025d52cce Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 10 Dec 2018 12:41:24 -0600 Subject: ipv4: Fix potential Spectre v1 vulnerability vr.vifi is indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: net/ipv4/ipmr.c:1616 ipmr_ioctl() warn: potential spectre issue 'mrt->vif_table' [r] (local cap) net/ipv4/ipmr.c:1690 ipmr_compat_ioctl() warn: potential spectre issue 'mrt->vif_table' [r] (local cap) Fix this by sanitizing vr.vifi before using it to index mrt->vif_table' Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a6defbec4f1b..e7a3879cedd0 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -69,6 +69,8 @@ #include #include +#include + struct ipmr_rule { struct fib_rule common; }; @@ -1612,6 +1614,7 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) return -EFAULT; if (vr.vifi >= mrt->maxvif) return -EINVAL; + vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif); read_lock(&mrt_lock); vif = &mrt->vif_table[vr.vifi]; if (VIF_EXISTS(mrt, vr.vifi)) { @@ -1686,6 +1689,7 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) return -EFAULT; if (vr.vifi >= mrt->maxvif) return -EINVAL; + vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif); read_lock(&mrt_lock); vif = &mrt->vif_table[vr.vifi]; if (VIF_EXISTS(mrt, vr.vifi)) { -- cgit v1.2.3-59-g8ed1b From aca1a80ebe3e4d49adaf6516c61a6786b1ee7dad Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 10 Dec 2018 15:25:04 -0800 Subject: selftests/bpf: use proper type when passing prog_type Use bpf_prog_type instead of bpf_map_type when passing prog_type. -Wenum-conversion might be unhappy about it: error: implicit conversion from enumeration type 'enum bpf_map_type' to different enumeration type 'enum bpf_prog_type' Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_verifier.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index d23929a1985d..37583267bdbc 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -13940,7 +13940,7 @@ static int create_map(uint32_t type, uint32_t size_key, return fd; } -static int create_prog_dummy1(enum bpf_map_type prog_type) +static int create_prog_dummy1(enum bpf_prog_type prog_type) { struct bpf_insn prog[] = { BPF_MOV64_IMM(BPF_REG_0, 42), @@ -13951,7 +13951,7 @@ static int create_prog_dummy1(enum bpf_map_type prog_type) ARRAY_SIZE(prog), "GPL", 0, NULL, 0); } -static int create_prog_dummy2(enum bpf_map_type prog_type, int mfd, int idx) +static int create_prog_dummy2(enum bpf_prog_type prog_type, int mfd, int idx) { struct bpf_insn prog[] = { BPF_MOV64_IMM(BPF_REG_3, idx), @@ -13966,7 +13966,7 @@ static int create_prog_dummy2(enum bpf_map_type prog_type, int mfd, int idx) ARRAY_SIZE(prog), "GPL", 0, NULL, 0); } -static int create_prog_array(enum bpf_map_type prog_type, uint32_t max_elem, +static int create_prog_array(enum bpf_prog_type prog_type, uint32_t max_elem, int p1key) { int p2key = 1; @@ -14037,7 +14037,7 @@ static int create_cgroup_storage(bool percpu) static char bpf_vlog[UINT_MAX >> 8]; -static void do_test_fixup(struct bpf_test *test, enum bpf_map_type prog_type, +static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, struct bpf_insn *prog, int *map_fds) { int *fixup_map_hash_8b = test->fixup_map_hash_8b; -- cgit v1.2.3-59-g8ed1b From 6c5c7489089608d89b7ce310bca44812e2b0a4a5 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Mon, 10 Dec 2018 15:22:22 -0600 Subject: ibmvnic: Convert reset work item mutex to spin lock ibmvnic_reset can create and schedule a reset work item from an IRQ context, so do not use a mutex, which can sleep. Convert the reset work item mutex to a spin lock. Locking debugger generated the trace output below. BUG: sleeping function called from invalid context at kernel/locking/mutex.c:908 in_atomic(): 1, irqs_disabled(): 1, pid: 120, name: kworker/8:1 4 locks held by kworker/8:1/120: #0: 0000000017c05720 ((wq_completion)"events"){+.+.}, at: process_one_work+0x188/0x710 #1: 00000000ace90706 ((linkwatch_work).work){+.+.}, at: process_one_work+0x188/0x710 #2: 000000007632871f (rtnl_mutex){+.+.}, at: rtnl_lock+0x30/0x50 #3: 00000000fc36813a (&(&crq->lock)->rlock){..-.}, at: ibmvnic_tasklet+0x88/0x2010 [ibmvnic] irq event stamp: 26293 hardirqs last enabled at (26292): [] tasklet_action_common.isra.12+0x78/0x1c0 hardirqs last disabled at (26293): [] _raw_spin_lock_irqsave+0x48/0xf0 softirqs last enabled at (26288): [] dev_deactivate_queue.constprop.28+0xc8/0x160 softirqs last disabled at (26289): [] call_do_softirq+0x14/0x24 CPU: 8 PID: 120 Comm: kworker/8:1 Kdump: loaded Not tainted 4.20.0-rc6 #6 Workqueue: events linkwatch_event Call Trace: [c0000003fffa7a50] [c000000000bc83e4] dump_stack+0xe8/0x164 (unreliable) [c0000003fffa7aa0] [c00000000015ba0c] ___might_sleep+0x2dc/0x320 [c0000003fffa7b20] [c000000000be960c] __mutex_lock+0x8c/0xb40 [c0000003fffa7c30] [d000000006202ac8] ibmvnic_reset+0x78/0x330 [ibmvnic] [c0000003fffa7cc0] [d0000000062097f4] ibmvnic_tasklet+0x1054/0x2010 [ibmvnic] [c0000003fffa7e00] [c0000000001224c8] tasklet_action_common.isra.12+0xd8/0x1c0 [c0000003fffa7e60] [c000000000bf1238] __do_softirq+0x1a8/0x64c [c0000003fffa7f90] [c0000000000306e0] call_do_softirq+0x14/0x24 [c0000003f3f87980] [c00000000001ba50] do_softirq_own_stack+0x60/0xb0 [c0000003f3f879c0] [c0000000001218a8] do_softirq+0xa8/0x100 [c0000003f3f879f0] [c000000000121a74] __local_bh_enable_ip+0x174/0x180 [c0000003f3f87a60] [c000000000bf003c] _raw_spin_unlock_bh+0x5c/0x80 [c0000003f3f87a90] [c000000000a8ac78] dev_deactivate_queue.constprop.28+0xc8/0x160 [c0000003f3f87ad0] [c000000000a8c8b0] dev_deactivate_many+0xd0/0x520 [c0000003f3f87b70] [c000000000a8cd40] dev_deactivate+0x40/0x60 [c0000003f3f87ba0] [c000000000a5e0c4] linkwatch_do_dev+0x74/0xd0 [c0000003f3f87bd0] [c000000000a5e694] __linkwatch_run_queue+0x1a4/0x1f0 [c0000003f3f87c30] [c000000000a5e728] linkwatch_event+0x48/0x60 [c0000003f3f87c50] [c0000000001444e8] process_one_work+0x238/0x710 [c0000003f3f87d20] [c000000000144a48] worker_thread+0x88/0x4e0 [c0000003f3f87db0] [c00000000014e3a8] kthread+0x178/0x1c0 [c0000003f3f87e20] [c00000000000bfd0] ret_from_kernel_thread+0x5c/0x6c Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 16 +++++++++------- drivers/net/ethernet/ibm/ibmvnic.h | 2 +- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index ed50b8dee44f..ffc0cab05b0f 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1939,8 +1939,9 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter, static struct ibmvnic_rwi *get_next_rwi(struct ibmvnic_adapter *adapter) { struct ibmvnic_rwi *rwi; + unsigned long flags; - mutex_lock(&adapter->rwi_lock); + spin_lock_irqsave(&adapter->rwi_lock, flags); if (!list_empty(&adapter->rwi_list)) { rwi = list_first_entry(&adapter->rwi_list, struct ibmvnic_rwi, @@ -1950,7 +1951,7 @@ static struct ibmvnic_rwi *get_next_rwi(struct ibmvnic_adapter *adapter) rwi = NULL; } - mutex_unlock(&adapter->rwi_lock); + spin_unlock_irqrestore(&adapter->rwi_lock, flags); return rwi; } @@ -2025,6 +2026,7 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter, struct list_head *entry, *tmp_entry; struct ibmvnic_rwi *rwi, *tmp; struct net_device *netdev = adapter->netdev; + unsigned long flags; int ret; if (adapter->state == VNIC_REMOVING || @@ -2041,13 +2043,13 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter, goto err; } - mutex_lock(&adapter->rwi_lock); + spin_lock_irqsave(&adapter->rwi_lock, flags); list_for_each(entry, &adapter->rwi_list) { tmp = list_entry(entry, struct ibmvnic_rwi, list); if (tmp->reset_reason == reason) { netdev_dbg(netdev, "Skipping matching reset\n"); - mutex_unlock(&adapter->rwi_lock); + spin_unlock_irqrestore(&adapter->rwi_lock, flags); ret = EBUSY; goto err; } @@ -2055,7 +2057,7 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter, rwi = kzalloc(sizeof(*rwi), GFP_KERNEL); if (!rwi) { - mutex_unlock(&adapter->rwi_lock); + spin_unlock_irqrestore(&adapter->rwi_lock, flags); ibmvnic_close(netdev); ret = ENOMEM; goto err; @@ -2069,7 +2071,7 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter, } rwi->reset_reason = reason; list_add_tail(&rwi->list, &adapter->rwi_list); - mutex_unlock(&adapter->rwi_lock); + spin_unlock_irqrestore(&adapter->rwi_lock, flags); adapter->resetting = true; netdev_dbg(adapter->netdev, "Scheduling reset (reason %d)\n", reason); schedule_work(&adapter->ibmvnic_reset); @@ -4759,7 +4761,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) INIT_WORK(&adapter->ibmvnic_reset, __ibmvnic_reset); INIT_LIST_HEAD(&adapter->rwi_list); - mutex_init(&adapter->rwi_lock); + spin_lock_init(&adapter->rwi_lock); adapter->resetting = false; adapter->mac_change_pending = false; diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index 99c4f8d331ce..f2018dbebfa5 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -1075,7 +1075,7 @@ struct ibmvnic_adapter { struct tasklet_struct tasklet; enum vnic_state state; enum ibmvnic_reset_reason reset_reason; - struct mutex rwi_lock; + spinlock_t rwi_lock; struct list_head rwi_list; struct work_struct ibmvnic_reset; bool resetting; -- cgit v1.2.3-59-g8ed1b From 1d1bbc37f89b0559c9e913682f2489d89cfde6b8 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Mon, 10 Dec 2018 15:22:23 -0600 Subject: ibmvnic: Fix non-atomic memory allocation in IRQ context ibmvnic_reset allocated new reset work item objects in a non-atomic context. This can be called from a tasklet, generating the output below. Allocate work items with the GFP_ATOMIC flag instead. BUG: sleeping function called from invalid context at mm/slab.h:421 in_atomic(): 1, irqs_disabled(): 1, pid: 93, name: kworker/0:2 INFO: lockdep is turned off. irq event stamp: 66049 hardirqs last enabled at (66048): [] tasklet_action_common.isra.12+0x78/0x1c0 hardirqs last disabled at (66049): [] _raw_spin_lock_irqsave+0x48/0xf0 softirqs last enabled at (66044): [] dev_deactivate_queue.constprop.28+0xc8/0x160 softirqs last disabled at (66045): [] call_do_softirq+0x14/0x24 CPU: 0 PID: 93 Comm: kworker/0:2 Kdump: loaded Not tainted 4.20.0-rc6-00001-g1b50a8f03706 #7 Workqueue: events linkwatch_event Call Trace: [c0000003fffe7ae0] [c000000000bc83e4] dump_stack+0xe8/0x164 (unreliable) [c0000003fffe7b30] [c00000000015ba0c] ___might_sleep+0x2dc/0x320 [c0000003fffe7bb0] [c000000000391514] kmem_cache_alloc_trace+0x3e4/0x440 [c0000003fffe7c30] [d000000005b2309c] ibmvnic_reset+0x16c/0x360 [ibmvnic] [c0000003fffe7cc0] [d000000005b29834] ibmvnic_tasklet+0x1054/0x2010 [ibmvnic] [c0000003fffe7e00] [c0000000001224c8] tasklet_action_common.isra.12+0xd8/0x1c0 [c0000003fffe7e60] [c000000000bf1238] __do_softirq+0x1a8/0x64c [c0000003fffe7f90] [c0000000000306e0] call_do_softirq+0x14/0x24 [c0000003f3967980] [c00000000001ba50] do_softirq_own_stack+0x60/0xb0 [c0000003f39679c0] [c0000000001218a8] do_softirq+0xa8/0x100 [c0000003f39679f0] [c000000000121a74] __local_bh_enable_ip+0x174/0x180 [c0000003f3967a60] [c000000000bf003c] _raw_spin_unlock_bh+0x5c/0x80 [c0000003f3967a90] [c000000000a8ac78] dev_deactivate_queue.constprop.28+0xc8/0x160 [c0000003f3967ad0] [c000000000a8c8b0] dev_deactivate_many+0xd0/0x520 [c0000003f3967b70] [c000000000a8cd40] dev_deactivate+0x40/0x60 [c0000003f3967ba0] [c000000000a5e0c4] linkwatch_do_dev+0x74/0xd0 [c0000003f3967bd0] [c000000000a5e694] __linkwatch_run_queue+0x1a4/0x1f0 [c0000003f3967c30] [c000000000a5e728] linkwatch_event+0x48/0x60 [c0000003f3967c50] [c0000000001444e8] process_one_work+0x238/0x710 [c0000003f3967d20] [c000000000144a48] worker_thread+0x88/0x4e0 [c0000003f3967db0] [c00000000014e3a8] kthread+0x178/0x1c0 [c0000003f3967e20] [c00000000000bfd0] ret_from_kernel_thread+0x5c/0x6c Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index ffc0cab05b0f..67cc6d9c8fd7 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2055,7 +2055,7 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter, } } - rwi = kzalloc(sizeof(*rwi), GFP_KERNEL); + rwi = kzalloc(sizeof(*rwi), GFP_ATOMIC); if (!rwi) { spin_unlock_irqrestore(&adapter->rwi_lock, flags); ibmvnic_close(netdev); -- cgit v1.2.3-59-g8ed1b From 290974d434783624c13a9530a23c45f9c5ffe018 Mon Sep 17 00:00:00 2001 From: Pieter Jansen van Vuuren Date: Mon, 10 Dec 2018 15:03:43 -0800 Subject: nfp: flower: ensure TCP flags can be placed in IPv6 frame Previously we did not ensure tcp flags have a place to be stored when using IPv6. We correct this by including IPv6 key layer when we match tcp flags and the IPv6 key layer has not been included already. Fixes: 07e1671cfca5 ("nfp: flower: refactor shared ip header in match offload") Signed-off-by: Pieter Jansen van Vuuren Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- .../net/ethernet/netronome/nfp/flower/offload.c | 28 +++++++++++++++++----- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 2f49eb75f3cc..67e576fe7fc0 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -345,13 +345,29 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, !(tcp_flags & (TCPHDR_FIN | TCPHDR_SYN | TCPHDR_RST))) return -EOPNOTSUPP; - /* We need to store TCP flags in the IPv4 key space, thus - * we need to ensure we include a IPv4 key layer if we have - * not done so already. + /* We need to store TCP flags in the either the IPv4 or IPv6 key + * space, thus we need to ensure we include a IPv4/IPv6 key + * layer if we have not done so already. */ - if (!(key_layer & NFP_FLOWER_LAYER_IPV4)) { - key_layer |= NFP_FLOWER_LAYER_IPV4; - key_size += sizeof(struct nfp_flower_ipv4); + if (!key_basic) + return -EOPNOTSUPP; + + if (!(key_layer & NFP_FLOWER_LAYER_IPV4) && + !(key_layer & NFP_FLOWER_LAYER_IPV6)) { + switch (key_basic->n_proto) { + case cpu_to_be16(ETH_P_IP): + key_layer |= NFP_FLOWER_LAYER_IPV4; + key_size += sizeof(struct nfp_flower_ipv4); + break; + + case cpu_to_be16(ETH_P_IPV6): + key_layer |= NFP_FLOWER_LAYER_IPV6; + key_size += sizeof(struct nfp_flower_ipv6); + break; + + default: + return -EOPNOTSUPP; + } } } -- cgit v1.2.3-59-g8ed1b From 8e41cae64b08fe2e86a9ffb88b295c6b4b3a3322 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 11 Dec 2018 11:13:39 +0800 Subject: ieee802154: ca8210: fix possible u8 overflow in ca8210_rx_done gcc warning this: drivers/net/ieee802154/ca8210.c:730:10: warning: comparison is always false due to limited range of data type [-Wtype-limits] 'len' is u8 type, we get it from buf[1] adding 2, which can overflow. This patch change the type of 'len' to unsigned int to avoid this,also fix the gcc warning. Fixes: ded845a781a5 ("ieee802154: Add CA8210 IEEE 802.15.4 device driver") Signed-off-by: YueHaibing Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/ca8210.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index 0ff5a403a8dc..b2ff903a9cb6 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -721,7 +721,7 @@ static void ca8210_mlme_reset_worker(struct work_struct *work) static void ca8210_rx_done(struct cas_control *cas_ctl) { u8 *buf; - u8 len; + unsigned int len; struct work_priv_container *mlme_reset_wpc; struct ca8210_priv *priv = cas_ctl->priv; @@ -730,7 +730,7 @@ static void ca8210_rx_done(struct cas_control *cas_ctl) if (len > CA8210_SPI_BUF_SIZE) { dev_crit( &priv->spi->dev, - "Received packet len (%d) erroneously long\n", + "Received packet len (%u) erroneously long\n", len ); goto finish; -- cgit v1.2.3-59-g8ed1b From fdadd04931c2d7cd294dc5b2b342863f94be53a3 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 11 Dec 2018 12:14:12 +0100 Subject: bpf: fix bpf_jit_limit knob for PAGE_SIZE >= 64K Michael and Sandipan report: Commit ede95a63b5 introduced a bpf_jit_limit tuneable to limit BPF JIT allocations. At compile time it defaults to PAGE_SIZE * 40000, and is adjusted again at init time if MODULES_VADDR is defined. For ppc64 kernels, MODULES_VADDR isn't defined, so we're stuck with the compile-time default at boot-time, which is 0x9c400000 when using 64K page size. This overflows the signed 32-bit bpf_jit_limit value: root@ubuntu:/tmp# cat /proc/sys/net/core/bpf_jit_limit -1673527296 and can cause various unexpected failures throughout the network stack. In one case `strace dhclient eth0` reported: setsockopt(5, SOL_SOCKET, SO_ATTACH_FILTER, {len=11, filter=0x105dd27f8}, 16) = -1 ENOTSUPP (Unknown error 524) and similar failures can be seen with tools like tcpdump. This doesn't always reproduce however, and I'm not sure why. The more consistent failure I've seen is an Ubuntu 18.04 KVM guest booted on a POWER9 host would time out on systemd/netplan configuring a virtio-net NIC with no noticeable errors in the logs. Given this and also given that in near future some architectures like arm64 will have a custom area for BPF JIT image allocations we should get rid of the BPF_JIT_LIMIT_DEFAULT fallback / default entirely. For 4.21, we have an overridable bpf_jit_alloc_exec(), bpf_jit_free_exec() so therefore add another overridable bpf_jit_alloc_exec_limit() helper function which returns the possible size of the memory area for deriving the default heuristic in bpf_jit_charge_init(). Like bpf_jit_alloc_exec() and bpf_jit_free_exec(), the new bpf_jit_alloc_exec_limit() assumes that module_alloc() is the default JIT memory provider, and therefore in case archs implement their custom module_alloc() we use MODULES_{END,_VADDR} for limits and otherwise for vmalloc_exec() cases like on ppc64 we use VMALLOC_{END,_START}. Additionally, for archs supporting large page sizes, we should change the sysctl to be handled as long to not run into sysctl restrictions in future. Fixes: ede95a63b5e8 ("bpf: add bpf_jit_limit knob to restrict unpriv allocations") Reported-by: Sandipan Das Reported-by: Michael Roth Signed-off-by: Daniel Borkmann Tested-by: Michael Roth Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 2 +- kernel/bpf/core.c | 21 +++++++++++++++------ net/core/sysctl_net_core.c | 20 +++++++++++++++++--- 3 files changed, 33 insertions(+), 10 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 795ff0b869bb..a8b9d90a8042 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -861,7 +861,7 @@ bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk, extern int bpf_jit_enable; extern int bpf_jit_harden; extern int bpf_jit_kallsyms; -extern int bpf_jit_limit; +extern long bpf_jit_limit; typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index b1a3545d0ec8..b2890c268cb3 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -365,13 +365,11 @@ void bpf_prog_kallsyms_del_all(struct bpf_prog *fp) } #ifdef CONFIG_BPF_JIT -# define BPF_JIT_LIMIT_DEFAULT (PAGE_SIZE * 40000) - /* All BPF JIT sysctl knobs here. */ int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON); int bpf_jit_harden __read_mostly; int bpf_jit_kallsyms __read_mostly; -int bpf_jit_limit __read_mostly = BPF_JIT_LIMIT_DEFAULT; +long bpf_jit_limit __read_mostly; static __always_inline void bpf_get_prog_addr_region(const struct bpf_prog *prog, @@ -580,16 +578,27 @@ int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, static atomic_long_t bpf_jit_current; +/* Can be overridden by an arch's JIT compiler if it has a custom, + * dedicated BPF backend memory area, or if neither of the two + * below apply. + */ +u64 __weak bpf_jit_alloc_exec_limit(void) +{ #if defined(MODULES_VADDR) + return MODULES_END - MODULES_VADDR; +#else + return VMALLOC_END - VMALLOC_START; +#endif +} + static int __init bpf_jit_charge_init(void) { /* Only used as heuristic here to derive limit. */ - bpf_jit_limit = min_t(u64, round_up((MODULES_END - MODULES_VADDR) >> 2, - PAGE_SIZE), INT_MAX); + bpf_jit_limit = min_t(u64, round_up(bpf_jit_alloc_exec_limit() >> 2, + PAGE_SIZE), LONG_MAX); return 0; } pure_initcall(bpf_jit_charge_init); -#endif static int bpf_jit_charge_modmem(u32 pages) { diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 37b4667128a3..d67ec17f2cc8 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -28,6 +28,8 @@ static int two __maybe_unused = 2; static int min_sndbuf = SOCK_MIN_SNDBUF; static int min_rcvbuf = SOCK_MIN_RCVBUF; static int max_skb_frags = MAX_SKB_FRAGS; +static long long_one __maybe_unused = 1; +static long long_max __maybe_unused = LONG_MAX; static int net_msg_warn; /* Unused, but still a sysctl */ @@ -289,6 +291,17 @@ proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, return proc_dointvec_minmax(table, write, buffer, lenp, ppos); } + +static int +proc_dolongvec_minmax_bpf_restricted(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); +} #endif static struct ctl_table net_core_table[] = { @@ -398,10 +411,11 @@ static struct ctl_table net_core_table[] = { { .procname = "bpf_jit_limit", .data = &bpf_jit_limit, - .maxlen = sizeof(int), + .maxlen = sizeof(long), .mode = 0600, - .proc_handler = proc_dointvec_minmax_bpf_restricted, - .extra1 = &one, + .proc_handler = proc_dolongvec_minmax_bpf_restricted, + .extra1 = &long_one, + .extra2 = &long_max, }, #endif { -- cgit v1.2.3-59-g8ed1b From ee28b30cbbe0b490f52ba222c9d392b99581545e Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 9 Dec 2018 22:05:11 +0100 Subject: r8169: fix crash if CONFIG_DEBUG_SHIRQ is enabled If CONFIG_DEBUG_SHIRQ is enabled __free_irq() intentionally fires a spurious interrupt. This interrupt causes a crash because tp->dev->phydev is NULL at that time. Fixes: 38caff5a445b ("r8169: handle all interrupt events in the hard irq handler") Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 1fd01688d37b..209566f8097b 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -6469,7 +6469,7 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance) goto out; } - if (status & LinkChg) + if (status & LinkChg && tp->dev->phydev) phy_mac_interrupt(tp->dev->phydev); if (unlikely(status & RxFIFOOver && -- cgit v1.2.3-59-g8ed1b From 158daed16efb1170694e420ae06ba8ba954d82e5 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Tue, 4 Dec 2018 15:18:52 +0100 Subject: i40e: fix mac filter delete when setting mac address A previous commit moved the ether_addr_copy() in i40e_set_mac() before the mac filter del/add to avoid a race. However it wasn't taken into account that this alters the mac address being handed to i40e_del_mac_filter(). Also changed i40e_add_mac_filter() to operate on netdev->dev_addr, hopefully that makes the code easier to read. Fixes: 458867b2ca0c ("i40e: don't remove netdev->dev_addr when syncing uc list") Signed-off-by: Stefan Assmann Tested-by: Andrew Bowers Acked-by: Jacob Keller Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index a3f45335437c..0e5dc74b4ef2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1543,17 +1543,17 @@ static int i40e_set_mac(struct net_device *netdev, void *p) netdev_info(netdev, "set new mac address %pM\n", addr->sa_data); /* Copy the address first, so that we avoid a possible race with - * .set_rx_mode(). If we copy after changing the address in the filter - * list, we might open ourselves to a narrow race window where - * .set_rx_mode could delete our dev_addr filter and prevent traffic - * from passing. + * .set_rx_mode(). + * - Remove old address from MAC filter + * - Copy new address + * - Add new address to MAC filter */ - ether_addr_copy(netdev->dev_addr, addr->sa_data); - spin_lock_bh(&vsi->mac_filter_hash_lock); i40e_del_mac_filter(vsi, netdev->dev_addr); - i40e_add_mac_filter(vsi, addr->sa_data); + ether_addr_copy(netdev->dev_addr, addr->sa_data); + i40e_add_mac_filter(vsi, netdev->dev_addr); spin_unlock_bh(&vsi->mac_filter_hash_lock); + if (vsi->type == I40E_VSI_MAIN) { i40e_status ret; -- cgit v1.2.3-59-g8ed1b From 2a508c64ad278d022543ec3360449c7f243eb56b Mon Sep 17 00:00:00 2001 From: Michał Mirosław Date: Tue, 4 Dec 2018 18:31:14 +0100 Subject: i40e: fix VLAN.TCI == 0 RX HW offload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes two bugs in hardware VLAN offload: 1. VLAN.TCI == 0 was being dropped 2. there was a race between disabling of VLAN RX feature in hardware and processing RX queue, where packets processed in this window could have their VLAN information dropped Fix moves the VLAN handling into i40e_process_skb_fields() to save on duplicated code. i40e_receive_skb() becomes trivial and so is removed. Signed-off-by: Michał Mirosław Signed-off-by: Michał Mirosław Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 31 ++++++---------------- drivers/net/ethernet/intel/i40e/i40e_txrx_common.h | 2 -- drivers/net/ethernet/intel/i40e/i40e_xsk.c | 6 +---- 3 files changed, 9 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index aef3c89ee79c..bc6a873ec574 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1558,24 +1558,6 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring, return true; } -/** - * i40e_receive_skb - Send a completed packet up the stack - * @rx_ring: rx ring in play - * @skb: packet to send up - * @vlan_tag: vlan tag for packet - **/ -void i40e_receive_skb(struct i40e_ring *rx_ring, - struct sk_buff *skb, u16 vlan_tag) -{ - struct i40e_q_vector *q_vector = rx_ring->q_vector; - - if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) && - (vlan_tag & VLAN_VID_MASK)) - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); - - napi_gro_receive(&q_vector->napi, skb); -} - /** * i40e_alloc_rx_buffers - Replace used receive buffers * @rx_ring: ring to place buffers on @@ -1812,6 +1794,13 @@ void i40e_process_skb_fields(struct i40e_ring *rx_ring, skb_record_rx_queue(skb, rx_ring->queue_index); + if (qword & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) { + u16 vlan_tag = rx_desc->wb.qword0.lo_dword.l2tag1; + + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + le16_to_cpu(vlan_tag)); + } + /* modifies the skb - consumes the enet header */ skb->protocol = eth_type_trans(skb, rx_ring->netdev); } @@ -2350,7 +2339,6 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) struct i40e_rx_buffer *rx_buffer; union i40e_rx_desc *rx_desc; unsigned int size; - u16 vlan_tag; u8 rx_ptype; u64 qword; @@ -2451,11 +2439,8 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) /* populate checksum, VLAN, and protocol */ i40e_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype); - vlan_tag = (qword & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) ? - le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) : 0; - i40e_trace(clean_rx_irq_rx, rx_ring, rx_desc, skb); - i40e_receive_skb(rx_ring, skb, vlan_tag); + napi_gro_receive(&rx_ring->q_vector->napi, skb); skb = NULL; /* update budget accounting */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h index 09809dffe399..2c077f1c4eca 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h @@ -14,8 +14,6 @@ struct i40e_rx_buffer *i40e_clean_programming_status( void i40e_process_skb_fields(struct i40e_ring *rx_ring, union i40e_rx_desc *rx_desc, struct sk_buff *skb, u8 rx_ptype); -void i40e_receive_skb(struct i40e_ring *rx_ring, - struct sk_buff *skb, u16 vlan_tag); void i40e_xdp_ring_update_tail(struct i40e_ring *xdp_ring); void i40e_update_rx_stats(struct i40e_ring *rx_ring, unsigned int total_rx_bytes, diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 433c8e688c78..a6ea2b249471 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -634,7 +634,6 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) struct i40e_rx_buffer *bi; union i40e_rx_desc *rx_desc; unsigned int size; - u16 vlan_tag; u8 rx_ptype; u64 qword; @@ -717,10 +716,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> I40E_RXD_QW1_PTYPE_SHIFT; i40e_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype); - - vlan_tag = (qword & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) ? - le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) : 0; - i40e_receive_skb(rx_ring, skb, vlan_tag); + napi_gro_receive(&rx_ring->q_vector->napi, skb); } i40e_finalize_xdp_rx(rx_ring, xdp_xmit); -- cgit v1.2.3-59-g8ed1b From 5146f95df782b0ac61abde36567e718692725c89 Mon Sep 17 00:00:00 2001 From: Hui Peng Date: Wed, 12 Dec 2018 12:42:24 +0100 Subject: USB: hso: Fix OOB memory access in hso_probe/hso_get_config_data The function hso_probe reads if_num from the USB device (as an u8) and uses it without a length check to index an array, resulting in an OOB memory read in hso_probe or hso_get_config_data. Add a length check for both locations and updated hso_probe to bail on error. This issue has been assigned CVE-2018-19985. Reported-by: Hui Peng Reported-by: Mathias Payer Signed-off-by: Hui Peng Signed-off-by: Mathias Payer Reviewed-by: Sebastian Andrzej Siewior Signed-off-by: Greg Kroah-Hartman Signed-off-by: David S. Miller --- drivers/net/usb/hso.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index 184c24baca15..d6916f787fce 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -2807,6 +2807,12 @@ static int hso_get_config_data(struct usb_interface *interface) return -EIO; } + /* check if we have a valid interface */ + if (if_num > 16) { + kfree(config_data); + return -EINVAL; + } + switch (config_data[if_num]) { case 0x0: result = 0; @@ -2877,10 +2883,18 @@ static int hso_probe(struct usb_interface *interface, /* Get the interface/port specification from either driver_info or from * the device itself */ - if (id->driver_info) + if (id->driver_info) { + /* if_num is controlled by the device, driver_info is a 0 terminated + * array. Make sure, the access is in bounds! */ + for (i = 0; i <= if_num; ++i) + if (((u32 *)(id->driver_info))[i] == 0) + goto exit; port_spec = ((u32 *)(id->driver_info))[if_num]; - else + } else { port_spec = hso_get_config_data(interface); + if (port_spec < 0) + goto exit; + } /* Check if we need to switch to alt interfaces prior to port * configuration */ -- cgit v1.2.3-59-g8ed1b From 800b8f637d07cc00a05e2e0d847d8d7c61745654 Mon Sep 17 00:00:00 2001 From: Michał Mirosław Date: Tue, 4 Dec 2018 18:31:15 +0100 Subject: i40e: DRY rx_ptype handling code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move rx_ptype extracting to i40e_process_skb_fields() to avoid duplicating the code. Signed-off-by: Michał Mirosław Signed-off-by: Michał Mirosław Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 12 ++++-------- drivers/net/ethernet/intel/i40e/i40e_txrx_common.h | 3 +-- drivers/net/ethernet/intel/i40e/i40e_xsk.c | 6 +----- 3 files changed, 6 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index bc6a873ec574..d0a95424ce58 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1775,8 +1775,7 @@ static inline void i40e_rx_hash(struct i40e_ring *ring, * other fields within the skb. **/ void i40e_process_skb_fields(struct i40e_ring *rx_ring, - union i40e_rx_desc *rx_desc, struct sk_buff *skb, - u8 rx_ptype) + union i40e_rx_desc *rx_desc, struct sk_buff *skb) { u64 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); u32 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >> @@ -1784,6 +1783,8 @@ void i40e_process_skb_fields(struct i40e_ring *rx_ring, u32 tsynvalid = rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK; u32 tsyn = (rx_status & I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >> I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT; + u8 rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> + I40E_RXD_QW1_PTYPE_SHIFT; if (unlikely(tsynvalid)) i40e_ptp_rx_hwtstamp(rx_ring->vsi->back, skb, tsyn); @@ -2339,7 +2340,6 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) struct i40e_rx_buffer *rx_buffer; union i40e_rx_desc *rx_desc; unsigned int size; - u8 rx_ptype; u64 qword; /* return some buffers to hardware, one at a time is too slow */ @@ -2432,12 +2432,8 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) /* probably a little skewed due to removing CRC */ total_rx_bytes += skb->len; - qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); - rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> - I40E_RXD_QW1_PTYPE_SHIFT; - /* populate checksum, VLAN, and protocol */ - i40e_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype); + i40e_process_skb_fields(rx_ring, rx_desc, skb); i40e_trace(clean_rx_irq_rx, rx_ring, rx_desc, skb); napi_gro_receive(&rx_ring->q_vector->napi, skb); diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h index 2c077f1c4eca..8af0e99c6c0d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h @@ -12,8 +12,7 @@ struct i40e_rx_buffer *i40e_clean_programming_status( union i40e_rx_desc *rx_desc, u64 qw); void i40e_process_skb_fields(struct i40e_ring *rx_ring, - union i40e_rx_desc *rx_desc, struct sk_buff *skb, - u8 rx_ptype); + union i40e_rx_desc *rx_desc, struct sk_buff *skb); void i40e_xdp_ring_update_tail(struct i40e_ring *xdp_ring); void i40e_update_rx_stats(struct i40e_ring *rx_ring, unsigned int total_rx_bytes, diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index a6ea2b249471..870cf654e436 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -634,7 +634,6 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) struct i40e_rx_buffer *bi; union i40e_rx_desc *rx_desc; unsigned int size; - u8 rx_ptype; u64 qword; if (cleaned_count >= I40E_RX_BUFFER_WRITE) { @@ -712,10 +711,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) total_rx_bytes += skb->len; total_rx_packets++; - qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); - rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> - I40E_RXD_QW1_PTYPE_SHIFT; - i40e_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype); + i40e_process_skb_fields(rx_ring, rx_desc, skb); napi_gro_receive(&rx_ring->q_vector->napi, skb); } -- cgit v1.2.3-59-g8ed1b From 96d1a731611f711f0cb82cea93363ae2ea8cb028 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Wed, 5 Dec 2018 13:54:26 +0000 Subject: ixgbe: Fix race when the VF driver does a reset When the VF driver does a reset, it (at least the Linux one) writes to the VFCTRL register to issue a reset and then immediately sends a reset message using the mailbox API. This is racy because when the PF driver detects that the VFCTRL register reset pin has been asserted, it clears the mailbox memory. Depending on ordering, the reset message sent by the VF could be cleared by the PF driver. It then responds to the cleared message with a NACK which causes the VF driver to malfunction. Fix this by deferring clearing the mailbox memory until the reset message is received. Fixes: 939b701ad633 ("ixgbe: fix driver behaviour after issuing VFLR") Signed-off-by: Ross Lagerwall Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 5dacfc870259..345701af7749 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -700,7 +700,6 @@ static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf) u8 num_tcs = adapter->hw_tcs; u32 reg_val; u32 queue; - u32 word; /* remove VLAN filters beloning to this VF */ ixgbe_clear_vf_vlans(adapter, vf); @@ -758,6 +757,14 @@ static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf) } } + IXGBE_WRITE_FLUSH(hw); +} + +static void ixgbe_vf_clear_mbx(struct ixgbe_adapter *adapter, u32 vf) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 word; + /* Clear VF's mailbox memory */ for (word = 0; word < IXGBE_VFMAILBOX_SIZE; word++) IXGBE_WRITE_REG_ARRAY(hw, IXGBE_PFMBMEM(vf), word, 0); @@ -831,6 +838,8 @@ static int ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf) /* reset the filters for the device */ ixgbe_vf_reset_event(adapter, vf); + ixgbe_vf_clear_mbx(adapter, vf); + /* set vf mac address */ if (!is_zero_ether_addr(vf_mac)) ixgbe_set_vf_mac(adapter, vf, vf_mac); -- cgit v1.2.3-59-g8ed1b From 708abf74dd87f8640871b814faa195fb5970b0e3 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Mon, 10 Dec 2018 14:39:37 +0100 Subject: netfilter: ipset: do not call ipset_nest_end after nla_nest_cancel In the error handling block, nla_nest_cancel(skb, atd) is called to cancel the nest operation. But then, ipset_nest_end(skb, atd) is unexpected called to end the nest operation. This patch calls the ipset_nest_end only on the branch that nla_nest_cancel is not called. Fixes: 45040978c899 ("netfilter: ipset: Fix set:list type crash when flush/dump set in parallel") Signed-off-by: Pan Bian Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_list_set.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 4eef55da0878..8da228da53ae 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -531,8 +531,8 @@ nla_put_failure: ret = -EMSGSIZE; } else { cb->args[IPSET_CB_ARG0] = i; + ipset_nest_end(skb, atd); } - ipset_nest_end(skb, atd); out: rcu_read_unlock(); return ret; -- cgit v1.2.3-59-g8ed1b From 542fbda0f08f1cbbc250f9e59f7537649651d0c8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 11 Dec 2018 07:45:29 +0100 Subject: netfilter: nat: can't use dst_hold on noref dst The dst entry might already have a zero refcount, waiting on rcu list to be free'd. Using dst_hold() transitions its reference count to 1, and next dst release will try to free it again -- resulting in a double free: WARNING: CPU: 1 PID: 0 at include/net/dst.h:239 nf_xfrm_me_harder+0xe7/0x130 [nf_nat] RIP: 0010:nf_xfrm_me_harder+0xe7/0x130 [nf_nat] Code: 48 8b 5c 24 60 65 48 33 1c 25 28 00 00 00 75 53 48 83 c4 68 5b 5d 41 5c c3 85 c0 74 0d 8d 48 01 f0 0f b1 0a 74 86 85 c0 75 f3 <0f> 0b e9 7b ff ff ff 29 c6 31 d2 b9 20 00 48 00 4c 89 e7 e8 31 27 Call Trace: nf_nat_ipv4_out+0x78/0x90 [nf_nat_ipv4] nf_hook_slow+0x36/0xd0 ip_output+0x9f/0xd0 ip_forward+0x328/0x440 ip_rcv+0x8a/0xb0 Use dst_hold_safe instead and bail out if we cannot take a reference. Fixes: a4c2fd7f7891 ("net: remove DST_NOCACHE flag") Reported-by: Martin Zaharinov Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_nat_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index e2b196054dfc..2268b10a9dcf 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -117,7 +117,8 @@ int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family) dst = skb_dst(skb); if (dst->xfrm) dst = ((struct xfrm_dst *)dst)->route; - dst_hold(dst); + if (!dst_hold_safe(dst)) + return -EHOSTUNREACH; if (sk && !net_eq(net, sock_net(sk))) sk = NULL; -- cgit v1.2.3-59-g8ed1b From d4e7df16567b80836a78d31b42f1a9355a636d67 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sat, 8 Dec 2018 11:03:01 +0900 Subject: netfilter: nf_conncount: use rb_link_node_rcu() instead of rb_link_node() rbnode in insert_tree() is rcu protected pointer. So, in order to handle this pointer, _rcu function should be used. rb_link_node_rcu() is a rcu version of rb_link_node(). Fixes: 34848d5c896e ("netfilter: nf_conncount: Split insert and traversal") Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conncount.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index b6d0f6deea86..9cd180bda092 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -427,7 +427,7 @@ insert_tree(struct net *net, count = 1; rbconn->list.count = count; - rb_link_node(&rbconn->node, parent, rbnode); + rb_link_node_rcu(&rbconn->node, parent, rbnode); rb_insert_color(&rbconn->node, root); out_unlock: spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]); -- cgit v1.2.3-59-g8ed1b From bbf666c1af916ed74795493c564df6fad462cc80 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Wed, 12 Dec 2018 08:57:00 -0800 Subject: bnx2x: Clear fip MAC when fcoe offload support is disabled On some customer setups it was observed that shmem contains a non-zero fip MAC for 57711 which would lead to enabling of SW FCoE. Add a software workaround to clear the bad fip mac address if no FCoE connections are supported. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 95309b27c7d1..9052d57895d5 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -11750,8 +11750,10 @@ static void bnx2x_get_fcoe_info(struct bnx2x *bp) * If maximum allowed number of connections is zero - * disable the feature. */ - if (!bp->cnic_eth_dev.max_fcoe_conn) + if (!bp->cnic_eth_dev.max_fcoe_conn) { bp->flags |= NO_FCOE_FLAG; + eth_zero_addr(bp->fip_mac); + } } static void bnx2x_get_cnic_info(struct bnx2x *bp) -- cgit v1.2.3-59-g8ed1b From 04f05230c5c13b1384f66f5186a68d7499e34622 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Wed, 12 Dec 2018 08:57:01 -0800 Subject: bnx2x: Remove configured vlans as part of unload sequence. Vlans are not getting removed when drivers are unloaded. The recent storm firmware versions had added safeguards against re-configuring an already configured vlan. As a result, PF inner reload flows (e.g., mtu change) might trigger an assertion. This change is going to remove vlans (same as we do for MACs) when doing a chip cleanup during unload. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 34 +++++++++++++++++++----- drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h | 4 ++- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 9052d57895d5..01a973e05356 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -8472,6 +8472,7 @@ int bnx2x_set_vlan_one(struct bnx2x *bp, u16 vlan, /* Fill a user request section if needed */ if (!test_bit(RAMROD_CONT, ramrod_flags)) { ramrod_param.user_req.u.vlan.vlan = vlan; + __set_bit(BNX2X_VLAN, &ramrod_param.user_req.vlan_mac_flags); /* Set the command: ADD or DEL */ if (set) ramrod_param.user_req.cmd = BNX2X_VLAN_MAC_ADD; @@ -8492,6 +8493,27 @@ int bnx2x_set_vlan_one(struct bnx2x *bp, u16 vlan, return rc; } +static int bnx2x_del_all_vlans(struct bnx2x *bp) +{ + struct bnx2x_vlan_mac_obj *vlan_obj = &bp->sp_objs[0].vlan_obj; + unsigned long ramrod_flags = 0, vlan_flags = 0; + struct bnx2x_vlan_entry *vlan; + int rc; + + __set_bit(RAMROD_COMP_WAIT, &ramrod_flags); + __set_bit(BNX2X_VLAN, &vlan_flags); + rc = vlan_obj->delete_all(bp, vlan_obj, &vlan_flags, &ramrod_flags); + if (rc) + return rc; + + /* Mark that hw forgot all entries */ + list_for_each_entry(vlan, &bp->vlan_reg, link) + vlan->hw = false; + bp->vlan_cnt = 0; + + return 0; +} + int bnx2x_del_all_macs(struct bnx2x *bp, struct bnx2x_vlan_mac_obj *mac_obj, int mac_type, bool wait_for_comp) @@ -9330,6 +9352,11 @@ void bnx2x_chip_cleanup(struct bnx2x *bp, int unload_mode, bool keep_link) BNX2X_ERR("Failed to schedule DEL commands for UC MACs list: %d\n", rc); + /* Remove all currently configured VLANs */ + rc = bnx2x_del_all_vlans(bp); + if (rc < 0) + BNX2X_ERR("Failed to delete all VLANs\n"); + /* Disable LLH */ if (!CHIP_IS_E1(bp)) REG_WR(bp, NIG_REG_LLH0_FUNC_EN + port*8, 0); @@ -13026,13 +13053,6 @@ static void bnx2x_vlan_configure(struct bnx2x *bp, bool set_rx_mode) int bnx2x_vlan_reconfigure_vid(struct bnx2x *bp) { - struct bnx2x_vlan_entry *vlan; - - /* The hw forgot all entries after reload */ - list_for_each_entry(vlan, &bp->vlan_reg, link) - vlan->hw = false; - bp->vlan_cnt = 0; - /* Don't set rx mode here. Our caller will do it. */ bnx2x_vlan_configure(bp, false); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h index 0bf2fd470819..7a6e82db4231 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h @@ -265,6 +265,7 @@ enum { BNX2X_ETH_MAC, BNX2X_ISCSI_ETH_MAC, BNX2X_NETQ_ETH_MAC, + BNX2X_VLAN, BNX2X_DONT_CONSUME_CAM_CREDIT, BNX2X_DONT_CONSUME_CAM_CREDIT_DEST, }; @@ -272,7 +273,8 @@ enum { #define BNX2X_VLAN_MAC_CMP_MASK (1 << BNX2X_UC_LIST_MAC | \ 1 << BNX2X_ETH_MAC | \ 1 << BNX2X_ISCSI_ETH_MAC | \ - 1 << BNX2X_NETQ_ETH_MAC) + 1 << BNX2X_NETQ_ETH_MAC | \ + 1 << BNX2X_VLAN) #define BNX2X_VLAN_MAC_CMP_FLAGS(flags) \ ((flags) & BNX2X_VLAN_MAC_CMP_MASK) -- cgit v1.2.3-59-g8ed1b From 07f12622a66320d5f56a71a921cf70a43e7a6b87 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Wed, 12 Dec 2018 08:57:02 -0800 Subject: bnx2x: Enable PTP only on the PF that initializes the port There will be only one PHC clock per port. PTP should be enabled only on one PF per port. The change enables PTP functionality on the PF that initializes the port. The change is useful in multi-function modes e.g., NPAR where a port can have more than one PF. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x.h | 1 + drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 1 + drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 22 ++++++++++------------ 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index 0de487a8f0eb..7ea9066ce126 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -2520,6 +2520,7 @@ void bnx2x_update_mfw_dump(struct bnx2x *bp); void bnx2x_init_ptp(struct bnx2x *bp); int bnx2x_configure_ptp_filters(struct bnx2x *bp); void bnx2x_set_rx_ts(struct bnx2x *bp, struct sk_buff *skb); +void bnx2x_register_phc(struct bnx2x *bp); #define BNX2X_MAX_PHC_DRIFT 31000000 #define BNX2X_PTP_TX_TIMEOUT diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 686899d7e555..ecb1bd7eb508 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -2842,6 +2842,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode) bnx2x_set_rx_mode_inner(bp); if (bp->flags & PTP_SUPPORTED) { + bnx2x_register_phc(bp); bnx2x_init_ptp(bp); bnx2x_configure_ptp_filters(bp); } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 01a973e05356..6b54eb1ad8ba 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -7723,6 +7723,9 @@ static int bnx2x_init_hw_port(struct bnx2x *bp) REG_WR(bp, reg_addr, val); } + if (CHIP_IS_E3B0(bp)) + bp->flags |= PTP_SUPPORTED; + return 0; } @@ -9444,8 +9447,13 @@ unload_error: * function stop ramrod is sent, since as part of this ramrod FW access * PTP registers. */ - if (bp->flags & PTP_SUPPORTED) + if (bp->flags & PTP_SUPPORTED) { bnx2x_stop_ptp(bp); + if (bp->ptp_clock) { + ptp_clock_unregister(bp->ptp_clock); + bp->ptp_clock = NULL; + } + } /* Disable HW interrupts, NAPI */ bnx2x_netif_stop(bp, 1); @@ -12523,9 +12531,6 @@ static int bnx2x_init_bp(struct bnx2x *bp) bp->dump_preset_idx = 1; - if (CHIP_IS_E3B0(bp)) - bp->flags |= PTP_SUPPORTED; - return rc; } @@ -13917,7 +13922,7 @@ static int bnx2x_ptp_enable(struct ptp_clock_info *ptp, return -ENOTSUPP; } -static void bnx2x_register_phc(struct bnx2x *bp) +void bnx2x_register_phc(struct bnx2x *bp) { /* Fill the ptp_clock_info struct and register PTP clock*/ bp->ptp_clock_info.owner = THIS_MODULE; @@ -14119,8 +14124,6 @@ static int bnx2x_init_one(struct pci_dev *pdev, dev->base_addr, bp->pdev->irq, dev->dev_addr); pcie_print_link_status(bp->pdev); - bnx2x_register_phc(bp); - if (!IS_MF_SD_STORAGE_PERSONALITY_ONLY(bp)) bnx2x_set_os_driver_state(bp, OS_DRIVER_STATE_DISABLED); @@ -14153,11 +14156,6 @@ static void __bnx2x_remove(struct pci_dev *pdev, struct bnx2x *bp, bool remove_netdev) { - if (bp->ptp_clock) { - ptp_clock_unregister(bp->ptp_clock); - bp->ptp_clock = NULL; - } - /* Delete storage MAC address */ if (!NO_FCOE(bp)) { rtnl_lock(); -- cgit v1.2.3-59-g8ed1b From 9061193c4ee065d3240fde06767c2e06ec61decc Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Wed, 12 Dec 2018 08:57:03 -0800 Subject: bnx2x: Send update-svid ramrod with retry/poll flags enabled Driver sends update-SVID ramrod in the MFW notification path. If there is a pending ramrod, driver doesn't retry the command and storm firmware will never be updated with the SVID value. The patch adds changes to send update-svid ramrod in process context with retry/poll flags set. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x.h | 1 + drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index 7ea9066ce126..5cd3135dfe30 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -1282,6 +1282,7 @@ enum sp_rtnl_flag { BNX2X_SP_RTNL_TX_STOP, BNX2X_SP_RTNL_GET_DRV_VERSION, BNX2X_SP_RTNL_CHANGE_UDP_PORT, + BNX2X_SP_RTNL_UPDATE_SVID, }; enum bnx2x_iov_flag { diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 6b54eb1ad8ba..b164f705709d 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -2925,6 +2925,10 @@ static void bnx2x_handle_update_svid_cmd(struct bnx2x *bp) func_params.f_obj = &bp->func_obj; func_params.cmd = BNX2X_F_CMD_SWITCH_UPDATE; + /* Prepare parameters for function state transitions */ + __set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags); + __set_bit(RAMROD_RETRY, &func_params.ramrod_flags); + if (IS_MF_UFP(bp) || IS_MF_BD(bp)) { int func = BP_ABS_FUNC(bp); u32 val; @@ -4311,7 +4315,8 @@ static void bnx2x_attn_int_deasserted3(struct bnx2x *bp, u32 attn) bnx2x_handle_eee_event(bp); if (val & DRV_STATUS_OEM_UPDATE_SVID) - bnx2x_handle_update_svid_cmd(bp); + bnx2x_schedule_sp_rtnl(bp, + BNX2X_SP_RTNL_UPDATE_SVID, 0); if (bp->link_vars.periodic_flags & PERIODIC_FLAGS_LINK_EVENT) { @@ -10394,6 +10399,9 @@ sp_rtnl_not_reset: &bp->sp_rtnl_state)) bnx2x_update_mng_version(bp); + if (test_and_clear_bit(BNX2X_SP_RTNL_UPDATE_SVID, &bp->sp_rtnl_state)) + bnx2x_handle_update_svid_cmd(bp); + if (test_and_clear_bit(BNX2X_SP_RTNL_CHANGE_UDP_PORT, &bp->sp_rtnl_state)) { if (bnx2x_udp_port_update(bp)) { -- cgit v1.2.3-59-g8ed1b From 841df922417eb82c835e93d4b93eb6a68c99d599 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 13 Dec 2018 10:53:37 +0800 Subject: vhost: make sure used idx is seen before log in vhost_add_used_n() We miss a write barrier that guarantees used idx is updated and seen before log. This will let userspace sync and copy used ring before used idx is update. Fix this by adding a barrier before log_write(). Fixes: 8dd014adfea6f ("vhost-net: mergeable buffers support") Acked-by: Michael S. Tsirkin Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/vhost/vhost.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 6b98d8e3a5bf..5915f240275a 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -2220,6 +2220,8 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, return -EFAULT; } if (unlikely(vq->log_used)) { + /* Make sure used idx is seen before log. */ + smp_wmb(); /* Log used index update. */ log_write(vq->log_base, vq->log_addr + offsetof(struct vring_used, idx), -- cgit v1.2.3-59-g8ed1b From 476e8ba712fa6a479192dfc4301cd3765d9ba713 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 13 Dec 2018 10:53:38 +0800 Subject: vhost_net: switch to use mutex_trylock() in vhost_net_busy_poll() We used to hold the mutex of paired virtqueue in vhost_net_busy_poll(). But this will results an inconsistent lock order which may cause deadlock if we try to bring back the protection of device IOTLB with vq mutex that requires to hold mutex of all virtqueues at the same time. Fix this simply by switching to use mutex_trylock(), when fail just skip the busy polling. This can happen when device IOTLB is under updating which should be rare. Fixes: commit 78139c94dc8c ("net: vhost: lock the vqs one by one") Cc: Tonghao Zhang Acked-by: Michael S. Tsirkin Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/vhost/net.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index ab11b2bee273..ad7a6f475a44 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -513,7 +513,13 @@ static void vhost_net_busy_poll(struct vhost_net *net, struct socket *sock; struct vhost_virtqueue *vq = poll_rx ? tvq : rvq; - mutex_lock_nested(&vq->mutex, poll_rx ? VHOST_NET_VQ_TX: VHOST_NET_VQ_RX); + /* Try to hold the vq mutex of the paired virtqueue. We can't + * use mutex_lock() here since we could not guarantee a + * consistenet lock ordering. + */ + if (!mutex_trylock(&vq->mutex)) + return; + vhost_disable_notify(&net->dev, vq); sock = rvq->private_data; -- cgit v1.2.3-59-g8ed1b From 86a07da34380bc9467fa407dc3e484488a8ad40d Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 13 Dec 2018 10:53:39 +0800 Subject: Revert "net: vhost: lock the vqs one by one" This reverts commit 78139c94dc8c96a478e67dab3bee84dc6eccb5fd. We don't protect device IOTLB with vq mutex, which will lead e.g use after free for device IOTLB entries. And since we've switched to use mutex_trylock() in previous patch, it's safe to revert it without having deadlock. Fixes: commit 78139c94dc8c ("net: vhost: lock the vqs one by one") Cc: Tonghao Zhang Acked-by: Michael S. Tsirkin Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/vhost/vhost.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 5915f240275a..55e5aa662ad5 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -295,11 +295,8 @@ static void vhost_vq_meta_reset(struct vhost_dev *d) { int i; - for (i = 0; i < d->nvqs; ++i) { - mutex_lock(&d->vqs[i]->mutex); + for (i = 0; i < d->nvqs; ++i) __vhost_vq_meta_reset(d->vqs[i]); - mutex_unlock(&d->vqs[i]->mutex); - } } static void vhost_vq_reset(struct vhost_dev *dev, @@ -895,6 +892,20 @@ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq, #define vhost_get_used(vq, x, ptr) \ vhost_get_user(vq, x, ptr, VHOST_ADDR_USED) +static void vhost_dev_lock_vqs(struct vhost_dev *d) +{ + int i = 0; + for (i = 0; i < d->nvqs; ++i) + mutex_lock_nested(&d->vqs[i]->mutex, i); +} + +static void vhost_dev_unlock_vqs(struct vhost_dev *d) +{ + int i = 0; + for (i = 0; i < d->nvqs; ++i) + mutex_unlock(&d->vqs[i]->mutex); +} + static int vhost_new_umem_range(struct vhost_umem *umem, u64 start, u64 size, u64 end, u64 userspace_addr, int perm) @@ -976,6 +987,7 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev, int ret = 0; mutex_lock(&dev->mutex); + vhost_dev_lock_vqs(dev); switch (msg->type) { case VHOST_IOTLB_UPDATE: if (!dev->iotlb) { @@ -1009,6 +1021,7 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev, break; } + vhost_dev_unlock_vqs(dev); mutex_unlock(&dev->mutex); return ret; -- cgit v1.2.3-59-g8ed1b From 663f146f2ecfcc47934cb9f9543f664eeb6adb46 Mon Sep 17 00:00:00 2001 From: Vu Pham Date: Wed, 31 Oct 2018 16:03:21 +0200 Subject: net/mlx5: E-Switch, Fix fdb cap bits swap The cap bits locations for the fdb caps of multi path to table (used for local mirroring) and multi encap (used for prio/chains) were wrongly used in swapped locations. This went unnoted so far b/c we tested the offending patch with CX5 FW that supports both of them. On different environments where not both caps are supported, we will be messed up, fix that. Fixes: b9aa0ba17af5 ('net/mlx5: Add cap bits for multi fdb encap') Signed-off-by: Vu Pham Reviewed-by: Or Gerlitz Tested-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 34e17e6f8942..4e77bfe0b580 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -582,11 +582,13 @@ struct mlx5_ifc_flow_table_nic_cap_bits { }; struct mlx5_ifc_flow_table_eswitch_cap_bits { - u8 reserved_at_0[0x1c]; - u8 fdb_multi_path_to_table[0x1]; - u8 reserved_at_1d[0x1]; + u8 reserved_at_0[0x1a]; u8 multi_fdb_encap[0x1]; - u8 reserved_at_1e[0x1e1]; + u8 reserved_at_1b[0x1]; + u8 fdb_multi_path_to_table[0x1]; + u8 reserved_at_1d[0x3]; + + u8 reserved_at_20[0x1e0]; struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_esw_fdb; -- cgit v1.2.3-59-g8ed1b From 8956f0014ea5ed824aae3816fd9e52cf3c85b9d4 Mon Sep 17 00:00:00 2001 From: Gavi Teitz Date: Mon, 19 Nov 2018 17:22:30 +0200 Subject: net/mlx5e: Fix default amount of channels for VF representors The default amount of channels a representor opens was erroneously changed from one to the maximum amount of channels, restore to its intended value. Fixes: 779d986d60de ("net/mlx5e: Do not ignore netdevice TX/RX queues number") Signed-off-by: Gavi Teitz Reviewed-by: Or Gerlitz Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index c3c657548824..6a4d782c1de1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -46,6 +46,7 @@ #define MLX5E_REP_PARAMS_LOG_SQ_SIZE \ max(0x6, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE) +#define MLX5E_REP_PARAMS_DEF_NUM_CHANNELS 1 static const char mlx5e_rep_driver_name[] = "mlx5e_rep"; @@ -1083,9 +1084,7 @@ static int mlx5e_init_rep(struct mlx5_core_dev *mdev, if (err) return err; - - priv->channels.params.num_channels = - mlx5e_get_netdev_max_channels(netdev); + priv->channels.params.num_channels = MLX5E_REP_PARAMS_DEF_NUM_CHANNELS; mlx5e_build_rep_params(mdev, &priv->channels.params, netdev->mtu); mlx5e_build_rep_netdev(netdev); -- cgit v1.2.3-59-g8ed1b From e1c15b62b7015119d3e5915cd2ae3b89d59c2576 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Fri, 2 Nov 2018 06:10:49 +0200 Subject: net/mlx5e: RX, Verify MPWQE stride size is in range Add check of MPWQE stride size is within range supported by HW. In case calculated MPWQE stride size exceed range, linear SKB can't be used and we should use non linear MPWQE instead. Fixes: 619a8f2a42f1 ("net/mlx5e: Use linear SKB in Striding RQ") Signed-off-by: Moshe Shemesh Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 871313d6b34d..c17de6a990ff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -128,6 +128,8 @@ static bool mlx5e_rx_is_linear_skb(struct mlx5_core_dev *mdev, return !params->lro_en && frag_sz <= PAGE_SIZE; } +#define MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ ((BIT(__mlx5_bit_sz(wq, log_wqe_stride_size)) - 1) + \ + MLX5_MPWQE_LOG_STRIDE_SZ_BASE) static bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { @@ -138,6 +140,9 @@ static bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, if (!mlx5e_rx_is_linear_skb(mdev, params)) return false; + if (order_base_2(frag_sz) > MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ) + return false; + if (MLX5_CAP_GEN(mdev, ext_stride_num_range)) return true; -- cgit v1.2.3-59-g8ed1b From e88afe759a49f8d51d6cc5fc89d65b4a5768703f Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 9 Dec 2018 17:03:36 +0200 Subject: net/mlx5e: Err if asked to mirror a goto chain tc eswitch rule Currently we are not supporting this and not err-ing on that either. For now, just err if asked to do that. Fixes: bf07aa730a04 ('net/mlx5e: Support offloading tc priorities and chains for eswitch flows') Signed-off-by: Or Gerlitz Reported-by: Eli Britstein Reviewed-by: Eli Britstein Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index fca6f4132c91..1f01c905a042 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2966,8 +2966,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, NL_SET_ERR_MSG(extack, "Requested destination chain is out of supported range"); return -EOPNOTSUPP; } - action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; + action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; attr->dest_chain = dest_chain; continue; @@ -2980,6 +2979,14 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, if (!actions_match_supported(priv, exts, parse_attr, flow, extack)) return -EOPNOTSUPP; + if (attr->dest_chain) { + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + NL_SET_ERR_MSG(extack, "Mirroring goto chain rules isn't supported"); + return -EOPNOTSUPP; + } + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + } + if (attr->mirror_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) { NL_SET_ERR_MSG_MOD(extack, "current firmware doesn't support split rule for port mirroring"); -- cgit v1.2.3-59-g8ed1b From d14f6f2a84326dd2751962167b2293fc91751ff8 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 9 Dec 2018 17:15:23 +0200 Subject: net/mlx5e: Avoid overriding the user provided priority for offloaded tc rules Just a leftover which was wrongly left there, remove it while spawning a message to suggest firmware upgrade. Fixes: bf07aa730a04 ('net/mlx5e: Support offloading tc priorities and chains for eswitch flows') Signed-off-by: Or Gerlitz Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 1f01c905a042..e6170ddf2cc4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -907,11 +907,10 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5e_priv *out_priv; int err = 0, encap_err = 0; - /* if prios are not supported, keep the old behaviour of using same prio - * for all offloaded rules. - */ - if (!mlx5_eswitch_prios_supported(esw)) - attr->prio = 1; + if (!mlx5_eswitch_prios_supported(esw) && attr->prio != 1) { + NL_SET_ERR_MSG(extack, "E-switch priorities unsupported, upgrade FW"); + return -EOPNOTSUPP; + } if (attr->chain > max_chain) { NL_SET_ERR_MSG(extack, "Requested chain is out of supported range"); -- cgit v1.2.3-59-g8ed1b From 154e62abe9cd07c9b71735d8456a03a7c5251280 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Mon, 10 Dec 2018 12:31:42 +0200 Subject: net/mlx5e: Properly initialize flow attributes for slow path eswitch rule deletion When a neighbour is resolved, we delete the goto slow path rule from HW. The eswitch flow attributes where not properly initialized on that case, hence we mess up the eswitch refcounts for chain zero (the default one). Fix that along with making sure to use semicolons and not commas on that code; Fixes: 5dbe906ff1d5 ('net/mlx5e: Use a slow path rule instead if vxlan neighbour isn't available') Signed-off-by: Or Gerlitz Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index e6170ddf2cc4..3b732c02b5e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -870,9 +870,9 @@ mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, struct mlx5_flow_handle *rule; memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr)); - slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - slow_attr->mirror_count = 0, - slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN, + slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + slow_attr->mirror_count = 0; + slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN; rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr); if (!IS_ERR(rule)) @@ -887,6 +887,9 @@ mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *slow_attr) { memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr)); + slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + slow_attr->mirror_count = 0; + slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN; mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr); flow->flags &= ~MLX5E_TC_FLOW_SLOW; } -- cgit v1.2.3-59-g8ed1b From 61c806dafe81c58f41fec10fb5cad0d3a4390e3e Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Mon, 10 Dec 2018 17:05:59 +0200 Subject: net/mlx5e: Avoid encap flows deletion attempt the 1st time a neigh is resolved Currently, we are deleting offloaded encap flows in case the relevant neigh becomes unconnected while the encap is valid (a sign that it used to be connected), or if the curr neigh mac is different from the cached mac (a sign that the remote side changed their mac). The 2nd check also applies when the neigh becomes connected on the 1st time (we start with zero mac). Before the offending commit, the deleting handler was practically no op, as no flows were offloaded. But since that commit, we offload neigh-less encap flows to slow path. Under mirroring scheme, we go into the delete handler, attempt to unoffload a mirror rule which was never set (as we were offloading to slow path) and crash. Fix that by calling the delete handler only when the encap is valid, which covers both cases mentioned above. Fixes: 5dbe906ff1d5 ('net/mlx5e: Use a slow path rule instead if vxlan neighbour isn't available') Signed-off-by: Or Gerlitz Signed-off-by: Eli Britstein Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 7 +++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 6a4d782c1de1..820fe85100b0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -467,8 +467,8 @@ static void mlx5e_rep_update_flows(struct mlx5e_priv *priv, ASSERT_RTNL(); - if ((!neigh_connected && (e->flags & MLX5_ENCAP_ENTRY_VALID)) || - !ether_addr_equal(e->h_dest, ha)) + if ((e->flags & MLX5_ENCAP_ENTRY_VALID) && + (!neigh_connected || !ether_addr_equal(e->h_dest, ha))) mlx5e_tc_encap_flows_del(priv, e); if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 3b732c02b5e1..9dabe9d4b279 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1096,10 +1096,9 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, flow->rule[0] = rule; } - if (e->flags & MLX5_ENCAP_ENTRY_VALID) { - e->flags &= ~MLX5_ENCAP_ENTRY_VALID; - mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id); - } + /* we know that the encap is valid */ + e->flags &= ~MLX5_ENCAP_ENTRY_VALID; + mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id); } static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow) -- cgit v1.2.3-59-g8ed1b From d13b224f431579fe2d712871d4265d7a22ca6c9c Mon Sep 17 00:00:00 2001 From: Mikhael Goikhman Date: Mon, 19 Nov 2018 19:11:12 +0200 Subject: net/mlx5e: Remove unused UDP GSO remaining counter Remove tx_udp_seg_rem counter from ethtool output, as it is no longer being updated in the driver's data flow. Fixes: 3f44899ef2ce ("net/mlx5e: Use PARTIAL_GSO for UDP segmentation") Signed-off-by: Mikhael Goikhman Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 2 -- drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 2 -- 2 files changed, 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 3e99d0728b2f..4337afd610d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -74,7 +74,6 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_recover) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_cqes) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_udp_seg_rem) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_cqe_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_xmit) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_full) }, @@ -198,7 +197,6 @@ void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->tx_nop += sq_stats->nop; s->tx_queue_stopped += sq_stats->stopped; s->tx_queue_wake += sq_stats->wake; - s->tx_udp_seg_rem += sq_stats->udp_seg_rem; s->tx_queue_dropped += sq_stats->dropped; s->tx_cqe_err += sq_stats->cqe_err; s->tx_recover += sq_stats->recover; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 3f8e870ef4c9..3ff69ddae2d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -87,7 +87,6 @@ struct mlx5e_sw_stats { u64 tx_recover; u64 tx_cqes; u64 tx_queue_wake; - u64 tx_udp_seg_rem; u64 tx_cqe_err; u64 tx_xdp_xmit; u64 tx_xdp_full; @@ -221,7 +220,6 @@ struct mlx5e_sq_stats { u64 csum_partial_inner; u64 added_vlan_packets; u64 nop; - u64 udp_seg_rem; #ifdef CONFIG_MLX5_EN_TLS u64 tls_ooo; u64 tls_resync_bytes; -- cgit v1.2.3-59-g8ed1b From fa2bf86bab4bbc61e5678a42a14e40075093a98f Mon Sep 17 00:00:00 2001 From: Tal Gilboa Date: Thu, 22 Nov 2018 14:20:45 +0200 Subject: net/mlx5e: Cancel DIM work on close SQ TXQ SQ closure is followed by closing the corresponding CQ. A pending DIM work would try to modify the now non-existing CQ. This would trigger an error: [85535.835926] mlx5_core 0000:af:00.0: mlx5_cmd_check:769:(pid 124399): MODIFY_CQ(0x403) op_mod(0x0) failed, status bad resource state(0x9), syndrome (0x1d7771) Fix by making sure to cancel any pending DIM work before destroying the SQ. Fixes: cbce4f444798 ("net/mlx5e: Enable adaptive-TX moderation") Signed-off-by: Tal Gilboa Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index c17de6a990ff..b70cb6fd164c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1401,6 +1401,7 @@ static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq) struct mlx5_core_dev *mdev = c->mdev; struct mlx5_rate_limit rl = {0}; + cancel_work_sync(&sq->dim.work); mlx5e_destroy_sq(mdev, sq->sqn); if (sq->rate_limit) { rl.rate = sq->rate_limit; -- cgit v1.2.3-59-g8ed1b From 8cfa272b0d321160ebb5b45073e39ef0a6ad73f2 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Sat, 17 Nov 2018 20:55:03 -0600 Subject: rtlwifi: Fix leak of skb when processing C2H_BT_INFO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With commit 0a9f8f0a1ba9 ("rtlwifi: fix btmpinfo timeout while processing C2H_BT_INFO"), calling rtl_c2hcmd_enqueue() with rtl_c2h_fast_cmd() true, the routine returns without freeing that skb, thereby leaking it. This issue has been discussed at https://github.com/lwfinger/rtlwifi_new/issues/401 and the fix tested there. Fixes: 0a9f8f0a1ba9 ("rtlwifi: fix btmpinfo timeout while processing C2H_BT_INFO") Reported-and-tested-by: Francisco Machado Magalhães Neto Cc: Francisco Machado Magalhães Neto Cc: Ping-Ke Shih Cc: Stable # 4.18+ Signed-off-by: Larry Finger Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/base.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c index f4122c8fdd97..ef9b502ce576 100644 --- a/drivers/net/wireless/realtek/rtlwifi/base.c +++ b/drivers/net/wireless/realtek/rtlwifi/base.c @@ -2289,6 +2289,7 @@ void rtl_c2hcmd_enqueue(struct ieee80211_hw *hw, struct sk_buff *skb) if (rtl_c2h_fast_cmd(hw, skb)) { rtl_c2h_content_parsing(hw, skb); + kfree_skb(skb); return; } -- cgit v1.2.3-59-g8ed1b From 1aa48f088615ebfa5e139951a0d3e7dc2c2af4ec Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Fri, 30 Nov 2018 09:59:57 -0800 Subject: Revert "mwifiex: restructure rx_reorder_tbl_lock usage" This reverts commit 5188d5453bc9380ccd4ae1086138dd485d13aef2, because it introduced lock recursion: BUG: spinlock recursion on CPU#2, kworker/u13:1/395 lock: 0xffffffc0e28a47f0, .magic: dead4ead, .owner: kworker/u13:1/395, .owner_cpu: 2 CPU: 2 PID: 395 Comm: kworker/u13:1 Not tainted 4.20.0-rc4+ #2 Hardware name: Google Kevin (DT) Workqueue: MWIFIEX_RX_WORK_QUEUE mwifiex_rx_work_queue [mwifiex] Call trace: dump_backtrace+0x0/0x140 show_stack+0x20/0x28 dump_stack+0x84/0xa4 spin_bug+0x98/0xa4 do_raw_spin_lock+0x5c/0xdc _raw_spin_lock_irqsave+0x38/0x48 mwifiex_flush_data+0x2c/0xa4 [mwifiex] call_timer_fn+0xcc/0x1c4 run_timer_softirq+0x264/0x4f0 __do_softirq+0x1a8/0x35c do_softirq+0x54/0x64 netif_rx_ni+0xe8/0x120 mwifiex_recv_packet+0xfc/0x10c [mwifiex] mwifiex_process_rx_packet+0x1d4/0x238 [mwifiex] mwifiex_11n_dispatch_pkt+0x190/0x1ac [mwifiex] mwifiex_11n_rx_reorder_pkt+0x28c/0x354 [mwifiex] mwifiex_process_sta_rx_packet+0x204/0x26c [mwifiex] mwifiex_handle_rx_packet+0x15c/0x16c [mwifiex] mwifiex_rx_work_queue+0x104/0x134 [mwifiex] worker_thread+0x4cc/0x72c kthread+0x134/0x13c ret_from_fork+0x10/0x18 This was clearly not tested well at all. I simply performed 'wget' in a loop and it fell over within a few seconds. Fixes: 5188d5453bc9 ("mwifiex: restructure rx_reorder_tbl_lock usage") Cc: Cc: Ganapathi Bhat Signed-off-by: Brian Norris Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/11n.c | 5 +- .../net/wireless/marvell/mwifiex/11n_rxreorder.c | 96 +++++++++++----------- drivers/net/wireless/marvell/mwifiex/uap_txrx.c | 3 - 3 files changed, 51 insertions(+), 53 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/11n.c b/drivers/net/wireless/marvell/mwifiex/11n.c index e2addd8b878b..5d75c971004b 100644 --- a/drivers/net/wireless/marvell/mwifiex/11n.c +++ b/drivers/net/wireless/marvell/mwifiex/11n.c @@ -696,11 +696,10 @@ void mwifiex_11n_delba(struct mwifiex_private *priv, int tid) "Send delba to tid=%d, %pM\n", tid, rx_reor_tbl_ptr->ta); mwifiex_send_delba(priv, tid, rx_reor_tbl_ptr->ta, 0); - spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, - flags); - return; + goto exit; } } +exit: spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags); } diff --git a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c index 8e63d14c1e1c..5380fba652cc 100644 --- a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c +++ b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c @@ -103,8 +103,6 @@ static int mwifiex_11n_dispatch_pkt(struct mwifiex_private *priv, void *payload) * There could be holes in the buffer, which are skipped by the function. * Since the buffer is linear, the function uses rotation to simulate * circular buffer. - * - * The caller must hold rx_reorder_tbl_lock spinlock. */ static void mwifiex_11n_dispatch_pkt_until_start_win(struct mwifiex_private *priv, @@ -113,21 +111,25 @@ mwifiex_11n_dispatch_pkt_until_start_win(struct mwifiex_private *priv, { int pkt_to_send, i; void *rx_tmp_ptr; + unsigned long flags; pkt_to_send = (start_win > tbl->start_win) ? min((start_win - tbl->start_win), tbl->win_size) : tbl->win_size; for (i = 0; i < pkt_to_send; ++i) { + spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags); rx_tmp_ptr = NULL; if (tbl->rx_reorder_ptr[i]) { rx_tmp_ptr = tbl->rx_reorder_ptr[i]; tbl->rx_reorder_ptr[i] = NULL; } + spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags); if (rx_tmp_ptr) mwifiex_11n_dispatch_pkt(priv, rx_tmp_ptr); } + spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags); /* * We don't have a circular buffer, hence use rotation to simulate * circular buffer @@ -138,6 +140,7 @@ mwifiex_11n_dispatch_pkt_until_start_win(struct mwifiex_private *priv, } tbl->start_win = start_win; + spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags); } /* @@ -147,8 +150,6 @@ mwifiex_11n_dispatch_pkt_until_start_win(struct mwifiex_private *priv, * The start window is adjusted automatically when a hole is located. * Since the buffer is linear, the function uses rotation to simulate * circular buffer. - * - * The caller must hold rx_reorder_tbl_lock spinlock. */ static void mwifiex_11n_scan_and_dispatch(struct mwifiex_private *priv, @@ -156,15 +157,22 @@ mwifiex_11n_scan_and_dispatch(struct mwifiex_private *priv, { int i, j, xchg; void *rx_tmp_ptr; + unsigned long flags; for (i = 0; i < tbl->win_size; ++i) { - if (!tbl->rx_reorder_ptr[i]) + spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags); + if (!tbl->rx_reorder_ptr[i]) { + spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, + flags); break; + } rx_tmp_ptr = tbl->rx_reorder_ptr[i]; tbl->rx_reorder_ptr[i] = NULL; + spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags); mwifiex_11n_dispatch_pkt(priv, rx_tmp_ptr); } + spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags); /* * We don't have a circular buffer, hence use rotation to simulate * circular buffer @@ -177,6 +185,7 @@ mwifiex_11n_scan_and_dispatch(struct mwifiex_private *priv, } } tbl->start_win = (tbl->start_win + i) & (MAX_TID_VALUE - 1); + spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags); } /* @@ -184,8 +193,6 @@ mwifiex_11n_scan_and_dispatch(struct mwifiex_private *priv, * * The function stops the associated timer and dispatches all the * pending packets in the Rx reorder table before deletion. - * - * The caller must hold rx_reorder_tbl_lock spinlock. */ static void mwifiex_del_rx_reorder_entry(struct mwifiex_private *priv, @@ -211,7 +218,11 @@ mwifiex_del_rx_reorder_entry(struct mwifiex_private *priv, del_timer_sync(&tbl->timer_context.timer); tbl->timer_context.timer_is_set = false; + + spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags); list_del(&tbl->list); + spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags); + kfree(tbl->rx_reorder_ptr); kfree(tbl); @@ -224,17 +235,22 @@ mwifiex_del_rx_reorder_entry(struct mwifiex_private *priv, /* * This function returns the pointer to an entry in Rx reordering * table which matches the given TA/TID pair. - * - * The caller must hold rx_reorder_tbl_lock spinlock. */ struct mwifiex_rx_reorder_tbl * mwifiex_11n_get_rx_reorder_tbl(struct mwifiex_private *priv, int tid, u8 *ta) { struct mwifiex_rx_reorder_tbl *tbl; + unsigned long flags; - list_for_each_entry(tbl, &priv->rx_reorder_tbl_ptr, list) - if (!memcmp(tbl->ta, ta, ETH_ALEN) && tbl->tid == tid) + spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags); + list_for_each_entry(tbl, &priv->rx_reorder_tbl_ptr, list) { + if (!memcmp(tbl->ta, ta, ETH_ALEN) && tbl->tid == tid) { + spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, + flags); return tbl; + } + } + spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags); return NULL; } @@ -251,9 +267,14 @@ void mwifiex_11n_del_rx_reorder_tbl_by_ta(struct mwifiex_private *priv, u8 *ta) return; spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags); - list_for_each_entry_safe(tbl, tmp, &priv->rx_reorder_tbl_ptr, list) - if (!memcmp(tbl->ta, ta, ETH_ALEN)) + list_for_each_entry_safe(tbl, tmp, &priv->rx_reorder_tbl_ptr, list) { + if (!memcmp(tbl->ta, ta, ETH_ALEN)) { + spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, + flags); mwifiex_del_rx_reorder_entry(priv, tbl); + spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags); + } + } spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags); return; @@ -262,18 +283,24 @@ void mwifiex_11n_del_rx_reorder_tbl_by_ta(struct mwifiex_private *priv, u8 *ta) /* * This function finds the last sequence number used in the packets * buffered in Rx reordering table. - * - * The caller must hold rx_reorder_tbl_lock spinlock. */ static int mwifiex_11n_find_last_seq_num(struct reorder_tmr_cnxt *ctx) { struct mwifiex_rx_reorder_tbl *rx_reorder_tbl_ptr = ctx->ptr; + struct mwifiex_private *priv = ctx->priv; + unsigned long flags; int i; - for (i = rx_reorder_tbl_ptr->win_size - 1; i >= 0; --i) - if (rx_reorder_tbl_ptr->rx_reorder_ptr[i]) + spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags); + for (i = rx_reorder_tbl_ptr->win_size - 1; i >= 0; --i) { + if (rx_reorder_tbl_ptr->rx_reorder_ptr[i]) { + spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, + flags); return i; + } + } + spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags); return -1; } @@ -291,22 +318,17 @@ mwifiex_flush_data(struct timer_list *t) struct reorder_tmr_cnxt *ctx = from_timer(ctx, t, timer); int start_win, seq_num; - unsigned long flags; ctx->timer_is_set = false; - spin_lock_irqsave(&ctx->priv->rx_reorder_tbl_lock, flags); seq_num = mwifiex_11n_find_last_seq_num(ctx); - if (seq_num < 0) { - spin_unlock_irqrestore(&ctx->priv->rx_reorder_tbl_lock, flags); + if (seq_num < 0) return; - } mwifiex_dbg(ctx->priv->adapter, INFO, "info: flush data %d\n", seq_num); start_win = (ctx->ptr->start_win + seq_num + 1) & (MAX_TID_VALUE - 1); mwifiex_11n_dispatch_pkt_until_start_win(ctx->priv, ctx->ptr, start_win); - spin_unlock_irqrestore(&ctx->priv->rx_reorder_tbl_lock, flags); } /* @@ -333,14 +355,11 @@ mwifiex_11n_create_rx_reorder_tbl(struct mwifiex_private *priv, u8 *ta, * If we get a TID, ta pair which is already present dispatch all the * the packets and move the window size until the ssn */ - spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags); tbl = mwifiex_11n_get_rx_reorder_tbl(priv, tid, ta); if (tbl) { mwifiex_11n_dispatch_pkt_until_start_win(priv, tbl, seq_num); - spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags); return; } - spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags); /* if !tbl then create one */ new_node = kzalloc(sizeof(struct mwifiex_rx_reorder_tbl), GFP_KERNEL); if (!new_node) @@ -551,20 +570,16 @@ int mwifiex_11n_rx_reorder_pkt(struct mwifiex_private *priv, int prev_start_win, start_win, end_win, win_size; u16 pkt_index; bool init_window_shift = false; - unsigned long flags; int ret = 0; - spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags); tbl = mwifiex_11n_get_rx_reorder_tbl(priv, tid, ta); if (!tbl) { - spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags); if (pkt_type != PKT_TYPE_BAR) mwifiex_11n_dispatch_pkt(priv, payload); return ret; } if ((pkt_type == PKT_TYPE_AMSDU) && !tbl->amsdu) { - spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags); mwifiex_11n_dispatch_pkt(priv, payload); return ret; } @@ -651,8 +666,6 @@ done: if (!tbl->timer_context.timer_is_set || prev_start_win != tbl->start_win) mwifiex_11n_rxreorder_timer_restart(tbl); - - spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags); return ret; } @@ -681,18 +694,14 @@ mwifiex_del_ba_tbl(struct mwifiex_private *priv, int tid, u8 *peer_mac, peer_mac, tid, initiator); if (cleanup_rx_reorder_tbl) { - spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags); tbl = mwifiex_11n_get_rx_reorder_tbl(priv, tid, peer_mac); if (!tbl) { - spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, - flags); mwifiex_dbg(priv->adapter, EVENT, "event: TID, TA not found in table\n"); return; } mwifiex_del_rx_reorder_entry(priv, tbl); - spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags); } else { ptx_tbl = mwifiex_get_ba_tbl(priv, tid, peer_mac); if (!ptx_tbl) { @@ -726,7 +735,6 @@ int mwifiex_ret_11n_addba_resp(struct mwifiex_private *priv, int tid, win_size; struct mwifiex_rx_reorder_tbl *tbl; uint16_t block_ack_param_set; - unsigned long flags; block_ack_param_set = le16_to_cpu(add_ba_rsp->block_ack_param_set); @@ -740,20 +748,17 @@ int mwifiex_ret_11n_addba_resp(struct mwifiex_private *priv, mwifiex_dbg(priv->adapter, ERROR, "ADDBA RSP: failed %pM tid=%d)\n", add_ba_rsp->peer_mac_addr, tid); - spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags); tbl = mwifiex_11n_get_rx_reorder_tbl(priv, tid, add_ba_rsp->peer_mac_addr); if (tbl) mwifiex_del_rx_reorder_entry(priv, tbl); - spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags); return 0; } win_size = (block_ack_param_set & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> BLOCKACKPARAM_WINSIZE_POS; - spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags); tbl = mwifiex_11n_get_rx_reorder_tbl(priv, tid, add_ba_rsp->peer_mac_addr); if (tbl) { @@ -764,7 +769,6 @@ int mwifiex_ret_11n_addba_resp(struct mwifiex_private *priv, else tbl->amsdu = false; } - spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags); mwifiex_dbg(priv->adapter, CMD, "cmd: ADDBA RSP: %pM tid=%d ssn=%d win_size=%d\n", @@ -804,8 +808,11 @@ void mwifiex_11n_cleanup_reorder_tbl(struct mwifiex_private *priv) spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags); list_for_each_entry_safe(del_tbl_ptr, tmp_node, - &priv->rx_reorder_tbl_ptr, list) + &priv->rx_reorder_tbl_ptr, list) { + spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags); mwifiex_del_rx_reorder_entry(priv, del_tbl_ptr); + spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags); + } INIT_LIST_HEAD(&priv->rx_reorder_tbl_ptr); spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags); @@ -929,7 +936,6 @@ void mwifiex_11n_rxba_sync_event(struct mwifiex_private *priv, int tlv_buf_left = len; int ret; u8 *tmp; - unsigned long flags; mwifiex_dbg_dump(priv->adapter, EVT_D, "RXBA_SYNC event:", event_buf, len); @@ -949,18 +955,14 @@ void mwifiex_11n_rxba_sync_event(struct mwifiex_private *priv, tlv_rxba->mac, tlv_rxba->tid, tlv_seq_num, tlv_bitmap_len); - spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags); rx_reor_tbl_ptr = mwifiex_11n_get_rx_reorder_tbl(priv, tlv_rxba->tid, tlv_rxba->mac); if (!rx_reor_tbl_ptr) { - spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, - flags); mwifiex_dbg(priv->adapter, ERROR, "Can not find rx_reorder_tbl!"); return; } - spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags); for (i = 0; i < tlv_bitmap_len; i++) { for (j = 0 ; j < 8; j++) { diff --git a/drivers/net/wireless/marvell/mwifiex/uap_txrx.c b/drivers/net/wireless/marvell/mwifiex/uap_txrx.c index a83c5afc256a..5ce85d5727e4 100644 --- a/drivers/net/wireless/marvell/mwifiex/uap_txrx.c +++ b/drivers/net/wireless/marvell/mwifiex/uap_txrx.c @@ -421,15 +421,12 @@ int mwifiex_process_uap_rx_packet(struct mwifiex_private *priv, spin_unlock_irqrestore(&priv->sta_list_spinlock, flags); } - spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags); if (!priv->ap_11n_enabled || (!mwifiex_11n_get_rx_reorder_tbl(priv, uap_rx_pd->priority, ta) && (le16_to_cpu(uap_rx_pd->rx_pkt_type) != PKT_TYPE_AMSDU))) { ret = mwifiex_handle_uap_rx_forward(priv, skb); - spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags); return ret; } - spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags); /* Reorder and send to kernel */ pkt_type = (u8)le16_to_cpu(uap_rx_pd->rx_pkt_type); -- cgit v1.2.3-59-g8ed1b From 7c250f4612ae97aa04500c0d0cff69bb87046e3a Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 16 Nov 2018 17:19:21 +0100 Subject: mt76: fix potential NULL pointer dereference in mt76_stop_tx_queues Starting from mac80211 commit adf8ed01e4fd ("mac80211: add an optional TXQ for other PS-buffered frames") and commit 0eeb2b674f05 ("mac80211: add an option for station management TXQ") a new per-sta queue has been introduced for bufferable management frames. sta->txq[IEEE80211_NUM_TIDS] is initialized just if the driver reports the following hw flags: - IEEE80211_HW_STA_MMPDU_TXQ - IEEE80211_HW_BUFF_MMPDU_TXQ This can produce a NULL pointer dereference in mt76_stop_tx_queues since mt76 iterates on all available sta tx queues assuming they are initialized by mac80211. This issue has been spotted analyzing the code (it has not triggered any crash yet) Signed-off-by: Lorenzo Bianconi Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt76/tx.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c index 7cbce03aa65b..aa426b838ffa 100644 --- a/drivers/net/wireless/mediatek/mt76/tx.c +++ b/drivers/net/wireless/mediatek/mt76/tx.c @@ -400,7 +400,12 @@ void mt76_stop_tx_queues(struct mt76_dev *dev, struct ieee80211_sta *sta, for (i = 0; i < ARRAY_SIZE(sta->txq); i++) { struct ieee80211_txq *txq = sta->txq[i]; - struct mt76_txq *mtxq = (struct mt76_txq *) txq->drv_priv; + struct mt76_txq *mtxq; + + if (!txq) + continue; + + mtxq = (struct mt76_txq *)txq->drv_priv; spin_lock_bh(&mtxq->hwq->lock); mtxq->send_bar = mtxq->aggr && send_bar; -- cgit v1.2.3-59-g8ed1b From cddfb283af7e5700760362ec6f6ab3a039636e64 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 10 Dec 2018 16:46:28 +0100 Subject: mt76: add entry in MAINTAINERS file Add entry for mt76 driver in MAINTAINERS file Signed-off-by: Lorenzo Bianconi Signed-off-by: Kalle Valo --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 380e43f585d3..3e1e51d84544 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9378,6 +9378,13 @@ F: drivers/media/platform/mtk-vpu/ F: Documentation/devicetree/bindings/media/mediatek-vcodec.txt F: Documentation/devicetree/bindings/media/mediatek-vpu.txt +MEDIATEK MT76 WIRELESS LAN DRIVER +M: Felix Fietkau +M: Lorenzo Bianconi +L: linux-wireless@vger.kernel.org +S: Maintained +F: drivers/net/wireless/mediatek/mt76/ + MEDIATEK MT7601U WIRELESS LAN DRIVER M: Jakub Kicinski L: linux-wireless@vger.kernel.org -- cgit v1.2.3-59-g8ed1b From 7640ead939247e91e84b7ec6ec001f30193cc7df Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 12 Dec 2018 16:29:07 -0800 Subject: bpf: verifier: make sure callees don't prune with caller differences Currently for liveness and state pruning the register parentage chains don't include states of the callee. This makes some sense as the callee can't access those registers. However, this means that READs done after the callee returns will not propagate into the states of the callee. Callee will then perform pruning disregarding differences in caller state. Example: 0: (85) call bpf_user_rnd_u32 1: (b7) r8 = 0 2: (55) if r0 != 0x0 goto pc+1 3: (b7) r8 = 1 4: (bf) r1 = r8 5: (85) call pc+4 6: (15) if r8 == 0x1 goto pc+1 7: (05) *(u64 *)(r9 - 8) = r3 8: (b7) r0 = 0 9: (95) exit 10: (15) if r1 == 0x0 goto pc+0 11: (95) exit Here we acquire unknown state with call to get_random() [1]. Then we store this random state in r8 (either 0 or 1) [1 - 3], and make a call on line 5. Callee does nothing but a trivial conditional jump (to create a pruning point). Upon return caller checks the state of r8 and either performs an unsafe read or not. Verifier will first explore the path with r8 == 1, creating a pruning point at [11]. The parentage chain for r8 will include only callers states so once verifier reaches [6] it will mark liveness only on states in the caller, and not [11]. Now when verifier walks the paths with r8 == 0 it will reach [11] and since REG_LIVE_READ on r8 was not propagated there it will prune the walk entirely (stop walking the entire program, not just the callee). Since [6] was never walked with r8 == 0, [7] will be considered dead and replaced with "goto -1" causing hang at runtime. This patch weaves the callee's explored states onto the callers parentage chain. Rough parentage for r8 would have looked like this before: [0] [1] [2] [3] [4] [5] [10] [11] [6] [7] | | ,---|----. | | | sl0: sl0: / sl0: \ sl0: sl0: sl0: fr0: r8 <-- fr0: r8<+--fr0: r8 `fr0: r8 ,fr0: r8<-fr0: r8 \ fr1: r8 <- fr1: r8 / \__________________/ after: [0] [1] [2] [3] [4] [5] [10] [11] [6] [7] | | | | | | sl0: sl0: sl0: sl0: sl0: sl0: fr0: r8 <-- fr0: r8 <- fr0: r8 <- fr0: r8 <-fr0: r8<-fr0: r8 fr1: r8 <- fr1: r8 Now the mark from instruction 6 will travel through callees states. Note that we don't have to connect r0 because its overwritten by callees state on return and r1 - r5 because those are not alive any more once a call is made. v2: - don't connect the callees registers twice (Alexei: suggestion & code) - add more details to the comment (Ed & Alexei) v1: don't unnecessarily link caller saved regs (Jiong) Fixes: f4d7e40a5b71 ("bpf: introduce function calls (verification)") Reported-by: David Beckett Signed-off-by: Jakub Kicinski Reviewed-by: Jiong Wang Reviewed-by: Edward Cree Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 13 ++++++++++--- tools/testing/selftests/bpf/test_verifier.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index fc760d00a38c..51ba84d4d34a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5102,9 +5102,16 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) } new_sl->next = env->explored_states[insn_idx]; env->explored_states[insn_idx] = new_sl; - /* connect new state to parentage chain */ - for (i = 0; i < BPF_REG_FP; i++) - cur_regs(env)[i].parent = &new->frame[new->curframe]->regs[i]; + /* connect new state to parentage chain. Current frame needs all + * registers connected. Only r6 - r9 of the callers are alive (pushed + * to the stack implicitly by JITs) so in callers' frames connect just + * r6 - r9 as an optimization. Callers will have r1 - r5 connected to + * the state of the call instruction (with WRITTEN set), and r0 comes + * from callee with its full parentage chain, anyway. + */ + for (j = 0; j <= cur->curframe; j++) + for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) + cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i]; /* clear write marks in current state: the writes we did are not writes * our child did, so they don't screen off its reads from us. * (There are no read marks in current state, because reads always mark diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 37583267bdbc..f8eac4a544f4 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -13915,6 +13915,34 @@ static struct bpf_test tests[] = { .result_unpriv = REJECT, .result = ACCEPT, }, + { + "calls: cross frame pruning", + .insns = { + /* r8 = !!random(); + * call pruner() + * if (r8) + * do something bad; + */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_prandom_u32), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_MOV64_IMM(BPF_REG_8, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_8, 1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .errstr_unpriv = "function calls to other bpf functions are allowed for root only", + .result_unpriv = REJECT, + .errstr = "!read_ok", + .result = REJECT, + }, }; static int probe_filter_length(const struct bpf_insn *fp) -- cgit v1.2.3-59-g8ed1b From c3db8d531045774aeee6e0f731ab15b0c450de45 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Sun, 9 Dec 2018 23:27:01 -0800 Subject: qed: Fix command number mismatch between driver and the mfw The value for OEM_CFG_UPDATE command differs between driver and the Management firmware (mfw). Fix this gap with adding a reserved field. Fixes: cac6f691546b ("qed: Add support for Unified Fabric Port.") Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_hsi.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index 5c221ebaa7b3..b38e12c9de9d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -12831,8 +12831,9 @@ enum MFW_DRV_MSG_TYPE { MFW_DRV_MSG_BW_UPDATE10, MFW_DRV_MSG_TRANSCEIVER_STATE_CHANGE, MFW_DRV_MSG_BW_UPDATE11, - MFW_DRV_MSG_OEM_CFG_UPDATE, + MFW_DRV_MSG_RESERVED, MFW_DRV_MSG_GET_TLV_REQ, + MFW_DRV_MSG_OEM_CFG_UPDATE, MFW_DRV_MSG_MAX }; -- cgit v1.2.3-59-g8ed1b From d3e8869ec82645599e6497d6974593bf00f7b19b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 14 Dec 2018 11:38:48 -0800 Subject: net: netlink: rename NETLINK_DUMP_STRICT_CHK -> NETLINK_GET_STRICT_CHK NETLINK_DUMP_STRICT_CHK can be used for all GET requests, dumps as well as doit handlers. Replace the DUMP in the name with GET make that clearer. Signed-off-by: Jakub Kicinski Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/uapi/linux/netlink.h | 2 +- net/netlink/af_netlink.c | 4 ++-- tools/include/uapi/linux/netlink.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index 486ed1f0c0bc..0a4d73317759 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -155,7 +155,7 @@ enum nlmsgerr_attrs { #define NETLINK_LIST_MEMBERSHIPS 9 #define NETLINK_CAP_ACK 10 #define NETLINK_EXT_ACK 11 -#define NETLINK_DUMP_STRICT_CHK 12 +#define NETLINK_GET_STRICT_CHK 12 struct nl_pktinfo { __u32 group; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 6bb9f3cde0b0..3c023d6120f6 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1706,7 +1706,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, nlk->flags &= ~NETLINK_F_EXT_ACK; err = 0; break; - case NETLINK_DUMP_STRICT_CHK: + case NETLINK_GET_STRICT_CHK: if (val) nlk->flags |= NETLINK_F_STRICT_CHK; else @@ -1806,7 +1806,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname, return -EFAULT; err = 0; break; - case NETLINK_DUMP_STRICT_CHK: + case NETLINK_GET_STRICT_CHK: if (len < sizeof(int)) return -EINVAL; len = sizeof(int); diff --git a/tools/include/uapi/linux/netlink.h b/tools/include/uapi/linux/netlink.h index 486ed1f0c0bc..0a4d73317759 100644 --- a/tools/include/uapi/linux/netlink.h +++ b/tools/include/uapi/linux/netlink.h @@ -155,7 +155,7 @@ enum nlmsgerr_attrs { #define NETLINK_LIST_MEMBERSHIPS 9 #define NETLINK_CAP_ACK 10 #define NETLINK_EXT_ACK 11 -#define NETLINK_DUMP_STRICT_CHK 12 +#define NETLINK_GET_STRICT_CHK 12 struct nl_pktinfo { __u32 group; -- cgit v1.2.3-59-g8ed1b From 15ef70e286176165d28b0b8a969b422561a68dfc Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 10 Dec 2018 11:49:55 -0800 Subject: tipc: use lock_sock() in tipc_sk_reinit() lock_sock() must be used in process context to be race-free with other lock_sock() callers, for example, tipc_release(). Otherwise using the spinlock directly can't serialize a parallel tipc_release(). As it is blocking, we have to hold the sock refcnt before rhashtable_walk_stop() and release it after rhashtable_walk_start(). Fixes: 07f6c4bc048a ("tipc: convert tipc reference table to use generic rhashtable") Reported-by: Dmitry Vyukov Cc: Ying Xue Cc: Jon Maloy Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/tipc/socket.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index b57b1be7252b..e1396fb87779 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2724,11 +2724,15 @@ void tipc_sk_reinit(struct net *net) rhashtable_walk_start(&iter); while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) { - spin_lock_bh(&tsk->sk.sk_lock.slock); + sock_hold(&tsk->sk); + rhashtable_walk_stop(&iter); + lock_sock(&tsk->sk); msg = &tsk->phdr; msg_set_prevnode(msg, tipc_own_addr(net)); msg_set_orignode(msg, tipc_own_addr(net)); - spin_unlock_bh(&tsk->sk.sk_lock.slock); + release_sock(&tsk->sk); + rhashtable_walk_start(&iter); + sock_put(&tsk->sk); } rhashtable_walk_stop(&iter); -- cgit v1.2.3-59-g8ed1b From acb4a33e9856d5fa3384b87d3d8369229be06d31 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 10 Dec 2018 12:45:45 -0800 Subject: tipc: fix a double kfree_skb() tipc_udp_xmit() drops the packet on error, there is no need to drop it again. Fixes: ef20cd4dd163 ("tipc: introduce UDP replicast") Reported-and-tested-by: syzbot+eae585ba2cc2752d3704@syzkaller.appspotmail.com Cc: Ying Xue Cc: Jon Maloy Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/tipc/udp_media.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 10dc59ce9c82..1b1ba1310ea7 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -245,10 +245,8 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, } err = tipc_udp_xmit(net, _skb, ub, src, &rcast->addr); - if (err) { - kfree_skb(_skb); + if (err) goto out; - } } err = 0; out: -- cgit v1.2.3-59-g8ed1b From fb83ed496b9a654f60cd1d58a0e1e79ec5694808 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 10 Dec 2018 15:23:30 -0800 Subject: tipc: compare remote and local protocols in tipc_udp_enable() When TIPC_NLA_UDP_REMOTE is an IPv6 mcast address but TIPC_NLA_UDP_LOCAL is an IPv4 address, a NULL-ptr deref is triggered as the UDP tunnel sock is initialized to IPv4 or IPv6 sock merely based on the protocol in local address. We should just error out when the remote address and local address have different protocols. Reported-by: syzbot+eb4da3a20fad2e52555d@syzkaller.appspotmail.com Cc: Ying Xue Cc: Jon Maloy Signed-off-by: Cong Wang Acked-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/udp_media.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 1b1ba1310ea7..4d85d71f16e2 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -679,6 +679,11 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, if (err) goto err; + if (remote.proto != local.proto) { + err = -EINVAL; + goto err; + } + /* Checking remote ip address */ rmcast = tipc_udp_is_mcast_addr(&remote); -- cgit v1.2.3-59-g8ed1b From 2ab4c3426c0cf711d7147e3f559638e4ab88960e Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 10 Dec 2018 21:20:30 -0700 Subject: drivers: net: xgene: Remove unnecessary forward declarations Clang warns: drivers/net/ethernet/apm/xgene/xgene_enet_main.c:33:36: warning: tentative array definition assumed to have one element static const struct acpi_device_id xgene_enet_acpi_match[]; ^ 1 warning generated. Both xgene_enet_acpi_match and xgene_enet_of_match are defined before their uses at the bottom of the file so this is unnecessary. When CONFIG_ACPI is disabled, ACPI_PTR becomes NULL so xgene_enet_acpi_match doesn't need to be defined. Signed-off-by: Nathan Chancellor Signed-off-by: David S. Miller --- drivers/net/ethernet/apm/xgene/xgene_enet_main.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 3b889efddf78..50dd6bf176d0 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -29,9 +29,6 @@ #define RES_RING_CSR 1 #define RES_RING_CMD 2 -static const struct of_device_id xgene_enet_of_match[]; -static const struct acpi_device_id xgene_enet_acpi_match[]; - static void xgene_enet_init_bufpool(struct xgene_enet_desc_ring *buf_pool) { struct xgene_enet_raw_desc16 *raw_desc; -- cgit v1.2.3-59-g8ed1b From 6c0563e442528733219afe15c749eb2cc365da3f Mon Sep 17 00:00:00 2001 From: Atul Gupta Date: Tue, 11 Dec 2018 02:19:40 -0800 Subject: net/tls: Init routines in create_ctx create_ctx is called from tls_init and tls_hw_prot hence initialize function pointers in common routine. Signed-off-by: Atul Gupta Signed-off-by: David S. Miller --- net/tls/tls_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 311cec8e533d..492080306edc 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -543,6 +543,9 @@ static struct tls_context *create_ctx(struct sock *sk) return NULL; icsk->icsk_ulp_data = ctx; + ctx->setsockopt = sk->sk_prot->setsockopt; + ctx->getsockopt = sk->sk_prot->getsockopt; + ctx->sk_proto_close = sk->sk_prot->close; return ctx; } @@ -675,9 +678,6 @@ static int tls_init(struct sock *sk) rc = -ENOMEM; goto out; } - ctx->setsockopt = sk->sk_prot->setsockopt; - ctx->getsockopt = sk->sk_prot->getsockopt; - ctx->sk_proto_close = sk->sk_prot->close; /* Build IPv6 TLS whenever the address of tcpv6 _prot changes */ if (ip_ver == TLSV6 && -- cgit v1.2.3-59-g8ed1b From df9d4a1780223c3ddd4bf0810079b5a75251554e Mon Sep 17 00:00:00 2001 From: Atul Gupta Date: Tue, 11 Dec 2018 02:20:09 -0800 Subject: net/tls: sleeping function from invalid context HW unhash within mutex for registered tls devices cause sleep when called from tcp_set_state for TCP_CLOSE. Release lock and re-acquire after function call with ref count incr/dec. defined kref and fp release for tls_device to ensure device is not released outside lock. BUG: sleeping function called from invalid context at kernel/locking/mutex.c:748 in_atomic(): 1, irqs_disabled(): 0, pid: 0, name: swapper/7 INFO: lockdep is turned off. CPU: 7 PID: 0 Comm: swapper/7 Tainted: G W O Call Trace: dump_stack+0x5e/0x8b ___might_sleep+0x222/0x260 __mutex_lock+0x5c/0xa50 ? vprintk_emit+0x1f3/0x440 ? kmem_cache_free+0x22d/0x2a0 ? tls_hw_unhash+0x2f/0x80 ? printk+0x52/0x6e ? tls_hw_unhash+0x2f/0x80 tls_hw_unhash+0x2f/0x80 tcp_set_state+0x5f/0x180 tcp_done+0x2e/0xe0 tcp_rcv_state_process+0x92c/0xdd3 ? lock_acquire+0xf5/0x1f0 ? tcp_v4_rcv+0xa7c/0xbe0 ? tcp_v4_do_rcv+0x70/0x1e0 Signed-off-by: Atul Gupta Signed-off-by: David S. Miller --- drivers/crypto/chelsio/chtls/chtls_main.c | 55 ++++++++++++++++++------------- include/net/tls.h | 6 ++++ net/tls/tls_main.c | 36 ++++++++++++-------- 3 files changed, 61 insertions(+), 36 deletions(-) diff --git a/drivers/crypto/chelsio/chtls/chtls_main.c b/drivers/crypto/chelsio/chtls/chtls_main.c index f472c51abe56..db40ab6dd410 100644 --- a/drivers/crypto/chelsio/chtls/chtls_main.c +++ b/drivers/crypto/chelsio/chtls/chtls_main.c @@ -149,6 +149,30 @@ static void chtls_destroy_hash(struct tls_device *dev, struct sock *sk) chtls_stop_listen(sk); } +static void chtls_free_uld(struct chtls_dev *cdev) +{ + int i; + + tls_unregister_device(&cdev->tlsdev); + kvfree(cdev->kmap.addr); + idr_destroy(&cdev->hwtid_idr); + for (i = 0; i < (1 << RSPQ_HASH_BITS); i++) + kfree_skb(cdev->rspq_skb_cache[i]); + kfree(cdev->lldi); + kfree_skb(cdev->askb); + kfree(cdev); +} + +static inline void chtls_dev_release(struct kref *kref) +{ + struct chtls_dev *cdev; + struct tls_device *dev; + + dev = container_of(kref, struct tls_device, kref); + cdev = to_chtls_dev(dev); + chtls_free_uld(cdev); +} + static void chtls_register_dev(struct chtls_dev *cdev) { struct tls_device *tlsdev = &cdev->tlsdev; @@ -159,15 +183,12 @@ static void chtls_register_dev(struct chtls_dev *cdev) tlsdev->feature = chtls_inline_feature; tlsdev->hash = chtls_create_hash; tlsdev->unhash = chtls_destroy_hash; - tls_register_device(&cdev->tlsdev); + tlsdev->release = chtls_dev_release; + kref_init(&tlsdev->kref); + tls_register_device(tlsdev); cdev->cdev_state = CHTLS_CDEV_STATE_UP; } -static void chtls_unregister_dev(struct chtls_dev *cdev) -{ - tls_unregister_device(&cdev->tlsdev); -} - static void process_deferq(struct work_struct *task_param) { struct chtls_dev *cdev = container_of(task_param, @@ -262,28 +283,16 @@ out: return NULL; } -static void chtls_free_uld(struct chtls_dev *cdev) -{ - int i; - - chtls_unregister_dev(cdev); - kvfree(cdev->kmap.addr); - idr_destroy(&cdev->hwtid_idr); - for (i = 0; i < (1 << RSPQ_HASH_BITS); i++) - kfree_skb(cdev->rspq_skb_cache[i]); - kfree(cdev->lldi); - kfree_skb(cdev->askb); - kfree(cdev); -} - static void chtls_free_all_uld(void) { struct chtls_dev *cdev, *tmp; mutex_lock(&cdev_mutex); list_for_each_entry_safe(cdev, tmp, &cdev_list, list) { - if (cdev->cdev_state == CHTLS_CDEV_STATE_UP) - chtls_free_uld(cdev); + if (cdev->cdev_state == CHTLS_CDEV_STATE_UP) { + list_del(&cdev->list); + kref_put(&cdev->tlsdev.kref, cdev->tlsdev.release); + } } mutex_unlock(&cdev_mutex); } @@ -304,7 +313,7 @@ static int chtls_uld_state_change(void *handle, enum cxgb4_state new_state) mutex_lock(&cdev_mutex); list_del(&cdev->list); mutex_unlock(&cdev_mutex); - chtls_free_uld(cdev); + kref_put(&cdev->tlsdev.kref, cdev->tlsdev.release); break; default: break; diff --git a/include/net/tls.h b/include/net/tls.h index bab5627ff5e3..3cbcd12303fd 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -76,6 +76,10 @@ * * void (*unhash)(struct tls_device *device, struct sock *sk); * This function cleans listen state set by Inline TLS driver + * + * void (*release)(struct kref *kref); + * Release the registered device and allocated resources + * @kref: Number of reference to tls_device */ struct tls_device { char name[TLS_DEVICE_NAME_MAX]; @@ -83,6 +87,8 @@ struct tls_device { int (*feature)(struct tls_device *device); int (*hash)(struct tls_device *device, struct sock *sk); void (*unhash)(struct tls_device *device, struct sock *sk); + void (*release)(struct kref *kref); + struct kref kref; }; enum { diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 492080306edc..1428bd74638c 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -56,7 +56,7 @@ enum { static struct proto *saved_tcpv6_prot; static DEFINE_MUTEX(tcpv6_prot_mutex); static LIST_HEAD(device_list); -static DEFINE_MUTEX(device_mutex); +static DEFINE_SPINLOCK(device_spinlock); static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG][TLS_NUM_CONFIG]; static struct proto_ops tls_sw_proto_ops; @@ -555,7 +555,7 @@ static int tls_hw_prot(struct sock *sk) struct tls_device *dev; int rc = 0; - mutex_lock(&device_mutex); + spin_lock_bh(&device_spinlock); list_for_each_entry(dev, &device_list, dev_list) { if (dev->feature && dev->feature(dev)) { ctx = create_ctx(sk); @@ -573,7 +573,7 @@ static int tls_hw_prot(struct sock *sk) } } out: - mutex_unlock(&device_mutex); + spin_unlock_bh(&device_spinlock); return rc; } @@ -582,12 +582,17 @@ static void tls_hw_unhash(struct sock *sk) struct tls_context *ctx = tls_get_ctx(sk); struct tls_device *dev; - mutex_lock(&device_mutex); + spin_lock_bh(&device_spinlock); list_for_each_entry(dev, &device_list, dev_list) { - if (dev->unhash) + if (dev->unhash) { + kref_get(&dev->kref); + spin_unlock_bh(&device_spinlock); dev->unhash(dev, sk); + kref_put(&dev->kref, dev->release); + spin_lock_bh(&device_spinlock); + } } - mutex_unlock(&device_mutex); + spin_unlock_bh(&device_spinlock); ctx->unhash(sk); } @@ -598,12 +603,17 @@ static int tls_hw_hash(struct sock *sk) int err; err = ctx->hash(sk); - mutex_lock(&device_mutex); + spin_lock_bh(&device_spinlock); list_for_each_entry(dev, &device_list, dev_list) { - if (dev->hash) + if (dev->hash) { + kref_get(&dev->kref); + spin_unlock_bh(&device_spinlock); err |= dev->hash(dev, sk); + kref_put(&dev->kref, dev->release); + spin_lock_bh(&device_spinlock); + } } - mutex_unlock(&device_mutex); + spin_unlock_bh(&device_spinlock); if (err) tls_hw_unhash(sk); @@ -699,17 +709,17 @@ out: void tls_register_device(struct tls_device *device) { - mutex_lock(&device_mutex); + spin_lock_bh(&device_spinlock); list_add_tail(&device->dev_list, &device_list); - mutex_unlock(&device_mutex); + spin_unlock_bh(&device_spinlock); } EXPORT_SYMBOL(tls_register_device); void tls_unregister_device(struct tls_device *device) { - mutex_lock(&device_mutex); + spin_lock_bh(&device_spinlock); list_del(&device->dev_list); - mutex_unlock(&device_mutex); + spin_unlock_bh(&device_spinlock); } EXPORT_SYMBOL(tls_unregister_device); -- cgit v1.2.3-59-g8ed1b From 6422ccc5fbefbd219f3fab133f698e58f5aa44eb Mon Sep 17 00:00:00 2001 From: Atul Gupta Date: Tue, 11 Dec 2018 02:20:26 -0800 Subject: crypto/chelsio/chtls: listen fails with multiadapt listen fails when more than one tls capable device is registered. tls_hw_hash is called for each dev which loops again for each cdev_list causing listen failure. Hence call chtls_listen_start/stop for specific device than loop over all devices. Signed-off-by: Atul Gupta Signed-off-by: David S. Miller --- drivers/crypto/chelsio/chtls/chtls.h | 5 ++++ drivers/crypto/chelsio/chtls/chtls_main.c | 50 +++++++++++++++++++------------ 2 files changed, 36 insertions(+), 19 deletions(-) diff --git a/drivers/crypto/chelsio/chtls/chtls.h b/drivers/crypto/chelsio/chtls/chtls.h index 7725b6ee14ef..fcb6747ed29e 100644 --- a/drivers/crypto/chelsio/chtls/chtls.h +++ b/drivers/crypto/chelsio/chtls/chtls.h @@ -153,6 +153,11 @@ struct chtls_dev { unsigned int cdev_state; }; +struct chtls_listen { + struct chtls_dev *cdev; + struct sock *sk; +}; + struct chtls_hws { struct sk_buff_head sk_recv_queue; u8 txqid; diff --git a/drivers/crypto/chelsio/chtls/chtls_main.c b/drivers/crypto/chelsio/chtls/chtls_main.c index db40ab6dd410..563f8fe7686a 100644 --- a/drivers/crypto/chelsio/chtls/chtls_main.c +++ b/drivers/crypto/chelsio/chtls/chtls_main.c @@ -55,24 +55,19 @@ static void unregister_listen_notifier(struct notifier_block *nb) static int listen_notify_handler(struct notifier_block *this, unsigned long event, void *data) { - struct chtls_dev *cdev; - struct sock *sk; - int ret; + struct chtls_listen *clisten; + int ret = NOTIFY_DONE; - sk = data; - ret = NOTIFY_DONE; + clisten = (struct chtls_listen *)data; switch (event) { case CHTLS_LISTEN_START: + ret = chtls_listen_start(clisten->cdev, clisten->sk); + kfree(clisten); + break; case CHTLS_LISTEN_STOP: - mutex_lock(&cdev_list_lock); - list_for_each_entry(cdev, &cdev_list, list) { - if (event == CHTLS_LISTEN_START) - ret = chtls_listen_start(cdev, sk); - else - chtls_listen_stop(cdev, sk); - } - mutex_unlock(&cdev_list_lock); + chtls_listen_stop(clisten->cdev, clisten->sk); + kfree(clisten); break; } return ret; @@ -90,8 +85,9 @@ static int listen_backlog_rcv(struct sock *sk, struct sk_buff *skb) return 0; } -static int chtls_start_listen(struct sock *sk) +static int chtls_start_listen(struct chtls_dev *cdev, struct sock *sk) { + struct chtls_listen *clisten; int err; if (sk->sk_protocol != IPPROTO_TCP) @@ -102,21 +98,33 @@ static int chtls_start_listen(struct sock *sk) return -EADDRNOTAVAIL; sk->sk_backlog_rcv = listen_backlog_rcv; + clisten = kmalloc(sizeof(*clisten), GFP_KERNEL); + if (!clisten) + return -ENOMEM; + clisten->cdev = cdev; + clisten->sk = sk; mutex_lock(¬ify_mutex); err = raw_notifier_call_chain(&listen_notify_list, - CHTLS_LISTEN_START, sk); + CHTLS_LISTEN_START, clisten); mutex_unlock(¬ify_mutex); return err; } -static void chtls_stop_listen(struct sock *sk) +static void chtls_stop_listen(struct chtls_dev *cdev, struct sock *sk) { + struct chtls_listen *clisten; + if (sk->sk_protocol != IPPROTO_TCP) return; + clisten = kmalloc(sizeof(*clisten), GFP_KERNEL); + if (!clisten) + return; + clisten->cdev = cdev; + clisten->sk = sk; mutex_lock(¬ify_mutex); raw_notifier_call_chain(&listen_notify_list, - CHTLS_LISTEN_STOP, sk); + CHTLS_LISTEN_STOP, clisten); mutex_unlock(¬ify_mutex); } @@ -138,15 +146,19 @@ static int chtls_inline_feature(struct tls_device *dev) static int chtls_create_hash(struct tls_device *dev, struct sock *sk) { + struct chtls_dev *cdev = to_chtls_dev(dev); + if (sk->sk_state == TCP_LISTEN) - return chtls_start_listen(sk); + return chtls_start_listen(cdev, sk); return 0; } static void chtls_destroy_hash(struct tls_device *dev, struct sock *sk) { + struct chtls_dev *cdev = to_chtls_dev(dev); + if (sk->sk_state == TCP_LISTEN) - chtls_stop_listen(sk); + chtls_stop_listen(cdev, sk); } static void chtls_free_uld(struct chtls_dev *cdev) -- cgit v1.2.3-59-g8ed1b From 848dd1c1cbc5a16ceae46f04722f9eb1f4b7bb4e Mon Sep 17 00:00:00 2001 From: Atul Gupta Date: Tue, 11 Dec 2018 02:20:40 -0800 Subject: crypto/chelsio/chtls: macro correction in tx path corrected macro used in tx path. removed redundant hdrlen and check for !page in chtls_sendmsg Signed-off-by: Atul Gupta Signed-off-by: David S. Miller --- drivers/crypto/chelsio/chtls/chtls_io.c | 20 ++++++++------------ drivers/net/ethernet/chelsio/cxgb4/t4_msg.h | 3 +++ 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/crypto/chelsio/chtls/chtls_io.c b/drivers/crypto/chelsio/chtls/chtls_io.c index afebbd87c4aa..18f553fcc167 100644 --- a/drivers/crypto/chelsio/chtls/chtls_io.c +++ b/drivers/crypto/chelsio/chtls/chtls_io.c @@ -397,7 +397,7 @@ static void tls_tx_data_wr(struct sock *sk, struct sk_buff *skb, req_wr->lsodisable_to_flags = htonl(TX_ULP_MODE_V(ULP_MODE_TLS) | - FW_OFLD_TX_DATA_WR_URGENT_V(skb_urgent(skb)) | + TX_URG_V(skb_urgent(skb)) | T6_TX_FORCE_F | wr_ulp_mode_force | TX_SHOVE_V((!csk_flag(sk, CSK_TX_MORE_DATA)) && skb_queue_empty(&csk->txq))); @@ -534,10 +534,9 @@ static void make_tx_data_wr(struct sock *sk, struct sk_buff *skb, FW_OFLD_TX_DATA_WR_SHOVE_F); req->tunnel_to_proxy = htonl(wr_ulp_mode_force | - FW_OFLD_TX_DATA_WR_URGENT_V(skb_urgent(skb)) | - FW_OFLD_TX_DATA_WR_SHOVE_V((!csk_flag - (sk, CSK_TX_MORE_DATA)) && - skb_queue_empty(&csk->txq))); + TX_URG_V(skb_urgent(skb)) | + TX_SHOVE_V((!csk_flag(sk, CSK_TX_MORE_DATA)) && + skb_queue_empty(&csk->txq))); req->plen = htonl(len); } @@ -995,7 +994,6 @@ int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) int mss, flags, err; int recordsz = 0; int copied = 0; - int hdrlen = 0; long timeo; lock_sock(sk); @@ -1032,7 +1030,7 @@ int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) recordsz = tls_header_read(&hdr, &msg->msg_iter); size -= TLS_HEADER_LENGTH; - hdrlen += TLS_HEADER_LENGTH; + copied += TLS_HEADER_LENGTH; csk->tlshws.txleft = recordsz; csk->tlshws.type = hdr.type; if (skb) @@ -1083,10 +1081,8 @@ new_buf: int off = TCP_OFF(sk); bool merge; - if (!page) - goto wait_for_memory; - - pg_size <<= compound_order(page); + if (page) + pg_size <<= compound_order(page); if (off < pg_size && skb_can_coalesce(skb, i, page, off)) { merge = 1; @@ -1187,7 +1183,7 @@ out: chtls_tcp_push(sk, flags); done: release_sock(sk); - return copied + hdrlen; + return copied; do_fault: if (!skb->len) { __skb_unlink(skb, &csk->txq); diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h index f152da1ce046..c62a0c830705 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h @@ -1453,6 +1453,9 @@ struct cpl_tx_data { #define T6_TX_FORCE_V(x) ((x) << T6_TX_FORCE_S) #define T6_TX_FORCE_F T6_TX_FORCE_V(1U) +#define TX_URG_S 16 +#define TX_URG_V(x) ((x) << TX_URG_S) + #define TX_SHOVE_S 14 #define TX_SHOVE_V(x) ((x) << TX_SHOVE_S) -- cgit v1.2.3-59-g8ed1b From 0c3a16be700ea008e1aa8843453bfc182e04b58a Mon Sep 17 00:00:00 2001 From: Atul Gupta Date: Tue, 11 Dec 2018 02:20:53 -0800 Subject: crypto/chelsio/chtls: send/recv window update recalculated send and receive window using linkspeed. Determine correct value of eck_ok from SYN received and option configured on local system. Signed-off-by: Atul Gupta Signed-off-by: David S. Miller --- drivers/crypto/chelsio/chtls/chtls.h | 2 + drivers/crypto/chelsio/chtls/chtls_cm.c | 78 ++++++++++++++++++++++----------- 2 files changed, 54 insertions(+), 26 deletions(-) diff --git a/drivers/crypto/chelsio/chtls/chtls.h b/drivers/crypto/chelsio/chtls/chtls.h index fcb6747ed29e..59bb67d5a7ce 100644 --- a/drivers/crypto/chelsio/chtls/chtls.h +++ b/drivers/crypto/chelsio/chtls/chtls.h @@ -220,6 +220,8 @@ struct chtls_sock { u16 resv2; u32 delack_mode; u32 delack_seq; + u32 snd_win; + u32 rcv_win; void *passive_reap_next; /* placeholder for passive */ struct chtls_hws tlshws; diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c index 20209e29f814..931b96c220af 100644 --- a/drivers/crypto/chelsio/chtls/chtls_cm.c +++ b/drivers/crypto/chelsio/chtls/chtls_cm.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -887,24 +888,6 @@ static unsigned int chtls_select_mss(const struct chtls_sock *csk, return mtu_idx; } -static unsigned int select_rcv_wnd(struct chtls_sock *csk) -{ - unsigned int rcvwnd; - unsigned int wnd; - struct sock *sk; - - sk = csk->sk; - wnd = tcp_full_space(sk); - - if (wnd < MIN_RCV_WND) - wnd = MIN_RCV_WND; - - rcvwnd = MAX_RCV_WND; - - csk_set_flag(csk, CSK_UPDATE_RCV_WND); - return min(wnd, rcvwnd); -} - static unsigned int select_rcv_wscale(int space, int wscale_ok, int win_clamp) { int wscale = 0; @@ -951,7 +934,7 @@ static void chtls_pass_accept_rpl(struct sk_buff *skb, csk->mtu_idx = chtls_select_mss(csk, dst_mtu(__sk_dst_get(sk)), req); opt0 = TCAM_BYPASS_F | - WND_SCALE_V((tp)->rx_opt.rcv_wscale) | + WND_SCALE_V(RCV_WSCALE(tp)) | MSS_IDX_V(csk->mtu_idx) | L2T_IDX_V(csk->l2t_entry->idx) | NAGLE_V(!(tp->nonagle & TCP_NAGLE_OFF)) | @@ -1005,6 +988,25 @@ static int chtls_backlog_rcv(struct sock *sk, struct sk_buff *skb) return 0; } +static void chtls_set_tcp_window(struct chtls_sock *csk) +{ + struct net_device *ndev = csk->egress_dev; + struct port_info *pi = netdev_priv(ndev); + unsigned int linkspeed; + u8 scale; + + linkspeed = pi->link_cfg.speed; + scale = linkspeed / SPEED_10000; +#define CHTLS_10G_RCVWIN (256 * 1024) + csk->rcv_win = CHTLS_10G_RCVWIN; + if (scale) + csk->rcv_win *= scale; +#define CHTLS_10G_SNDWIN (256 * 1024) + csk->snd_win = CHTLS_10G_SNDWIN; + if (scale) + csk->snd_win *= scale; +} + static struct sock *chtls_recv_sock(struct sock *lsk, struct request_sock *oreq, void *network_hdr, @@ -1067,6 +1069,9 @@ static struct sock *chtls_recv_sock(struct sock *lsk, csk->port_id = port_id; csk->egress_dev = ndev; csk->tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid)); + chtls_set_tcp_window(csk); + tp->rcv_wnd = csk->rcv_win; + csk->sndbuf = csk->snd_win; csk->ulp_mode = ULP_MODE_TLS; step = cdev->lldi->nrxq / cdev->lldi->nchan; csk->rss_qid = cdev->lldi->rxq_ids[port_id * step]; @@ -1076,9 +1081,9 @@ static struct sock *chtls_recv_sock(struct sock *lsk, csk->sndbuf = newsk->sk_sndbuf; csk->smac_idx = cxgb4_tp_smt_idx(cdev->lldi->adapter_type, cxgb4_port_viid(ndev)); - tp->rcv_wnd = select_rcv_wnd(csk); RCV_WSCALE(tp) = select_rcv_wscale(tcp_full_space(newsk), - WSCALE_OK(tp), + sock_net(newsk)-> + ipv4.sysctl_tcp_window_scaling, tp->window_clamp); neigh_release(n); inet_inherit_port(&tcp_hashinfo, lsk, newsk); @@ -1130,6 +1135,7 @@ static void chtls_pass_accept_request(struct sock *sk, struct cpl_t5_pass_accept_rpl *rpl; struct cpl_pass_accept_req *req; struct listen_ctx *listen_ctx; + struct vlan_ethhdr *vlan_eh; struct request_sock *oreq; struct sk_buff *reply_skb; struct chtls_sock *csk; @@ -1142,6 +1148,10 @@ static void chtls_pass_accept_request(struct sock *sk, unsigned int stid; unsigned int len; unsigned int tid; + bool th_ecn, ect; + __u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */ + u16 eth_hdr_len; + bool ecn_ok; req = cplhdr(skb) + RSS_HDR; tid = GET_TID(req); @@ -1180,24 +1190,40 @@ static void chtls_pass_accept_request(struct sock *sk, oreq->mss = 0; oreq->ts_recent = 0; - eh = (struct ethhdr *)(req + 1); - iph = (struct iphdr *)(eh + 1); + eth_hdr_len = T6_ETH_HDR_LEN_G(ntohl(req->hdr_len)); + if (eth_hdr_len == ETH_HLEN) { + eh = (struct ethhdr *)(req + 1); + iph = (struct iphdr *)(eh + 1); + network_hdr = (void *)(eh + 1); + } else { + vlan_eh = (struct vlan_ethhdr *)(req + 1); + iph = (struct iphdr *)(vlan_eh + 1); + network_hdr = (void *)(vlan_eh + 1); + } if (iph->version != 0x4) goto free_oreq; - network_hdr = (void *)(eh + 1); tcph = (struct tcphdr *)(iph + 1); + skb_set_network_header(skb, (void *)iph - (void *)req); tcp_rsk(oreq)->tfo_listener = false; tcp_rsk(oreq)->rcv_isn = ntohl(tcph->seq); chtls_set_req_port(oreq, tcph->source, tcph->dest); - inet_rsk(oreq)->ecn_ok = 0; chtls_set_req_addr(oreq, iph->daddr, iph->saddr); - if (req->tcpopt.wsf <= 14) { + ip_dsfield = ipv4_get_dsfield(iph); + if (req->tcpopt.wsf <= 14 && + sock_net(sk)->ipv4.sysctl_tcp_window_scaling) { inet_rsk(oreq)->wscale_ok = 1; inet_rsk(oreq)->snd_wscale = req->tcpopt.wsf; } inet_rsk(oreq)->ir_iif = sk->sk_bound_dev_if; + th_ecn = tcph->ece && tcph->cwr; + if (th_ecn) { + ect = !INET_ECN_is_not_ect(ip_dsfield); + ecn_ok = sock_net(sk)->ipv4.sysctl_tcp_ecn; + if ((!ect && ecn_ok) || tcp_ca_needs_ecn(sk)) + inet_rsk(oreq)->ecn_ok = 1; + } newsk = chtls_recv_sock(sk, oreq, network_hdr, req, cdev); if (!newsk) -- cgit v1.2.3-59-g8ed1b From 9c27369f4a1393452c17e8708c1b0beb8ac59501 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 11 Dec 2018 08:16:14 -0600 Subject: r8152: Add support for MAC address pass through on RTL8153-BND All previous docks and dongles that have supported this feature use the RTL8153-AD chip. RTL8153-BND is a new chip that will be used in upcoming Dell type-C docks. It should be added to the whitelist of devices to activate MAC address pass through. Per confirming with Realtek all devices containing RTL8153-BND should activate MAC pass through and there won't use pass through bit on efuse like in RTL8153-AD. Signed-off-by: Mario Limonciello Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index f1b5201cc320..60dd1ec1665f 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -129,6 +129,7 @@ #define USB_UPS_CTRL 0xd800 #define USB_POWER_CUT 0xd80a #define USB_MISC_0 0xd81a +#define USB_MISC_1 0xd81f #define USB_AFE_CTRL2 0xd824 #define USB_UPS_CFG 0xd842 #define USB_UPS_FLAGS 0xd848 @@ -555,6 +556,7 @@ enum spd_duplex { /* MAC PASSTHRU */ #define AD_MASK 0xfee0 +#define BND_MASK 0x0004 #define EFUSE 0xcfdb #define PASS_THRU_MASK 0x1 @@ -1150,7 +1152,7 @@ out1: return ret; } -/* Devices containing RTL8153-AD can support a persistent +/* Devices containing proper chips can support a persistent * host system provided MAC address. * Examples of this are Dell TB15 and Dell WD15 docks */ @@ -1165,13 +1167,23 @@ static int vendor_mac_passthru_addr_read(struct r8152 *tp, struct sockaddr *sa) /* test for -AD variant of RTL8153 */ ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_MISC_0); - if ((ocp_data & AD_MASK) != 0x1000) - return -ENODEV; - - /* test for MAC address pass-through bit */ - ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, EFUSE); - if ((ocp_data & PASS_THRU_MASK) != 1) - return -ENODEV; + if ((ocp_data & AD_MASK) == 0x1000) { + /* test for MAC address pass-through bit */ + ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, EFUSE); + if ((ocp_data & PASS_THRU_MASK) != 1) { + netif_dbg(tp, probe, tp->netdev, + "No efuse for RTL8153-AD MAC pass through\n"); + return -ENODEV; + } + } else { + /* test for RTL8153-BND */ + ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_MISC_1); + if ((ocp_data & BND_MASK) == 0) { + netif_dbg(tp, probe, tp->netdev, + "Invalid variant for MAC pass through\n"); + return -ENODEV; + } + } /* returns _AUXMAC_#AABBCCDDEEFF# */ status = acpi_evaluate_object(NULL, "\\_SB.AMAC", NULL, &buffer); @@ -1217,9 +1229,8 @@ static int set_ethernet_addr(struct r8152 *tp) if (tp->version == RTL_VER_01) { ret = pla_ocp_read(tp, PLA_IDR, 8, sa.sa_data); } else { - /* if this is not an RTL8153-AD, no eFuse mac pass thru set, - * or system doesn't provide valid _SB.AMAC this will be - * be expected to non-zero + /* if device doesn't support MAC pass through this will + * be expected to be non-zero */ ret = vendor_mac_passthru_addr_read(tp, &sa); if (ret < 0) -- cgit v1.2.3-59-g8ed1b From 8236b08cf50f85bbfaf48910a0b3ee68318b7c4b Mon Sep 17 00:00:00 2001 From: Lepton Wu Date: Tue, 11 Dec 2018 11:12:55 -0800 Subject: VSOCK: bind to random port for VMADDR_PORT_ANY The old code always starts from fixed port for VMADDR_PORT_ANY. Sometimes when VMM crashed, there is still orphaned vsock which is waiting for close timer, then it could cause connection time out for new started VM if they are trying to connect to same port with same guest cid since the new packets could hit that orphaned vsock. We could also fix this by doing more in vhost_vsock_reset_orphans, but any way, it should be better to start from a random local port instead of a fixed one. Signed-off-by: Lepton Wu Reviewed-by: Jorgen Hansen Signed-off-by: David S. Miller --- net/vmw_vsock/af_vsock.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index ab27a2872935..43a1dec08825 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -107,6 +107,7 @@ #include #include #include +#include #include #include #include @@ -504,9 +505,13 @@ out: static int __vsock_bind_stream(struct vsock_sock *vsk, struct sockaddr_vm *addr) { - static u32 port = LAST_RESERVED_PORT + 1; + static u32 port = 0; struct sockaddr_vm new_addr; + if (!port) + port = LAST_RESERVED_PORT + 1 + + prandom_u32_max(U32_MAX - LAST_RESERVED_PORT); + vsock_addr_init(&new_addr, addr->svm_cid, addr->svm_port); if (addr->svm_port == VMADDR_PORT_ANY) { -- cgit v1.2.3-59-g8ed1b From 51367e423c6501a26e67d91a655d2bc892303462 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 10 Dec 2018 21:45:07 +0100 Subject: w90p910_ether: remove incorrect __init annotation The get_mac_address() function is normally inline, but when it is not, we get a warning that this configuration is broken: WARNING: vmlinux.o(.text+0x4aff00): Section mismatch in reference from the function w90p910_ether_setup() to the function .init.text:get_mac_address() The function w90p910_ether_setup() references the function __init get_mac_address(). This is often because w90p910_ether_setup lacks a __init Remove the __init to make it always do the right thing. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/nuvoton/w90p910_ether.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/nuvoton/w90p910_ether.c b/drivers/net/ethernet/nuvoton/w90p910_ether.c index 052b3d2c07a1..c662c6f5bee3 100644 --- a/drivers/net/ethernet/nuvoton/w90p910_ether.c +++ b/drivers/net/ethernet/nuvoton/w90p910_ether.c @@ -912,7 +912,7 @@ static const struct net_device_ops w90p910_ether_netdev_ops = { .ndo_validate_addr = eth_validate_addr, }; -static void __init get_mac_address(struct net_device *dev) +static void get_mac_address(struct net_device *dev) { struct w90p910_ether *ether = netdev_priv(dev); struct platform_device *pdev; -- cgit v1.2.3-59-g8ed1b From 69d2c86766da2ded2b70281f1bf242cb0d58a778 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 11 Dec 2018 14:10:08 -0600 Subject: ip6mr: Fix potential Spectre v1 vulnerability vr.mifi is indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: net/ipv6/ip6mr.c:1845 ip6mr_ioctl() warn: potential spectre issue 'mrt->vif_table' [r] (local cap) net/ipv6/ip6mr.c:1919 ip6mr_compat_ioctl() warn: potential spectre issue 'mrt->vif_table' [r] (local cap) Fix this by sanitizing vr.mifi before using it to index mrt->vif_table' Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index e2ea691e42c6..377a2ee5d9ad 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -52,6 +52,8 @@ #include #include +#include + struct ip6mr_rule { struct fib_rule common; }; @@ -1841,6 +1843,7 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) return -EFAULT; if (vr.mifi >= mrt->maxvif) return -EINVAL; + vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif); read_lock(&mrt_lock); vif = &mrt->vif_table[vr.mifi]; if (VIF_EXISTS(mrt, vr.mifi)) { @@ -1915,6 +1918,7 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) return -EFAULT; if (vr.mifi >= mrt->maxvif) return -EINVAL; + vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif); read_lock(&mrt_lock); vif = &mrt->vif_table[vr.mifi]; if (VIF_EXISTS(mrt, vr.mifi)) { -- cgit v1.2.3-59-g8ed1b From 65cab850f0eeaa9180bd2e10a231964f33743edf Mon Sep 17 00:00:00 2001 From: Dave Taht Date: Tue, 11 Dec 2018 15:30:34 -0800 Subject: net: Allow class-e address assignment via ifconfig ioctl While most distributions long ago switched to the iproute2 suite of utilities, which allow class-e (240.0.0.0/4) address assignment, distributions relying on busybox, toybox and other forms of ifconfig cannot assign class-e addresses without this kernel patch. While CIDR has been obsolete for 2 decades, and a survey of all the open source code in the world shows the IN_whatever macros are also obsolete... rather than obsolete CIDR from this ioctl entirely, this patch merely enables class-e assignment, sanely. Signed-off-by: Dave Taht Signed-off-by: David S. Miller --- include/uapi/linux/in.h | 10 +++++++--- net/ipv4/devinet.c | 5 +++-- net/ipv4/ipconfig.c | 2 ++ 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index 48e8a225b985..f6052e70bf40 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -266,10 +266,14 @@ struct sockaddr_in { #define IN_CLASSD(a) ((((long int) (a)) & 0xf0000000) == 0xe0000000) #define IN_MULTICAST(a) IN_CLASSD(a) -#define IN_MULTICAST_NET 0xF0000000 +#define IN_MULTICAST_NET 0xe0000000 -#define IN_EXPERIMENTAL(a) ((((long int) (a)) & 0xf0000000) == 0xf0000000) -#define IN_BADCLASS(a) IN_EXPERIMENTAL((a)) +#define IN_BADCLASS(a) ((((long int) (a) ) == 0xffffffff) +#define IN_EXPERIMENTAL(a) IN_BADCLASS((a)) + +#define IN_CLASSE(a) ((((long int) (a)) & 0xf0000000) == 0xf0000000) +#define IN_CLASSE_NET 0xffffffff +#define IN_CLASSE_NSHIFT 0 /* Address to accept any incoming messages. */ #define INADDR_ANY ((unsigned long int) 0x00000000) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index a34602ae27de..608a6f4223fb 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -952,17 +952,18 @@ static int inet_abc_len(__be32 addr) { int rc = -1; /* Something else, probably a multicast. */ - if (ipv4_is_zeronet(addr)) + if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr)) rc = 0; else { __u32 haddr = ntohl(addr); - if (IN_CLASSA(haddr)) rc = 8; else if (IN_CLASSB(haddr)) rc = 16; else if (IN_CLASSC(haddr)) rc = 24; + else if (IN_CLASSE(haddr)) + rc = 32; } return rc; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 88212615bf4c..2393e5c106bf 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -429,6 +429,8 @@ static int __init ic_defaults(void) ic_netmask = htonl(IN_CLASSB_NET); else if (IN_CLASSC(ntohl(ic_myaddr))) ic_netmask = htonl(IN_CLASSC_NET); + else if (IN_CLASSE(ntohl(ic_myaddr))) + ic_netmask = htonl(IN_CLASSE_NET); else { pr_err("IP-Config: Unable to guess netmask for address %pI4\n", &ic_myaddr); -- cgit v1.2.3-59-g8ed1b From 143ece654f9f5b37bedea252a990be37e48ae3a5 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 11 Dec 2018 21:43:51 -0800 Subject: tipc: check tsk->group in tipc_wait_for_cond() tipc_wait_for_cond() drops socket lock before going to sleep, but tsk->group could be freed right after that release_sock(). So we have to re-check and reload tsk->group after it wakes up. After this patch, tipc_wait_for_cond() returns -ERESTARTSYS when tsk->group is NULL, instead of continuing with the assumption of a non-NULL tsk->group. (It looks like 'dsts' should be re-checked and reloaded too, but it is a different bug.) Similar for tipc_send_group_unicast() and tipc_send_group_anycast(). Reported-by: syzbot+10a9db47c3a0e13eb31c@syzkaller.appspotmail.com Fixes: b7d42635517f ("tipc: introduce flow control for group broadcast messages") Fixes: ee106d7f942d ("tipc: introduce group anycast messaging") Fixes: 27bd9ec027f3 ("tipc: introduce group unicast messaging") Cc: Ying Xue Cc: Jon Maloy Signed-off-by: Cong Wang Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/socket.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index e1396fb87779..656940692a44 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -880,7 +880,6 @@ static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m, DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); int blks = tsk_blocks(GROUP_H_SIZE + dlen); struct tipc_sock *tsk = tipc_sk(sk); - struct tipc_group *grp = tsk->group; struct net *net = sock_net(sk); struct tipc_member *mb = NULL; u32 node, port; @@ -894,7 +893,9 @@ static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m, /* Block or return if destination link or member is congested */ rc = tipc_wait_for_cond(sock, &timeout, !tipc_dest_find(&tsk->cong_links, node, 0) && - !tipc_group_cong(grp, node, port, blks, &mb)); + tsk->group && + !tipc_group_cong(tsk->group, node, port, blks, + &mb)); if (unlikely(rc)) return rc; @@ -924,7 +925,6 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m, struct tipc_sock *tsk = tipc_sk(sk); struct list_head *cong_links = &tsk->cong_links; int blks = tsk_blocks(GROUP_H_SIZE + dlen); - struct tipc_group *grp = tsk->group; struct tipc_msg *hdr = &tsk->phdr; struct tipc_member *first = NULL; struct tipc_member *mbr = NULL; @@ -941,9 +941,10 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m, type = msg_nametype(hdr); inst = dest->addr.name.name.instance; scope = msg_lookup_scope(hdr); - exclude = tipc_group_exclude(grp); while (++lookups < 4) { + exclude = tipc_group_exclude(tsk->group); + first = NULL; /* Look for a non-congested destination member, if any */ @@ -952,7 +953,8 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m, &dstcnt, exclude, false)) return -EHOSTUNREACH; tipc_dest_pop(&dsts, &node, &port); - cong = tipc_group_cong(grp, node, port, blks, &mbr); + cong = tipc_group_cong(tsk->group, node, port, blks, + &mbr); if (!cong) break; if (mbr == first) @@ -971,7 +973,8 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m, /* Block or return if destination link or member is congested */ rc = tipc_wait_for_cond(sock, &timeout, !tipc_dest_find(cong_links, node, 0) && - !tipc_group_cong(grp, node, port, + tsk->group && + !tipc_group_cong(tsk->group, node, port, blks, &mbr)); if (unlikely(rc)) return rc; @@ -1006,8 +1009,7 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m, struct sock *sk = sock->sk; struct net *net = sock_net(sk); struct tipc_sock *tsk = tipc_sk(sk); - struct tipc_group *grp = tsk->group; - struct tipc_nlist *dsts = tipc_group_dests(grp); + struct tipc_nlist *dsts = tipc_group_dests(tsk->group); struct tipc_mc_method *method = &tsk->mc_method; bool ack = method->mandatory && method->rcast; int blks = tsk_blocks(MCAST_H_SIZE + dlen); @@ -1020,8 +1022,9 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m, return -EHOSTUNREACH; /* Block or return if any destination link or member is congested */ - rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt && - !tipc_group_bc_cong(grp, blks)); + rc = tipc_wait_for_cond(sock, &timeout, + !tsk->cong_link_cnt && tsk->group && + !tipc_group_bc_cong(tsk->group, blks)); if (unlikely(rc)) return rc; @@ -1036,7 +1039,7 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m, msg_set_hdr_sz(hdr, GROUP_H_SIZE); msg_set_destport(hdr, 0); msg_set_destnode(hdr, 0); - msg_set_grp_bc_seqno(hdr, tipc_group_bc_snd_nxt(grp)); + msg_set_grp_bc_seqno(hdr, tipc_group_bc_snd_nxt(tsk->group)); /* Avoid getting stuck with repeated forced replicasts */ msg_set_grp_bc_ack_req(hdr, ack); -- cgit v1.2.3-59-g8ed1b From 110a1cc28bc383adb4885eff27e18c61ddebffb4 Mon Sep 17 00:00:00 2001 From: Jörgen Storvist Date: Wed, 12 Dec 2018 22:45:34 +0100 Subject: qmi_wwan: Added support for Fibocom NL668 series MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added support for Fibocom NL668 series QMI interface. Using QMI_QUIRK_SET_DTR required for Qualcomm MDM9x07 chipsets. Signed-off-by: Jörgen Storvist Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 72a55b6b4211..dada68f279bc 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1117,6 +1117,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x1435, 0xd181, 4)}, /* Wistron NeWeb D18Q1 */ {QMI_FIXED_INTF(0x1435, 0xd181, 5)}, /* Wistron NeWeb D18Q1 */ {QMI_FIXED_INTF(0x1435, 0xd191, 4)}, /* Wistron NeWeb D19Q1 */ + {QMI_QUIRK_SET_DTR(0x1508, 0x1001, 4)}, /* Fibocom NL668 series */ {QMI_FIXED_INTF(0x16d8, 0x6003, 0)}, /* CMOTech 6003 */ {QMI_FIXED_INTF(0x16d8, 0x6007, 0)}, /* CMOTech CHE-628S */ {QMI_FIXED_INTF(0x16d8, 0x6008, 0)}, /* CMOTech CMU-301 */ -- cgit v1.2.3-59-g8ed1b From 1986af16e8ed355822600c24b3d2f0be46b573df Mon Sep 17 00:00:00 2001 From: Jörgen Storvist Date: Thu, 13 Dec 2018 17:00:35 +0100 Subject: qmi_wwan: Added support for Telit LN940 series MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added support for the Telit LN940 series cellular modules QMI interface. QMI_QUIRK_SET_DTR quirk requied for Qualcomm MDM9x40 chipset. Signed-off-by: Jörgen Storvist Acked-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index dada68f279bc..c8872dd5ff5e 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1230,6 +1230,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x1bc7, 0x1101, 3)}, /* Telit ME910 dual modem */ {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1201, 2)}, /* Telit LE920, LE920A4 */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x1900, 1)}, /* Telit LN940 series */ {QMI_FIXED_INTF(0x1c9e, 0x9801, 3)}, /* Telewell TW-3G HSPA+ */ {QMI_FIXED_INTF(0x1c9e, 0x9803, 4)}, /* Telewell TW-3G HSPA+ */ {QMI_FIXED_INTF(0x1c9e, 0x9b01, 3)}, /* XS Stick W100-2 from 4G Systems */ -- cgit v1.2.3-59-g8ed1b From ade446403bfb79d3528d56071a84b15351a139ad Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 13 Dec 2018 17:23:32 +0100 Subject: net: ipv4: do not handle duplicate fragments as overlapping Since commit 7969e5c40dfd ("ip: discard IPv4 datagrams with overlapping segments.") IPv4 reassembly code drops the whole queue whenever an overlapping fragment is received. However, the test is written in a way which detects duplicate fragments as overlapping so that in environments with many duplicate packets, fragmented packets may be undeliverable. Add an extra test and for (potentially) duplicate fragment, only drop the new fragment rather than the whole queue. Only starting offset and length are checked, not the contents of the fragments as that would be too expensive. For similar reason, linear list ("run") of a rbtree node is not iterated, we only check if the new fragment is a subset of the interval covered by existing consecutive fragments. v2: instead of an exact check iterating through linear list of an rbtree node, only check if the new fragment is subset of the "run" (suggested by Eric Dumazet) Fixes: 7969e5c40dfd ("ip: discard IPv4 datagrams with overlapping segments.") Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- net/ipv4/ip_fragment.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index aa0b22697998..867be8f7f1fa 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -346,10 +346,10 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) struct net *net = container_of(qp->q.net, struct net, ipv4.frags); struct rb_node **rbn, *parent; struct sk_buff *skb1, *prev_tail; + int ihl, end, skb1_run_end; struct net_device *dev; unsigned int fragsize; int flags, offset; - int ihl, end; int err = -ENOENT; u8 ecn; @@ -419,7 +419,9 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) * overlapping fragment, the entire datagram (and any constituent * fragments) MUST be silently discarded. * - * We do the same here for IPv4 (and increment an snmp counter). + * We do the same here for IPv4 (and increment an snmp counter) but + * we do not want to drop the whole queue in response to a duplicate + * fragment. */ err = -EINVAL; @@ -444,13 +446,17 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) do { parent = *rbn; skb1 = rb_to_skb(parent); + skb1_run_end = skb1->ip_defrag_offset + + FRAG_CB(skb1)->frag_run_len; if (end <= skb1->ip_defrag_offset) rbn = &parent->rb_left; - else if (offset >= skb1->ip_defrag_offset + - FRAG_CB(skb1)->frag_run_len) + else if (offset >= skb1_run_end) rbn = &parent->rb_right; - else /* Found an overlap with skb1. */ - goto overlap; + else if (offset >= skb1->ip_defrag_offset && + end <= skb1_run_end) + goto err; /* No new data, potential duplicate */ + else + goto overlap; /* Found an overlap */ } while (*rbn); /* Here we have parent properly set, and rbn pointing to * one of its NULL left/right children. Insert skb. -- cgit v1.2.3-59-g8ed1b From 15c6d8e565943a904172de69cc13c53f724fa16c Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Thu, 13 Dec 2018 15:00:11 -0500 Subject: mod_devicetable.h: correct kerneldoc typo, "PHYSID2" -> "MII_PHYSID2" Signed-off-by: Robert P. J. Day Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/mod_devicetable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 01797cb4587e..a0dcc9b6a723 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -565,7 +565,7 @@ struct platform_device_id { /** * struct mdio_device_id - identifies PHY devices on an MDIO/MII bus * @phy_id: The result of - * (mdio_read(&MII_PHYSID1) << 16 | mdio_read(&PHYSID2)) & @phy_id_mask + * (mdio_read(&MII_PHYSID1) << 16 | mdio_read(&MII_PHYSID2)) & @phy_id_mask * for this PHY type * @phy_id_mask: Defines the significant bits of @phy_id. A value of 0 * is used to terminate an array of struct mdio_device_id. -- cgit v1.2.3-59-g8ed1b From 8203e2d844d34af247a151d8ebd68553a6e91785 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 14 Dec 2018 06:46:49 -0800 Subject: net: clear skb->tstamp in forwarding paths Sergey reported that forwarding was no longer working if fq packet scheduler was used. This is caused by the recent switch to EDT model, since incoming packets might have been timestamped by __net_timestamp() __net_timestamp() uses ktime_get_real(), while fq expects packets using CLOCK_MONOTONIC base. The fix is to clear skb->tstamp in forwarding paths. Fixes: 80b14dee2bea ("net: Add a new socket option for a future transmit time.") Fixes: fb420d5d91c1 ("tcp/fq: move back to CLOCK_MONOTONIC") Signed-off-by: Eric Dumazet Reported-by: Sergey Matyukevich Tested-by: Sergey Matyukevich Signed-off-by: David S. Miller --- net/ipv4/ip_forward.c | 1 + net/ipv6/ip6_output.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 32662e9e5d21..d5984d31ab93 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -72,6 +72,7 @@ static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *s if (unlikely(opt->optlen)) ip_forward_options(skb); + skb->tstamp = 0; return dst_output(net, sk, skb); } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index fcd3c66ded16..4591ca4bdbe8 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -378,6 +378,7 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk, __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); + skb->tstamp = 0; return dst_output(net, sk, skb); } -- cgit v1.2.3-59-g8ed1b From 4e1d4be681b2c26fd874adbf584bf034573ac45d Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Sat, 15 Dec 2018 11:53:20 +0800 Subject: net: hns: Incorrect offset address used for some registers. According to the hip06 Datasheet: 1. The offset of INGRESS_SW_VLAN_TAG_DISC should be 0x1A00+4*all_chn_num 2. The offset of INGRESS_IN_DATA_STP_DISC should be 0x1A50+4*all_chn_num Signed-off-by: Yonglong Liu Signed-off-by: Peng Li Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index e557a4ef5996..dc9215a565c9 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -2166,9 +2166,9 @@ void hns_dsaf_update_stats(struct dsaf_device *dsaf_dev, u32 node_num) DSAF_INODE_LOCAL_ADDR_FALSE_NUM_0_REG + 0x80 * (u64)node_num); hw_stats->vlan_drop += dsaf_read_dev(dsaf_dev, - DSAF_INODE_SW_VLAN_TAG_DISC_0_REG + 0x80 * (u64)node_num); + DSAF_INODE_SW_VLAN_TAG_DISC_0_REG + 4 * (u64)node_num); hw_stats->stp_drop += dsaf_read_dev(dsaf_dev, - DSAF_INODE_IN_DATA_STP_DISC_0_REG + 0x80 * (u64)node_num); + DSAF_INODE_IN_DATA_STP_DISC_0_REG + 4 * (u64)node_num); /* pfc pause frame statistics stored in dsaf inode*/ if ((node_num < DSAF_SERVICE_NW_NUM) && !is_ver1) { -- cgit v1.2.3-59-g8ed1b From 308c6cafde0147616da45e3a928adae55c428deb Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Sat, 15 Dec 2018 11:53:21 +0800 Subject: net: hns: All ports can not work when insmod hns ko after rmmod. There are two test cases: 1. Remove the 4 modules:hns_enet_drv/hns_dsaf/hnae/hns_mdio, and install them again, must use "ifconfig down/ifconfig up" command pair to bring port to work. This patch calls phy_stop function when init phy to fix this bug. 2. Remove the 2 modules:hns_enet_drv/hns_dsaf, and install them again, all ports can not use anymore, because of the phy devices register failed(phy devices already exists). Phy devices are registered when hns_dsaf installed, this patch removes them when hns_dsaf removed. The two cases are sometimes related, fixing the second case also requires fixing the first case, so fix them together. Signed-off-by: Yonglong Liu Signed-off-by: Peng Li Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c | 15 +++++++++++++++ drivers/net/ethernet/hisilicon/hns/hns_enet.c | 3 +++ 2 files changed, 18 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c index 3613e400e816..a97228c93831 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c @@ -778,6 +778,17 @@ static int hns_mac_register_phy(struct hns_mac_cb *mac_cb) return rc; } +static void hns_mac_remove_phydev(struct hns_mac_cb *mac_cb) +{ + if (!to_acpi_device_node(mac_cb->fw_port) || !mac_cb->phy_dev) + return; + + phy_device_remove(mac_cb->phy_dev); + phy_device_free(mac_cb->phy_dev); + + mac_cb->phy_dev = NULL; +} + #define MAC_MEDIA_TYPE_MAX_LEN 16 static const struct { @@ -1117,7 +1128,11 @@ void hns_mac_uninit(struct dsaf_device *dsaf_dev) int max_port_num = hns_mac_get_max_port_num(dsaf_dev); for (i = 0; i < max_port_num; i++) { + if (!dsaf_dev->mac_cb[i]) + continue; + dsaf_dev->misc_op->cpld_reset_led(dsaf_dev->mac_cb[i]); + hns_mac_remove_phydev(dsaf_dev->mac_cb[i]); dsaf_dev->mac_cb[i] = NULL; } } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 28e907831b0e..c205a0e4b64b 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -1186,6 +1186,9 @@ int hns_nic_init_phy(struct net_device *ndev, struct hnae_handle *h) if (h->phy_if == PHY_INTERFACE_MODE_XGMII) phy_dev->autoneg = false; + if (h->phy_if == PHY_INTERFACE_MODE_SGMII) + phy_stop(phy_dev); + return 0; } -- cgit v1.2.3-59-g8ed1b From 4ad26f117b6ea0f5d5f1592127bafb5ec65904d3 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Sat, 15 Dec 2018 11:53:22 +0800 Subject: net: hns: Some registers use wrong address according to the datasheet. According to the hip06 datasheet: 1.Six registers use wrong address: RCB_COM_SF_CFG_INTMASK_RING RCB_COM_SF_CFG_RING_STS RCB_COM_SF_CFG_RING RCB_COM_SF_CFG_INTMASK_BD RCB_COM_SF_CFG_BD_RINT_STS DSAF_INODE_VC1_IN_PKT_NUM_0_REG 2.The offset of DSAF_INODE_VC1_IN_PKT_NUM_0_REG should be 0x103C + 0x80 * all_chn_num 3.The offset to show the value of DSAF_INODE_IN_DATA_STP_DISC_0_REG is wrong, so the value of DSAF_INODE_SW_VLAN_TAG_DISC_0_REG will be overwrite These registers are only used in "ethtool -d", so that did not cause ndev to misfunction. Signed-off-by: Yonglong Liu Signed-off-by: Peng Li Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c | 242 ++++++++++----------- drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h | 12 +- 2 files changed, 127 insertions(+), 127 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index dc9215a565c9..97d62decba19 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -2285,237 +2285,237 @@ void hns_dsaf_get_regs(struct dsaf_device *ddev, u32 port, void *data) DSAF_INODE_BD_ORDER_STATUS_0_REG + j * 4); p[223 + i] = dsaf_read_dev(ddev, DSAF_INODE_SW_VLAN_TAG_DISC_0_REG + j * 4); - p[224 + i] = dsaf_read_dev(ddev, + p[226 + i] = dsaf_read_dev(ddev, DSAF_INODE_IN_DATA_STP_DISC_0_REG + j * 4); } - p[227] = dsaf_read_dev(ddev, DSAF_INODE_GE_FC_EN_0_REG + port * 4); + p[229] = dsaf_read_dev(ddev, DSAF_INODE_GE_FC_EN_0_REG + port * 4); for (i = 0; i < DSAF_INODE_NUM / DSAF_COMM_CHN; i++) { j = i * DSAF_COMM_CHN + port; - p[228 + i] = dsaf_read_dev(ddev, + p[230 + i] = dsaf_read_dev(ddev, DSAF_INODE_VC0_IN_PKT_NUM_0_REG + j * 4); } - p[231] = dsaf_read_dev(ddev, - DSAF_INODE_VC1_IN_PKT_NUM_0_REG + port * 4); + p[233] = dsaf_read_dev(ddev, + DSAF_INODE_VC1_IN_PKT_NUM_0_REG + port * 0x80); /* dsaf inode registers */ for (i = 0; i < HNS_DSAF_SBM_NUM(ddev) / DSAF_COMM_CHN; i++) { j = i * DSAF_COMM_CHN + port; - p[232 + i] = dsaf_read_dev(ddev, + p[234 + i] = dsaf_read_dev(ddev, DSAF_SBM_CFG_REG_0_REG + j * 0x80); - p[235 + i] = dsaf_read_dev(ddev, + p[237 + i] = dsaf_read_dev(ddev, DSAF_SBM_BP_CFG_0_XGE_REG_0_REG + j * 0x80); - p[238 + i] = dsaf_read_dev(ddev, + p[240 + i] = dsaf_read_dev(ddev, DSAF_SBM_BP_CFG_1_REG_0_REG + j * 0x80); - p[241 + i] = dsaf_read_dev(ddev, + p[243 + i] = dsaf_read_dev(ddev, DSAF_SBM_BP_CFG_2_XGE_REG_0_REG + j * 0x80); - p[244 + i] = dsaf_read_dev(ddev, + p[246 + i] = dsaf_read_dev(ddev, DSAF_SBM_FREE_CNT_0_0_REG + j * 0x80); - p[245 + i] = dsaf_read_dev(ddev, + p[249 + i] = dsaf_read_dev(ddev, DSAF_SBM_FREE_CNT_1_0_REG + j * 0x80); - p[248 + i] = dsaf_read_dev(ddev, + p[252 + i] = dsaf_read_dev(ddev, DSAF_SBM_BP_CNT_0_0_REG + j * 0x80); - p[251 + i] = dsaf_read_dev(ddev, + p[255 + i] = dsaf_read_dev(ddev, DSAF_SBM_BP_CNT_1_0_REG + j * 0x80); - p[254 + i] = dsaf_read_dev(ddev, + p[258 + i] = dsaf_read_dev(ddev, DSAF_SBM_BP_CNT_2_0_REG + j * 0x80); - p[257 + i] = dsaf_read_dev(ddev, + p[261 + i] = dsaf_read_dev(ddev, DSAF_SBM_BP_CNT_3_0_REG + j * 0x80); - p[260 + i] = dsaf_read_dev(ddev, + p[264 + i] = dsaf_read_dev(ddev, DSAF_SBM_INER_ST_0_REG + j * 0x80); - p[263 + i] = dsaf_read_dev(ddev, + p[267 + i] = dsaf_read_dev(ddev, DSAF_SBM_MIB_REQ_FAILED_TC_0_REG + j * 0x80); - p[266 + i] = dsaf_read_dev(ddev, + p[270 + i] = dsaf_read_dev(ddev, DSAF_SBM_LNK_INPORT_CNT_0_REG + j * 0x80); - p[269 + i] = dsaf_read_dev(ddev, + p[273 + i] = dsaf_read_dev(ddev, DSAF_SBM_LNK_DROP_CNT_0_REG + j * 0x80); - p[272 + i] = dsaf_read_dev(ddev, + p[276 + i] = dsaf_read_dev(ddev, DSAF_SBM_INF_OUTPORT_CNT_0_REG + j * 0x80); - p[275 + i] = dsaf_read_dev(ddev, + p[279 + i] = dsaf_read_dev(ddev, DSAF_SBM_LNK_INPORT_TC0_CNT_0_REG + j * 0x80); - p[278 + i] = dsaf_read_dev(ddev, + p[282 + i] = dsaf_read_dev(ddev, DSAF_SBM_LNK_INPORT_TC1_CNT_0_REG + j * 0x80); - p[281 + i] = dsaf_read_dev(ddev, + p[285 + i] = dsaf_read_dev(ddev, DSAF_SBM_LNK_INPORT_TC2_CNT_0_REG + j * 0x80); - p[284 + i] = dsaf_read_dev(ddev, + p[288 + i] = dsaf_read_dev(ddev, DSAF_SBM_LNK_INPORT_TC3_CNT_0_REG + j * 0x80); - p[287 + i] = dsaf_read_dev(ddev, + p[291 + i] = dsaf_read_dev(ddev, DSAF_SBM_LNK_INPORT_TC4_CNT_0_REG + j * 0x80); - p[290 + i] = dsaf_read_dev(ddev, + p[294 + i] = dsaf_read_dev(ddev, DSAF_SBM_LNK_INPORT_TC5_CNT_0_REG + j * 0x80); - p[293 + i] = dsaf_read_dev(ddev, + p[297 + i] = dsaf_read_dev(ddev, DSAF_SBM_LNK_INPORT_TC6_CNT_0_REG + j * 0x80); - p[296 + i] = dsaf_read_dev(ddev, + p[300 + i] = dsaf_read_dev(ddev, DSAF_SBM_LNK_INPORT_TC7_CNT_0_REG + j * 0x80); - p[299 + i] = dsaf_read_dev(ddev, + p[303 + i] = dsaf_read_dev(ddev, DSAF_SBM_LNK_REQ_CNT_0_REG + j * 0x80); - p[302 + i] = dsaf_read_dev(ddev, + p[306 + i] = dsaf_read_dev(ddev, DSAF_SBM_LNK_RELS_CNT_0_REG + j * 0x80); - p[305 + i] = dsaf_read_dev(ddev, + p[309 + i] = dsaf_read_dev(ddev, DSAF_SBM_BP_CFG_3_REG_0_REG + j * 0x80); - p[308 + i] = dsaf_read_dev(ddev, + p[312 + i] = dsaf_read_dev(ddev, DSAF_SBM_BP_CFG_4_REG_0_REG + j * 0x80); } /* dsaf onode registers */ for (i = 0; i < DSAF_XOD_NUM; i++) { - p[311 + i] = dsaf_read_dev(ddev, + p[315 + i] = dsaf_read_dev(ddev, DSAF_XOD_ETS_TSA_TC0_TC3_CFG_0_REG + i * 0x90); - p[319 + i] = dsaf_read_dev(ddev, + p[323 + i] = dsaf_read_dev(ddev, DSAF_XOD_ETS_TSA_TC4_TC7_CFG_0_REG + i * 0x90); - p[327 + i] = dsaf_read_dev(ddev, + p[331 + i] = dsaf_read_dev(ddev, DSAF_XOD_ETS_BW_TC0_TC3_CFG_0_REG + i * 0x90); - p[335 + i] = dsaf_read_dev(ddev, + p[339 + i] = dsaf_read_dev(ddev, DSAF_XOD_ETS_BW_TC4_TC7_CFG_0_REG + i * 0x90); - p[343 + i] = dsaf_read_dev(ddev, + p[347 + i] = dsaf_read_dev(ddev, DSAF_XOD_ETS_BW_OFFSET_CFG_0_REG + i * 0x90); - p[351 + i] = dsaf_read_dev(ddev, + p[355 + i] = dsaf_read_dev(ddev, DSAF_XOD_ETS_TOKEN_CFG_0_REG + i * 0x90); } - p[359] = dsaf_read_dev(ddev, DSAF_XOD_PFS_CFG_0_0_REG + port * 0x90); - p[360] = dsaf_read_dev(ddev, DSAF_XOD_PFS_CFG_1_0_REG + port * 0x90); - p[361] = dsaf_read_dev(ddev, DSAF_XOD_PFS_CFG_2_0_REG + port * 0x90); + p[363] = dsaf_read_dev(ddev, DSAF_XOD_PFS_CFG_0_0_REG + port * 0x90); + p[364] = dsaf_read_dev(ddev, DSAF_XOD_PFS_CFG_1_0_REG + port * 0x90); + p[365] = dsaf_read_dev(ddev, DSAF_XOD_PFS_CFG_2_0_REG + port * 0x90); for (i = 0; i < DSAF_XOD_BIG_NUM / DSAF_COMM_CHN; i++) { j = i * DSAF_COMM_CHN + port; - p[362 + i] = dsaf_read_dev(ddev, + p[366 + i] = dsaf_read_dev(ddev, DSAF_XOD_GNT_L_0_REG + j * 0x90); - p[365 + i] = dsaf_read_dev(ddev, + p[369 + i] = dsaf_read_dev(ddev, DSAF_XOD_GNT_H_0_REG + j * 0x90); - p[368 + i] = dsaf_read_dev(ddev, + p[372 + i] = dsaf_read_dev(ddev, DSAF_XOD_CONNECT_STATE_0_REG + j * 0x90); - p[371 + i] = dsaf_read_dev(ddev, + p[375 + i] = dsaf_read_dev(ddev, DSAF_XOD_RCVPKT_CNT_0_REG + j * 0x90); - p[374 + i] = dsaf_read_dev(ddev, + p[378 + i] = dsaf_read_dev(ddev, DSAF_XOD_RCVTC0_CNT_0_REG + j * 0x90); - p[377 + i] = dsaf_read_dev(ddev, + p[381 + i] = dsaf_read_dev(ddev, DSAF_XOD_RCVTC1_CNT_0_REG + j * 0x90); - p[380 + i] = dsaf_read_dev(ddev, + p[384 + i] = dsaf_read_dev(ddev, DSAF_XOD_RCVTC2_CNT_0_REG + j * 0x90); - p[383 + i] = dsaf_read_dev(ddev, + p[387 + i] = dsaf_read_dev(ddev, DSAF_XOD_RCVTC3_CNT_0_REG + j * 0x90); - p[386 + i] = dsaf_read_dev(ddev, + p[390 + i] = dsaf_read_dev(ddev, DSAF_XOD_RCVVC0_CNT_0_REG + j * 0x90); - p[389 + i] = dsaf_read_dev(ddev, + p[393 + i] = dsaf_read_dev(ddev, DSAF_XOD_RCVVC1_CNT_0_REG + j * 0x90); } - p[392] = dsaf_read_dev(ddev, + p[396] = dsaf_read_dev(ddev, DSAF_XOD_XGE_RCVIN0_CNT_0_REG + port * 0x90); - p[393] = dsaf_read_dev(ddev, + p[397] = dsaf_read_dev(ddev, DSAF_XOD_XGE_RCVIN1_CNT_0_REG + port * 0x90); - p[394] = dsaf_read_dev(ddev, + p[398] = dsaf_read_dev(ddev, DSAF_XOD_XGE_RCVIN2_CNT_0_REG + port * 0x90); - p[395] = dsaf_read_dev(ddev, + p[399] = dsaf_read_dev(ddev, DSAF_XOD_XGE_RCVIN3_CNT_0_REG + port * 0x90); - p[396] = dsaf_read_dev(ddev, + p[400] = dsaf_read_dev(ddev, DSAF_XOD_XGE_RCVIN4_CNT_0_REG + port * 0x90); - p[397] = dsaf_read_dev(ddev, + p[401] = dsaf_read_dev(ddev, DSAF_XOD_XGE_RCVIN5_CNT_0_REG + port * 0x90); - p[398] = dsaf_read_dev(ddev, + p[402] = dsaf_read_dev(ddev, DSAF_XOD_XGE_RCVIN6_CNT_0_REG + port * 0x90); - p[399] = dsaf_read_dev(ddev, + p[403] = dsaf_read_dev(ddev, DSAF_XOD_XGE_RCVIN7_CNT_0_REG + port * 0x90); - p[400] = dsaf_read_dev(ddev, + p[404] = dsaf_read_dev(ddev, DSAF_XOD_PPE_RCVIN0_CNT_0_REG + port * 0x90); - p[401] = dsaf_read_dev(ddev, + p[405] = dsaf_read_dev(ddev, DSAF_XOD_PPE_RCVIN1_CNT_0_REG + port * 0x90); - p[402] = dsaf_read_dev(ddev, + p[406] = dsaf_read_dev(ddev, DSAF_XOD_ROCEE_RCVIN0_CNT_0_REG + port * 0x90); - p[403] = dsaf_read_dev(ddev, + p[407] = dsaf_read_dev(ddev, DSAF_XOD_ROCEE_RCVIN1_CNT_0_REG + port * 0x90); - p[404] = dsaf_read_dev(ddev, + p[408] = dsaf_read_dev(ddev, DSAF_XOD_FIFO_STATUS_0_REG + port * 0x90); /* dsaf voq registers */ for (i = 0; i < DSAF_VOQ_NUM / DSAF_COMM_CHN; i++) { j = (i * DSAF_COMM_CHN + port) * 0x90; - p[405 + i] = dsaf_read_dev(ddev, + p[409 + i] = dsaf_read_dev(ddev, DSAF_VOQ_ECC_INVERT_EN_0_REG + j); - p[408 + i] = dsaf_read_dev(ddev, + p[412 + i] = dsaf_read_dev(ddev, DSAF_VOQ_SRAM_PKT_NUM_0_REG + j); - p[411 + i] = dsaf_read_dev(ddev, DSAF_VOQ_IN_PKT_NUM_0_REG + j); - p[414 + i] = dsaf_read_dev(ddev, + p[415 + i] = dsaf_read_dev(ddev, DSAF_VOQ_IN_PKT_NUM_0_REG + j); + p[418 + i] = dsaf_read_dev(ddev, DSAF_VOQ_OUT_PKT_NUM_0_REG + j); - p[417 + i] = dsaf_read_dev(ddev, + p[421 + i] = dsaf_read_dev(ddev, DSAF_VOQ_ECC_ERR_ADDR_0_REG + j); - p[420 + i] = dsaf_read_dev(ddev, DSAF_VOQ_BP_STATUS_0_REG + j); - p[423 + i] = dsaf_read_dev(ddev, DSAF_VOQ_SPUP_IDLE_0_REG + j); - p[426 + i] = dsaf_read_dev(ddev, + p[424 + i] = dsaf_read_dev(ddev, DSAF_VOQ_BP_STATUS_0_REG + j); + p[427 + i] = dsaf_read_dev(ddev, DSAF_VOQ_SPUP_IDLE_0_REG + j); + p[430 + i] = dsaf_read_dev(ddev, DSAF_VOQ_XGE_XOD_REQ_0_0_REG + j); - p[429 + i] = dsaf_read_dev(ddev, + p[433 + i] = dsaf_read_dev(ddev, DSAF_VOQ_XGE_XOD_REQ_1_0_REG + j); - p[432 + i] = dsaf_read_dev(ddev, + p[436 + i] = dsaf_read_dev(ddev, DSAF_VOQ_PPE_XOD_REQ_0_REG + j); - p[435 + i] = dsaf_read_dev(ddev, + p[439 + i] = dsaf_read_dev(ddev, DSAF_VOQ_ROCEE_XOD_REQ_0_REG + j); - p[438 + i] = dsaf_read_dev(ddev, + p[442 + i] = dsaf_read_dev(ddev, DSAF_VOQ_BP_ALL_THRD_0_REG + j); } /* dsaf tbl registers */ - p[441] = dsaf_read_dev(ddev, DSAF_TBL_CTRL_0_REG); - p[442] = dsaf_read_dev(ddev, DSAF_TBL_INT_MSK_0_REG); - p[443] = dsaf_read_dev(ddev, DSAF_TBL_INT_SRC_0_REG); - p[444] = dsaf_read_dev(ddev, DSAF_TBL_INT_STS_0_REG); - p[445] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_ADDR_0_REG); - p[446] = dsaf_read_dev(ddev, DSAF_TBL_LINE_ADDR_0_REG); - p[447] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_HIGH_0_REG); - p[448] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_LOW_0_REG); - p[449] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_MCAST_CFG_4_0_REG); - p[450] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_MCAST_CFG_3_0_REG); - p[451] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_MCAST_CFG_2_0_REG); - p[452] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_MCAST_CFG_1_0_REG); - p[453] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_MCAST_CFG_0_0_REG); - p[454] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_UCAST_CFG_0_REG); - p[455] = dsaf_read_dev(ddev, DSAF_TBL_LIN_CFG_0_REG); - p[456] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RDATA_HIGH_0_REG); - p[457] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RDATA_LOW_0_REG); - p[458] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RAM_RDATA4_0_REG); - p[459] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RAM_RDATA3_0_REG); - p[460] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RAM_RDATA2_0_REG); - p[461] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RAM_RDATA1_0_REG); - p[462] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RAM_RDATA0_0_REG); - p[463] = dsaf_read_dev(ddev, DSAF_TBL_LIN_RDATA_0_REG); + p[445] = dsaf_read_dev(ddev, DSAF_TBL_CTRL_0_REG); + p[446] = dsaf_read_dev(ddev, DSAF_TBL_INT_MSK_0_REG); + p[447] = dsaf_read_dev(ddev, DSAF_TBL_INT_SRC_0_REG); + p[448] = dsaf_read_dev(ddev, DSAF_TBL_INT_STS_0_REG); + p[449] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_ADDR_0_REG); + p[450] = dsaf_read_dev(ddev, DSAF_TBL_LINE_ADDR_0_REG); + p[451] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_HIGH_0_REG); + p[452] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_LOW_0_REG); + p[453] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_MCAST_CFG_4_0_REG); + p[454] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_MCAST_CFG_3_0_REG); + p[455] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_MCAST_CFG_2_0_REG); + p[456] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_MCAST_CFG_1_0_REG); + p[457] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_MCAST_CFG_0_0_REG); + p[458] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_UCAST_CFG_0_REG); + p[459] = dsaf_read_dev(ddev, DSAF_TBL_LIN_CFG_0_REG); + p[460] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RDATA_HIGH_0_REG); + p[461] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RDATA_LOW_0_REG); + p[462] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RAM_RDATA4_0_REG); + p[463] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RAM_RDATA3_0_REG); + p[464] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RAM_RDATA2_0_REG); + p[465] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RAM_RDATA1_0_REG); + p[466] = dsaf_read_dev(ddev, DSAF_TBL_TCAM_RAM_RDATA0_0_REG); + p[467] = dsaf_read_dev(ddev, DSAF_TBL_LIN_RDATA_0_REG); for (i = 0; i < DSAF_SW_PORT_NUM; i++) { j = i * 0x8; - p[464 + 2 * i] = dsaf_read_dev(ddev, + p[468 + 2 * i] = dsaf_read_dev(ddev, DSAF_TBL_DA0_MIS_INFO1_0_REG + j); - p[465 + 2 * i] = dsaf_read_dev(ddev, + p[469 + 2 * i] = dsaf_read_dev(ddev, DSAF_TBL_DA0_MIS_INFO0_0_REG + j); } - p[480] = dsaf_read_dev(ddev, DSAF_TBL_SA_MIS_INFO2_0_REG); - p[481] = dsaf_read_dev(ddev, DSAF_TBL_SA_MIS_INFO1_0_REG); - p[482] = dsaf_read_dev(ddev, DSAF_TBL_SA_MIS_INFO0_0_REG); - p[483] = dsaf_read_dev(ddev, DSAF_TBL_PUL_0_REG); - p[484] = dsaf_read_dev(ddev, DSAF_TBL_OLD_RSLT_0_REG); - p[485] = dsaf_read_dev(ddev, DSAF_TBL_OLD_SCAN_VAL_0_REG); - p[486] = dsaf_read_dev(ddev, DSAF_TBL_DFX_CTRL_0_REG); - p[487] = dsaf_read_dev(ddev, DSAF_TBL_DFX_STAT_0_REG); - p[488] = dsaf_read_dev(ddev, DSAF_TBL_DFX_STAT_2_0_REG); - p[489] = dsaf_read_dev(ddev, DSAF_TBL_LKUP_NUM_I_0_REG); - p[490] = dsaf_read_dev(ddev, DSAF_TBL_LKUP_NUM_O_0_REG); - p[491] = dsaf_read_dev(ddev, DSAF_TBL_UCAST_BCAST_MIS_INFO_0_0_REG); + p[484] = dsaf_read_dev(ddev, DSAF_TBL_SA_MIS_INFO2_0_REG); + p[485] = dsaf_read_dev(ddev, DSAF_TBL_SA_MIS_INFO1_0_REG); + p[486] = dsaf_read_dev(ddev, DSAF_TBL_SA_MIS_INFO0_0_REG); + p[487] = dsaf_read_dev(ddev, DSAF_TBL_PUL_0_REG); + p[488] = dsaf_read_dev(ddev, DSAF_TBL_OLD_RSLT_0_REG); + p[489] = dsaf_read_dev(ddev, DSAF_TBL_OLD_SCAN_VAL_0_REG); + p[490] = dsaf_read_dev(ddev, DSAF_TBL_DFX_CTRL_0_REG); + p[491] = dsaf_read_dev(ddev, DSAF_TBL_DFX_STAT_0_REG); + p[492] = dsaf_read_dev(ddev, DSAF_TBL_DFX_STAT_2_0_REG); + p[493] = dsaf_read_dev(ddev, DSAF_TBL_LKUP_NUM_I_0_REG); + p[494] = dsaf_read_dev(ddev, DSAF_TBL_LKUP_NUM_O_0_REG); + p[495] = dsaf_read_dev(ddev, DSAF_TBL_UCAST_BCAST_MIS_INFO_0_0_REG); /* dsaf other registers */ - p[492] = dsaf_read_dev(ddev, DSAF_INODE_FIFO_WL_0_REG + port * 0x4); - p[493] = dsaf_read_dev(ddev, DSAF_ONODE_FIFO_WL_0_REG + port * 0x4); - p[494] = dsaf_read_dev(ddev, DSAF_XGE_GE_WORK_MODE_0_REG + port * 0x4); - p[495] = dsaf_read_dev(ddev, + p[496] = dsaf_read_dev(ddev, DSAF_INODE_FIFO_WL_0_REG + port * 0x4); + p[497] = dsaf_read_dev(ddev, DSAF_ONODE_FIFO_WL_0_REG + port * 0x4); + p[498] = dsaf_read_dev(ddev, DSAF_XGE_GE_WORK_MODE_0_REG + port * 0x4); + p[499] = dsaf_read_dev(ddev, DSAF_XGE_APP_RX_LINK_UP_0_REG + port * 0x4); - p[496] = dsaf_read_dev(ddev, DSAF_NETPORT_CTRL_SIG_0_REG + port * 0x4); - p[497] = dsaf_read_dev(ddev, DSAF_XGE_CTRL_SIG_CFG_0_REG + port * 0x4); + p[500] = dsaf_read_dev(ddev, DSAF_NETPORT_CTRL_SIG_0_REG + port * 0x4); + p[501] = dsaf_read_dev(ddev, DSAF_XGE_CTRL_SIG_CFG_0_REG + port * 0x4); if (!is_ver1) - p[498] = dsaf_read_dev(ddev, DSAF_PAUSE_CFG_REG + port * 0x4); + p[502] = dsaf_read_dev(ddev, DSAF_PAUSE_CFG_REG + port * 0x4); /* mark end of dsaf regs */ - for (i = 499; i < 504; i++) + for (i = 503; i < 504; i++) p[i] = 0xdddddddd; } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h index 74d935d82cbc..f171d11b9b44 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h @@ -176,7 +176,7 @@ #define DSAF_INODE_IN_DATA_STP_DISC_0_REG 0x1A50 #define DSAF_INODE_GE_FC_EN_0_REG 0x1B00 #define DSAF_INODE_VC0_IN_PKT_NUM_0_REG 0x1B50 -#define DSAF_INODE_VC1_IN_PKT_NUM_0_REG 0x1C00 +#define DSAF_INODE_VC1_IN_PKT_NUM_0_REG 0x103C #define DSAF_INODE_IN_PRIO_PAUSE_BASE_REG 0x1C00 #define DSAF_INODE_IN_PRIO_PAUSE_BASE_OFFSET 0x100 #define DSAF_INODE_IN_PRIO_PAUSE_OFFSET 0x50 @@ -404,11 +404,11 @@ #define RCB_ECC_ERR_ADDR4_REG 0x460 #define RCB_ECC_ERR_ADDR5_REG 0x464 -#define RCB_COM_SF_CFG_INTMASK_RING 0x480 -#define RCB_COM_SF_CFG_RING_STS 0x484 -#define RCB_COM_SF_CFG_RING 0x488 -#define RCB_COM_SF_CFG_INTMASK_BD 0x48C -#define RCB_COM_SF_CFG_BD_RINT_STS 0x470 +#define RCB_COM_SF_CFG_INTMASK_RING 0x470 +#define RCB_COM_SF_CFG_RING_STS 0x474 +#define RCB_COM_SF_CFG_RING 0x478 +#define RCB_COM_SF_CFG_INTMASK_BD 0x47C +#define RCB_COM_SF_CFG_BD_RINT_STS 0x480 #define RCB_COM_RCB_RD_BD_BUSY 0x490 #define RCB_COM_RCB_FBD_CRT_EN 0x494 #define RCB_COM_AXI_WR_ERR_INTMASK 0x498 -- cgit v1.2.3-59-g8ed1b From 5778b13b64eca5549d242686f2f91a2c80c8fa40 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Sat, 15 Dec 2018 11:53:23 +0800 Subject: net: hns: Fixed bug that netdev was opened twice After resetting dsaf to try to repair chip error such as ecc error, the net device will be open if net interface is up. But at this time if there is the users set the net device up with the command ifconfig, the net device will be opened twice consecutively. Function napi_enable was called when open device. And Kernel panic will be occurred if it was called twice consecutively. Such as follow: static inline void napi_enable(struct napi_struct *n) { BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); smp_mb__before_clear_bit(); clear_bit(NAPI_STATE_SCHED, &n->state); } [37255.571996] Kernel panic - not syncing: BUG! [37255.595234] Call trace: [37255.597694] [] dump_backtrace+0x0/0x1a0 [37255.603114] [] show_stack+0x20/0x28 [37255.608187] [] dump_stack+0x98/0xb8 [37255.613258] [] panic+0x10c/0x26c [37255.618070] [] hns_nic_net_up+0x30c/0x4e0 [37255.623664] [] hns_nic_net_open+0x94/0x12c [37255.629346] [] __dev_open+0xf4/0x168 [37255.634504] [] __dev_change_flags+0x98/0x15c [37255.640359] [] dev_change_flags+0x2c/0x68 [37255.769580] [] devinet_ioctl+0x650/0x704 [37255.775086] [] inet_ioctl+0x98/0xb4 [37255.780159] [] sock_do_ioctl+0x44/0x84 [37255.785490] [] sock_ioctl+0x248/0x30c [37255.790737] [] do_vfs_ioctl+0x480/0x618 [37255.796156] [] SyS_ioctl+0x90/0xa4 [37255.801139] SMP: stopping secondary CPUs [37255.805079] kbox: catch panic event. [37255.809586] collected_len = 128928, LOG_BUF_LEN_LOCAL = 131072 [37255.816103] flush cache 0xffff80003f000000 size 0x800000 [37255.822192] flush cache 0xffff80003f000000 size 0x800000 [37255.828289] flush cache 0xffff80003f000000 size 0x800000 [37255.834378] kbox: no notify die func register. no need to notify [37255.840413] ---[ end Kernel panic - not syncing: BUG! This patchset fix this bug according to the flag NIC_STATE_DOWN. Signed-off-by: Yonglong Liu Signed-off-by: Peng Li Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index c205a0e4b64b..c9454c4784cb 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -1333,6 +1333,9 @@ static int hns_nic_net_up(struct net_device *ndev) int i, j; int ret; + if (!test_bit(NIC_STATE_DOWN, &priv->state)) + return 0; + ret = hns_nic_init_irq(priv); if (ret != 0) { netdev_err(ndev, "hns init irq failed! ret=%d\n", ret); -- cgit v1.2.3-59-g8ed1b From 31f6b61d810654fb3ef43f4d8afda0f44b142fad Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Sat, 15 Dec 2018 11:53:24 +0800 Subject: net: hns: Clean rx fbd when ae stopped. If there are packets in hardware when changing the speed or duplex, it may cause hardware hang up. This patch adds the code to wait rx fbd clean up when ae stopped. Signed-off-by: Yonglong Liu Signed-off-by: Peng Li Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c index b52029e26d15..ad1779fc410e 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c @@ -379,6 +379,9 @@ static void hns_ae_stop(struct hnae_handle *handle) hns_ae_ring_enable_all(handle, 0); + /* clean rx fbd. */ + hns_rcb_wait_fbd_clean(handle->qs, handle->q_num, RCB_INT_FLAG_RX); + (void)hns_mac_vm_config_bc_en(mac_cb, 0, false); } -- cgit v1.2.3-59-g8ed1b From c82bd077e1ba3dd586569c733dc6d3dd4b0e43cd Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Sat, 15 Dec 2018 11:53:25 +0800 Subject: net: hns: Free irq when exit from abnormal branch 1.In "hns_nic_init_irq", if request irq fail at index i, the function return directly without releasing irq resources that already requested. 2.In "hns_nic_net_up" after "hns_nic_init_irq", if exceptional branch occurs, irqs that already requested are not release. Signed-off-by: Yonglong Liu Signed-off-by: Peng Li Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index c9454c4784cb..03d959c7a39f 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -1284,6 +1284,22 @@ static int hns_nic_init_affinity_mask(int q_num, int ring_idx, return cpu; } +static void hns_nic_free_irq(int q_num, struct hns_nic_priv *priv) +{ + int i; + + for (i = 0; i < q_num * 2; i++) { + if (priv->ring_data[i].ring->irq_init_flag == RCB_IRQ_INITED) { + irq_set_affinity_hint(priv->ring_data[i].ring->irq, + NULL); + free_irq(priv->ring_data[i].ring->irq, + &priv->ring_data[i]); + priv->ring_data[i].ring->irq_init_flag = + RCB_IRQ_NOT_INITED; + } + } +} + static int hns_nic_init_irq(struct hns_nic_priv *priv) { struct hnae_handle *h = priv->ae_handle; @@ -1309,7 +1325,7 @@ static int hns_nic_init_irq(struct hns_nic_priv *priv) if (ret) { netdev_err(priv->netdev, "request irq(%d) fail\n", rd->ring->irq); - return ret; + goto out_free_irq; } disable_irq(rd->ring->irq); @@ -1324,6 +1340,10 @@ static int hns_nic_init_irq(struct hns_nic_priv *priv) } return 0; + +out_free_irq: + hns_nic_free_irq(h->q_num, priv); + return ret; } static int hns_nic_net_up(struct net_device *ndev) @@ -1371,6 +1391,7 @@ out_has_some_queues: for (j = i - 1; j >= 0; j--) hns_nic_ring_close(ndev, j); + hns_nic_free_irq(h->q_num, priv); set_bit(NIC_STATE_DOWN, &priv->state); return ret; -- cgit v1.2.3-59-g8ed1b From a57275d35576fdd89d8c771eedf1e7cf97e0dfa6 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Sat, 15 Dec 2018 11:53:26 +0800 Subject: net: hns: Avoid net reset caused by pause frames storm There will be a large number of MAC pause frames on the net, which caused tx timeout of net device. And then the net device was reset to try to recover it. So that is not useful, and will cause some other problems. So need doubled ndev->watchdog_timeo if device watchdog occurred until watchdog_timeo up to 40s and then try resetting to recover it. When collecting dfx information such as hardware registers when tx timeout. Some registers for count were cleared when read. So need move this task before update net state which also read the count registers. Signed-off-by: Yonglong Liu Signed-off-by: Peng Li Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 03d959c7a39f..8a53c82c72b4 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -1509,11 +1509,19 @@ static int hns_nic_net_stop(struct net_device *ndev) } static void hns_tx_timeout_reset(struct hns_nic_priv *priv); +#define HNS_TX_TIMEO_LIMIT (40 * HZ) static void hns_nic_net_timeout(struct net_device *ndev) { struct hns_nic_priv *priv = netdev_priv(ndev); - hns_tx_timeout_reset(priv); + if (ndev->watchdog_timeo < HNS_TX_TIMEO_LIMIT) { + ndev->watchdog_timeo *= 2; + netdev_info(ndev, "watchdog_timo changed to %d.\n", + ndev->watchdog_timeo); + } else { + ndev->watchdog_timeo = HNS_NIC_TX_TIMEOUT; + hns_tx_timeout_reset(priv); + } } static int hns_nic_do_ioctl(struct net_device *netdev, struct ifreq *ifr, @@ -2076,11 +2084,11 @@ static void hns_nic_service_task(struct work_struct *work) = container_of(work, struct hns_nic_priv, service_task); struct hnae_handle *h = priv->ae_handle; + hns_nic_reset_subtask(priv); hns_nic_update_link_status(priv->netdev); h->dev->ops->update_led_status(h); hns_nic_update_stats(priv->netdev); - hns_nic_reset_subtask(priv); hns_nic_service_event_complete(priv); } -- cgit v1.2.3-59-g8ed1b From 7e74a19ca522aec7c2be201a7ae1d1d57ded409b Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Sat, 15 Dec 2018 11:53:27 +0800 Subject: net: hns: Fix ntuple-filters status error. The ntuple-filters features is forced on by chip. But it shows "ntuple-filters: off [fixed]" when use ethtool. This patch make it correct with "ntuple-filters: on [fixed]". Signed-off-by: Yonglong Liu Signed-off-by: Peng Li Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 8a53c82c72b4..6242249c9f4c 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -2374,7 +2374,7 @@ static int hns_nic_dev_probe(struct platform_device *pdev) ndev->min_mtu = MAC_MIN_MTU; switch (priv->enet_ver) { case AE_VERSION_2: - ndev->features |= NETIF_F_TSO | NETIF_F_TSO6; + ndev->features |= NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_NTUPLE; ndev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6; -- cgit v1.2.3-59-g8ed1b From 726ae5c9e5f0c18eca8ea5296b526242c3e89822 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Sat, 15 Dec 2018 11:53:28 +0800 Subject: net: hns: Add mac pcs config when enable|disable mac In some case, when mac enable|disable and adjust link, may cause hard to link(or abnormal) between mac and phy. This patch adds the code for rx PCS to avoid this bug. Disable the rx PCS when driver disable the gmac, and enable the rx PCS when driver enable the mac. Signed-off-by: Yonglong Liu Signed-off-by: Peng Li Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c | 14 ++++++++++---- drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h | 1 + 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c index aaf72c055711..1790cdafd9b8 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c @@ -67,11 +67,14 @@ static void hns_gmac_enable(void *mac_drv, enum mac_commom_mode mode) struct mac_driver *drv = (struct mac_driver *)mac_drv; /*enable GE rX/tX */ - if ((mode == MAC_COMM_MODE_TX) || (mode == MAC_COMM_MODE_RX_AND_TX)) + if (mode == MAC_COMM_MODE_TX || mode == MAC_COMM_MODE_RX_AND_TX) dsaf_set_dev_bit(drv, GMAC_PORT_EN_REG, GMAC_PORT_TX_EN_B, 1); - if ((mode == MAC_COMM_MODE_RX) || (mode == MAC_COMM_MODE_RX_AND_TX)) + if (mode == MAC_COMM_MODE_RX || mode == MAC_COMM_MODE_RX_AND_TX) { + /* enable rx pcs */ + dsaf_set_dev_bit(drv, GMAC_PCS_RX_EN_REG, 0, 0); dsaf_set_dev_bit(drv, GMAC_PORT_EN_REG, GMAC_PORT_RX_EN_B, 1); + } } static void hns_gmac_disable(void *mac_drv, enum mac_commom_mode mode) @@ -79,11 +82,14 @@ static void hns_gmac_disable(void *mac_drv, enum mac_commom_mode mode) struct mac_driver *drv = (struct mac_driver *)mac_drv; /*disable GE rX/tX */ - if ((mode == MAC_COMM_MODE_TX) || (mode == MAC_COMM_MODE_RX_AND_TX)) + if (mode == MAC_COMM_MODE_TX || mode == MAC_COMM_MODE_RX_AND_TX) dsaf_set_dev_bit(drv, GMAC_PORT_EN_REG, GMAC_PORT_TX_EN_B, 0); - if ((mode == MAC_COMM_MODE_RX) || (mode == MAC_COMM_MODE_RX_AND_TX)) + if (mode == MAC_COMM_MODE_RX || mode == MAC_COMM_MODE_RX_AND_TX) { + /* disable rx pcs */ + dsaf_set_dev_bit(drv, GMAC_PCS_RX_EN_REG, 0, 1); dsaf_set_dev_bit(drv, GMAC_PORT_EN_REG, GMAC_PORT_RX_EN_B, 0); + } } /* hns_gmac_get_en - get port enable diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h index f171d11b9b44..b9733b0b8482 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h @@ -534,6 +534,7 @@ #define GMAC_LD_LINK_COUNTER_REG 0x01D0UL #define GMAC_LOOP_REG 0x01DCUL #define GMAC_RECV_CONTROL_REG 0x01E0UL +#define GMAC_PCS_RX_EN_REG 0x01E4UL #define GMAC_VLAN_CODE_REG 0x01E8UL #define GMAC_RX_OVERRUN_CNT_REG 0x01ECUL #define GMAC_RX_LENGTHFIELD_ERR_CNT_REG 0x01F4UL -- cgit v1.2.3-59-g8ed1b From 6adafc356e20189193b38ee6b9af7743078bf6b4 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Sat, 15 Dec 2018 11:53:29 +0800 Subject: net: hns: Fix ping failed when use net bridge and send multicast Create a net bridge, add eth and vnet to the bridge. The vnet is used by a virtual machine. When ping the virtual machine from the outside host and the virtual machine send multicast at the same time, the ping package will lost. The multicast package send to the eth, eth will send it to the bridge too, and the bridge learn the mac of eth. When outside host ping the virtual mechine, it will match the promisc entry of the eth which is not expected, and the bridge send it to eth not to vnet, cause ping lost. So this patch change promisc tcam entry position to the END of 512 tcam entries, which indicate lower priority. And separate one promisc entry to two: mc & uc, to avoid package match the wrong tcam entry. Signed-off-by: Yonglong Liu Signed-off-by: Peng Li Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c | 257 +++++++++++++++++---- 1 file changed, 216 insertions(+), 41 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index 97d62decba19..3b9e74be5fbd 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -934,6 +934,62 @@ static void hns_dsaf_tcam_mc_cfg( spin_unlock_bh(&dsaf_dev->tcam_lock); } +/** + * hns_dsaf_tcam_uc_cfg_vague - INT + * @dsaf_dev: dsa fabric device struct pointer + * @address, + * @ptbl_tcam_data, + */ +static void hns_dsaf_tcam_uc_cfg_vague(struct dsaf_device *dsaf_dev, + u32 address, + struct dsaf_tbl_tcam_data *tcam_data, + struct dsaf_tbl_tcam_data *tcam_mask, + struct dsaf_tbl_tcam_ucast_cfg *tcam_uc) +{ + spin_lock_bh(&dsaf_dev->tcam_lock); + hns_dsaf_tbl_tcam_addr_cfg(dsaf_dev, address); + hns_dsaf_tbl_tcam_data_cfg(dsaf_dev, tcam_data); + hns_dsaf_tbl_tcam_ucast_cfg(dsaf_dev, tcam_uc); + hns_dsaf_tbl_tcam_match_cfg(dsaf_dev, tcam_mask); + hns_dsaf_tbl_tcam_data_ucast_pul(dsaf_dev); + + /*Restore Match Data*/ + tcam_mask->tbl_tcam_data_high = 0xffffffff; + tcam_mask->tbl_tcam_data_low = 0xffffffff; + hns_dsaf_tbl_tcam_match_cfg(dsaf_dev, tcam_mask); + + spin_unlock_bh(&dsaf_dev->tcam_lock); +} + +/** + * hns_dsaf_tcam_mc_cfg_vague - INT + * @dsaf_dev: dsa fabric device struct pointer + * @address, + * @ptbl_tcam_data, + * @ptbl_tcam_mask + * @ptbl_tcam_mcast + */ +static void hns_dsaf_tcam_mc_cfg_vague(struct dsaf_device *dsaf_dev, + u32 address, + struct dsaf_tbl_tcam_data *tcam_data, + struct dsaf_tbl_tcam_data *tcam_mask, + struct dsaf_tbl_tcam_mcast_cfg *tcam_mc) +{ + spin_lock_bh(&dsaf_dev->tcam_lock); + hns_dsaf_tbl_tcam_addr_cfg(dsaf_dev, address); + hns_dsaf_tbl_tcam_data_cfg(dsaf_dev, tcam_data); + hns_dsaf_tbl_tcam_mcast_cfg(dsaf_dev, tcam_mc); + hns_dsaf_tbl_tcam_match_cfg(dsaf_dev, tcam_mask); + hns_dsaf_tbl_tcam_data_mcast_pul(dsaf_dev); + + /*Restore Match Data*/ + tcam_mask->tbl_tcam_data_high = 0xffffffff; + tcam_mask->tbl_tcam_data_low = 0xffffffff; + hns_dsaf_tbl_tcam_match_cfg(dsaf_dev, tcam_mask); + + spin_unlock_bh(&dsaf_dev->tcam_lock); +} + /** * hns_dsaf_tcam_mc_invld - INT * @dsaf_id: dsa fabric id @@ -1492,6 +1548,27 @@ static u16 hns_dsaf_find_empty_mac_entry(struct dsaf_device *dsaf_dev) return DSAF_INVALID_ENTRY_IDX; } +/** + * hns_dsaf_find_empty_mac_entry_reverse + * search dsa fabric soft empty-entry from the end + * @dsaf_dev: dsa fabric device struct pointer + */ +static u16 hns_dsaf_find_empty_mac_entry_reverse(struct dsaf_device *dsaf_dev) +{ + struct dsaf_drv_priv *priv = hns_dsaf_dev_priv(dsaf_dev); + struct dsaf_drv_soft_mac_tbl *soft_mac_entry; + int i; + + soft_mac_entry = priv->soft_mac_tbl + (DSAF_TCAM_SUM - 1); + for (i = (DSAF_TCAM_SUM - 1); i > 0; i--) { + /* search all entry from end to start.*/ + if (soft_mac_entry->index == DSAF_INVALID_ENTRY_IDX) + return i; + soft_mac_entry--; + } + return DSAF_INVALID_ENTRY_IDX; +} + /** * hns_dsaf_set_mac_key - set mac key * @dsaf_dev: dsa fabric device struct pointer @@ -2673,58 +2750,156 @@ int hns_dsaf_get_regs_count(void) return DSAF_DUMP_REGS_NUM; } -/* Reserve the last TCAM entry for promisc support */ -#define dsaf_promisc_tcam_entry(port) \ - (DSAF_TCAM_SUM - DSAFV2_MAC_FUZZY_TCAM_NUM + (port)) -void hns_dsaf_set_promisc_tcam(struct dsaf_device *dsaf_dev, - u32 port, bool enable) +static void set_promisc_tcam_enable(struct dsaf_device *dsaf_dev, u32 port) { + struct dsaf_tbl_tcam_ucast_cfg tbl_tcam_ucast = {0, 1, 0, 0, 0x80}; + struct dsaf_tbl_tcam_data tbl_tcam_data_mc = {0x01000000, port}; + struct dsaf_tbl_tcam_data tbl_tcam_mask_uc = {0x01000000, 0xf}; + struct dsaf_tbl_tcam_mcast_cfg tbl_tcam_mcast = {0, 0, {0} }; struct dsaf_drv_priv *priv = hns_dsaf_dev_priv(dsaf_dev); - struct dsaf_drv_soft_mac_tbl *soft_mac_entry = priv->soft_mac_tbl; - u16 entry_index; - struct dsaf_drv_tbl_tcam_key tbl_tcam_data, tbl_tcam_mask; - struct dsaf_tbl_tcam_mcast_cfg mac_data = {0}; + struct dsaf_tbl_tcam_data tbl_tcam_data_uc = {0, port}; + struct dsaf_drv_mac_single_dest_entry mask_entry; + struct dsaf_drv_tbl_tcam_key temp_key, mask_key; + struct dsaf_drv_soft_mac_tbl *soft_mac_entry; + u16 entry_index = DSAF_INVALID_ENTRY_IDX; + struct dsaf_drv_tbl_tcam_key mac_key; + struct hns_mac_cb *mac_cb; + u8 addr[ETH_ALEN] = {0}; + u8 port_num; + u16 mskid; - if ((AE_IS_VER1(dsaf_dev->dsaf_ver)) || HNS_DSAF_IS_DEBUG(dsaf_dev)) + /* promisc use vague table match with vlanid = 0 & macaddr = 0 */ + hns_dsaf_set_mac_key(dsaf_dev, &mac_key, 0x00, port, addr); + entry_index = hns_dsaf_find_soft_mac_entry(dsaf_dev, &mac_key); + if (entry_index != DSAF_INVALID_ENTRY_IDX) return; - /* find the tcam entry index for promisc */ - entry_index = dsaf_promisc_tcam_entry(port); - - memset(&tbl_tcam_data, 0, sizeof(tbl_tcam_data)); - memset(&tbl_tcam_mask, 0, sizeof(tbl_tcam_mask)); - - /* config key mask */ - if (enable) { - dsaf_set_field(tbl_tcam_data.low.bits.port_vlan, - DSAF_TBL_TCAM_KEY_PORT_M, - DSAF_TBL_TCAM_KEY_PORT_S, port); - dsaf_set_field(tbl_tcam_mask.low.bits.port_vlan, - DSAF_TBL_TCAM_KEY_PORT_M, - DSAF_TBL_TCAM_KEY_PORT_S, 0xf); - - /* SUB_QID */ - dsaf_set_bit(mac_data.tbl_mcast_port_msk[0], - DSAF_SERVICE_NW_NUM, true); - mac_data.tbl_mcast_item_vld = true; /* item_vld bit */ + /* put promisc tcam entry in the end. */ + /* 1. set promisc unicast vague tcam entry. */ + entry_index = hns_dsaf_find_empty_mac_entry_reverse(dsaf_dev); + if (entry_index == DSAF_INVALID_ENTRY_IDX) { + dev_err(dsaf_dev->dev, + "enable uc promisc failed (port:%#x)\n", + port); + return; + } + + mac_cb = dsaf_dev->mac_cb[port]; + (void)hns_mac_get_inner_port_num(mac_cb, 0, &port_num); + tbl_tcam_ucast.tbl_ucast_out_port = port_num; + + /* config uc vague table */ + hns_dsaf_tcam_uc_cfg_vague(dsaf_dev, entry_index, &tbl_tcam_data_uc, + &tbl_tcam_mask_uc, &tbl_tcam_ucast); + + /* update software entry */ + soft_mac_entry = priv->soft_mac_tbl; + soft_mac_entry += entry_index; + soft_mac_entry->index = entry_index; + soft_mac_entry->tcam_key.high.val = mac_key.high.val; + soft_mac_entry->tcam_key.low.val = mac_key.low.val; + /* step back to the START for mc. */ + soft_mac_entry = priv->soft_mac_tbl; + + /* 2. set promisc multicast vague tcam entry. */ + entry_index = hns_dsaf_find_empty_mac_entry_reverse(dsaf_dev); + if (entry_index == DSAF_INVALID_ENTRY_IDX) { + dev_err(dsaf_dev->dev, + "enable mc promisc failed (port:%#x)\n", + port); + return; + } + + memset(&mask_entry, 0x0, sizeof(mask_entry)); + memset(&mask_key, 0x0, sizeof(mask_key)); + memset(&temp_key, 0x0, sizeof(temp_key)); + mask_entry.addr[0] = 0x01; + hns_dsaf_set_mac_key(dsaf_dev, &mask_key, mask_entry.in_vlan_id, + port, mask_entry.addr); + tbl_tcam_mcast.tbl_mcast_item_vld = 1; + tbl_tcam_mcast.tbl_mcast_old_en = 0; + + if (port < DSAF_SERVICE_NW_NUM) { + mskid = port; + } else if (port >= DSAF_BASE_INNER_PORT_NUM) { + mskid = port - DSAF_BASE_INNER_PORT_NUM + DSAF_SERVICE_NW_NUM; } else { - mac_data.tbl_mcast_item_vld = false; /* item_vld bit */ + dev_err(dsaf_dev->dev, "%s,pnum(%d)error,key(%#x:%#x)\n", + dsaf_dev->ae_dev.name, port, + mask_key.high.val, mask_key.low.val); + return; } - dev_dbg(dsaf_dev->dev, - "set_promisc_entry, %s Mac key(%#x:%#x) entry_index%d\n", - dsaf_dev->ae_dev.name, tbl_tcam_data.high.val, - tbl_tcam_data.low.val, entry_index); + dsaf_set_bit(tbl_tcam_mcast.tbl_mcast_port_msk[mskid / 32], + mskid % 32, 1); + memcpy(&temp_key, &mask_key, sizeof(mask_key)); + hns_dsaf_tcam_mc_cfg_vague(dsaf_dev, entry_index, &tbl_tcam_data_mc, + (struct dsaf_tbl_tcam_data *)(&mask_key), + &tbl_tcam_mcast); - /* config promisc entry with mask */ - hns_dsaf_tcam_mc_cfg(dsaf_dev, entry_index, - (struct dsaf_tbl_tcam_data *)&tbl_tcam_data, - (struct dsaf_tbl_tcam_data *)&tbl_tcam_mask, - &mac_data); + /* update software entry */ + soft_mac_entry += entry_index; + soft_mac_entry->index = entry_index; + soft_mac_entry->tcam_key.high.val = temp_key.high.val; + soft_mac_entry->tcam_key.low.val = temp_key.low.val; +} - /* config software entry */ +static void set_promisc_tcam_disable(struct dsaf_device *dsaf_dev, u32 port) +{ + struct dsaf_tbl_tcam_data tbl_tcam_data_mc = {0x01000000, port}; + struct dsaf_tbl_tcam_ucast_cfg tbl_tcam_ucast = {0, 0, 0, 0, 0}; + struct dsaf_tbl_tcam_mcast_cfg tbl_tcam_mcast = {0, 0, {0} }; + struct dsaf_drv_priv *priv = hns_dsaf_dev_priv(dsaf_dev); + struct dsaf_tbl_tcam_data tbl_tcam_data_uc = {0, 0}; + struct dsaf_tbl_tcam_data tbl_tcam_mask = {0, 0}; + struct dsaf_drv_soft_mac_tbl *soft_mac_entry; + u16 entry_index = DSAF_INVALID_ENTRY_IDX; + struct dsaf_drv_tbl_tcam_key mac_key; + u8 addr[ETH_ALEN] = {0}; + + /* 1. delete uc vague tcam entry. */ + /* promisc use vague table match with vlanid = 0 & macaddr = 0 */ + hns_dsaf_set_mac_key(dsaf_dev, &mac_key, 0x00, port, addr); + entry_index = hns_dsaf_find_soft_mac_entry(dsaf_dev, &mac_key); + + if (entry_index == DSAF_INVALID_ENTRY_IDX) + return; + + /* config uc vague table */ + hns_dsaf_tcam_uc_cfg_vague(dsaf_dev, entry_index, &tbl_tcam_data_uc, + &tbl_tcam_mask, &tbl_tcam_ucast); + /* update soft management table. */ + soft_mac_entry = priv->soft_mac_tbl; soft_mac_entry += entry_index; - soft_mac_entry->index = enable ? entry_index : DSAF_INVALID_ENTRY_IDX; + soft_mac_entry->index = DSAF_INVALID_ENTRY_IDX; + /* step back to the START for mc. */ + soft_mac_entry = priv->soft_mac_tbl; + + /* 2. delete mc vague tcam entry. */ + addr[0] = 0x01; + memset(&mac_key, 0x0, sizeof(mac_key)); + hns_dsaf_set_mac_key(dsaf_dev, &mac_key, 0x00, port, addr); + entry_index = hns_dsaf_find_soft_mac_entry(dsaf_dev, &mac_key); + + if (entry_index == DSAF_INVALID_ENTRY_IDX) + return; + + /* config mc vague table */ + hns_dsaf_tcam_mc_cfg_vague(dsaf_dev, entry_index, &tbl_tcam_data_mc, + &tbl_tcam_mask, &tbl_tcam_mcast); + /* update soft management table. */ + soft_mac_entry += entry_index; + soft_mac_entry->index = DSAF_INVALID_ENTRY_IDX; +} + +/* Reserve the last TCAM entry for promisc support */ +void hns_dsaf_set_promisc_tcam(struct dsaf_device *dsaf_dev, + u32 port, bool enable) +{ + if (enable) + set_promisc_tcam_enable(dsaf_dev, port); + else + set_promisc_tcam_disable(dsaf_dev, port); } int hns_dsaf_wait_pkt_clean(struct dsaf_device *dsaf_dev, int port) -- cgit v1.2.3-59-g8ed1b From e735fd55b94bb48363737db3b1d57627c1a16b47 Mon Sep 17 00:00:00 2001 From: Marcin Wojtas Date: Tue, 11 Dec 2018 13:56:49 +0100 Subject: net: mvneta: fix operation for 64K PAGE_SIZE Recent changes in the mvneta driver reworked allocation and handling of the ingress buffers to use entire pages. Apart from that in SW BM scenario the HW must be informed via PRXDQS about the biggest possible incoming buffer that can be propagated by RX descriptors. The BufferSize field was filled according to the MTU-dependent pkt_size value. Later change to PAGE_SIZE broke RX operation when usin 64K pages, as the field is simply too small. This patch conditionally limits the value passed to the BufferSize of the PRXDQS register, depending on the PAGE_SIZE used. On the occasion remove now unused frag_size field of the mvneta_port structure. Fixes: 562e2f467e71 ("net: mvneta: Improve the buffer allocation method for SWBM") Signed-off-by: Marcin Wojtas Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index e5397c8197b9..61b23497f836 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -408,7 +408,6 @@ struct mvneta_port { struct mvneta_pcpu_stats __percpu *stats; int pkt_size; - unsigned int frag_size; void __iomem *base; struct mvneta_rx_queue *rxqs; struct mvneta_tx_queue *txqs; @@ -2905,7 +2904,9 @@ static void mvneta_rxq_hw_init(struct mvneta_port *pp, if (!pp->bm_priv) { /* Set Offset */ mvneta_rxq_offset_set(pp, rxq, 0); - mvneta_rxq_buf_size_set(pp, rxq, pp->frag_size); + mvneta_rxq_buf_size_set(pp, rxq, PAGE_SIZE < SZ_64K ? + PAGE_SIZE : + MVNETA_RX_BUF_SIZE(pp->pkt_size)); mvneta_rxq_bm_disable(pp, rxq); mvneta_rxq_fill(pp, rxq, rxq->size); } else { @@ -3760,7 +3761,6 @@ static int mvneta_open(struct net_device *dev) int ret; pp->pkt_size = MVNETA_RX_PKT_SIZE(pp->dev->mtu); - pp->frag_size = PAGE_SIZE; ret = mvneta_setup_rxqs(pp); if (ret) -- cgit v1.2.3-59-g8ed1b From eca1e56ceedd9cc185eb18baf307d3ff2e4af376 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Fri, 14 Dec 2018 18:30:22 +0200 Subject: iwlwifi: mvm: don't send GEO_TX_POWER_LIMIT to old firmwares Old firmware versions don't support this command. Sending it to any firmware before -41.ucode will crash the firmware. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=201975 Fixes: 66e839030fd6 ("iwlwifi: fix wrong WGDS_WIFI_DATA_SIZE") CC: #4.19+ Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo --- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 2ba890445c35..1689bead1b4f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -881,6 +881,15 @@ static int iwl_mvm_sar_geo_init(struct iwl_mvm *mvm) int ret, i, j; u16 cmd_wide_id = WIDE_ID(PHY_OPS_GROUP, GEO_TX_POWER_LIMIT); + /* + * This command is not supported on earlier firmware versions. + * Unfortunately, we don't have a TLV API flag to rely on, so + * rely on the major version which is in the first byte of + * ucode_ver. + */ + if (IWL_UCODE_SERIAL(mvm->fw->ucode_ver) < 41) + return 0; + ret = iwl_mvm_sar_get_wgds_table(mvm); if (ret < 0) { IWL_DEBUG_RADIO(mvm, -- cgit v1.2.3-59-g8ed1b From bcb671c2fa0e238eea087fdcb7eba72376ba3697 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 17 Dec 2018 00:04:40 +0900 Subject: bpf: promote bpf_perf_event.h to mandatory UAPI header Since commit c895f6f703ad ("bpf: correct broken uapi for BPF_PROG_TYPE_PERF_EVENT program type"), all architectures (except um) are required to have bpf_perf_event.h in uapi/asm. Add it to mandatory-y so "make headers_install" can check it. Signed-off-by: Masahiro Yamada Signed-off-by: Daniel Borkmann --- include/uapi/asm-generic/Kbuild.asm | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/asm-generic/Kbuild.asm b/include/uapi/asm-generic/Kbuild.asm index 21381449d98a..355c4ac2c0b0 100644 --- a/include/uapi/asm-generic/Kbuild.asm +++ b/include/uapi/asm-generic/Kbuild.asm @@ -3,6 +3,7 @@ # mandatory-y += auxvec.h mandatory-y += bitsperlong.h +mandatory-y += bpf_perf_event.h mandatory-y += byteorder.h mandatory-y += errno.h mandatory-y += fcntl.h -- cgit v1.2.3-59-g8ed1b From 255fe81a6ad420258b6dac8c509c99cd1c233390 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 17 Dec 2018 16:29:01 -0500 Subject: MAINTAINERS: change my email address Make my Gmail address the primary one from now on. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8119141a926f..2fbe569fb76c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8938,7 +8938,7 @@ F: arch/mips/boot/dts/img/pistachio_marduk.dts MARVELL 88E6XXX ETHERNET SWITCH FABRIC DRIVER M: Andrew Lunn -M: Vivien Didelot +M: Vivien Didelot L: netdev@vger.kernel.org S: Maintained F: drivers/net/dsa/mv88e6xxx/ @@ -10416,7 +10416,7 @@ F: drivers/net/wireless/ NETWORKING [DSA] M: Andrew Lunn -M: Vivien Didelot +M: Vivien Didelot M: Florian Fainelli S: Maintained F: Documentation/devicetree/bindings/net/dsa/ -- cgit v1.2.3-59-g8ed1b From fbfb2321e950918b430e7225546296b2dcadf725 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 17 Dec 2018 12:23:59 -0500 Subject: ipv6: add missing tx timestamping on IPPROTO_RAW Raw sockets support tx timestamping, but one case is missing. IPPROTO_RAW takes a separate packet construction path. raw_send_hdrinc has an explicit call to sock_tx_timestamp, but rawv6_send_hdrinc does not. Add it. Fixes: 11878b40ed5c ("net-timestamp: SOCK_RAW and PING timestamping") Signed-off-by: Willem de Bruijn Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv6/raw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 5e0efd3954e9..c8562432fcc3 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -658,6 +658,8 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, skb->ip_summed = CHECKSUM_NONE; + sock_tx_timestamp(sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags); + if (flags & MSG_CONFIRM) skb_set_dst_pending_confirm(skb, 1); -- cgit v1.2.3-59-g8ed1b From 8f932f762e7928d250e21006b00ff9b7718b0a64 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 17 Dec 2018 12:24:00 -0500 Subject: net: add missing SOF_TIMESTAMPING_OPT_ID support SOF_TIMESTAMPING_OPT_ID is supported on TCP, UDP and RAW sockets. But it was missing on RAW with IPPROTO_IP, PF_PACKET and CAN. Add skb_setup_tx_timestamp that configures both tx_flags and tskey for these paths that do not need corking or use bytestream keys. Fixes: 09c2d251b707 ("net-timestamp: add key to disambiguate concurrent datagrams") Signed-off-by: Willem de Bruijn Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/net/sock.h | 25 +++++++++++++++++++++---- net/can/raw.c | 2 +- net/ipv4/raw.c | 2 +- net/ipv6/raw.c | 2 +- net/packet/af_packet.c | 6 +++--- 5 files changed, 27 insertions(+), 10 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index f665d74ae509..0e3a09380655 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2340,22 +2340,39 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags); /** - * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped + * _sock_tx_timestamp - checks whether the outgoing packet is to be time stamped * @sk: socket sending this packet * @tsflags: timestamping flags to use * @tx_flags: completed with instructions for time stamping + * @tskey: filled in with next sk_tskey (not for TCP, which uses seqno) * * Note: callers should take care of initial ``*tx_flags`` value (usually 0) */ -static inline void sock_tx_timestamp(const struct sock *sk, __u16 tsflags, - __u8 *tx_flags) +static inline void _sock_tx_timestamp(struct sock *sk, __u16 tsflags, + __u8 *tx_flags, __u32 *tskey) { - if (unlikely(tsflags)) + if (unlikely(tsflags)) { __sock_tx_timestamp(tsflags, tx_flags); + if (tsflags & SOF_TIMESTAMPING_OPT_ID && tskey && + tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK) + *tskey = sk->sk_tskey++; + } if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS))) *tx_flags |= SKBTX_WIFI_STATUS; } +static inline void sock_tx_timestamp(struct sock *sk, __u16 tsflags, + __u8 *tx_flags) +{ + _sock_tx_timestamp(sk, tsflags, tx_flags, NULL); +} + +static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags) +{ + _sock_tx_timestamp(skb->sk, tsflags, &skb_shinfo(skb)->tx_flags, + &skb_shinfo(skb)->tskey); +} + /** * sk_eat_skb - Release a skb if it is no longer needed * @sk: socket to eat this skb from diff --git a/net/can/raw.c b/net/can/raw.c index 3aab7664933f..c70207537488 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -771,7 +771,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) if (err < 0) goto free_skb; - sock_tx_timestamp(sk, sk->sk_tsflags, &skb_shinfo(skb)->tx_flags); + skb_setup_tx_timestamp(skb, sk->sk_tsflags); skb->dev = dev; skb->sk = sk; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 8ca3eb06ba04..169a652b3dd1 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -391,7 +391,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, skb->ip_summed = CHECKSUM_NONE; - sock_tx_timestamp(sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags); + skb_setup_tx_timestamp(skb, sockc->tsflags); if (flags & MSG_CONFIRM) skb_set_dst_pending_confirm(skb, 1); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index c8562432fcc3..fc2b5e845fdf 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -658,7 +658,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, skb->ip_summed = CHECKSUM_NONE; - sock_tx_timestamp(sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags); + skb_setup_tx_timestamp(skb, sockc->tsflags); if (flags & MSG_CONFIRM) skb_set_dst_pending_confirm(skb, 1); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index a74650e98f42..6655793765b2 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1965,7 +1965,7 @@ retry: skb->mark = sk->sk_mark; skb->tstamp = sockc.transmit_time; - sock_tx_timestamp(sk, sockc.tsflags, &skb_shinfo(skb)->tx_flags); + skb_setup_tx_timestamp(skb, sockc.tsflags); if (unlikely(extra_len == 4)) skb->no_fcs = 1; @@ -2460,7 +2460,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb->priority = po->sk.sk_priority; skb->mark = po->sk.sk_mark; skb->tstamp = sockc->transmit_time; - sock_tx_timestamp(&po->sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags); + skb_setup_tx_timestamp(skb, sockc->tsflags); skb_zcopy_set_nouarg(skb, ph.raw); skb_reserve(skb, hlen); @@ -2898,7 +2898,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) goto out_free; } - sock_tx_timestamp(sk, sockc.tsflags, &skb_shinfo(skb)->tx_flags); + skb_setup_tx_timestamp(skb, sockc.tsflags); if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) && !packet_extra_vlan_len_allowed(dev, skb)) { -- cgit v1.2.3-59-g8ed1b From a5f3932646a74e690ed532a1f421f75c3b8cf1ec Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 17 Dec 2018 16:05:21 -0500 Subject: net: dsa: mv88e6xxx: set ethtool regs version Currently the ethtool_regs version is set to 0 for all DSA drivers. Use this field to store the chip ID to simplify the pretty dump of any interfaces registered by the "dsa" driver. Signed-off-by: Vivien Didelot Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index e05d4eddc935..24fb6a685039 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1124,7 +1124,7 @@ static void mv88e6xxx_get_regs(struct dsa_switch *ds, int port, u16 *p = _p; int i; - regs->version = 0; + regs->version = chip->info->prod_num; memset(p, 0xff, 32 * sizeof(u16)); -- cgit v1.2.3-59-g8ed1b From a50e5fb8db83c5b57392204c21ea6c5c4ccefde6 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Sat, 15 Dec 2018 11:03:10 +0200 Subject: mac80211: fix a kernel panic when TXing after TXQ teardown Recently TXQ teardown was moved earlier in ieee80211_unregister_hw(), to avoid a use-after-free of the netdev data. However, interfaces aren't fully removed at the point, and cfg80211_shutdown_all_interfaces can for example, TX a deauth frame. Move the TXQ teardown to the point between cfg80211_shutdown_all_interfaces and the free of netdev queues, so we can be sure they are torn down before netdev is freed, but after there is no ongoing TX. Fixes: 77cfaf52eca5 ("mac80211: Run TXQ teardown code before de-registering interfaces") Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 3 +++ net/mac80211/main.c | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 5f3c81e705c7..3a0171a65db3 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -7,6 +7,7 @@ * Copyright 2008, Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (c) 2016 Intel Deutschland GmbH + * Copyright (C) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -1951,6 +1952,8 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) WARN(local->open_count, "%s: open count remains %d\n", wiphy_name(local->hw.wiphy), local->open_count); + ieee80211_txq_teardown_flows(local); + mutex_lock(&local->iflist_mtx); list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) { list_del(&sdata->list); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 83e71e6b2ebe..7b8320d4a8e4 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1262,7 +1262,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) rtnl_unlock(); ieee80211_led_exit(local); ieee80211_wep_free(local); - ieee80211_txq_teardown_flows(local); fail_flows: destroy_workqueue(local->workqueue); fail_workqueue: @@ -1288,7 +1287,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) #if IS_ENABLED(CONFIG_IPV6) unregister_inet6addr_notifier(&local->ifa6_notifier); #endif - ieee80211_txq_teardown_flows(local); rtnl_lock(); -- cgit v1.2.3-59-g8ed1b From cf0b70e71b32137ccf9c1f3dd9fb30cbf89b4322 Mon Sep 17 00:00:00 2001 From: Shalom Toledo Date: Tue, 18 Dec 2018 15:59:20 +0000 Subject: mlxsw: core: Increase timeout during firmware flash process During the firmware flash process, some of the EMADs get timed out, which causes the driver to send them again with a limit of 5 retries. There are some situations in which 5 retries is not enough and the EMAD access fails. If the failed EMAD was related to the flashing process, the driver fails the flashing. The reason for these timeouts during firmware flashing is cache misses in the CPU running the firmware. In case the CPU needs to fetch instructions from the flash when a firmware is flashed, it needs to wait for the flashing to complete. Since flashing takes time, it is possible for pending EMADs to timeout. Fix by increasing EMADs' timeout while flashing firmware. Fixes: ce6ef68f433f ("mlxsw: spectrum: Implement the ethtool flash_device callback") Signed-off-by: Shalom Toledo Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.c | 19 ++++++++++++++++++- drivers/net/ethernet/mellanox/mlxsw/core.h | 3 +++ drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 7 ++++++- 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 30f751e69698..f7154f358f27 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -81,6 +81,7 @@ struct mlxsw_core { struct mlxsw_core_port *ports; unsigned int max_ports; bool reload_fail; + bool fw_flash_in_progress; unsigned long driver_priv[0]; /* driver_priv has to be always the last item */ }; @@ -428,12 +429,16 @@ struct mlxsw_reg_trans { struct rcu_head rcu; }; -#define MLXSW_EMAD_TIMEOUT_MS 200 +#define MLXSW_EMAD_TIMEOUT_DURING_FW_FLASH_MS 3000 +#define MLXSW_EMAD_TIMEOUT_MS 200 static void mlxsw_emad_trans_timeout_schedule(struct mlxsw_reg_trans *trans) { unsigned long timeout = msecs_to_jiffies(MLXSW_EMAD_TIMEOUT_MS); + if (trans->core->fw_flash_in_progress) + timeout = msecs_to_jiffies(MLXSW_EMAD_TIMEOUT_DURING_FW_FLASH_MS); + queue_delayed_work(trans->core->emad_wq, &trans->timeout_dw, timeout); } @@ -1854,6 +1859,18 @@ int mlxsw_core_kvd_sizes_get(struct mlxsw_core *mlxsw_core, } EXPORT_SYMBOL(mlxsw_core_kvd_sizes_get); +void mlxsw_core_fw_flash_start(struct mlxsw_core *mlxsw_core) +{ + mlxsw_core->fw_flash_in_progress = true; +} +EXPORT_SYMBOL(mlxsw_core_fw_flash_start); + +void mlxsw_core_fw_flash_end(struct mlxsw_core *mlxsw_core) +{ + mlxsw_core->fw_flash_in_progress = false; +} +EXPORT_SYMBOL(mlxsw_core_fw_flash_end); + static int __init mlxsw_core_module_init(void) { int err; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index c35be477856f..c4e4971764e5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -292,6 +292,9 @@ int mlxsw_core_kvd_sizes_get(struct mlxsw_core *mlxsw_core, u64 *p_single_size, u64 *p_double_size, u64 *p_linear_size); +void mlxsw_core_fw_flash_start(struct mlxsw_core *mlxsw_core); +void mlxsw_core_fw_flash_end(struct mlxsw_core *mlxsw_core); + bool mlxsw_core_res_valid(struct mlxsw_core *mlxsw_core, enum mlxsw_res_id res_id); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 9bec940330a4..75a2f1495455 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -309,8 +309,13 @@ static int mlxsw_sp_firmware_flash(struct mlxsw_sp *mlxsw_sp, }, .mlxsw_sp = mlxsw_sp }; + int err; + + mlxsw_core_fw_flash_start(mlxsw_sp->core); + err = mlxfw_firmware_flash(&mlxsw_sp_mlxfw_dev.mlxfw_dev, firmware); + mlxsw_core_fw_flash_end(mlxsw_sp->core); - return mlxfw_firmware_flash(&mlxsw_sp_mlxfw_dev.mlxfw_dev, firmware); + return err; } static int mlxsw_sp_fw_rev_validate(struct mlxsw_sp *mlxsw_sp) -- cgit v1.2.3-59-g8ed1b From 5d5043917a28438df163c4e6e56e77ef6d981089 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 18 Dec 2018 15:59:22 +0000 Subject: mlxsw: spectrum: Add trap for decapsulated ARP packets After a packet was decapsulated it is classified to the relevant FID based on its VNI and undergoes L2 forwarding. Unlike regular (non-encapsulated) ARP packets, Spectrum does not trap decapsulated ARP packets during L2 forwarding and instead can only trap such packets in the underlay router during decapsulation. Add this missing packet trap, which is required for VXLAN routing when the MAC of the target host is not known. Fixes: b02597d513a9 ("mlxsw: spectrum: Add NVE packet traps") Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 1 + drivers/net/ethernet/mellanox/mlxsw/trap.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 75a2f1495455..f84b9c02fcc5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3526,6 +3526,7 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { MLXSW_SP_RXL_MR_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false), /* NVE traps */ MLXSW_SP_RXL_MARK(NVE_ENCAP_ARP, TRAP_TO_CPU, ARP, false), + MLXSW_SP_RXL_NO_MARK(NVE_DECAP_ARP, TRAP_TO_CPU, ARP, false), }; static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index 6f18f4d3322a..451216dd7f6b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -60,6 +60,7 @@ enum { MLXSW_TRAP_ID_IPV6_MC_LINK_LOCAL_DEST = 0x91, MLXSW_TRAP_ID_HOST_MISS_IPV6 = 0x92, MLXSW_TRAP_ID_IPIP_DECAP_ERROR = 0xB1, + MLXSW_TRAP_ID_NVE_DECAP_ARP = 0xB8, MLXSW_TRAP_ID_NVE_ENCAP_ARP = 0xBD, MLXSW_TRAP_ID_ROUTER_ALERT_IPV4 = 0xD6, MLXSW_TRAP_ID_ROUTER_ALERT_IPV6 = 0xD7, -- cgit v1.2.3-59-g8ed1b From 5edb7e8bd52cd79634d1d7fbc97c5960a21b2873 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 18 Dec 2018 15:59:23 +0000 Subject: mlxsw: spectrum_nve: Fix memory leak upon driver reload The pointer was NULLed before freeing the memory, resulting in a memory leak. Trace from kmemleak: unreferenced object 0xffff88820ae36528 (size 512): comm "devlink", pid 5374, jiffies 4295354033 (age 10829.296s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000a43f5195>] kmem_cache_alloc_trace+0x1be/0x330 [<00000000312f8140>] mlxsw_sp_nve_init+0xcb/0x1ae0 [<0000000009201d22>] mlxsw_sp_init+0x1382/0x2690 [<000000007227d877>] mlxsw_sp1_init+0x1b5/0x260 [<000000004a16feec>] __mlxsw_core_bus_device_register+0x776/0x1360 [<0000000070ab954c>] mlxsw_devlink_core_bus_device_reload+0x129/0x220 [<00000000432313d5>] devlink_nl_cmd_reload+0x119/0x1e0 [<000000003821a06b>] genl_family_rcv_msg+0x813/0x1150 [<00000000d54d04c0>] genl_rcv_msg+0xd1/0x180 [<0000000040543d12>] netlink_rcv_skb+0x152/0x3c0 [<00000000efc4eae8>] genl_rcv+0x2d/0x40 [<00000000ea645603>] netlink_unicast+0x52f/0x740 [<00000000641fca1a>] netlink_sendmsg+0x9c7/0xf50 [<00000000fed4a4b8>] sock_sendmsg+0xbe/0x120 [<00000000d85795a9>] __sys_sendto+0x397/0x620 [<00000000c5f84622>] __x64_sys_sendto+0xe6/0x1a0 Fixes: 6e6030bd5412 ("mlxsw: spectrum_nve: Implement common NVE core") Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c index 5c13674439f1..b5b54b41349a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c @@ -977,6 +977,6 @@ void mlxsw_sp_nve_fini(struct mlxsw_sp *mlxsw_sp) { WARN_ON(mlxsw_sp->nve->num_nve_tunnels); rhashtable_destroy(&mlxsw_sp->nve->mc_list_ht); - mlxsw_sp->nve = NULL; kfree(mlxsw_sp->nve); + mlxsw_sp->nve = NULL; } -- cgit v1.2.3-59-g8ed1b From a915b982d8f5e4295f64b8dd37ce753874867e88 Mon Sep 17 00:00:00 2001 From: Jorgen Hansen Date: Tue, 18 Dec 2018 00:34:06 -0800 Subject: VSOCK: Send reset control packet when socket is partially bound If a server side socket is bound to an address, but not in the listening state yet, incoming connection requests should receive a reset control packet in response. However, the function used to send the reset silently drops the reset packet if the sending socket isn't bound to a remote address (as is the case for a bound socket not yet in the listening state). This change fixes this by using the src of the incoming packet as destination for the reset packet in this case. Fixes: d021c344051a ("VSOCK: Introduce VM Sockets") Reviewed-by: Adit Ranadive Reviewed-by: Vishnu Dasa Signed-off-by: Jorgen Hansen Signed-off-by: David S. Miller --- net/vmw_vsock/vmci_transport.c | 67 +++++++++++++++++++++++++++++++----------- 1 file changed, 50 insertions(+), 17 deletions(-) diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index cb332adb84cd..c361ce782412 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -263,6 +263,31 @@ vmci_transport_send_control_pkt_bh(struct sockaddr_vm *src, false); } +static int +vmci_transport_alloc_send_control_pkt(struct sockaddr_vm *src, + struct sockaddr_vm *dst, + enum vmci_transport_packet_type type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + u16 proto, + struct vmci_handle handle) +{ + struct vmci_transport_packet *pkt; + int err; + + pkt = kmalloc(sizeof(*pkt), GFP_KERNEL); + if (!pkt) + return -ENOMEM; + + err = __vmci_transport_send_control_pkt(pkt, src, dst, type, size, + mode, wait, proto, handle, + true); + kfree(pkt); + + return err; +} + static int vmci_transport_send_control_pkt(struct sock *sk, enum vmci_transport_packet_type type, @@ -272,9 +297,7 @@ vmci_transport_send_control_pkt(struct sock *sk, u16 proto, struct vmci_handle handle) { - struct vmci_transport_packet *pkt; struct vsock_sock *vsk; - int err; vsk = vsock_sk(sk); @@ -284,17 +307,10 @@ vmci_transport_send_control_pkt(struct sock *sk, if (!vsock_addr_bound(&vsk->remote_addr)) return -EINVAL; - pkt = kmalloc(sizeof(*pkt), GFP_KERNEL); - if (!pkt) - return -ENOMEM; - - err = __vmci_transport_send_control_pkt(pkt, &vsk->local_addr, - &vsk->remote_addr, type, size, - mode, wait, proto, handle, - true); - kfree(pkt); - - return err; + return vmci_transport_alloc_send_control_pkt(&vsk->local_addr, + &vsk->remote_addr, + type, size, mode, + wait, proto, handle); } static int vmci_transport_send_reset_bh(struct sockaddr_vm *dst, @@ -312,12 +328,29 @@ static int vmci_transport_send_reset_bh(struct sockaddr_vm *dst, static int vmci_transport_send_reset(struct sock *sk, struct vmci_transport_packet *pkt) { + struct sockaddr_vm *dst_ptr; + struct sockaddr_vm dst; + struct vsock_sock *vsk; + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST) return 0; - return vmci_transport_send_control_pkt(sk, - VMCI_TRANSPORT_PACKET_TYPE_RST, - 0, 0, NULL, VSOCK_PROTO_INVALID, - VMCI_INVALID_HANDLE); + + vsk = vsock_sk(sk); + + if (!vsock_addr_bound(&vsk->local_addr)) + return -EINVAL; + + if (vsock_addr_bound(&vsk->remote_addr)) { + dst_ptr = &vsk->remote_addr; + } else { + vsock_addr_init(&dst, pkt->dg.src.context, + pkt->src_port); + dst_ptr = &dst; + } + return vmci_transport_alloc_send_control_pkt(&vsk->local_addr, dst_ptr, + VMCI_TRANSPORT_PACKET_TYPE_RST, + 0, 0, NULL, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); } static int vmci_transport_send_negotiate(struct sock *sk, size_t size) -- cgit v1.2.3-59-g8ed1b From 006791772084383de779ef29f2e06f3a6e111e7d Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 11 Dec 2018 17:32:28 +0100 Subject: net: mvpp2: 10G modes aren't supported on all ports The mvpp2_phylink_validate() function sets all modes that are supported by a given PPv2 port. A recent change made all ports to advertise they support 10G modes in certain cases. This is not true, as only the port #0 can do so. This patch fixes it. Fixes: 01b3fd5ac97c ("net: mvpp2: fix detection of 10G SFP modules") Cc: Baruch Siach Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 125ea99418df..88aa488054a8 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -4405,12 +4405,14 @@ static void mvpp2_phylink_validate(struct net_device *dev, case PHY_INTERFACE_MODE_10GKR: case PHY_INTERFACE_MODE_XAUI: case PHY_INTERFACE_MODE_NA: - phylink_set(mask, 10000baseCR_Full); - phylink_set(mask, 10000baseSR_Full); - phylink_set(mask, 10000baseLR_Full); - phylink_set(mask, 10000baseLRM_Full); - phylink_set(mask, 10000baseER_Full); - phylink_set(mask, 10000baseKR_Full); + if (port->gop_id == 0) { + phylink_set(mask, 10000baseCR_Full); + phylink_set(mask, 10000baseSR_Full); + phylink_set(mask, 10000baseLR_Full); + phylink_set(mask, 10000baseLRM_Full); + phylink_set(mask, 10000baseER_Full); + phylink_set(mask, 10000baseKR_Full); + } /* Fall-through */ case PHY_INTERFACE_MODE_RGMII: case PHY_INTERFACE_MODE_RGMII_ID: -- cgit v1.2.3-59-g8ed1b From f07d4276892d97671e880190ff195a288b2d8d92 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 17 Dec 2018 10:05:13 +0300 Subject: qed: Fix an error code qed_ll2_start_xmit() We accidentally deleted the code to set "rc = -ENOMEM;" and this patch adds it back. Fixes: d2201a21598a ("qed: No need for LL2 frags indication") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index aa633381aa47..c6f4bab67a5f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -2496,6 +2496,7 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb, if (unlikely(dma_mapping_error(&cdev->pdev->dev, mapping))) { DP_NOTICE(cdev, "Unable to map frag - dropping packet\n"); + rc = -ENOMEM; goto err; } -- cgit v1.2.3-59-g8ed1b From 3c6306d44082ef007a258ae1b86ea58e6974ee3f Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 16 Dec 2018 23:25:12 -0800 Subject: tipc: check group dests after tipc_wait_for_cond() Similar to commit 143ece654f9f ("tipc: check tsk->group in tipc_wait_for_cond()") we have to reload grp->dests too after we re-take the sock lock. This means we need to move the dsts check after tipc_wait_for_cond() too. Fixes: 75da2163dbb6 ("tipc: introduce communication groups") Reported-and-tested-by: syzbot+99f20222fc5018d2b97a@syzkaller.appspotmail.com Cc: Ying Xue Cc: Jon Maloy Signed-off-by: Cong Wang Acked-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 656940692a44..8f34db2a9785 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1009,7 +1009,7 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m, struct sock *sk = sock->sk; struct net *net = sock_net(sk); struct tipc_sock *tsk = tipc_sk(sk); - struct tipc_nlist *dsts = tipc_group_dests(tsk->group); + struct tipc_nlist *dsts; struct tipc_mc_method *method = &tsk->mc_method; bool ack = method->mandatory && method->rcast; int blks = tsk_blocks(MCAST_H_SIZE + dlen); @@ -1018,9 +1018,6 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m, struct sk_buff_head pkts; int rc = -EHOSTUNREACH; - if (!dsts->local && !dsts->remote) - return -EHOSTUNREACH; - /* Block or return if any destination link or member is congested */ rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt && tsk->group && @@ -1028,6 +1025,10 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m, if (unlikely(rc)) return rc; + dsts = tipc_group_dests(tsk->group); + if (!dsts->local && !dsts->remote) + return -EHOSTUNREACH; + /* Complete message header */ if (dest) { msg_set_type(hdr, TIPC_GRP_MCAST_MSG); -- cgit v1.2.3-59-g8ed1b From b26322d2ac6c1c1087af73856531bb836f6963ca Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 17 Dec 2018 11:06:06 +0300 Subject: net: stmmac: Fix an error code in probe() The function should return an error if create_singlethread_workqueue() fails. Fixes: 34877a15f787 ("net: stmmac: Rework and fix TX Timeout code") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 5551fead8f66..c4a35e932f05 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4250,6 +4250,7 @@ int stmmac_dvr_probe(struct device *device, priv->wq = create_singlethread_workqueue("stmmac_wq"); if (!priv->wq) { dev_err(priv->device, "failed to create workqueue\n"); + ret = -ENOMEM; goto error_wq; } -- cgit v1.2.3-59-g8ed1b From 4298388574dae6168fa8940b3edc7ba965e8a7ab Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Mon, 17 Dec 2018 10:02:42 +0000 Subject: net: macb: restart tx after tx used bit read On some platforms (currently detected only on SAMA5D4) TX might stuck even the pachets are still present in DMA memories and TX start was issued for them. This happens due to race condition between MACB driver updating next TX buffer descriptor to be used and IP reading the same descriptor. In such a case, the "TX USED BIT READ" interrupt is asserted. GEM/MACB user guide specifies that if a "TX USED BIT READ" interrupt is asserted TX must be restarted. Restart TX if used bit is read and packets are present in software TX queue. Packets are removed from software TX queue if TX was successful for them (see macb_tx_interrupt()). Signed-off-by: Claudiu Beznea Acked-by: Nicolas Ferre Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 1d86b4d5645a..dfaefe91b96a 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -61,7 +61,8 @@ #define MACB_TX_ERR_FLAGS (MACB_BIT(ISR_TUND) \ | MACB_BIT(ISR_RLE) \ | MACB_BIT(TXERR)) -#define MACB_TX_INT_FLAGS (MACB_TX_ERR_FLAGS | MACB_BIT(TCOMP)) +#define MACB_TX_INT_FLAGS (MACB_TX_ERR_FLAGS | MACB_BIT(TCOMP) \ + | MACB_BIT(TXUBR)) /* Max length of transmit frame must be a multiple of 8 bytes */ #define MACB_TX_LEN_ALIGN 8 @@ -1312,6 +1313,21 @@ static void macb_hresp_error_task(unsigned long data) netif_tx_start_all_queues(dev); } +static void macb_tx_restart(struct macb_queue *queue) +{ + unsigned int head = queue->tx_head; + unsigned int tail = queue->tx_tail; + struct macb *bp = queue->bp; + + if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) + queue_writel(queue, ISR, MACB_BIT(TXUBR)); + + if (head == tail) + return; + + macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART)); +} + static irqreturn_t macb_interrupt(int irq, void *dev_id) { struct macb_queue *queue = dev_id; @@ -1369,6 +1385,9 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id) if (status & MACB_BIT(TCOMP)) macb_tx_interrupt(queue); + if (status & MACB_BIT(TXUBR)) + macb_tx_restart(queue); + /* Link change detection isn't possible with RMII, so we'll * add that if/when we get our hands on a full-blown MII PHY. */ -- cgit v1.2.3-59-g8ed1b From e2c4cf7f98a519eb4d95532bfa06bcaf3562fed5 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Mon, 17 Dec 2018 11:26:38 +0100 Subject: net: Use __kernel_clockid_t in uapi net_stamp.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Herton reports the following error when building a userspace program that includes net_stamp.h: In file included from foo.c:2: /usr/include/linux/net_tstamp.h:158:2: error: unknown type name ‘clockid_t’ clockid_t clockid; /* reference clockid */ ^~~~~~~~~ Fix it by using __kernel_clockid_t in place of clockid_t. Fixes: 80b14dee2bea ("net: Add a new socket option for a future transmit time.") Cc: Timothy Redaelli Reported-by: Herton R. Krzesinski Signed-off-by: Davide Caratti Tested-by: Paolo Abeni Signed-off-by: David S. Miller --- include/uapi/linux/net_tstamp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index 97ff3c17ec4d..e5b39721c6e4 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -155,8 +155,8 @@ enum txtime_flags { }; struct sock_txtime { - clockid_t clockid; /* reference clockid */ - __u32 flags; /* as defined by enum txtime_flags */ + __kernel_clockid_t clockid;/* reference clockid */ + __u32 flags; /* as defined by enum txtime_flags */ }; #endif /* _NET_TIMESTAMPING_H */ -- cgit v1.2.3-59-g8ed1b From e100a897bf9b19089e57f236f2398c9e0538900e Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Mon, 17 Dec 2018 15:05:39 +0200 Subject: net: macb: fix random memory corruption on RX with 64-bit DMA 64-bit DMA addresses are split in upper and lower halves that are written in separate fields on GEM. For RX, bit 0 of the address is used as the ownership bit (RX_USED). When the RX_USED bit is unset the controller is allowed to write data to the buffer. The driver does not guarantee that the controller already sees the upper half when the RX_USED bit is cleared, possibly resulting in the controller writing an incoming frame to an address with an incorrect upper half and therefore possibly corrupting unrelated system memory. Fix that by adding the necessary DMA memory barrier between the writes. This corruption was observed on a ZynqMP based system. Fixes: fff8019a08b6 ("net: macb: Add 64 bit addressing support for GEM") Signed-off-by: Anssi Hannula Acked-by: Harini Katakam Tested-by: Claudiu Beznea Cc: Nicolas Ferre Cc: Michal Simek Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index dfaefe91b96a..c2f03c60f84b 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -681,6 +681,11 @@ static void macb_set_addr(struct macb *bp, struct macb_dma_desc *desc, dma_addr_ if (bp->hw_dma_cap & HW_DMA_CAP_64B) { desc_64 = macb_64b_desc(bp, desc); desc_64->addrh = upper_32_bits(addr); + /* The low bits of RX address contain the RX_USED bit, clearing + * of which allows packet RX. Make sure the high bits are also + * visible to HW at that point. + */ + dma_wmb(); } #endif desc->addr = lower_32_bits(addr); -- cgit v1.2.3-59-g8ed1b From 8159ecab0db9095902d4c73605fb8787f5c7d653 Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Mon, 17 Dec 2018 15:05:40 +0200 Subject: net: macb: fix dropped RX frames due to a race Bit RX_USED set to 0 in the address field allows the controller to write data to the receive buffer descriptor. The driver does not ensure the ctrl field is ready (cleared) when the controller sees the RX_USED=0 written by the driver. The ctrl field might only be cleared after the controller has already updated it according to a newly received frame, causing the frame to be discarded in gem_rx() due to unexpected ctrl field contents. A message is logged when the above scenario occurs: macb ff0b0000.ethernet eth0: not whole frame pointed by descriptor Fix the issue by ensuring that when the controller sees RX_USED=0 the ctrl field is already cleared. This issue was observed on a ZynqMP based system. Fixes: 4df95131ea80 ("net/macb: change RX path for GEM") Signed-off-by: Anssi Hannula Tested-by: Claudiu Beznea Cc: Nicolas Ferre Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index c2f03c60f84b..f531c1eae4ff 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -934,14 +934,19 @@ static void gem_rx_refill(struct macb_queue *queue) if (entry == bp->rx_ring_size - 1) paddr |= MACB_BIT(RX_WRAP); - macb_set_addr(bp, desc, paddr); desc->ctrl = 0; + /* Setting addr clears RX_USED and allows reception, + * make sure ctrl is cleared first to avoid a race. + */ + dma_wmb(); + macb_set_addr(bp, desc, paddr); /* properly align Ethernet header */ skb_reserve(skb, NET_IP_ALIGN); } else { - desc->addr &= ~MACB_BIT(RX_USED); desc->ctrl = 0; + dma_wmb(); + desc->addr &= ~MACB_BIT(RX_USED); } } -- cgit v1.2.3-59-g8ed1b From 6e0af298066f3b6d99f58989bb0dca6f764b4c6d Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Mon, 17 Dec 2018 15:05:41 +0200 Subject: net: macb: add missing barriers when reading descriptors When reading buffer descriptors on RX or on TX completion, an RX_USED/TX_USED bit is checked first to ensure that the descriptors have been populated, i.e. the ownership has been transferred. However, there are no memory barriers to ensure that the data protected by the RX_USED/TX_USED bit is up-to-date with respect to that bit. Specifically: - TX timestamp descriptors may be loaded before ctrl is loaded for the TX_USED check, which is racy as the descriptors may be updated between the loads, causing old timestamp descriptor data to be used. - RX ctrl may be loaded before addr is loaded for the RX_USED check, which is racy as a new frame may be written between the loads, causing old ctrl descriptor data to be used. This issue exists for both macb_rx() and gem_rx() variants. Fix the races by adding DMA read memory barriers on those paths and reordering the reads in macb_rx(). I have not observed any actual problems in practice caused by these being missing, though. Tested on a ZynqMP based system. Fixes: 89e5785fc8a6 ("[PATCH] Atmel MACB ethernet driver") Signed-off-by: Anssi Hannula Cc: Nicolas Ferre Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 13 ++++++++++--- drivers/net/ethernet/cadence/macb_ptp.c | 2 ++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index f531c1eae4ff..4c816e5a841f 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1000,11 +1000,15 @@ static int gem_rx(struct macb_queue *queue, int budget) rxused = (desc->addr & MACB_BIT(RX_USED)) ? true : false; addr = macb_get_addr(bp, desc); - ctrl = desc->ctrl; if (!rxused) break; + /* Ensure ctrl is at least as up-to-date as rxused */ + dma_rmb(); + + ctrl = desc->ctrl; + queue->rx_tail++; count++; @@ -1179,11 +1183,14 @@ static int macb_rx(struct macb_queue *queue, int budget) /* Make hw descriptor updates visible to CPU */ rmb(); - ctrl = desc->ctrl; - if (!(desc->addr & MACB_BIT(RX_USED))) break; + /* Ensure ctrl is at least as up-to-date as addr */ + dma_rmb(); + + ctrl = desc->ctrl; + if (ctrl & MACB_BIT(RX_SOF)) { if (first_frag != -1) discard_partial_frame(queue, first_frag, tail); diff --git a/drivers/net/ethernet/cadence/macb_ptp.c b/drivers/net/ethernet/cadence/macb_ptp.c index cd5296b84229..a6dc47edc4cf 100644 --- a/drivers/net/ethernet/cadence/macb_ptp.c +++ b/drivers/net/ethernet/cadence/macb_ptp.c @@ -319,6 +319,8 @@ int gem_ptp_txstamp(struct macb_queue *queue, struct sk_buff *skb, desc_ptp = macb_ptp_desc(queue->bp, desc); tx_timestamp = &queue->tx_timestamps[head]; tx_timestamp->skb = skb; + /* ensure ts_1/ts_2 is loaded after ctrl (TX_USED check) */ + dma_rmb(); tx_timestamp->desc_ptp.ts_1 = desc_ptp->ts_1; tx_timestamp->desc_ptp.ts_2 = desc_ptp->ts_2; /* move head */ -- cgit v1.2.3-59-g8ed1b From 6ad0b5a4e03dc5cc0c1ed6be093bdcefdc794a6f Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 18 Dec 2018 13:15:59 +0000 Subject: vxlan: Unmark offloaded bit on replaced FDB entries When rdst of an offloaded FDB entry is replaced, it certainly isn't offloaded anymore. Drivers are notified about such replacements, and can re-mark the entry as offloaded again if they so wish. However until a driver does so explicitly, assume a replaced FDB entry is not offloaded. Note that replaces coming via vxlan_fdb_external_learn_add() are always immediately followed by an explicit offload marking. Fixes: 0efe11733356 ("vxlan: Support marking RDSTs as offloaded") Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 297cdeaef479..c9956c08edf5 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -568,6 +568,7 @@ static int vxlan_fdb_replace(struct vxlan_fdb *f, rd->remote_port = port; rd->remote_vni = vni; rd->remote_ifindex = ifindex; + rd->offloaded = false; return 1; } -- cgit v1.2.3-59-g8ed1b From 6db9246871394b3a136cd52001a0763676563840 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 18 Dec 2018 13:16:00 +0000 Subject: vxlan: Fix error path in __vxlan_dev_create() When a failure occurs in rtnl_configure_link(), the current code calls unregister_netdevice() to roll back the earlier call to register_netdevice(), and jumps to errout, which calls vxlan_fdb_destroy(). However unregister_netdevice() calls transitively ndo_uninit, which is vxlan_uninit(), and that already takes care of deleting the default FDB entry by calling vxlan_fdb_delete_default(). Since the entry added earlier in __vxlan_dev_create() is exactly the default entry, the cleanup code in the errout block always leads to double free and thus a panic. Besides, since vxlan_fdb_delete_default() always destroys the FDB entry with notification enabled, the deletion of the default entry is notified even before the addition was notified. Instead, move the unregister_netdevice() call after the manual destroy, which solves both problems. Fixes: 0241b836732f ("vxlan: fix default fdb entry netlink notify ordering during netdev create") Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index c9956c08edf5..82e78d6fd9ae 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -3259,6 +3259,7 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev, struct vxlan_net *vn = net_generic(net, vxlan_net_id); struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_fdb *f = NULL; + bool unregister = false; int err; err = vxlan_dev_configure(net, dev, conf, false, extack); @@ -3284,12 +3285,11 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev, err = register_netdevice(dev); if (err) goto errout; + unregister = true; err = rtnl_configure_link(dev, NULL); - if (err) { - unregister_netdevice(dev); + if (err) goto errout; - } /* notify default fdb entry */ if (f) @@ -3297,9 +3297,16 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev, list_add(&vxlan->next, &vn->vxlan_list); return 0; + errout: + /* unregister_netdevice() destroys the default FDB entry with deletion + * notification. But the addition notification was not sent yet, so + * destroy the entry by hand here. + */ if (f) vxlan_fdb_destroy(vxlan, f, false); + if (unregister) + unregister_netdevice(dev); return err; } -- cgit v1.2.3-59-g8ed1b From ce5e098f7a10b4bf8e948c12fa350320c5c3afad Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 18 Dec 2018 13:16:02 +0000 Subject: vxlan: changelink: Fix handling of default remotes Default remotes are stored as FDB entries with an Ethernet address of 00:00:00:00:00:00. When a request is made to change a remote address of a VXLAN device, vxlan_changelink() first deletes the existing default remote, and then creates a new FDB entry. This works well as long as the list of default remotes matches exactly the configuration of a VXLAN remote address. Thus when the VXLAN device has a remote of X, there should be exactly one default remote FDB entry X. If the VXLAN device has no remote address, there should be no such entry. Besides using "ip link set", it is possible to manipulate the list of default remotes by using the "bridge fdb". It is therefore easy to break the above condition. Under such circumstances, the __vxlan_fdb_delete() call doesn't delete the FDB entry itself, but just one remote. The following vxlan_fdb_create() then creates a new FDB entry, leading to a situation where two entries exist for the address 00:00:00:00:00:00, each with a different subset of default remotes. An even more obvious breakage rooted in the same cause can be observed when a remote address is configured for a VXLAN device that did not have one before. In that case vxlan_changelink() doesn't remove any remote, and just creates a new FDB entry for the new address: $ ip link add name vx up type vxlan id 2000 dstport 4789 $ bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.20 self permanent $ bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.30 self permanent $ ip link set dev vx type vxlan remote 192.0.2.30 $ bridge fdb sh dev vx | grep 00:00:00:00:00:00 00:00:00:00:00:00 dst 192.0.2.30 self permanent <- new entry, 1 rdst 00:00:00:00:00:00 dst 192.0.2.20 self permanent <- orig. entry, 2 rdsts 00:00:00:00:00:00 dst 192.0.2.30 self permanent To fix this, instead of calling vxlan_fdb_create() directly, defer to vxlan_fdb_update(). That has logic to handle the duplicates properly. Additionally, it also handles notifications, so drop that call from changelink as well. Fixes: 0241b836732f ("vxlan: fix default fdb entry netlink notify ordering during netdev create") Signed-off-by: Petr Machata Acked-by: Roopa Prabhu Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 82e78d6fd9ae..0565f8880199 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -3542,7 +3542,6 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], struct vxlan_rdst *dst = &vxlan->default_dst; struct vxlan_rdst old_dst; struct vxlan_config conf; - struct vxlan_fdb *f = NULL; int err; err = vxlan_nl2conf(tb, data, @@ -3568,19 +3567,19 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], old_dst.remote_ifindex, 0); if (!vxlan_addr_any(&dst->remote_ip)) { - err = vxlan_fdb_create(vxlan, all_zeros_mac, + err = vxlan_fdb_update(vxlan, all_zeros_mac, &dst->remote_ip, NUD_REACHABLE | NUD_PERMANENT, + NLM_F_APPEND | NLM_F_CREATE, vxlan->cfg.dst_port, dst->remote_vni, dst->remote_vni, dst->remote_ifindex, - NTF_SELF, &f); + NTF_SELF); if (err) { spin_unlock_bh(&vxlan->hash_lock); return err; } - vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH); } spin_unlock_bh(&vxlan->hash_lock); } -- cgit v1.2.3-59-g8ed1b From 55cbe0794294031d9b36984d4529f416e7e0a40a Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 18 Dec 2018 13:16:03 +0000 Subject: selftests: net: Add test_vxlan_fdb_changelink.sh Add a test to exercise the fix from the previous patch. Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- tools/testing/selftests/net/Makefile | 1 + .../selftests/net/test_vxlan_fdb_changelink.sh | 29 ++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100755 tools/testing/selftests/net/test_vxlan_fdb_changelink.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 256d82d5fa87..923570a9708a 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -7,6 +7,7 @@ CFLAGS += -I../../../../usr/include/ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh +TEST_PROGS += test_vxlan_fdb_changelink.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy diff --git a/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh b/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh new file mode 100755 index 000000000000..2d442cdab11e --- /dev/null +++ b/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Check FDB default-remote handling across "ip link set". + +check_remotes() +{ + local what=$1; shift + local N=$(bridge fdb sh dev vx | grep 00:00:00:00:00:00 | wc -l) + + echo -ne "expected two remotes after $what\t" + if [[ $N != 2 ]]; then + echo "[FAIL]" + EXIT_STATUS=1 + else + echo "[ OK ]" + fi +} + +ip link add name vx up type vxlan id 2000 dstport 4789 +bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.20 self permanent +bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.30 self permanent +check_remotes "fdb append" + +ip link set dev vx type vxlan remote 192.0.2.30 +check_remotes "link set" + +ip link del dev vx +exit $EXIT_STATUS -- cgit v1.2.3-59-g8ed1b From 0db7d253e9f0ff1a41c602429bea93df221be6ed Mon Sep 17 00:00:00 2001 From: Bryan Whitehead Date: Mon, 17 Dec 2018 16:44:50 -0500 Subject: lan743x: Expand phy search for LAN7431 The LAN7431 uses an external phy, and it can be found anywhere in the phy address space. This patch uses phy address 1 for LAN7430 only. And searches all addresses otherwise. Signed-off-by: Bryan Whitehead Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/lan743x_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index e8ca98c070f6..1ba8ea07dc4f 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -2719,8 +2719,9 @@ static int lan743x_mdiobus_init(struct lan743x_adapter *adapter) snprintf(adapter->mdiobus->id, MII_BUS_ID_SIZE, "pci-%s", pci_name(adapter->pdev)); - /* set to internal PHY id */ - adapter->mdiobus->phy_mask = ~(u32)BIT(1); + if ((adapter->csr.id_rev & ID_REV_ID_MASK_) == ID_REV_ID_LAN7430_) + /* LAN7430 uses internal phy at address 1 */ + adapter->mdiobus->phy_mask = ~(u32)BIT(1); /* register mdiobus */ ret = mdiobus_register(adapter->mdiobus); -- cgit v1.2.3-59-g8ed1b From 15515aaaa69659c502003926a2067ee76176148a Mon Sep 17 00:00:00 2001 From: Jason Martinsen Date: Tue, 18 Dec 2018 05:38:22 +0000 Subject: lan78xx: Resolve issue with changing MAC address Current state for the lan78xx driver does not allow for changing the MAC address of the interface, without either removing the module (if you compiled it that way) or rebooting the machine. If you attempt to change the MAC address, ifconfig will show the new address, however, the system/interface will not respond to any traffic using that configuration. A few short-term options to work around this are to unload the module and reload it with the new MAC address, change the interface to "promisc", or reboot with the correct configuration to change the MAC. This patch enables the ability to change the MAC address via fairly normal means... ifdown modify entry in /etc/network/interfaces OR a similar method ifup Then test via any network communication, such as ICMP requests to gateway. My only test platform for this patch has been a raspberry pi model 3b+. Signed-off-by: Jason Martinsen ----- Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index be1917be28f2..77d3c85febf1 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2320,6 +2320,10 @@ static int lan78xx_set_mac_addr(struct net_device *netdev, void *p) ret = lan78xx_write_reg(dev, RX_ADDRL, addr_lo); ret = lan78xx_write_reg(dev, RX_ADDRH, addr_hi); + /* Added to support MAC address changes */ + ret = lan78xx_write_reg(dev, MAF_LO(0), addr_lo); + ret = lan78xx_write_reg(dev, MAF_HI(0), addr_hi | MAF_HI_VALID_); + return 0; } -- cgit v1.2.3-59-g8ed1b From 8742beb50f2db903d3b6d69ddd81d67ce9914453 Mon Sep 17 00:00:00 2001 From: Kunihiko Hayashi Date: Tue, 18 Dec 2018 16:57:04 +0900 Subject: net: phy: Fix the issue that netif always links up after resuming Even though the link is down before entering hibernation, there is an issue that the network interface always links up after resuming from hibernation. If the link is still down before enabling the network interface, and after resuming from hibernation, the phydev->state is forcibly set to PHY_UP in mdio_bus_phy_restore(), and the link becomes up. In suspend sequence, only if the PHY is attached, mdio_bus_phy_suspend() calls phy_stop_machine(), and mdio_bus_phy_resume() calls phy_start_machine(). In resume sequence, it's enough to do the same as mdio_bus_phy_resume() because the state has been preserved. This patch fixes the issue by calling phy_start_machine() in mdio_bus_phy_restore() in the same way as mdio_bus_phy_resume(). Fixes: bc87922ff59d ("phy: Move PHY PM operations into phy_device") Suggested-by: Heiner Kallweit Signed-off-by: Kunihiko Hayashi Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 18e92c19c5ab..26c41ede54a4 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -308,11 +308,8 @@ static int mdio_bus_phy_restore(struct device *dev) if (ret < 0) return ret; - /* The PHY needs to renegotiate. */ - phydev->link = 0; - phydev->state = PHY_UP; - - phy_start_machine(phydev); + if (phydev->attached_dev && phydev->adjust_link) + phy_start_machine(phydev); return 0; } -- cgit v1.2.3-59-g8ed1b From d81c5054a5d1d4999c7cdead7636b6cd4af83d36 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 18 Dec 2018 16:06:19 +0100 Subject: xen/netfront: tolerate frags with no data At least old Xen net backends seem to send frags with no real data sometimes. In case such a fragment happens to occur with the frag limit already reached the frontend will BUG currently even if this situation is easily recoverable. Modify the BUG_ON() condition accordingly. Tested-by: Dietmar Hahn Signed-off-by: Juergen Gross Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index f17f602e6171..5b97cc946d70 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -905,7 +905,7 @@ static RING_IDX xennet_fill_frags(struct netfront_queue *queue, if (skb_shinfo(skb)->nr_frags == MAX_SKB_FRAGS) { unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to; - BUG_ON(pull_to <= skb_headlen(skb)); + BUG_ON(pull_to < skb_headlen(skb)); __pskb_pull_tail(skb, pull_to - skb_headlen(skb)); } if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) { -- cgit v1.2.3-59-g8ed1b From f7db2beb4c2c6cc8111f5ab90fc7363ca91107b6 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 18 Dec 2018 15:19:47 +0000 Subject: vxge: ensure data0 is initialized in when fetching firmware version information Currently variable data0 is not being initialized so a garbage value is being passed to vxge_hw_vpath_fw_api and this value is being written to the rts_access_steer_data0 register. There are other occurrances where data0 is being initialized to zero (e.g. in function vxge_hw_upgrade_read_version) so I think it makes sense to ensure data0 is initialized likewise to 0. Detected by CoverityScan, CID#140696 ("Uninitialized scalar variable") Fixes: 8424e00dfd52 ("vxge: serialize access to steering control register") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/neterion/vxge/vxge-config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/neterion/vxge/vxge-config.c b/drivers/net/ethernet/neterion/vxge/vxge-config.c index 4c1fb7e57888..7cde387e5ec6 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-config.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-config.c @@ -808,7 +808,7 @@ __vxge_hw_vpath_fw_ver_get(struct __vxge_hw_virtualpath *vpath, struct vxge_hw_device_date *fw_date = &hw_info->fw_date; struct vxge_hw_device_version *flash_version = &hw_info->flash_version; struct vxge_hw_device_date *flash_date = &hw_info->flash_date; - u64 data0, data1 = 0, steer_ctrl = 0; + u64 data0 = 0, data1 = 0, steer_ctrl = 0; enum vxge_hw_status status; status = vxge_hw_vpath_fw_api(vpath, -- cgit v1.2.3-59-g8ed1b From 78abe3d0dfad196959b1246003366e2610775ea6 Mon Sep 17 00:00:00 2001 From: Myungho Jung Date: Tue, 18 Dec 2018 09:02:25 -0800 Subject: net/smc: fix TCP fallback socket release clcsock can be released while kernel_accept() references it in TCP listen worker. Also, clcsock needs to wake up before released if TCP fallback is used and the clcsock is blocked by accept. Add a lock to safely release clcsock and call kernel_sock_shutdown() to wake up clcsock from accept in smc_release(). Reported-by: syzbot+0bf2e01269f1274b4b03@syzkaller.appspotmail.com Reported-by: syzbot+e3132895630f957306bc@syzkaller.appspotmail.com Signed-off-by: Myungho Jung Signed-off-by: David S. Miller --- net/smc/af_smc.c | 14 ++++++++++++-- net/smc/smc.h | 4 ++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 5fbaf1901571..82cb0e5634bc 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -147,8 +147,14 @@ static int smc_release(struct socket *sock) sk->sk_shutdown |= SHUTDOWN_MASK; } if (smc->clcsock) { + if (smc->use_fallback && sk->sk_state == SMC_LISTEN) { + /* wake up clcsock accept */ + rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); + } + mutex_lock(&smc->clcsock_release_lock); sock_release(smc->clcsock); smc->clcsock = NULL; + mutex_unlock(&smc->clcsock_release_lock); } if (smc->use_fallback) { if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT) @@ -205,6 +211,7 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock, spin_lock_init(&smc->conn.send_lock); sk->sk_prot->hash(sk); sk_refcnt_debug_inc(sk); + mutex_init(&smc->clcsock_release_lock); return sk; } @@ -821,7 +828,7 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) struct socket *new_clcsock = NULL; struct sock *lsk = &lsmc->sk; struct sock *new_sk; - int rc; + int rc = -EINVAL; release_sock(lsk); new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol); @@ -834,7 +841,10 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) } *new_smc = smc_sk(new_sk); - rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0); + mutex_lock(&lsmc->clcsock_release_lock); + if (lsmc->clcsock) + rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0); + mutex_unlock(&lsmc->clcsock_release_lock); lock_sock(lsk); if (rc < 0) lsk->sk_err = -rc; diff --git a/net/smc/smc.h b/net/smc/smc.h index 08786ace6010..5721416d0605 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -219,6 +219,10 @@ struct smc_sock { /* smc sock container */ * started, waiting for unsent * data to be sent */ + struct mutex clcsock_release_lock; + /* protects clcsock of a listen + * socket + * */ }; static inline struct smc_sock *smc_sk(const struct sock *sk) -- cgit v1.2.3-59-g8ed1b From d350a0f431189517b1af0dbbb605c273231a8966 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 15 Dec 2018 11:03:22 +0200 Subject: nl80211: fix memory leak if validate_pae_over_nl80211() fails If validate_pae_over_nl80211() were to fail in nl80211_crypto_settings(), we might leak the 'connkeys' allocation. Fix this. Fixes: 64bf3d4bc2b0 ("nl80211: Add CONTROL_PORT_OVER_NL80211 attribute") Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 8d763725498c..2317727d6413 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -8930,8 +8930,10 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_CONTROL_PORT_OVER_NL80211]) { int r = validate_pae_over_nl80211(rdev, info); - if (r < 0) + if (r < 0) { + kzfree(connkeys); return r; + } ibss.control_port_over_nl80211 = true; } -- cgit v1.2.3-59-g8ed1b From 34b1e0e9efe101822e83cc62d22443ed3867ae7a Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Sat, 15 Dec 2018 11:03:06 +0200 Subject: mac80211: free skb fraglist before freeing the skb mac80211 uses the frag list to build AMSDU. When freeing the skb, it may not be really freed, since someone is still holding a reference to it. In that case, when TCP skb is being retransmitted, the pointer to the frag list is being reused, while the data in there is no longer valid. Since we will never get frag list from the network stack, as mac80211 doesn't advertise the capability, we can safely free and nullify it before releasing the SKB. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/mac80211/status.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/mac80211/status.c b/net/mac80211/status.c index a794ca729000..3f0b96e1e02f 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -556,6 +556,11 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local, } ieee80211_led_tx(local); + + if (skb_has_frag_list(skb)) { + kfree_skb_list(skb_shinfo(skb)->frag_list); + skb_shinfo(skb)->frag_list = NULL; + } } /* -- cgit v1.2.3-59-g8ed1b From 53884577fbcef33a7d15ad664e664a3dabe35171 Mon Sep 17 00:00:00 2001 From: Rakesh Pillai Date: Fri, 14 Dec 2018 12:17:46 +0200 Subject: ath10k: skip sending quiet mode cmd for WCN3990 HL2.0 firmware does not support setting quiet mode. If the host driver sends the quiet mode setting command to the HL2.0 firmware, it crashes with the below signature. fatal error received: err_qdi.c:456:EX:wlan_process:1:WLAN RT:207a:PC=b001b4f0 The quiet mode command support is exposed by the firmware via thermal throttle wmi service. Enable ath10k thermal support if thermal throttle wmi service bit is set. 10.x firmware versions support this feature by default, but unfortunately do not advertise the support via service flags, hence have to manually set the service flag in ath10k_core_compat_services(). Tested on QCA988X with 10.2.4.70.9-2. Also tested on WCN3990. Co-developed-by: Govind Singh Co-developed-by: Kalle Valo Signed-off-by: Rakesh Pillai Signed-off-by: Govind Singh Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.c | 28 ++++++++++++++++++++++++++++ drivers/net/wireless/ath/ath10k/debug.c | 5 +++-- drivers/net/wireless/ath/ath10k/thermal.c | 9 +++++++++ drivers/net/wireless/ath/ath10k/wmi-tlv.h | 3 +++ drivers/net/wireless/ath/ath10k/wmi.h | 1 + 5 files changed, 44 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index da607febfd82..d210b0ed59be 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -2418,6 +2418,28 @@ static int ath10k_core_reset_rx_filter(struct ath10k *ar) return 0; } +static int ath10k_core_compat_services(struct ath10k *ar) +{ + struct ath10k_fw_file *fw_file = &ar->normal_mode_fw.fw_file; + + /* all 10.x firmware versions support thermal throttling but don't + * advertise the support via service flags so we have to hardcode + * it here + */ + switch (fw_file->wmi_op_version) { + case ATH10K_FW_WMI_OP_VERSION_10_1: + case ATH10K_FW_WMI_OP_VERSION_10_2: + case ATH10K_FW_WMI_OP_VERSION_10_2_4: + case ATH10K_FW_WMI_OP_VERSION_10_4: + set_bit(WMI_SERVICE_THERM_THROT, ar->wmi.svc_map); + break; + default: + break; + } + + return 0; +} + int ath10k_core_start(struct ath10k *ar, enum ath10k_firmware_mode mode, const struct ath10k_fw_components *fw) { @@ -2617,6 +2639,12 @@ int ath10k_core_start(struct ath10k *ar, enum ath10k_firmware_mode mode, goto err_hif_stop; } + status = ath10k_core_compat_services(ar); + if (status) { + ath10k_err(ar, "compat services failed: %d\n", status); + goto err_hif_stop; + } + /* Some firmware revisions do not properly set up hardware rx filter * registers. * diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c index 15964b374f68..02988fc378a1 100644 --- a/drivers/net/wireless/ath/ath10k/debug.c +++ b/drivers/net/wireless/ath/ath10k/debug.c @@ -2578,8 +2578,9 @@ int ath10k_debug_register(struct ath10k *ar) debugfs_create_file("pktlog_filter", 0644, ar->debug.debugfs_phy, ar, &fops_pktlog_filter); - debugfs_create_file("quiet_period", 0644, ar->debug.debugfs_phy, ar, - &fops_quiet_period); + if (test_bit(WMI_SERVICE_THERM_THROT, ar->wmi.svc_map)) + debugfs_create_file("quiet_period", 0644, ar->debug.debugfs_phy, ar, + &fops_quiet_period); debugfs_create_file("tpc_stats", 0400, ar->debug.debugfs_phy, ar, &fops_tpc_stats); diff --git a/drivers/net/wireless/ath/ath10k/thermal.c b/drivers/net/wireless/ath/ath10k/thermal.c index aa8978a8d751..fe35edcd3ec8 100644 --- a/drivers/net/wireless/ath/ath10k/thermal.c +++ b/drivers/net/wireless/ath/ath10k/thermal.c @@ -140,6 +140,9 @@ void ath10k_thermal_set_throttling(struct ath10k *ar) lockdep_assert_held(&ar->conf_mutex); + if (!test_bit(WMI_SERVICE_THERM_THROT, ar->wmi.svc_map)) + return; + if (!ar->wmi.ops->gen_pdev_set_quiet_mode) return; @@ -165,6 +168,9 @@ int ath10k_thermal_register(struct ath10k *ar) struct device *hwmon_dev; int ret; + if (!test_bit(WMI_SERVICE_THERM_THROT, ar->wmi.svc_map)) + return 0; + cdev = thermal_cooling_device_register("ath10k_thermal", ar, &ath10k_thermal_ops); @@ -216,6 +222,9 @@ err_cooling_destroy: void ath10k_thermal_unregister(struct ath10k *ar) { + if (!test_bit(WMI_SERVICE_THERM_THROT, ar->wmi.svc_map)) + return; + sysfs_remove_link(&ar->dev->kobj, "cooling_device"); thermal_cooling_device_unregister(ar->thermal.cdev); } diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.h b/drivers/net/wireless/ath/ath10k/wmi-tlv.h index 92c25f51bf86..c2cb413392ee 100644 --- a/drivers/net/wireless/ath/ath10k/wmi-tlv.h +++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.h @@ -1564,6 +1564,9 @@ wmi_tlv_svc_map_ext(const __le32 *in, unsigned long *out, size_t len) SVCMAP(WMI_TLV_SERVICE_SPOOF_MAC_SUPPORT, WMI_SERVICE_SPOOF_MAC_SUPPORT, WMI_TLV_MAX_SERVICE); + SVCMAP(WMI_TLV_SERVICE_THERM_THROT, + WMI_SERVICE_THERM_THROT, + WMI_TLV_MAX_SERVICE); } #undef SVCMAP diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h index f7badd079051..c5a343c93013 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.h +++ b/drivers/net/wireless/ath/ath10k/wmi.h @@ -205,6 +205,7 @@ enum wmi_service { WMI_SERVICE_SPOOF_MAC_SUPPORT, WMI_SERVICE_TX_DATA_ACK_RSSI, WMI_SERVICE_VDEV_DIFFERENT_BEACON_INTERVAL_SUPPORT, + WMI_SERVICE_THERM_THROT, /* keep last */ WMI_SERVICE_MAX, -- cgit v1.2.3-59-g8ed1b From ea010070d0a7497253d5a6f919f6dd107450b31a Mon Sep 17 00:00:00 2001 From: shamir rabinovitch Date: Sun, 16 Dec 2018 09:01:08 +0200 Subject: net/rds: fix warn in rds_message_alloc_sgs redundant copy_from_user in rds_sendmsg system call expose rds to issue where rds_rdma_extra_size walk the rds iovec and and calculate the number pf pages (sgs) it need to add to the tail of rds message and later rds_cmsg_rdma_args copy the rds iovec again and re calculate the same number and get different result causing WARN_ON in rds_message_alloc_sgs. fix this by doing the copy_from_user only once per rds_sendmsg system call. When issue occur the below dump is seen: WARNING: CPU: 0 PID: 19789 at net/rds/message.c:316 rds_message_alloc_sgs+0x10c/0x160 net/rds/message.c:316 Kernel panic - not syncing: panic_on_warn set ... CPU: 0 PID: 19789 Comm: syz-executor827 Not tainted 4.19.0-next-20181030+ #101 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x244/0x39d lib/dump_stack.c:113 panic+0x2ad/0x55c kernel/panic.c:188 __warn.cold.8+0x20/0x45 kernel/panic.c:540 report_bug+0x254/0x2d0 lib/bug.c:186 fixup_bug arch/x86/kernel/traps.c:178 [inline] do_error_trap+0x11b/0x200 arch/x86/kernel/traps.c:271 do_invalid_op+0x36/0x40 arch/x86/kernel/traps.c:290 invalid_op+0x14/0x20 arch/x86/entry/entry_64.S:969 RIP: 0010:rds_message_alloc_sgs+0x10c/0x160 net/rds/message.c:316 Code: c0 74 04 3c 03 7e 6c 44 01 ab 78 01 00 00 e8 2b 9e 35 fa 4c 89 e0 48 83 c4 08 5b 41 5c 41 5d 41 5e 41 5f 5d c3 e8 14 9e 35 fa <0f> 0b 31 ff 44 89 ee e8 18 9f 35 fa 45 85 ed 75 1b e8 fe 9d 35 fa RSP: 0018:ffff8801c51b7460 EFLAGS: 00010293 RAX: ffff8801bc412080 RBX: ffff8801d7bf4040 RCX: ffffffff8749c9e6 RDX: 0000000000000000 RSI: ffffffff8749ca5c RDI: 0000000000000004 RBP: ffff8801c51b7490 R08: ffff8801bc412080 R09: ffffed003b5c5b67 R10: ffffed003b5c5b67 R11: ffff8801dae2db3b R12: 0000000000000000 R13: 000000000007165c R14: 000000000007165c R15: 0000000000000005 rds_cmsg_rdma_args+0x82d/0x1510 net/rds/rdma.c:623 rds_cmsg_send net/rds/send.c:971 [inline] rds_sendmsg+0x19a2/0x3180 net/rds/send.c:1273 sock_sendmsg_nosec net/socket.c:622 [inline] sock_sendmsg+0xd5/0x120 net/socket.c:632 ___sys_sendmsg+0x7fd/0x930 net/socket.c:2117 __sys_sendmsg+0x11d/0x280 net/socket.c:2155 __do_sys_sendmsg net/socket.c:2164 [inline] __se_sys_sendmsg net/socket.c:2162 [inline] __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2162 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x44a859 Code: e8 dc e6 ff ff 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 6b cb fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f1d4710ada8 EFLAGS: 00000297 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00000000006dcc28 RCX: 000000000044a859 RDX: 0000000000000000 RSI: 0000000020001600 RDI: 0000000000000003 RBP: 00000000006dcc20 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000297 R12: 00000000006dcc2c R13: 646e732f7665642f R14: 00007f1d4710b9c0 R15: 00000000006dcd2c Kernel Offset: disabled Rebooting in 86400 seconds.. Reported-by: syzbot+26de17458aeda9d305d8@syzkaller.appspotmail.com Acked-by: Santosh Shilimkar Signed-off-by: shamir rabinovitch Signed-off-by: David S. Miller --- net/rds/rdma.c | 63 +++++++++++++++++++++++++++++----------------------------- net/rds/rds.h | 20 +++++++++++++++---- net/rds/send.c | 50 +++++++++++++++++++++++++++++++++++++++------- 3 files changed, 91 insertions(+), 42 deletions(-) diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 98237feb607a..e1965d9cbcf8 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -517,9 +517,10 @@ static int rds_rdma_pages(struct rds_iovec iov[], int nr_iovecs) return tot_pages; } -int rds_rdma_extra_size(struct rds_rdma_args *args) +int rds_rdma_extra_size(struct rds_rdma_args *args, + struct rds_iov_vector *iov) { - struct rds_iovec vec; + struct rds_iovec *vec; struct rds_iovec __user *local_vec; int tot_pages = 0; unsigned int nr_pages; @@ -530,13 +531,23 @@ int rds_rdma_extra_size(struct rds_rdma_args *args) if (args->nr_local == 0) return -EINVAL; + iov->iov = kcalloc(args->nr_local, + sizeof(struct rds_iovec), + GFP_KERNEL); + if (!iov->iov) + return -ENOMEM; + + vec = &iov->iov[0]; + + if (copy_from_user(vec, local_vec, args->nr_local * + sizeof(struct rds_iovec))) + return -EFAULT; + iov->len = args->nr_local; + /* figure out the number of pages in the vector */ - for (i = 0; i < args->nr_local; i++) { - if (copy_from_user(&vec, &local_vec[i], - sizeof(struct rds_iovec))) - return -EFAULT; + for (i = 0; i < args->nr_local; i++, vec++) { - nr_pages = rds_pages_in_vec(&vec); + nr_pages = rds_pages_in_vec(vec); if (nr_pages == 0) return -EINVAL; @@ -558,15 +569,15 @@ int rds_rdma_extra_size(struct rds_rdma_args *args) * Extract all arguments and set up the rdma_op */ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, - struct cmsghdr *cmsg) + struct cmsghdr *cmsg, + struct rds_iov_vector *vec) { struct rds_rdma_args *args; struct rm_rdma_op *op = &rm->rdma; int nr_pages; unsigned int nr_bytes; struct page **pages = NULL; - struct rds_iovec iovstack[UIO_FASTIOV], *iovs = iovstack; - int iov_size; + struct rds_iovec *iovs; unsigned int i, j; int ret = 0; @@ -586,31 +597,23 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, goto out_ret; } - /* Check whether to allocate the iovec area */ - iov_size = args->nr_local * sizeof(struct rds_iovec); - if (args->nr_local > UIO_FASTIOV) { - iovs = sock_kmalloc(rds_rs_to_sk(rs), iov_size, GFP_KERNEL); - if (!iovs) { - ret = -ENOMEM; - goto out_ret; - } + if (vec->len != args->nr_local) { + ret = -EINVAL; + goto out_ret; } - if (copy_from_user(iovs, (struct rds_iovec __user *)(unsigned long) args->local_vec_addr, iov_size)) { - ret = -EFAULT; - goto out; - } + iovs = vec->iov; nr_pages = rds_rdma_pages(iovs, args->nr_local); if (nr_pages < 0) { ret = -EINVAL; - goto out; + goto out_ret; } pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); if (!pages) { ret = -ENOMEM; - goto out; + goto out_ret; } op->op_write = !!(args->flags & RDS_RDMA_READWRITE); @@ -623,7 +626,7 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, op->op_sg = rds_message_alloc_sgs(rm, nr_pages); if (!op->op_sg) { ret = -ENOMEM; - goto out; + goto out_pages; } if (op->op_notify || op->op_recverr) { @@ -635,7 +638,7 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, op->op_notifier = kmalloc(sizeof(struct rds_notifier), GFP_KERNEL); if (!op->op_notifier) { ret = -ENOMEM; - goto out; + goto out_pages; } op->op_notifier->n_user_token = args->user_token; op->op_notifier->n_status = RDS_RDMA_SUCCESS; @@ -681,7 +684,7 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, */ ret = rds_pin_pages(iov->addr, nr, pages, !op->op_write); if (ret < 0) - goto out; + goto out_pages; else ret = 0; @@ -714,13 +717,11 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, nr_bytes, (unsigned int) args->remote_vec.bytes); ret = -EINVAL; - goto out; + goto out_pages; } op->op_bytes = nr_bytes; -out: - if (iovs != iovstack) - sock_kfree_s(rds_rs_to_sk(rs), iovs, iov_size); +out_pages: kfree(pages); out_ret: if (ret) diff --git a/net/rds/rds.h b/net/rds/rds.h index 6bfaf05b63b2..4d2523100093 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -386,6 +386,18 @@ static inline void rds_message_zcopy_queue_init(struct rds_msg_zcopy_queue *q) INIT_LIST_HEAD(&q->zcookie_head); } +struct rds_iov_vector { + struct rds_iovec *iov; + int len; +}; + +struct rds_iov_vector_arr { + struct rds_iov_vector *vec; + int len; + int indx; + int incr; +}; + struct rds_message { refcount_t m_refcount; struct list_head m_sock_item; @@ -904,13 +916,13 @@ int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen); int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen); int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen); void rds_rdma_drop_keys(struct rds_sock *rs); -int rds_rdma_extra_size(struct rds_rdma_args *args); -int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, - struct cmsghdr *cmsg); +int rds_rdma_extra_size(struct rds_rdma_args *args, + struct rds_iov_vector *iov); int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm, struct cmsghdr *cmsg); int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, - struct cmsghdr *cmsg); + struct cmsghdr *cmsg, + struct rds_iov_vector *vec); int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm, struct cmsghdr *cmsg); void rds_rdma_free_op(struct rm_rdma_op *ro); diff --git a/net/rds/send.c b/net/rds/send.c index fe785ee819dd..ec2267cbf85f 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -876,13 +876,15 @@ out: * rds_message is getting to be quite complicated, and we'd like to allocate * it all in one go. This figures out how big it needs to be up front. */ -static int rds_rm_size(struct msghdr *msg, int num_sgs) +static int rds_rm_size(struct msghdr *msg, int num_sgs, + struct rds_iov_vector_arr *vct) { struct cmsghdr *cmsg; int size = 0; int cmsg_groups = 0; int retval; bool zcopy_cookie = false; + struct rds_iov_vector *iov, *tmp_iov; for_each_cmsghdr(cmsg, msg) { if (!CMSG_OK(msg, cmsg)) @@ -893,8 +895,24 @@ static int rds_rm_size(struct msghdr *msg, int num_sgs) switch (cmsg->cmsg_type) { case RDS_CMSG_RDMA_ARGS: + if (vct->indx >= vct->len) { + vct->len += vct->incr; + tmp_iov = + krealloc(vct->vec, + vct->len * + sizeof(struct rds_iov_vector), + GFP_KERNEL); + if (!tmp_iov) { + vct->len -= vct->incr; + return -ENOMEM; + } + vct->vec = tmp_iov; + } + iov = &vct->vec[vct->indx]; + memset(iov, 0, sizeof(struct rds_iov_vector)); + vct->indx++; cmsg_groups |= 1; - retval = rds_rdma_extra_size(CMSG_DATA(cmsg)); + retval = rds_rdma_extra_size(CMSG_DATA(cmsg), iov); if (retval < 0) return retval; size += retval; @@ -951,10 +969,11 @@ static int rds_cmsg_zcopy(struct rds_sock *rs, struct rds_message *rm, } static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, - struct msghdr *msg, int *allocated_mr) + struct msghdr *msg, int *allocated_mr, + struct rds_iov_vector_arr *vct) { struct cmsghdr *cmsg; - int ret = 0; + int ret = 0, ind = 0; for_each_cmsghdr(cmsg, msg) { if (!CMSG_OK(msg, cmsg)) @@ -968,7 +987,10 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, */ switch (cmsg->cmsg_type) { case RDS_CMSG_RDMA_ARGS: - ret = rds_cmsg_rdma_args(rs, rm, cmsg); + if (ind >= vct->indx) + return -ENOMEM; + ret = rds_cmsg_rdma_args(rs, rm, cmsg, &vct->vec[ind]); + ind++; break; case RDS_CMSG_RDMA_DEST: @@ -1084,6 +1106,11 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY)); int num_sgs = ceil(payload_len, PAGE_SIZE); int namelen; + struct rds_iov_vector_arr vct = {0}; + int ind; + + /* expect 1 RDMA CMSG per rds_sendmsg. can still grow if more needed. */ + vct.incr = 1; /* Mirror Linux UDP mirror of BSD error message compatibility */ /* XXX: Perhaps MSG_MORE someday */ @@ -1220,7 +1247,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) num_sgs = iov_iter_npages(&msg->msg_iter, INT_MAX); } /* size of rm including all sgs */ - ret = rds_rm_size(msg, num_sgs); + ret = rds_rm_size(msg, num_sgs, &vct); if (ret < 0) goto out; @@ -1270,7 +1297,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) rm->m_conn_path = cpath; /* Parse any control messages the user may have included. */ - ret = rds_cmsg_send(rs, rm, msg, &allocated_mr); + ret = rds_cmsg_send(rs, rm, msg, &allocated_mr, &vct); if (ret) { /* Trigger connection so that its ready for the next retry */ if (ret == -EAGAIN) @@ -1348,9 +1375,18 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) if (ret) goto out; rds_message_put(rm); + + for (ind = 0; ind < vct.indx; ind++) + kfree(vct.vec[ind].iov); + kfree(vct.vec); + return payload_len; out: + for (ind = 0; ind < vct.indx; ind++) + kfree(vct.vec[ind].iov); + kfree(vct.vec); + /* If the user included a RDMA_MAP cmsg, we allocated a MR on the fly. * If the sendmsg goes through, we keep the MR. If it fails with EAGAIN * or in any other way, we need to destroy the MR again */ -- cgit v1.2.3-59-g8ed1b From c75ab8a55ac1083c232e4407f52b0cadae6c1e0e Mon Sep 17 00:00:00 2001 From: shamir rabinovitch Date: Sun, 16 Dec 2018 09:01:09 +0200 Subject: net/rds: remove user triggered WARN_ON in rds_sendmsg per comment from Leon in rdma mailing list https://lkml.org/lkml/2018/10/31/312 : Please don't forget to remove user triggered WARN_ON. https://lwn.net/Articles/769365/ "Greg Kroah-Hartman raised the problem of core kernel API code that will use WARN_ON_ONCE() to complain about bad usage; that will not generate the desired result if WARN_ON_ONCE() is configured to crash the machine. He was told that the code should just call pr_warn() instead, and that the called function should return an error in such situations. It was generally agreed that any WARN_ON() or WARN_ON_ONCE() calls that can be triggered from user space need to be fixed." in addition harden rds_sendmsg to detect and overcome issues with invalid sg count and fail the sendmsg. Suggested-by: Leon Romanovsky Acked-by: Santosh Shilimkar Signed-off-by: shamir rabinovitch Signed-off-by: David S. Miller --- net/rds/message.c | 24 ++++++++++++++++++------ net/rds/rdma.c | 12 ++++-------- net/rds/rds.h | 3 ++- net/rds/send.c | 9 +++++---- 4 files changed, 29 insertions(+), 19 deletions(-) diff --git a/net/rds/message.c b/net/rds/message.c index 4b00b1152a5f..f139420ba1f6 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -308,16 +308,27 @@ out: /* * RDS ops use this to grab SG entries from the rm's sg pool. */ -struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents) +struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents, + int *ret) { struct scatterlist *sg_first = (struct scatterlist *) &rm[1]; struct scatterlist *sg_ret; - WARN_ON(rm->m_used_sgs + nents > rm->m_total_sgs); - WARN_ON(!nents); + if (WARN_ON(!ret)) + return NULL; - if (rm->m_used_sgs + nents > rm->m_total_sgs) + if (nents <= 0) { + pr_warn("rds: alloc sgs failed! nents <= 0\n"); + *ret = -EINVAL; return NULL; + } + + if (rm->m_used_sgs + nents > rm->m_total_sgs) { + pr_warn("rds: alloc sgs failed! total %d used %d nents %d\n", + rm->m_total_sgs, rm->m_used_sgs, nents); + *ret = -ENOMEM; + return NULL; + } sg_ret = &sg_first[rm->m_used_sgs]; sg_init_table(sg_ret, nents); @@ -332,6 +343,7 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in unsigned int i; int num_sgs = ceil(total_len, PAGE_SIZE); int extra_bytes = num_sgs * sizeof(struct scatterlist); + int ret; rm = rds_message_alloc(extra_bytes, GFP_NOWAIT); if (!rm) @@ -340,10 +352,10 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in set_bit(RDS_MSG_PAGEVEC, &rm->m_flags); rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len); rm->data.op_nents = ceil(total_len, PAGE_SIZE); - rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs); + rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs, &ret); if (!rm->data.op_sg) { rds_message_put(rm); - return ERR_PTR(-ENOMEM); + return ERR_PTR(ret); } for (i = 0; i < rm->data.op_nents; ++i) { diff --git a/net/rds/rdma.c b/net/rds/rdma.c index e1965d9cbcf8..182ab8430594 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -623,11 +623,9 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, op->op_active = 1; op->op_recverr = rs->rs_recverr; WARN_ON(!nr_pages); - op->op_sg = rds_message_alloc_sgs(rm, nr_pages); - if (!op->op_sg) { - ret = -ENOMEM; + op->op_sg = rds_message_alloc_sgs(rm, nr_pages, &ret); + if (!op->op_sg) goto out_pages; - } if (op->op_notify || op->op_recverr) { /* We allocate an uninitialized notifier here, because @@ -839,11 +837,9 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, rm->atomic.op_silent = !!(args->flags & RDS_RDMA_SILENT); rm->atomic.op_active = 1; rm->atomic.op_recverr = rs->rs_recverr; - rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1); - if (!rm->atomic.op_sg) { - ret = -ENOMEM; + rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1, &ret); + if (!rm->atomic.op_sg) goto err; - } /* verify 8 byte-aligned */ if (args->local_addr & 0x7) { diff --git a/net/rds/rds.h b/net/rds/rds.h index 4d2523100093..02ec4a3b2799 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -839,7 +839,8 @@ rds_conn_connecting(struct rds_connection *conn) /* message.c */ struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp); -struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents); +struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents, + int *ret); int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from, bool zcopy); struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len); diff --git a/net/rds/send.c b/net/rds/send.c index ec2267cbf85f..b39b30706210 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -886,6 +886,9 @@ static int rds_rm_size(struct msghdr *msg, int num_sgs, bool zcopy_cookie = false; struct rds_iov_vector *iov, *tmp_iov; + if (num_sgs < 0) + return -EINVAL; + for_each_cmsghdr(cmsg, msg) { if (!CMSG_OK(msg, cmsg)) return -EINVAL; @@ -1259,11 +1262,9 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) /* Attach data to the rm */ if (payload_len) { - rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs); - if (!rm->data.op_sg) { - ret = -ENOMEM; + rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs, &ret); + if (!rm->data.op_sg) goto out; - } ret = rds_message_copy_from_user(rm, &msg->msg_iter, zcopy); if (ret) goto out; -- cgit v1.2.3-59-g8ed1b From 84404d5fd5858588bbf694b0300dbc6310d02737 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 19 Dec 2018 13:46:50 -0500 Subject: bnxt_en: Fix ethtool self-test loopback. The current code has 2 problems. It assumes that the RX ring for the loopback packet is combined with the TX ring. This is not true if the ethtool channels are set to non-combined mode. The second problem is that it won't work on 57500 chips without adjusting the logic to get the proper completion ring (cpr) pointer. Fix both issues by locating the proper cpr pointer through the RX ring. Fixes: e44758b78ae8 ("bnxt_en: Use bnxt_cp_ring_info struct pointer as parameter for RX path.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 6cc69a58478a..6b51f4de6017 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -2572,6 +2572,7 @@ static int bnxt_poll_loopback(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, static int bnxt_run_loopback(struct bnxt *bp) { struct bnxt_tx_ring_info *txr = &bp->tx_ring[0]; + struct bnxt_rx_ring_info *rxr = &bp->rx_ring[0]; struct bnxt_cp_ring_info *cpr; int pkt_size, i = 0; struct sk_buff *skb; @@ -2579,7 +2580,9 @@ static int bnxt_run_loopback(struct bnxt *bp) u8 *data; int rc; - cpr = &txr->bnapi->cp_ring; + cpr = &rxr->bnapi->cp_ring; + if (bp->flags & BNXT_FLAG_CHIP_P5) + cpr = cpr->cp_ring_arr[BNXT_RX_HDL]; pkt_size = min(bp->dev->mtu + ETH_HLEN, bp->rx_copy_thresh); skb = netdev_alloc_skb(bp->dev, pkt_size); if (!skb) -- cgit v1.2.3-59-g8ed1b From fb24274546310872eeeaf3d1d53799d8414aa0f2 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 18 Dec 2018 21:17:44 -0800 Subject: ipv6: explicitly initialize udp6_addr in udp_sock_create6() syzbot reported the use of uninitialized udp6_addr::sin6_scope_id. We can just set ::sin6_scope_id to zero, as tunnels are unlikely to use an IPv6 address that needs a scope id and there is no interface to bind in this context. For net-next, it looks different as we have cfg->bind_ifindex there so we can probably call ipv6_iface_scope_id(). Same for ::sin6_flowinfo, tunnels don't use it. Fixes: 8024e02879dd ("udp: Add udp_sock_create for UDP tunnels to open listener socket") Reported-by: syzbot+c56449ed3652e6720f30@syzkaller.appspotmail.com Cc: Jon Maloy Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv6/ip6_udp_tunnel.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index b283f293ee4a..caad40d6e74d 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -15,7 +15,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, struct socket **sockp) { - struct sockaddr_in6 udp6_addr; + struct sockaddr_in6 udp6_addr = {}; int err; struct socket *sock = NULL; @@ -42,6 +42,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, goto error; if (cfg->peer_udp_port) { + memset(&udp6_addr, 0, sizeof(udp6_addr)); udp6_addr.sin6_family = AF_INET6; memcpy(&udp6_addr.sin6_addr, &cfg->peer_ip6, sizeof(udp6_addr.sin6_addr)); -- cgit v1.2.3-59-g8ed1b From bfc698254ba97b3e3e4ebbfae0ffa1f7e2fa0717 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 2 Dec 2018 15:45:53 +0200 Subject: net/mlx5e: RX, Fix wrong early return in receive queue poll When the completion queue of the RQ is empty, do not immediately return. If left-over decompressed CQEs (from the previous cycle) were processed, need to go to the finalization part of the poll function. Bug exists only when CQE compression is turned ON. This solves the following issue: mlx5_core 0000:82:00.1: mlx5_eq_int:544:(pid 0): CQ error on CQN 0xc08, syndrome 0x1 mlx5_core 0000:82:00.1 p4p2: mlx5e_cq_error_event: cqn=0x000c08 event=0x04 Fixes: 4b7dfc992514 ("net/mlx5e: Early-return on empty completion queues") Signed-off-by: Tariq Toukan Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 624eed345b5d..0b5ef6d4e815 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1190,7 +1190,7 @@ mpwrq_cqe_out: int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) { struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); - struct mlx5e_xdpsq *xdpsq; + struct mlx5e_xdpsq *xdpsq = &rq->xdpsq; struct mlx5_cqe64 *cqe; int work_done = 0; @@ -1201,10 +1201,11 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget); cqe = mlx5_cqwq_get_cqe(&cq->wq); - if (!cqe) + if (!cqe) { + if (unlikely(work_done)) + goto out; return 0; - - xdpsq = &rq->xdpsq; + } do { if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) { @@ -1219,6 +1220,7 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) rq->handle_rx_cqe(rq, cqe); } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); +out: if (xdpsq->doorbell) { mlx5e_xmit_xdp_doorbell(xdpsq); xdpsq->doorbell = false; -- cgit v1.2.3-59-g8ed1b From f0337889147c956721696553ffcc97212b0948fe Mon Sep 17 00:00:00 2001 From: Yuval Avnery Date: Thu, 13 Dec 2018 02:26:46 +0200 Subject: net/mlx5: Typo fix in del_sw_hw_rule Expression terminated with "," instead of ";", resulted in set_fte getting bad value for modify_enable_mask field. Fixes: bd5251dbf156 ("net/mlx5_core: Introduce flow steering destination of type counter") Signed-off-by: Yuval Avnery Reviewed-by: Daniel Jurgens Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 9d73eb955f75..08233cf44871 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -452,7 +452,7 @@ static void del_sw_hw_rule(struct fs_node *node) if ((fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) && --fte->dests_size) { - modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST), + modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST); update_fte = true; } out: -- cgit v1.2.3-59-g8ed1b From 4765420439e758bfa4808392d18b0a4cb6f06065 Mon Sep 17 00:00:00 2001 From: Alaa Hleihel Date: Sun, 25 Nov 2018 11:46:09 +0200 Subject: net/mlx5e: Remove the false indication of software timestamping support mlx5 driver falsely advertises support of software timestamping. Fix it by removing the false indication. Fixes: ef9814deafd0 ("net/mlx5e: Add HW timestamping (TS) support") Signed-off-by: Alaa Hleihel Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 25c1c4f96841..f480763dcd0d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1190,11 +1190,6 @@ int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, struct ethtool_ts_info *info) { struct mlx5_core_dev *mdev = priv->mdev; - int ret; - - ret = ethtool_op_get_ts_info(priv->netdev, info); - if (ret) - return ret; info->phc_index = mlx5_clock_get_ptp_index(mdev); @@ -1202,9 +1197,9 @@ int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, info->phc_index == -1) return 0; - info->so_timestamping |= SOF_TIMESTAMPING_TX_HARDWARE | - SOF_TIMESTAMPING_RX_HARDWARE | - SOF_TIMESTAMPING_RAW_HARDWARE; + info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON); -- cgit v1.2.3-59-g8ed1b From e0e587878f538c9e3400219b6c516b8199dc2042 Mon Sep 17 00:00:00 2001 From: Bryan Whitehead Date: Wed, 19 Dec 2018 16:55:15 -0500 Subject: lan743x: Remove MAC Reset from initialization The MAC Reset was noticed to erase important EEPROM settings. It is also unnecessary since a chip wide reset was done earlier in initialization, and that reset preserves EEPROM settings. There for this patch removes the unnecessary MAC specific reset. Signed-off-by: Bryan Whitehead Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/lan743x_main.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 1ba8ea07dc4f..20c9377e99cb 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -802,14 +802,8 @@ static int lan743x_mac_init(struct lan743x_adapter *adapter) u32 mac_addr_hi = 0; u32 mac_addr_lo = 0; u32 data; - int ret; netdev = adapter->netdev; - lan743x_csr_write(adapter, MAC_CR, MAC_CR_RST_); - ret = lan743x_csr_wait_for_bit(adapter, MAC_CR, MAC_CR_RST_, - 0, 1000, 20000, 100); - if (ret) - return ret; /* setup auto duplex, and speed detection */ data = lan743x_csr_read(adapter, MAC_CR); -- cgit v1.2.3-59-g8ed1b From 8e1da73acded4751a93d4166458a7e640f37d26c Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 19 Dec 2018 23:23:00 +0100 Subject: gro_cell: add napi_disable in gro_cells_destroy Add napi_disable routine in gro_cells_destroy since starting from commit c42858eaf492 ("gro_cells: remove spinlock protecting receive queues") gro_cell_poll and gro_cells_destroy can run concurrently on napi_skbs list producing a kernel Oops if the tunnel interface is removed while gro_cell_poll is running. The following Oops has been triggered removing a vxlan device while the interface is receiving traffic [ 5628.948853] BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 [ 5628.949981] PGD 0 P4D 0 [ 5628.950308] Oops: 0002 [#1] SMP PTI [ 5628.950748] CPU: 0 PID: 9 Comm: ksoftirqd/0 Not tainted 4.20.0-rc6+ #41 [ 5628.952940] RIP: 0010:gro_cell_poll+0x49/0x80 [ 5628.955615] RSP: 0018:ffffc9000004fdd8 EFLAGS: 00010202 [ 5628.956250] RAX: 0000000000000000 RBX: ffffe8ffffc08150 RCX: 0000000000000000 [ 5628.957102] RDX: 0000000000000000 RSI: ffff88802356bf00 RDI: ffffe8ffffc08150 [ 5628.957940] RBP: 0000000000000026 R08: 0000000000000000 R09: 0000000000000000 [ 5628.958803] R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000040 [ 5628.959661] R13: ffffe8ffffc08100 R14: 0000000000000000 R15: 0000000000000040 [ 5628.960682] FS: 0000000000000000(0000) GS:ffff88803ea00000(0000) knlGS:0000000000000000 [ 5628.961616] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 5628.962359] CR2: 0000000000000008 CR3: 000000000221c000 CR4: 00000000000006b0 [ 5628.963188] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 5628.964034] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 5628.964871] Call Trace: [ 5628.965179] net_rx_action+0xf0/0x380 [ 5628.965637] __do_softirq+0xc7/0x431 [ 5628.966510] run_ksoftirqd+0x24/0x30 [ 5628.966957] smpboot_thread_fn+0xc5/0x160 [ 5628.967436] kthread+0x113/0x130 [ 5628.968283] ret_from_fork+0x3a/0x50 [ 5628.968721] Modules linked in: [ 5628.969099] CR2: 0000000000000008 [ 5628.969510] ---[ end trace 9d9dedc7181661fe ]--- [ 5628.970073] RIP: 0010:gro_cell_poll+0x49/0x80 [ 5628.972965] RSP: 0018:ffffc9000004fdd8 EFLAGS: 00010202 [ 5628.973611] RAX: 0000000000000000 RBX: ffffe8ffffc08150 RCX: 0000000000000000 [ 5628.974504] RDX: 0000000000000000 RSI: ffff88802356bf00 RDI: ffffe8ffffc08150 [ 5628.975462] RBP: 0000000000000026 R08: 0000000000000000 R09: 0000000000000000 [ 5628.976413] R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000040 [ 5628.977375] R13: ffffe8ffffc08100 R14: 0000000000000000 R15: 0000000000000040 [ 5628.978296] FS: 0000000000000000(0000) GS:ffff88803ea00000(0000) knlGS:0000000000000000 [ 5628.979327] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 5628.980044] CR2: 0000000000000008 CR3: 000000000221c000 CR4: 00000000000006b0 [ 5628.980929] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 5628.981736] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 5628.982409] Kernel panic - not syncing: Fatal exception in interrupt [ 5628.983307] Kernel Offset: disabled Fixes: c42858eaf492 ("gro_cells: remove spinlock protecting receive queues") Signed-off-by: Lorenzo Bianconi Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/gro_cells.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c index 4b54e5f107c6..acf45ddbe924 100644 --- a/net/core/gro_cells.c +++ b/net/core/gro_cells.c @@ -84,6 +84,7 @@ void gro_cells_destroy(struct gro_cells *gcells) for_each_possible_cpu(i) { struct gro_cell *cell = per_cpu_ptr(gcells->cells, i); + napi_disable(&cell->napi); netif_napi_del(&cell->napi); __skb_queue_purge(&cell->napi_skbs); } -- cgit v1.2.3-59-g8ed1b From 1875a9ab01dfa96b06cb6649cb1ce56efa86c7cb Mon Sep 17 00:00:00 2001 From: wenxu Date: Wed, 19 Dec 2018 14:11:15 +0800 Subject: iptunnel: make TUNNEL_FLAGS available in uapi ip l add dev tun type gretap external ip r a 10.0.0.1 encap ip dst 192.168.152.171 id 1000 dev gretap For gretap Key example when the command set the id but don't set the TUNNEL_KEY flags. There is no key field in the send packet In the lwtunnel situation, some TUNNEL_FLAGS should can be set by userspace Signed-off-by: wenxu Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 19 ------------------- include/uapi/linux/if_tunnel.h | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index b0d022ff6ea1..5ce926701bd0 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -144,25 +144,6 @@ struct ip_tunnel { bool ignore_df; }; -#define TUNNEL_CSUM __cpu_to_be16(0x01) -#define TUNNEL_ROUTING __cpu_to_be16(0x02) -#define TUNNEL_KEY __cpu_to_be16(0x04) -#define TUNNEL_SEQ __cpu_to_be16(0x08) -#define TUNNEL_STRICT __cpu_to_be16(0x10) -#define TUNNEL_REC __cpu_to_be16(0x20) -#define TUNNEL_VERSION __cpu_to_be16(0x40) -#define TUNNEL_NO_KEY __cpu_to_be16(0x80) -#define TUNNEL_DONT_FRAGMENT __cpu_to_be16(0x0100) -#define TUNNEL_OAM __cpu_to_be16(0x0200) -#define TUNNEL_CRIT_OPT __cpu_to_be16(0x0400) -#define TUNNEL_GENEVE_OPT __cpu_to_be16(0x0800) -#define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000) -#define TUNNEL_NOCACHE __cpu_to_be16(0x2000) -#define TUNNEL_ERSPAN_OPT __cpu_to_be16(0x4000) - -#define TUNNEL_OPTIONS_PRESENT \ - (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT) - struct tnl_ptk_info { __be16 flags; __be16 proto; diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 1b3d148c4560..7d9105533c7b 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -160,4 +160,24 @@ enum { }; #define IFLA_VTI_MAX (__IFLA_VTI_MAX - 1) + +#define TUNNEL_CSUM __cpu_to_be16(0x01) +#define TUNNEL_ROUTING __cpu_to_be16(0x02) +#define TUNNEL_KEY __cpu_to_be16(0x04) +#define TUNNEL_SEQ __cpu_to_be16(0x08) +#define TUNNEL_STRICT __cpu_to_be16(0x10) +#define TUNNEL_REC __cpu_to_be16(0x20) +#define TUNNEL_VERSION __cpu_to_be16(0x40) +#define TUNNEL_NO_KEY __cpu_to_be16(0x80) +#define TUNNEL_DONT_FRAGMENT __cpu_to_be16(0x0100) +#define TUNNEL_OAM __cpu_to_be16(0x0200) +#define TUNNEL_CRIT_OPT __cpu_to_be16(0x0400) +#define TUNNEL_GENEVE_OPT __cpu_to_be16(0x0800) +#define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000) +#define TUNNEL_NOCACHE __cpu_to_be16(0x2000) +#define TUNNEL_ERSPAN_OPT __cpu_to_be16(0x4000) + +#define TUNNEL_OPTIONS_PRESENT \ + (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT) + #endif /* _UAPI_IF_TUNNEL_H_ */ -- cgit v1.2.3-59-g8ed1b From c6ec179a0082e2e76e3a72050c2b99d3d0f3da3f Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Wed, 19 Dec 2018 17:18:22 +0530 Subject: net/tls: allocate tls context using GFP_ATOMIC create_ctx can be called from atomic context, hence use GFP_ATOMIC instead of GFP_KERNEL. [ 395.962599] BUG: sleeping function called from invalid context at mm/slab.h:421 [ 395.979896] in_atomic(): 1, irqs_disabled(): 0, pid: 16254, name: openssl [ 395.996564] 2 locks held by openssl/16254: [ 396.010492] #0: 00000000347acb52 (sk_lock-AF_INET){+.+.}, at: do_tcp_setsockopt.isra.44+0x13b/0x9a0 [ 396.029838] #1: 000000006c9552b5 (device_spinlock){+...}, at: tls_init+0x1d/0x280 [ 396.047675] CPU: 5 PID: 16254 Comm: openssl Tainted: G O 4.20.0-rc6+ #25 [ 396.066019] Hardware name: Supermicro X10SRA-F/X10SRA-F, BIOS 2.0c 09/25/2017 [ 396.083537] Call Trace: [ 396.096265] dump_stack+0x5e/0x8b [ 396.109876] ___might_sleep+0x216/0x250 [ 396.123940] kmem_cache_alloc_trace+0x1b0/0x240 [ 396.138800] create_ctx+0x1f/0x60 [ 396.152504] tls_init+0xbd/0x280 [ 396.166135] tcp_set_ulp+0x191/0x2d0 [ 396.180035] ? tcp_set_ulp+0x2c/0x2d0 [ 396.193960] do_tcp_setsockopt.isra.44+0x148/0x9a0 [ 396.209013] __sys_setsockopt+0x7c/0xe0 [ 396.223054] __x64_sys_setsockopt+0x20/0x30 [ 396.237378] do_syscall_64+0x4a/0x180 [ 396.251200] entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: df9d4a178022 ("net/tls: sleeping function from invalid context") Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- net/tls/tls_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 1428bd74638c..28887cf628b8 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -538,7 +538,7 @@ static struct tls_context *create_ctx(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); struct tls_context *ctx; - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC); if (!ctx) return NULL; -- cgit v1.2.3-59-g8ed1b From 599d2570b2da7c2f5419332b42b7999d79c85959 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 19 Dec 2018 18:07:56 +0200 Subject: net/sched: cls_flower: Remove old entries from rhashtable When replacing a rule we add the new rule to the rhashtable but only remove the old if not in skip_sw. This commit fix this and remove the old rule anyway. Fixes: 35cc3cefc4de ("net/sched: cls_flower: Reject duplicated rules also under skip_sw") Signed-off-by: Roi Dayan Reviewed-by: Vlad Buslov Acked-by: Or Gerlitz Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 71312d7bd8f4..208d940464d7 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1258,10 +1258,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, fnew->flags |= TCA_CLS_FLAGS_NOT_IN_HW; if (fold) { - if (!tc_skip_sw(fold->flags)) - rhashtable_remove_fast(&fold->mask->ht, - &fold->ht_node, - fold->mask->filter_ht_params); + rhashtable_remove_fast(&fold->mask->ht, + &fold->ht_node, + fold->mask->filter_ht_params); if (!tc_skip_hw(fold->flags)) fl_hw_destroy_filter(tp, fold, NULL); } -- cgit v1.2.3-59-g8ed1b From 1b451fb2051b464b9758c09a3492104403252e2b Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Wed, 19 Dec 2018 18:00:12 +0100 Subject: net: mvpp2: fix the phylink mode validation The mvpp2_phylink_validate() sets all modes that are supported by a given PPv2 port. An mistake made the 10000baseT_Full mode being advertised in some cases when a port wasn't configured to perform at 10G. This patch fixes this. Fixes: d97c9f4ab000 ("net: mvpp2: 1000baseX support") Reported-by: Russell King Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 88aa488054a8..f1dab0b55769 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -4406,6 +4406,7 @@ static void mvpp2_phylink_validate(struct net_device *dev, case PHY_INTERFACE_MODE_XAUI: case PHY_INTERFACE_MODE_NA: if (port->gop_id == 0) { + phylink_set(mask, 10000baseT_Full); phylink_set(mask, 10000baseCR_Full); phylink_set(mask, 10000baseSR_Full); phylink_set(mask, 10000baseLR_Full); @@ -4423,7 +4424,6 @@ static void mvpp2_phylink_validate(struct net_device *dev, phylink_set(mask, 10baseT_Full); phylink_set(mask, 100baseT_Half); phylink_set(mask, 100baseT_Full); - phylink_set(mask, 10000baseT_Full); /* Fall-through */ case PHY_INTERFACE_MODE_1000BASEX: case PHY_INTERFACE_MODE_2500BASEX: -- cgit v1.2.3-59-g8ed1b From c0fde870d96e42bbdcc0d9af7ae5e190c767aab8 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 19 Dec 2018 16:54:38 -0800 Subject: neighbor: NTF_PROXY is a valid ndm_flag for a dump request When dumping proxy entries the dump request has NTF_PROXY set in ndm_flags. strict mode checking needs to be updated to allow this flag. Fixes: 51183d233b5a ("net/neighbor: Update neigh_dump_info for strict data checking") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/core/neighbour.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 41954e42a2de..5fa32c064baf 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2494,11 +2494,16 @@ static int neigh_valid_dump_req(const struct nlmsghdr *nlh, ndm = nlmsg_data(nlh); if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_ifindex || - ndm->ndm_state || ndm->ndm_flags || ndm->ndm_type) { + ndm->ndm_state || ndm->ndm_type) { NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request"); return -EINVAL; } + if (ndm->ndm_flags & ~NTF_PROXY) { + NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request"); + return -EINVAL; + } + err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL, extack); } else { -- cgit v1.2.3-59-g8ed1b From d84e7bc0595a7e146ad0ddb80b240cea77825245 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 19 Dec 2018 20:53:18 -0800 Subject: rds: Fix warning. >> net/rds/send.c:1109:42: warning: Using plain integer as NULL pointer Fixes: ea010070d0a7 ("net/rds: fix warn in rds_message_alloc_sgs") Reported-by: kbuild test robot Signed-off-by: David S. Miller --- net/rds/send.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/rds/send.c b/net/rds/send.c index b39b30706210..3d822bad7de9 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1109,9 +1109,11 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY)); int num_sgs = ceil(payload_len, PAGE_SIZE); int namelen; - struct rds_iov_vector_arr vct = {0}; + struct rds_iov_vector_arr vct; int ind; + memset(&vct, 0, sizeof(vct)); + /* expect 1 RDMA CMSG per rds_sendmsg. can still grow if more needed. */ vct.incr = 1; -- cgit v1.2.3-59-g8ed1b