From 0e63208915a8d7590d0a6218dadb2a6a00ac705a Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Mon, 4 Mar 2019 23:26:10 +0100 Subject: tipc: fix RDM/DGRAM connect() regression Fix regression bug introduced in commit 365ad353c256 ("tipc: reduce risk of user starvation during link congestion") Only signal -EDESTADDRREQ for RDM/DGRAM if we don't have a cached sockaddr. Fixes: 365ad353c256 ("tipc: reduce risk of user starvation during link congestion") Signed-off-by: Erik Hugne Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index e482b342bfa8..3274ef625dba 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1333,7 +1333,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) if (unlikely(!dest)) { dest = &tsk->peer; - if (!syn || dest->family != AF_TIPC) + if (!syn && dest->family != AF_TIPC) return -EDESTADDRREQ; } -- cgit v1.2.3-59-g8ed1b From 4177c5d94264b57f426ef5c45a788808d1a1e536 Mon Sep 17 00:00:00 2001 From: wenxu Date: Tue, 5 Mar 2019 08:29:28 +0800 Subject: net/sched: act_tunnel_key: Fix double free dst_cache dst_cache_destroy will be called in dst_release dst_release-->dst_destroy_rcu-->dst_destroy-->metadata_dst_free -->dst_cache_destroy It should not call dst_cache_destroy before dst_release Fixes: 41411e2fd6b8 ("net/sched: act_tunnel_key: Add dst_cache support") Signed-off-by: wenxu Signed-off-by: David S. Miller --- net/sched/act_tunnel_key.c | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 3beb4717d3b7..7c6591b991d5 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -201,14 +201,9 @@ static void tunnel_key_release_params(struct tcf_tunnel_key_params *p) { if (!p) return; - if (p->tcft_action == TCA_TUNNEL_KEY_ACT_SET) { -#ifdef CONFIG_DST_CACHE - struct ip_tunnel_info *info = &p->tcft_enc_metadata->u.tun_info; - - dst_cache_destroy(&info->dst_cache); -#endif + if (p->tcft_action == TCA_TUNNEL_KEY_ACT_SET) dst_release(&p->tcft_enc_metadata->dst); - } + kfree_rcu(p, rcu); } @@ -338,7 +333,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, &metadata->u.tun_info, opts_len, extack); if (ret < 0) - goto release_dst_cache; + goto release_tun_meta; } metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX; @@ -354,14 +349,14 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, &act_tunnel_key_ops, bind, true); if (ret) { NL_SET_ERR_MSG(extack, "Cannot create TC IDR"); - goto release_dst_cache; + goto release_tun_meta; } ret = ACT_P_CREATED; } else if (!ovr) { NL_SET_ERR_MSG(extack, "TC IDR already exists"); ret = -EEXIST; - goto release_dst_cache; + goto release_tun_meta; } t = to_tunnel_key(*a); @@ -371,7 +366,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, NL_SET_ERR_MSG(extack, "Cannot allocate tunnel key parameters"); ret = -ENOMEM; exists = true; - goto release_dst_cache; + goto release_tun_meta; } params_new->tcft_action = parm->t_action; params_new->tcft_enc_metadata = metadata; @@ -388,12 +383,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, return ret; -release_dst_cache: -#ifdef CONFIG_DST_CACHE - if (metadata) - dst_cache_destroy(&metadata->u.tun_info.dst_cache); release_tun_meta: -#endif if (metadata) dst_release(&metadata->dst); -- cgit v1.2.3-59-g8ed1b From 22c74764aa2943ecdf9f07c900d8a9c8ba6c9265 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 6 Mar 2019 10:42:53 +0100 Subject: ipv4/route: fail early when inet dev is missing If a non local multicast packet reaches ip_route_input_rcu() while the ingress device IPv4 private data (in_dev) is NULL, we end up doing a NULL pointer dereference in IN_DEV_MFORWARD(). Since the later call to ip_route_input_mc() is going to fail if !in_dev, we can fail early in such scenario and avoid the dangerous code path. v1 -> v2: - clarified the commit message, no code changes Reported-by: Tianhao Zhao Fixes: e58e41596811 ("net: Enable support for VRF with ipv4 multicast") Signed-off-by: Paolo Abeni Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/route.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 738ff0a1a048..8ca3642f0d9b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2149,12 +2149,13 @@ int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr, int our = 0; int err = -EINVAL; - if (in_dev) - our = ip_check_mc_rcu(in_dev, daddr, saddr, - ip_hdr(skb)->protocol); + if (!in_dev) + return err; + our = ip_check_mc_rcu(in_dev, daddr, saddr, + ip_hdr(skb)->protocol); /* check l3 master if no match yet */ - if ((!in_dev || !our) && netif_is_l3_slave(dev)) { + if (!our && netif_is_l3_slave(dev)) { struct in_device *l3_in_dev; l3_in_dev = __in_dev_get_rcu(skb->dev); -- cgit v1.2.3-59-g8ed1b From f4b3ec4e6aa1a2ca437905a519ae08e8cf6af754 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Wed, 6 Mar 2019 10:25:42 +0000 Subject: iptunnel: NULL pointer deref for ip_md_tunnel_xmit Naresh Kamboju noted the following oops during execution of selftest tools/testing/selftests/bpf/test_tunnel.sh on x86_64: [ 274.120445] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 [ 274.128285] #PF error: [INSTR] [ 274.131351] PGD 8000000414a0e067 P4D 8000000414a0e067 PUD 3b6334067 PMD 0 [ 274.138241] Oops: 0010 [#1] SMP PTI [ 274.141734] CPU: 1 PID: 11464 Comm: ping Not tainted 5.0.0-rc4-next-20190129 #1 [ 274.149046] Hardware name: Supermicro SYS-5019S-ML/X11SSH-F, BIOS 2.0b 07/27/2017 [ 274.156526] RIP: 0010: (null) [ 274.160280] Code: Bad RIP value. [ 274.163509] RSP: 0018:ffffbc9681f83540 EFLAGS: 00010286 [ 274.168726] RAX: 0000000000000000 RBX: ffffdc967fa80a18 RCX: 0000000000000000 [ 274.175851] RDX: ffff9db2ee08b540 RSI: 000000000000000e RDI: ffffdc967fa809a0 [ 274.182974] RBP: ffffbc9681f83580 R08: ffff9db2c4d62690 R09: 000000000000000c [ 274.190098] R10: 0000000000000000 R11: ffff9db2ee08b540 R12: ffff9db31ce7c000 [ 274.197222] R13: 0000000000000001 R14: 000000000000000c R15: ffff9db3179cf400 [ 274.204346] FS: 00007ff4ae7c5740(0000) GS:ffff9db31fa80000(0000) knlGS:0000000000000000 [ 274.212424] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 274.218162] CR2: ffffffffffffffd6 CR3: 00000004574da004 CR4: 00000000003606e0 [ 274.225292] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 274.232416] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 274.239541] Call Trace: [ 274.241988] ? tnl_update_pmtu+0x296/0x3b0 [ 274.246085] ip_md_tunnel_xmit+0x1bc/0x520 [ 274.250176] gre_fb_xmit+0x330/0x390 [ 274.253754] gre_tap_xmit+0x128/0x180 [ 274.257414] dev_hard_start_xmit+0xb7/0x300 [ 274.261598] sch_direct_xmit+0xf6/0x290 [ 274.265430] __qdisc_run+0x15d/0x5e0 [ 274.269007] __dev_queue_xmit+0x2c5/0xc00 [ 274.273011] ? dev_queue_xmit+0x10/0x20 [ 274.276842] ? eth_header+0x2b/0xc0 [ 274.280326] dev_queue_xmit+0x10/0x20 [ 274.283984] ? dev_queue_xmit+0x10/0x20 [ 274.287813] arp_xmit+0x1a/0xf0 [ 274.290952] arp_send_dst.part.19+0x46/0x60 [ 274.295138] arp_solicit+0x177/0x6b0 [ 274.298708] ? mod_timer+0x18e/0x440 [ 274.302281] neigh_probe+0x57/0x70 [ 274.305684] __neigh_event_send+0x197/0x2d0 [ 274.309862] neigh_resolve_output+0x18c/0x210 [ 274.314212] ip_finish_output2+0x257/0x690 [ 274.318304] ip_finish_output+0x219/0x340 [ 274.322314] ? ip_finish_output+0x219/0x340 [ 274.326493] ip_output+0x76/0x240 [ 274.329805] ? ip_fragment.constprop.53+0x80/0x80 [ 274.334510] ip_local_out+0x3f/0x70 [ 274.337992] ip_send_skb+0x19/0x40 [ 274.341391] ip_push_pending_frames+0x33/0x40 [ 274.345740] raw_sendmsg+0xc15/0x11d0 [ 274.349403] ? __might_fault+0x85/0x90 [ 274.353151] ? _copy_from_user+0x6b/0xa0 [ 274.357070] ? rw_copy_check_uvector+0x54/0x130 [ 274.361604] inet_sendmsg+0x42/0x1c0 [ 274.365179] ? inet_sendmsg+0x42/0x1c0 [ 274.368937] sock_sendmsg+0x3e/0x50 [ 274.372460] ___sys_sendmsg+0x26f/0x2d0 [ 274.376293] ? lock_acquire+0x95/0x190 [ 274.380043] ? __handle_mm_fault+0x7ce/0xb70 [ 274.384307] ? lock_acquire+0x95/0x190 [ 274.388053] ? __audit_syscall_entry+0xdd/0x130 [ 274.392586] ? ktime_get_coarse_real_ts64+0x64/0xc0 [ 274.397461] ? __audit_syscall_entry+0xdd/0x130 [ 274.401989] ? trace_hardirqs_on+0x4c/0x100 [ 274.406173] __sys_sendmsg+0x63/0xa0 [ 274.409744] ? __sys_sendmsg+0x63/0xa0 [ 274.413488] __x64_sys_sendmsg+0x1f/0x30 [ 274.417405] do_syscall_64+0x55/0x190 [ 274.421064] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 274.426113] RIP: 0033:0x7ff4ae0e6e87 [ 274.429686] Code: 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 80 00 00 00 00 8b 05 ca d9 2b 00 48 63 d2 48 63 ff 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 53 48 89 f3 48 83 ec 10 48 89 7c 24 08 [ 274.448422] RSP: 002b:00007ffcd9b76db8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 274.455978] RAX: ffffffffffffffda RBX: 0000000000000040 RCX: 00007ff4ae0e6e87 [ 274.463104] RDX: 0000000000000000 RSI: 00000000006092e0 RDI: 0000000000000003 [ 274.470228] RBP: 0000000000000000 R08: 00007ffcd9bc40a0 R09: 00007ffcd9bc4080 [ 274.477349] R10: 000000000000060a R11: 0000000000000246 R12: 0000000000000003 [ 274.484475] R13: 0000000000000016 R14: 00007ffcd9b77fa0 R15: 00007ffcd9b78da4 [ 274.491602] Modules linked in: cls_bpf sch_ingress iptable_filter ip_tables algif_hash af_alg x86_pkg_temp_thermal fuse [last unloaded: test_bpf] [ 274.504634] CR2: 0000000000000000 [ 274.507976] ---[ end trace 196d18386545eae1 ]--- [ 274.512588] RIP: 0010: (null) [ 274.516334] Code: Bad RIP value. [ 274.519557] RSP: 0018:ffffbc9681f83540 EFLAGS: 00010286 [ 274.524775] RAX: 0000000000000000 RBX: ffffdc967fa80a18 RCX: 0000000000000000 [ 274.531921] RDX: ffff9db2ee08b540 RSI: 000000000000000e RDI: ffffdc967fa809a0 [ 274.539082] RBP: ffffbc9681f83580 R08: ffff9db2c4d62690 R09: 000000000000000c [ 274.546205] R10: 0000000000000000 R11: ffff9db2ee08b540 R12: ffff9db31ce7c000 [ 274.553329] R13: 0000000000000001 R14: 000000000000000c R15: ffff9db3179cf400 [ 274.560456] FS: 00007ff4ae7c5740(0000) GS:ffff9db31fa80000(0000) knlGS:0000000000000000 [ 274.568541] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 274.574277] CR2: ffffffffffffffd6 CR3: 00000004574da004 CR4: 00000000003606e0 [ 274.581403] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 274.588535] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 274.595658] Kernel panic - not syncing: Fatal exception in interrupt [ 274.602046] Kernel Offset: 0x14400000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) [ 274.612827] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- [ 274.620387] ------------[ cut here ]------------ I'm also seeing the same failure on x86_64, and it reproduces consistently. >From poking around it looks like the skb's dst entry is being used to calculate the mtu in: mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; ...but because that dst_entry has an "ops" value set to md_dst_ops, the various ops (including mtu) are not set: crash> struct sk_buff._skb_refdst ffff928f87447700 -x _skb_refdst = 0xffffcd6fbf5ea590 crash> struct dst_entry.ops 0xffffcd6fbf5ea590 ops = 0xffffffffa0193800 crash> struct dst_ops.mtu 0xffffffffa0193800 mtu = 0x0 crash> I confirmed that the dst entry also has dst->input set to dst_md_discard, so it looks like it's an entry that's been initialized via __metadata_dst_init alright. I think the fix here is to use skb_valid_dst(skb) - it checks for DST_METADATA also, and with that fix in place, the problem - which was previously 100% reproducible - disappears. The below patch resolves the panic and all bpf tunnel tests pass without incident. Fixes: c8b34e680a09 ("ip_tunnel: Add tnl_update_pmtu in ip_md_tunnel_xmit") Reported-by: Naresh Kamboju Signed-off-by: Alan Maguire Acked-by: Alexei Starovoitov Tested-by: Anders Roxell Reported-by: Nicolas Dichtel Tested-by: Nicolas Dichtel Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 2756fb725bf0..a5d8cad18ead 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -515,9 +515,10 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, mtu = dst_mtu(&rt->dst) - dev->hard_header_len - sizeof(struct iphdr) - tunnel_hlen; else - mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; + mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; - skb_dst_update_pmtu(skb, mtu); + if (skb_valid_dst(skb)) + skb_dst_update_pmtu(skb, mtu); if (skb->protocol == htons(ETH_P_IP)) { if (!skb_is_gso(skb) && @@ -530,9 +531,11 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, } #if IS_ENABLED(CONFIG_IPV6) else if (skb->protocol == htons(ETH_P_IPV6)) { - struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); + struct rt6_info *rt6; __be32 daddr; + rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) : + NULL; daddr = md ? dst : tunnel->parms.iph.daddr; if (rt6 && mtu < dst_mtu(skb_dst(skb)) && -- cgit v1.2.3-59-g8ed1b From a10674bf2406afc2554f9c7d31b2dc65d6a27fd9 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Wed, 6 Mar 2019 14:10:22 +0300 Subject: tcp: detecting the misuse of .sendpage for Slab objects sendpage was not designed for processing of the Slab pages, in some situations it can trigger BUG_ON on receiving side. Signed-off-by: Vasily Averin Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ad07dd71063d..dbb08140cdc9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -943,6 +943,10 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, ssize_t copied; long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); + if (IS_ENABLED(CONFIG_DEBUG_VM) && + WARN_ONCE(PageSlab(page), "page must not be a Slab one")) + return -EINVAL; + /* Wait for a connection to finish. One exception is TCP Fast Open * (passive side) where data is allowed to be sent before a connection * is fully established. -- cgit v1.2.3-59-g8ed1b From ecb3dea400d3beaf611ce76ac7a51d4230492cf2 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Wed, 6 Mar 2019 16:22:12 +0200 Subject: net: sched: flower: insert new filter to idr after setting its mask When adding new filter to flower classifier, fl_change() inserts it to handle_idr before initializing filter extensions and assigning it a mask. Normally this ordering doesn't matter because all flower classifier ops callbacks assume rtnl lock protection. However, when filter has an action that doesn't have its kernel module loaded, rtnl lock is released before call to request_module(). During this time the filter can be accessed bu concurrent task before its initialization is completed, which can lead to a crash. Example case of NULL pointer dereference in concurrent dump: Task 1 Task 2 tc_new_tfilter() fl_change() idr_alloc_u32(fnew) fl_set_parms() tcf_exts_validate() tcf_action_init() tcf_action_init_1() rtnl_unlock() request_module() ... rtnl_lock() tc_dump_tfilter() tcf_chain_dump() fl_walk() idr_get_next_ul() tcf_node_dump() tcf_fill_node() fl_dump() mask = &f->mask->key; <- NULL ptr rtnl_lock() Extension initialization and mask assignment don't depend on fnew->handle that is allocated by idr_alloc_u32(). Move idr allocation code after action creation and mask assignment in fl_change() to prevent concurrent access to not fully initialized filter when rtnl lock is released to load action module. Fixes: 01683a146999 ("net: sched: refactor flower walk to iterate over idr") Signed-off-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 27300a3e76c7..c04247b403ed 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1348,46 +1348,46 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, if (err < 0) goto errout; - if (!handle) { - handle = 1; - err = idr_alloc_u32(&head->handle_idr, fnew, &handle, - INT_MAX, GFP_KERNEL); - } else if (!fold) { - /* user specifies a handle and it doesn't exist */ - err = idr_alloc_u32(&head->handle_idr, fnew, &handle, - handle, GFP_KERNEL); - } - if (err) - goto errout; - fnew->handle = handle; - if (tb[TCA_FLOWER_FLAGS]) { fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]); if (!tc_flags_valid(fnew->flags)) { err = -EINVAL; - goto errout_idr; + goto errout; } } err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE], ovr, tp->chain->tmplt_priv, extack); if (err) - goto errout_idr; + goto errout; err = fl_check_assign_mask(head, fnew, fold, mask); if (err) - goto errout_idr; + goto errout; + + if (!handle) { + handle = 1; + err = idr_alloc_u32(&head->handle_idr, fnew, &handle, + INT_MAX, GFP_KERNEL); + } else if (!fold) { + /* user specifies a handle and it doesn't exist */ + err = idr_alloc_u32(&head->handle_idr, fnew, &handle, + handle, GFP_KERNEL); + } + if (err) + goto errout_mask; + fnew->handle = handle; if (!fold && __fl_lookup(fnew->mask, &fnew->mkey)) { err = -EEXIST; - goto errout_mask; + goto errout_idr; } err = rhashtable_insert_fast(&fnew->mask->ht, &fnew->ht_node, fnew->mask->filter_ht_params); if (err) - goto errout_mask; + goto errout_idr; if (!tc_skip_hw(fnew->flags)) { err = fl_hw_replace_filter(tp, fnew, extack); @@ -1426,12 +1426,13 @@ errout_mask_ht: rhashtable_remove_fast(&fnew->mask->ht, &fnew->ht_node, fnew->mask->filter_ht_params); -errout_mask: - fl_mask_put(head, fnew->mask, false); - errout_idr: if (!fold) idr_remove(&head->handle_idr, fnew->handle); + +errout_mask: + fl_mask_put(head, fnew->mask, false); + errout: tcf_exts_destroy(&fnew->exts); kfree(fnew); -- cgit v1.2.3-59-g8ed1b From 6caabe7f197d3466d238f70915d65301f1716626 Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Wed, 6 Mar 2019 22:45:01 +0800 Subject: net: hsr: fix memory leak in hsr_dev_finalize() If hsr_add_port(hsr, hsr_dev, HSR_PT_MASTER) failed to add port, it directly returns res and forgets to free the node that allocated in hsr_create_self_node(), and forgets to delete the node->mac_list linked in hsr->self_node_db. BUG: memory leak unreferenced object 0xffff8881cfa0c780 (size 64): comm "syz-executor.0", pid 2077, jiffies 4294717969 (age 2415.377s) hex dump (first 32 bytes): e0 c7 a0 cf 81 88 ff ff 00 02 00 00 00 00 ad de ................ 00 e6 49 cd 81 88 ff ff c0 9b 87 d0 81 88 ff ff ..I............. backtrace: [<00000000e2ff5070>] hsr_dev_finalize+0x736/0x960 [hsr] [<000000003ed2e597>] hsr_newlink+0x2b2/0x3e0 [hsr] [<000000003fa8c6b6>] __rtnl_newlink+0xf1f/0x1600 net/core/rtnetlink.c:3182 [<000000001247a7ad>] rtnl_newlink+0x66/0x90 net/core/rtnetlink.c:3240 [<00000000e7d1b61d>] rtnetlink_rcv_msg+0x54e/0xb90 net/core/rtnetlink.c:5130 [<000000005556bd3a>] netlink_rcv_skb+0x129/0x340 net/netlink/af_netlink.c:2477 [<00000000741d5ee6>] netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] [<00000000741d5ee6>] netlink_unicast+0x49a/0x650 net/netlink/af_netlink.c:1336 [<000000009d56f9b7>] netlink_sendmsg+0x88b/0xdf0 net/netlink/af_netlink.c:1917 [<0000000046b35c59>] sock_sendmsg_nosec net/socket.c:621 [inline] [<0000000046b35c59>] sock_sendmsg+0xc3/0x100 net/socket.c:631 [<00000000d208adc9>] __sys_sendto+0x33e/0x560 net/socket.c:1786 [<00000000b582837a>] __do_sys_sendto net/socket.c:1798 [inline] [<00000000b582837a>] __se_sys_sendto net/socket.c:1794 [inline] [<00000000b582837a>] __x64_sys_sendto+0xdd/0x1b0 net/socket.c:1794 [<00000000c866801d>] do_syscall_64+0x147/0x600 arch/x86/entry/common.c:290 [<00000000fea382d9>] entry_SYSCALL_64_after_hwframe+0x49/0xbe [<00000000e01dacb3>] 0xffffffffffffffff Fixes: c5a759117210 ("net/hsr: Use list_head (and rcu) instead of array for slave devices.") Reported-by: Hulk Robot Signed-off-by: Mao Wenan Signed-off-by: David S. Miller --- net/hsr/hsr_device.c | 4 +++- net/hsr/hsr_framereg.c | 12 ++++++++++++ net/hsr/hsr_framereg.h | 1 + 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index b8cd43c9ed5b..c4676bacb8db 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -486,7 +486,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], res = hsr_add_port(hsr, hsr_dev, HSR_PT_MASTER); if (res) - return res; + goto err_add_port; res = register_netdevice(hsr_dev); if (res) @@ -506,6 +506,8 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], fail: hsr_for_each_port(hsr, port) hsr_del_port(port); +err_add_port: + hsr_del_node(&hsr->self_node_db); return res; } diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 286ceb41ac0c..9af16cb68f76 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -124,6 +124,18 @@ int hsr_create_self_node(struct list_head *self_node_db, return 0; } +void hsr_del_node(struct list_head *self_node_db) +{ + struct hsr_node *node; + + rcu_read_lock(); + node = list_first_or_null_rcu(self_node_db, struct hsr_node, mac_list); + rcu_read_unlock(); + if (node) { + list_del_rcu(&node->mac_list); + kfree(node); + } +} /* Allocate an hsr_node and add it to node_db. 'addr' is the node's AddressA; * seq_out is used to initialize filtering of outgoing duplicate frames diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h index 370b45998121..531fd3dfcac1 100644 --- a/net/hsr/hsr_framereg.h +++ b/net/hsr/hsr_framereg.h @@ -16,6 +16,7 @@ struct hsr_node; +void hsr_del_node(struct list_head *self_node_db); struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[], u16 seq_out); struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb, -- cgit v1.2.3-59-g8ed1b From 6466e715651f9f358e60c5ea4880e4731325827f Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Wed, 6 Mar 2019 13:01:36 -0500 Subject: tcp: do not report TCP_CM_INQ of 0 for closed connections Returning 0 as inq to userspace indicates there is no more data to read, and the application needs to wait for EPOLLIN. For a connection that has received FIN from the remote peer, however, the application must continue reading until getting EOF (return value of 0 from tcp_recvmsg) or an error, if edge-triggered epoll (EPOLLET) is being used. Otherwise, the application will never receive a new EPOLLIN, since there is no epoll edge after the FIN. Return 1 when there is no data left on the queue but the connection has received FIN, so that the applications continue reading. Fixes: b75eba76d3d72 (tcp: send in-queue bytes in cmsg upon read) Signed-off-by: Soheil Hassas Yeganeh Acked-by: Neal Cardwell Signed-off-by: Eric Dumazet Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index dbb08140cdc9..6baa6dc1b13b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1937,6 +1937,11 @@ static int tcp_inq_hint(struct sock *sk) inq = tp->rcv_nxt - tp->copied_seq; release_sock(sk); } + /* After receiving a FIN, tell the user-space to continue reading + * by returning a non-zero inq. + */ + if (inq == 0 && sock_flag(sk, SOCK_DONE)) + inq = 1; return inq; } -- cgit v1.2.3-59-g8ed1b From 4c3024debf62de4c6ac6d3cb4c0063be21d4f652 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 6 Mar 2019 14:35:15 -0500 Subject: bpf: only test gso type on gso packets BPF can adjust gso only for tcp bytestreams. Fail on other gso types. But only on gso packets. It does not touch this field if !gso_size. Fixes: b90efd225874 ("bpf: only adjust gso_size on bytestream protocols") Signed-off-by: Willem de Bruijn Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann --- include/linux/skbuff.h | 4 ++-- net/core/filter.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 27beb549ffbe..f32f32407dc4 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4232,10 +4232,10 @@ static inline bool skb_is_gso_sctp(const struct sk_buff *skb) return skb_shinfo(skb)->gso_type & SKB_GSO_SCTP; } +/* Note: Should be called only if skb_is_gso(skb) is true */ static inline bool skb_is_gso_tcp(const struct sk_buff *skb) { - return skb_is_gso(skb) && - skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6); + return skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6); } static inline void skb_gso_reset(struct sk_buff *skb) diff --git a/net/core/filter.c b/net/core/filter.c index 5ceba98069d4..f274620945ff 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2804,7 +2804,7 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb) u32 off = skb_mac_header_len(skb); int ret; - if (!skb_is_gso_tcp(skb)) + if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) return -ENOTSUPP; ret = skb_cow(skb, len_diff); @@ -2845,7 +2845,7 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb) u32 off = skb_mac_header_len(skb); int ret; - if (!skb_is_gso_tcp(skb)) + if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) return -ENOTSUPP; ret = skb_unclone(skb, GFP_ATOMIC); @@ -2970,7 +2970,7 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff) u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb); int ret; - if (!skb_is_gso_tcp(skb)) + if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) return -ENOTSUPP; ret = skb_cow(skb, len_diff); @@ -2999,7 +2999,7 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff) u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb); int ret; - if (!skb_is_gso_tcp(skb)) + if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) return -ENOTSUPP; ret = skb_unclone(skb, GFP_ATOMIC); -- cgit v1.2.3-59-g8ed1b From 915905f8b1d452e70ee6d8637c3f0fb55a39691d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Mar 2019 09:31:26 -0800 Subject: xsk: fix potential crash in xsk_diag_put_umem() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes two typos in xsk_diag_put_umem() syzbot reported the following crash : kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 7641 Comm: syz-executor946 Not tainted 5.0.0-rc7+ #95 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:xsk_diag_put_umem net/xdp/xsk_diag.c:71 [inline] RIP: 0010:xsk_diag_fill net/xdp/xsk_diag.c:113 [inline] RIP: 0010:xsk_diag_dump+0xdcb/0x13a0 net/xdp/xsk_diag.c:143 Code: 8d be c0 04 00 00 48 89 f8 48 c1 e8 03 42 80 3c 20 00 0f 85 39 04 00 00 49 8b 96 c0 04 00 00 48 8d 7a 14 48 89 f8 48 c1 e8 03 <42> 0f b6 0c 20 48 89 f8 83 e0 07 83 c0 03 38 c8 7c 08 84 c9 0f 85 RSP: 0018:ffff888090bcf2d8 EFLAGS: 00010203 RAX: 0000000000000002 RBX: ffff8880a0aacbc0 RCX: ffffffff86ffdc3c RDX: 0000000000000000 RSI: ffffffff86ffdc70 RDI: 0000000000000014 RBP: ffff888090bcf438 R08: ffff88808e04a700 R09: ffffed1011c74174 R10: ffffed1011c74173 R11: ffff88808e3a0b9f R12: dffffc0000000000 R13: ffff888093a6d818 R14: ffff88808e365240 R15: ffff88808e3a0b40 FS: 00000000011ea880(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000080 CR3: 000000008fa13000 CR4: 00000000001406e0 Call Trace: netlink_dump+0x55d/0xfb0 net/netlink/af_netlink.c:2252 __netlink_dump_start+0x5b4/0x7e0 net/netlink/af_netlink.c:2360 netlink_dump_start include/linux/netlink.h:226 [inline] xsk_diag_handler_dump+0x1b2/0x250 net/xdp/xsk_diag.c:170 __sock_diag_cmd net/core/sock_diag.c:232 [inline] sock_diag_rcv_msg+0x322/0x410 net/core/sock_diag.c:263 netlink_rcv_skb+0x17a/0x460 net/netlink/af_netlink.c:2485 sock_diag_rcv+0x2b/0x40 net/core/sock_diag.c:274 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] netlink_unicast+0x536/0x720 net/netlink/af_netlink.c:1336 netlink_sendmsg+0x8ae/0xd70 net/netlink/af_netlink.c:1925 sock_sendmsg_nosec net/socket.c:622 [inline] sock_sendmsg+0xdd/0x130 net/socket.c:632 sock_write_iter+0x27c/0x3e0 net/socket.c:923 call_write_iter include/linux/fs.h:1863 [inline] do_iter_readv_writev+0x5e0/0x8e0 fs/read_write.c:680 do_iter_write fs/read_write.c:956 [inline] do_iter_write+0x184/0x610 fs/read_write.c:937 vfs_writev+0x1b3/0x2f0 fs/read_write.c:1001 do_writev+0xf6/0x290 fs/read_write.c:1036 __do_sys_writev fs/read_write.c:1109 [inline] __se_sys_writev fs/read_write.c:1106 [inline] __x64_sys_writev+0x75/0xb0 fs/read_write.c:1106 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x440139 Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 fb 13 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007ffcc966cc18 EFLAGS: 00000246 ORIG_RAX: 0000000000000014 RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 0000000000440139 RDX: 0000000000000001 RSI: 0000000020000080 RDI: 0000000000000003 RBP: 00000000006ca018 R08: 00000000004002c8 R09: 00000000004002c8 R10: 0000000000000004 R11: 0000000000000246 R12: 00000000004019c0 R13: 0000000000401a50 R14: 0000000000000000 R15: 0000000000000000 Modules linked in: ---[ end trace 460a3c24d0a656c9 ]--- RIP: 0010:xsk_diag_put_umem net/xdp/xsk_diag.c:71 [inline] RIP: 0010:xsk_diag_fill net/xdp/xsk_diag.c:113 [inline] RIP: 0010:xsk_diag_dump+0xdcb/0x13a0 net/xdp/xsk_diag.c:143 Code: 8d be c0 04 00 00 48 89 f8 48 c1 e8 03 42 80 3c 20 00 0f 85 39 04 00 00 49 8b 96 c0 04 00 00 48 8d 7a 14 48 89 f8 48 c1 e8 03 <42> 0f b6 0c 20 48 89 f8 83 e0 07 83 c0 03 38 c8 7c 08 84 c9 0f 85 RSP: 0018:ffff888090bcf2d8 EFLAGS: 00010203 RAX: 0000000000000002 RBX: ffff8880a0aacbc0 RCX: ffffffff86ffdc3c RDX: 0000000000000000 RSI: ffffffff86ffdc70 RDI: 0000000000000014 RBP: ffff888090bcf438 R08: ffff88808e04a700 R09: ffffed1011c74174 R10: ffffed1011c74173 R11: ffff88808e3a0b9f R12: dffffc0000000000 R13: ffff888093a6d818 R14: ffff88808e365240 R15: ffff88808e3a0b40 FS: 00000000011ea880(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000001d22000 CR3: 000000008fa13000 CR4: 00000000001406f0 Fixes: a36b38aa2af6 ("xsk: add sock_diag interface for AF_XDP") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Björn Töpel Cc: Daniel Borkmann Cc: Magnus Karlsson Acked-by: Björn Töpel Signed-off-by: Daniel Borkmann --- net/xdp/xsk_diag.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c index 661d007c3b28..d5e06c8e0cbf 100644 --- a/net/xdp/xsk_diag.c +++ b/net/xdp/xsk_diag.c @@ -68,9 +68,9 @@ static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb) err = nla_put(nlskb, XDP_DIAG_UMEM, sizeof(du), &du); if (!err && umem->fq) - err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_FILL_RING, nlskb); + err = xsk_diag_put_ring(umem->fq, XDP_DIAG_UMEM_FILL_RING, nlskb); if (!err && umem->cq) { - err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_COMPLETION_RING, + err = xsk_diag_put_ring(umem->cq, XDP_DIAG_UMEM_COMPLETION_RING, nlskb); } return err; -- cgit v1.2.3-59-g8ed1b From ea0371f7879987cff70e21d808e3e9fea624c051 Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Mon, 4 Mar 2019 16:27:08 -0800 Subject: net: fix GSO in bpf_lwt_push_ip_encap GSO needs inner headers and inner protocol set properly to work. skb->inner_mac_header: skb_reset_inner_headers() assigns the current mac header value to inner_mac_header; but it is not set at the point, so we need to call skb_reset_inner_mac_header, otherwise gre_gso_segment fails: it does int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); ... if (unlikely(!pskb_may_pull(skb, tnl_hlen))) ... skb->inner_protocol should also be correctly set. Fixes: ca78801a81e0 ("bpf: handle GSO in bpf_lwt_push_encap") Signed-off-by: Peter Oskolkov Reviewed-by: David Ahern Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- net/core/lwt_bpf.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index cf2f8897ca19..126d31ff5ee3 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -625,6 +625,8 @@ int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, bool ingress) /* push the encap headers and fix pointers */ skb_reset_inner_headers(skb); + skb_reset_inner_mac_header(skb); /* mac header is not yet set */ + skb_set_inner_protocol(skb, skb->protocol); skb->encapsulation = 1; skb_push(skb, len); if (ingress) -- cgit v1.2.3-59-g8ed1b From e8e3437762ad938880dd48a3c52d702e7cf3c124 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 7 Mar 2019 11:35:43 +0100 Subject: bpf: Stop the psock parser before canceling its work We might have never enabled (started) the psock's parser, in which case it will not get stopped when destroying the psock. This leads to a warning when trying to cancel parser's work from psock's deferred destructor: [ 405.325769] WARNING: CPU: 1 PID: 3216 at net/strparser/strparser.c:526 strp_done+0x3c/0x40 [ 405.326712] Modules linked in: [last unloaded: test_bpf] [ 405.327359] CPU: 1 PID: 3216 Comm: kworker/1:164 Tainted: G W 5.0.0 #42 [ 405.328294] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20180531_142017-buildhw-08.phx2.fedoraproject.org-1.fc28 04/01/2014 [ 405.329712] Workqueue: events sk_psock_destroy_deferred [ 405.330254] RIP: 0010:strp_done+0x3c/0x40 [ 405.330706] Code: 28 e8 b8 d5 6b ff 48 8d bb 80 00 00 00 e8 9c d5 6b ff 48 8b 7b 18 48 85 ff 74 0d e8 1e a5 e8 ff 48 c7 43 18 00 00 00 00 5b c3 <0f> 0b eb cf 66 66 66 66 90 55 89 f5 53 48 89 fb 48 83 c7 28 e8 0b [ 405.332862] RSP: 0018:ffffc900026bbe50 EFLAGS: 00010246 [ 405.333482] RAX: ffffffff819323e0 RBX: ffff88812cb83640 RCX: ffff88812cb829e8 [ 405.334228] RDX: 0000000000000001 RSI: ffff88812cb837e8 RDI: ffff88812cb83640 [ 405.335366] RBP: ffff88813fd22680 R08: 0000000000000000 R09: 000073746e657665 [ 405.336472] R10: 8080808080808080 R11: 0000000000000001 R12: ffff88812cb83600 [ 405.337760] R13: 0000000000000000 R14: ffff88811f401780 R15: ffff88812cb837e8 [ 405.338777] FS: 0000000000000000(0000) GS:ffff88813fd00000(0000) knlGS:0000000000000000 [ 405.339903] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 405.340821] CR2: 00007fb11489a6b8 CR3: 000000012d4d6000 CR4: 00000000000406e0 [ 405.341981] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 405.343131] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 405.344415] Call Trace: [ 405.344821] sk_psock_destroy_deferred+0x23/0x1b0 [ 405.345585] process_one_work+0x1ae/0x3e0 [ 405.346110] worker_thread+0x3c/0x3b0 [ 405.346576] ? pwq_unbound_release_workfn+0xd0/0xd0 [ 405.347187] kthread+0x11d/0x140 [ 405.347601] ? __kthread_parkme+0x80/0x80 [ 405.348108] ret_from_fork+0x35/0x40 [ 405.348566] ---[ end trace a4a3af4026a327d4 ]--- Stop psock's parser just before canceling its work. Fixes: 1d79895aef18 ("sk_msg: Always cancel strp work before freeing the psock") Reported-by: kernel test robot Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann --- net/core/skmsg.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/skmsg.c b/net/core/skmsg.c index ae6f06e45737..cc94d921476c 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -554,6 +554,7 @@ static void sk_psock_destroy_deferred(struct work_struct *gc) struct sk_psock *psock = container_of(gc, struct sk_psock, gc); /* No sk_callback_lock since already detached. */ + strp_stop(&psock->parser.strp); strp_done(&psock->parser.strp); cancel_work_sync(&psock->work); -- cgit v1.2.3-59-g8ed1b From 3499e87ea0413ee5b2cc028f4c8ed4d424bc7f98 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 7 Mar 2019 16:58:35 +0100 Subject: ethtool: reduce stack usage with clang clang inlines the dev_ethtool() more aggressively than gcc does, leading to a larger amount of used stack space: net/core/ethtool.c:2536:24: error: stack frame size of 1216 bytes in function 'dev_ethtool' [-Werror,-Wframe-larger-than=] Marking the sub-functions that require the most stack space as noinline_for_stack gives us reasonable behavior on all compilers. Signed-off-by: Arnd Bergmann Reviewed-by: Michal Kubecek Signed-off-by: David S. Miller --- net/core/ethtool.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index d4918ffddda8..b1eb32419732 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -2319,9 +2319,10 @@ static int ethtool_set_tunable(struct net_device *dev, void __user *useraddr) return ret; } -static int ethtool_get_per_queue_coalesce(struct net_device *dev, - void __user *useraddr, - struct ethtool_per_queue_op *per_queue_opt) +static noinline_for_stack int +ethtool_get_per_queue_coalesce(struct net_device *dev, + void __user *useraddr, + struct ethtool_per_queue_op *per_queue_opt) { u32 bit; int ret; @@ -2349,9 +2350,10 @@ static int ethtool_get_per_queue_coalesce(struct net_device *dev, return 0; } -static int ethtool_set_per_queue_coalesce(struct net_device *dev, - void __user *useraddr, - struct ethtool_per_queue_op *per_queue_opt) +static noinline_for_stack int +ethtool_set_per_queue_coalesce(struct net_device *dev, + void __user *useraddr, + struct ethtool_per_queue_op *per_queue_opt) { u32 bit; int i, ret = 0; @@ -2405,7 +2407,7 @@ roll_back: return ret; } -static int ethtool_set_per_queue(struct net_device *dev, +static int noinline_for_stack ethtool_set_per_queue(struct net_device *dev, void __user *useraddr, u32 sub_cmd) { struct ethtool_per_queue_op per_queue_opt; -- cgit v1.2.3-59-g8ed1b From f9d19a7494e5341a7f256823e32788ae560ca22f Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 7 Mar 2019 09:57:42 -0700 Subject: net: atm: Use IS_ENABLED in atm_dev_ioctl When building with -Wsometimes-uninitialized, Clang warns: net/atm/resources.c:256:6: warning: variable 'number' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized] net/atm/resources.c:212:7: warning: variable 'iobuf_len' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized] Clang won't realize that compat is 0 when CONFIG_COMPAT is not set until the constant folding stage, which happens after this semantic analysis. Use IS_ENABLED instead so that the zero is present at the semantic analysis stage, which eliminates this warning. Link: https://github.com/ClangBuiltLinux/linux/issues/386 Signed-off-by: Nathan Chancellor Signed-off-by: David S. Miller --- net/atm/resources.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/atm/resources.c b/net/atm/resources.c index bada395ecdb1..3e9f6391319e 100644 --- a/net/atm/resources.c +++ b/net/atm/resources.c @@ -203,13 +203,9 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat) int __user *sioc_len; int __user *iobuf_len; -#ifndef CONFIG_COMPAT - compat = 0; /* Just so the compiler _knows_ */ -#endif - switch (cmd) { case ATM_GETNAMES: - if (compat) { + if (IS_ENABLED(CONFIG_COMPAT) && compat) { #ifdef CONFIG_COMPAT struct compat_atm_iobuf __user *ciobuf = arg; compat_uptr_t cbuf; @@ -253,7 +249,7 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat) break; } - if (compat) { + if (IS_ENABLED(CONFIG_COMPAT) && compat) { #ifdef CONFIG_COMPAT struct compat_atmif_sioc __user *csioc = arg; compat_uptr_t carg; -- cgit v1.2.3-59-g8ed1b From 0805a4b894a8daaf76ad99934563d8ecfc6e7aed Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 7 Mar 2019 11:11:26 -0700 Subject: net: atm: Add another IS_ENABLED(CONFIG_COMPAT) in atm_dev_ioctl I removed compat's universal assignment to 0, which allows this if statement to fall through when compat is passed with a value other than 0. Fixes: f9d19a7494e5 ("net: atm: Use IS_ENABLED in atm_dev_ioctl") Signed-off-by: Nathan Chancellor Signed-off-by: David S. Miller --- net/atm/resources.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/atm/resources.c b/net/atm/resources.c index 3e9f6391319e..889349c6d90d 100644 --- a/net/atm/resources.c +++ b/net/atm/resources.c @@ -413,7 +413,7 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat) } /* fall through */ default: - if (compat) { + if (IS_ENABLED(CONFIG_COMPAT) && compat) { #ifdef CONFIG_COMPAT if (!dev->ops->compat_ioctl) { error = -EINVAL; -- cgit v1.2.3-59-g8ed1b From 1e027960edfaa6a43f9ca31081729b716598112b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Mar 2019 09:36:33 -0800 Subject: net/hsr: fix possible crash in add_timer() syzbot found another add_timer() issue, this time in net/hsr [1] Let's use mod_timer() which is safe. [1] kernel BUG at kernel/time/timer.c:1136! invalid opcode: 0000 [#1] PREEMPT SMP KASAN CPU: 0 PID: 15909 Comm: syz-executor.3 Not tainted 5.0.0+ #97 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 kobject: 'loop2' (00000000f5629718): kobject_uevent_env RIP: 0010:add_timer kernel/time/timer.c:1136 [inline] RIP: 0010:add_timer+0x654/0xbe0 kernel/time/timer.c:1134 Code: 0f 94 c5 31 ff 44 89 ee e8 09 61 0f 00 45 84 ed 0f 84 77 fd ff ff e8 bb 5f 0f 00 e8 07 10 a0 ff e9 68 fd ff ff e8 ac 5f 0f 00 <0f> 0b e8 a5 5f 0f 00 0f 0b e8 9e 5f 0f 00 4c 89 b5 58 ff ff ff e9 RSP: 0018:ffff8880656eeca0 EFLAGS: 00010246 kobject: 'loop2' (00000000f5629718): fill_kobj_path: path = '/devices/virtual/block/loop2' RAX: 0000000000040000 RBX: 1ffff1100caddd9a RCX: ffffc9000c436000 RDX: 0000000000040000 RSI: ffffffff816056c4 RDI: ffff88806a2f6cc8 RBP: ffff8880656eed58 R08: ffff888067f4a300 R09: ffff888067f4abc8 R10: 0000000000000000 R11: 0000000000000000 R12: ffff88806a2f6cc0 R13: dffffc0000000000 R14: 0000000000000001 R15: ffff8880656eed30 FS: 00007fc2019bf700(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000738000 CR3: 0000000067e8e000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: hsr_check_announce net/hsr/hsr_device.c:99 [inline] hsr_check_carrier_and_operstate+0x567/0x6f0 net/hsr/hsr_device.c:120 hsr_netdev_notify+0x297/0xa00 net/hsr/hsr_main.c:51 notifier_call_chain+0xc7/0x240 kernel/notifier.c:93 __raw_notifier_call_chain kernel/notifier.c:394 [inline] raw_notifier_call_chain+0x2e/0x40 kernel/notifier.c:401 call_netdevice_notifiers_info+0x3f/0x90 net/core/dev.c:1739 call_netdevice_notifiers_extack net/core/dev.c:1751 [inline] call_netdevice_notifiers net/core/dev.c:1765 [inline] dev_open net/core/dev.c:1436 [inline] dev_open+0x143/0x160 net/core/dev.c:1424 team_port_add drivers/net/team/team.c:1203 [inline] team_add_slave+0xa07/0x15d0 drivers/net/team/team.c:1933 do_set_master net/core/rtnetlink.c:2358 [inline] do_set_master+0x1d4/0x230 net/core/rtnetlink.c:2332 do_setlink+0x966/0x3510 net/core/rtnetlink.c:2493 rtnl_setlink+0x271/0x3b0 net/core/rtnetlink.c:2747 rtnetlink_rcv_msg+0x465/0xb00 net/core/rtnetlink.c:5192 netlink_rcv_skb+0x17a/0x460 net/netlink/af_netlink.c:2485 rtnetlink_rcv+0x1d/0x30 net/core/rtnetlink.c:5210 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] netlink_unicast+0x536/0x720 net/netlink/af_netlink.c:1336 netlink_sendmsg+0x8ae/0xd70 net/netlink/af_netlink.c:1925 sock_sendmsg_nosec net/socket.c:622 [inline] sock_sendmsg+0xdd/0x130 net/socket.c:632 sock_write_iter+0x27c/0x3e0 net/socket.c:923 call_write_iter include/linux/fs.h:1869 [inline] do_iter_readv_writev+0x5e0/0x8e0 fs/read_write.c:680 do_iter_write fs/read_write.c:956 [inline] do_iter_write+0x184/0x610 fs/read_write.c:937 vfs_writev+0x1b3/0x2f0 fs/read_write.c:1001 do_writev+0xf6/0x290 fs/read_write.c:1036 __do_sys_writev fs/read_write.c:1109 [inline] __se_sys_writev fs/read_write.c:1106 [inline] __x64_sys_writev+0x75/0xb0 fs/read_write.c:1106 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x457f29 Code: ad b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fc2019bec78 EFLAGS: 00000246 ORIG_RAX: 0000000000000014 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000457f29 RDX: 0000000000000001 RSI: 00000000200000c0 RDI: 0000000000000003 RBP: 000000000073bf00 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fc2019bf6d4 R13: 00000000004c4a60 R14: 00000000004dd218 R15: 00000000ffffffff Fixes: f421436a591d ("net/hsr: Add support for the High-availability Seamless Redundancy protocol (HSRv0)") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Arvid Brodin Signed-off-by: David S. Miller --- net/hsr/hsr_device.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index c4676bacb8db..a97bf326b231 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -94,9 +94,8 @@ static void hsr_check_announce(struct net_device *hsr_dev, && (old_operstate != IF_OPER_UP)) { /* Went up */ hsr->announce_count = 0; - hsr->announce_timer.expires = jiffies + - msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL); - add_timer(&hsr->announce_timer); + mod_timer(&hsr->announce_timer, + jiffies + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL)); } if ((hsr_dev->operstate != IF_OPER_UP) && (old_operstate == IF_OPER_UP)) @@ -332,6 +331,7 @@ static void hsr_announce(struct timer_list *t) { struct hsr_priv *hsr; struct hsr_port *master; + unsigned long interval; hsr = from_timer(hsr, t, announce_timer); @@ -343,18 +343,16 @@ static void hsr_announce(struct timer_list *t) hsr->protVersion); hsr->announce_count++; - hsr->announce_timer.expires = jiffies + - msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL); + interval = msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL); } else { send_hsr_supervision_frame(master, HSR_TLV_LIFE_CHECK, hsr->protVersion); - hsr->announce_timer.expires = jiffies + - msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL); + interval = msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL); } if (is_admin_up(master->dev)) - add_timer(&hsr->announce_timer); + mod_timer(&hsr->announce_timer, jiffies + interval); rcu_read_unlock(); } -- cgit v1.2.3-59-g8ed1b From ee60ad219f5c7c4fb2f047f88037770063ef785f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 8 Mar 2019 14:50:54 +0800 Subject: route: set the deleted fnhe fnhe_daddr to 0 in ip_del_fnhe to fix a race The race occurs in __mkroute_output() when 2 threads lookup a dst: CPU A CPU B find_exception() find_exception() [fnhe expires] ip_del_fnhe() [fnhe is deleted] rt_bind_exception() In rt_bind_exception() it will bind a deleted fnhe with the new dst, and this dst will get no chance to be freed. It causes a dev defcnt leak and consecutive dmesg warnings: unregister_netdevice: waiting for ethX to become free. Usage count = 1 Especially thanks Jon to identify the issue. This patch fixes it by setting fnhe_daddr to 0 in ip_del_fnhe() to stop binding the deleted fnhe with a new dst when checking fnhe's fnhe_daddr and daddr in rt_bind_exception(). It works as both ip_del_fnhe() and rt_bind_exception() are protected by fnhe_lock and the fhne is freed by kfree_rcu(). Fixes: deed49df7390 ("route: check and remove route cache when we get route") Signed-off-by: Jon Maxwell Signed-off-by: Xin Long Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/route.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8ca3642f0d9b..a5da63e5faa2 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1303,6 +1303,10 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) if (fnhe->fnhe_daddr == daddr) { rcu_assign_pointer(*fnhe_p, rcu_dereference_protected( fnhe->fnhe_next, lockdep_is_held(&fnhe_lock))); + /* set fnhe_daddr to 0 to ensure it won't bind with + * new dsts in rt_bind_exception(). + */ + fnhe->fnhe_daddr = 0; fnhe_flush_routes(fnhe); kfree_rcu(fnhe, rcu); break; -- cgit v1.2.3-59-g8ed1b From 2e990dfd13974d9eae493006f42ffb48707970ef Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 8 Mar 2019 15:49:16 +0800 Subject: sctp: remove sched init from sctp_stream_init syzbot reported a NULL-ptr deref caused by that sched->init() in sctp_stream_init() set stream->rr_next = NULL. kasan: GPF could be caused by NULL-ptr deref or user memory access RIP: 0010:sctp_sched_rr_dequeue+0xd3/0x170 net/sctp/stream_sched_rr.c:141 Call Trace: sctp_outq_dequeue_data net/sctp/outqueue.c:90 [inline] sctp_outq_flush_data net/sctp/outqueue.c:1079 [inline] sctp_outq_flush+0xba2/0x2790 net/sctp/outqueue.c:1205 All sched info is saved in sout->ext now, in sctp_stream_init() sctp_stream_alloc_out() will not change it, there's no need to call sched->init() again, since sctp_outq_init() has already done it. Fixes: 5bbbbe32a431 ("sctp: introduce stream scheduler foundations") Reported-by: syzbot+4c9934f20522c0efd657@syzkaller.appspotmail.com Signed-off-by: Xin Long Acked-by: Neil Horman Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/stream.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 2936ed17bf9e..3b47457862cc 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -230,8 +230,6 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, for (i = 0; i < stream->outcnt; i++) SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN; - sched->init(stream); - in: sctp_stream_interleave_init(stream); if (!incnt) -- cgit v1.2.3-59-g8ed1b From 930c9f9125c85b5134b3e711bc252ecc094708e3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 8 Mar 2019 12:48:39 +0000 Subject: rxrpc: Fix client call connect/disconnect race rxrpc_disconnect_client_call() reads the call's connection ID protocol value (call->cid) as part of that function's variable declarations. This is bad because it's not inside the locked section and so may race with someone granting use of the channel to the call. This manifests as an assertion failure (see below) where the call in the presumed channel (0 because call->cid wasn't set when we read it) doesn't match the call attached to the channel we were actually granted (if 1, 2 or 3). Fix this by moving the read and dependent calculations inside of the channel_lock section. Also, only set the channel number and pointer variables if cid is not zero (ie. unset). This problem can be induced by injecting an occasional error in rxrpc_wait_for_channel() before the call to schedule(). Make two further changes also: (1) Add a trace for wait failure in rxrpc_connect_call(). (2) Drop channel_lock before BUG'ing in the case of the assertion failure. The failure causes a trace akin to the following: rxrpc: Assertion failed - 18446612685268945920(0xffff8880beab8c00) == 18446612685268621312(0xffff8880bea69800) is false ------------[ cut here ]------------ kernel BUG at net/rxrpc/conn_client.c:824! ... RIP: 0010:rxrpc_disconnect_client_call+0x2bf/0x99d ... Call Trace: rxrpc_connect_call+0x902/0x9b3 ? wake_up_q+0x54/0x54 rxrpc_new_client_call+0x3a0/0x751 ? rxrpc_kernel_begin_call+0x141/0x1bc ? afs_alloc_call+0x1b5/0x1b5 rxrpc_kernel_begin_call+0x141/0x1bc afs_make_call+0x20c/0x525 ? afs_alloc_call+0x1b5/0x1b5 ? __lock_is_held+0x40/0x71 ? lockdep_init_map+0xaf/0x193 ? lockdep_init_map+0xaf/0x193 ? __lock_is_held+0x40/0x71 ? yfs_fs_fetch_data+0x33b/0x34a yfs_fs_fetch_data+0x33b/0x34a afs_fetch_data+0xdc/0x3b7 afs_read_dir+0x52d/0x97f afs_dir_iterate+0xa0/0x661 ? iterate_dir+0x63/0x141 iterate_dir+0xa2/0x141 ksys_getdents64+0x9f/0x11b ? filldir+0x111/0x111 ? do_syscall_64+0x3e/0x1a0 __x64_sys_getdents64+0x16/0x19 do_syscall_64+0x7d/0x1a0 entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: 45025bceef17 ("rxrpc: Improve management and caching of client connection objects") Signed-off-by: David Howells Reviewed-by: Marc Dionne Signed-off-by: David S. Miller --- include/trace/events/rxrpc.h | 2 ++ net/rxrpc/conn_client.c | 20 +++++++++++++++----- 2 files changed, 17 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 5b50fe4906d2..7b60fd186cfe 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -76,6 +76,7 @@ enum rxrpc_client_trace { rxrpc_client_chan_disconnect, rxrpc_client_chan_pass, rxrpc_client_chan_unstarted, + rxrpc_client_chan_wait_failed, rxrpc_client_cleanup, rxrpc_client_count, rxrpc_client_discard, @@ -276,6 +277,7 @@ enum rxrpc_tx_point { EM(rxrpc_client_chan_disconnect, "ChDisc") \ EM(rxrpc_client_chan_pass, "ChPass") \ EM(rxrpc_client_chan_unstarted, "ChUnst") \ + EM(rxrpc_client_chan_wait_failed, "ChWtFl") \ EM(rxrpc_client_cleanup, "Clean ") \ EM(rxrpc_client_count, "Count ") \ EM(rxrpc_client_discard, "Discar") \ diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index b2adfa825363..f307a05076e1 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -704,6 +704,7 @@ int rxrpc_connect_call(struct rxrpc_sock *rx, ret = rxrpc_wait_for_channel(call, gfp); if (ret < 0) { + trace_rxrpc_client(call->conn, ret, rxrpc_client_chan_wait_failed); rxrpc_disconnect_client_call(call); goto out; } @@ -774,16 +775,22 @@ static void rxrpc_set_client_reap_timer(struct rxrpc_net *rxnet) */ void rxrpc_disconnect_client_call(struct rxrpc_call *call) { - unsigned int channel = call->cid & RXRPC_CHANNELMASK; struct rxrpc_connection *conn = call->conn; - struct rxrpc_channel *chan = &conn->channels[channel]; + struct rxrpc_channel *chan = NULL; struct rxrpc_net *rxnet = conn->params.local->rxnet; + unsigned int channel = -1; + u32 cid; + spin_lock(&conn->channel_lock); + + cid = call->cid; + if (cid) { + channel = cid & RXRPC_CHANNELMASK; + chan = &conn->channels[channel]; + } trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect); call->conn = NULL; - spin_lock(&conn->channel_lock); - /* Calls that have never actually been assigned a channel can simply be * discarded. If the conn didn't get used either, it will follow * immediately unless someone else grabs it in the meantime. @@ -807,7 +814,10 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) goto out; } - ASSERTCMP(rcu_access_pointer(chan->call), ==, call); + if (rcu_access_pointer(chan->call) != call) { + spin_unlock(&conn->channel_lock); + BUG(); + } /* If a client call was exposed to the world, we save the result for * retransmission. -- cgit v1.2.3-59-g8ed1b From 89664c623617b1d34447a927ac7871ddf3db29d3 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 3 Mar 2019 17:54:53 +0800 Subject: sctp: sctp_sock_migrate() returns error if sctp_bind_addr_dup() fails It should fail to create the new sk if sctp_bind_addr_dup() fails when accepting or peeloff an association. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 533207dbeae9..44f2acb3d433 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -102,9 +102,9 @@ static int sctp_send_asconf(struct sctp_association *asoc, struct sctp_chunk *chunk); static int sctp_do_bind(struct sock *, union sctp_addr *, int); static int sctp_autobind(struct sock *sk); -static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, - struct sctp_association *assoc, - enum sctp_socket_type type); +static int sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, + struct sctp_association *assoc, + enum sctp_socket_type type); static unsigned long sctp_memory_pressure; static atomic_long_t sctp_memory_allocated; @@ -4891,7 +4891,11 @@ static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern) /* Populate the fields of the newsk from the oldsk and migrate the * asoc to the newsk. */ - sctp_sock_migrate(sk, newsk, asoc, SCTP_SOCKET_TCP); + error = sctp_sock_migrate(sk, newsk, asoc, SCTP_SOCKET_TCP); + if (error) { + sk_common_release(newsk); + newsk = NULL; + } out: release_sock(sk); @@ -5639,7 +5643,12 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp) /* Populate the fields of the newsk from the oldsk and migrate the * asoc to the newsk. */ - sctp_sock_migrate(sk, sock->sk, asoc, SCTP_SOCKET_UDP_HIGH_BANDWIDTH); + err = sctp_sock_migrate(sk, sock->sk, asoc, + SCTP_SOCKET_UDP_HIGH_BANDWIDTH); + if (err) { + sock_release(sock); + sock = NULL; + } *sockp = sock; @@ -9171,9 +9180,9 @@ static inline void sctp_copy_descendant(struct sock *sk_to, /* Populate the fields of the newsk from the oldsk and migrate the assoc * and its messages to the newsk. */ -static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, - struct sctp_association *assoc, - enum sctp_socket_type type) +static int sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, + struct sctp_association *assoc, + enum sctp_socket_type type) { struct sctp_sock *oldsp = sctp_sk(oldsk); struct sctp_sock *newsp = sctp_sk(newsk); @@ -9182,6 +9191,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, struct sk_buff *skb, *tmp; struct sctp_ulpevent *event; struct sctp_bind_hashbucket *head; + int err; /* Migrate socket buffer sizes and all the socket level options to the * new socket. @@ -9210,8 +9220,10 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, /* Copy the bind_addr list from the original endpoint to the new * endpoint so that we can handle restarts properly */ - sctp_bind_addr_dup(&newsp->ep->base.bind_addr, - &oldsp->ep->base.bind_addr, GFP_KERNEL); + err = sctp_bind_addr_dup(&newsp->ep->base.bind_addr, + &oldsp->ep->base.bind_addr, GFP_KERNEL); + if (err) + return err; /* Move any messages in the old socket's receive queue that are for the * peeled off association to the new socket's receive queue. @@ -9296,6 +9308,8 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, } release_sock(newsk); + + return 0; } -- cgit v1.2.3-59-g8ed1b From 60208f79139af0e2f84747d04a2f3321f174a398 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 3 Mar 2019 17:54:54 +0800 Subject: sctp: move up sctp_auth_init_hmacs() in sctp_endpoint_init() sctp_auth_init_hmacs() is called only when ep->auth_enable is set. It better to move up sctp_auth_init_hmacs() and remove auth_enable check in it and check auth_enable only once in sctp_endpoint_init(). Signed-off-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/auth.c | 6 ------ net/sctp/endpointola.c | 18 ++++++++++-------- 2 files changed, 10 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 5b537613946f..39d72e58b8e5 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -471,12 +471,6 @@ int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp) struct crypto_shash *tfm = NULL; __u16 id; - /* If AUTH extension is disabled, we are done */ - if (!ep->auth_enable) { - ep->auth_hmacs = NULL; - return 0; - } - /* If the transforms are already allocated, we are done */ if (ep->auth_hmacs) return 0; diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 40c7eb941bc9..0448b68fce74 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -107,6 +107,13 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, auth_chunks->param_hdr.length = htons(sizeof(struct sctp_paramhdr) + 2); } + + /* Allocate and initialize transorms arrays for supported + * HMACs. + */ + err = sctp_auth_init_hmacs(ep, gfp); + if (err) + goto nomem; } /* Initialize the base structure. */ @@ -150,15 +157,10 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, INIT_LIST_HEAD(&ep->endpoint_shared_keys); null_key = sctp_auth_shkey_create(0, gfp); if (!null_key) - goto nomem; + goto nomem_shkey; list_add(&null_key->key_list, &ep->endpoint_shared_keys); - /* Allocate and initialize transorms arrays for supported HMACs. */ - err = sctp_auth_init_hmacs(ep, gfp); - if (err) - goto nomem_hmacs; - /* Add the null key to the endpoint shared keys list and * set the hmcas and chunks pointers. */ @@ -169,8 +171,8 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, return ep; -nomem_hmacs: - sctp_auth_destroy_keys(&ep->endpoint_shared_keys); +nomem_shkey: + sctp_auth_destroy_hmacs(ep->auth_hmacs); nomem: /* Free all allocations */ kfree(auth_hmacs); -- cgit v1.2.3-59-g8ed1b From c6f33e05225696fee3c901b7526b80f31848454e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 3 Mar 2019 17:54:55 +0800 Subject: sctp: call sctp_auth_init_hmacs() in sctp_sock_migrate() New ep's auth_hmacs should be set if old ep's is set, in case that net->sctp.auth_enable has been changed to 0 by users and new ep's auth_hmacs couldn't be set in sctp_endpoint_init(). It can even crash kernel by doing: 1. on server: sysctl -w net.sctp.auth_enable=1, sysctl -w net.sctp.addip_enable=1, sysctl -w net.sctp.addip_noauth_enable=0, listen() on server, sysctl -w net.sctp.auth_enable=0. 2. on client: connect() to server. 3. on server: accept() the asoc, sysctl -w net.sctp.auth_enable=1. 4. on client: send() asconf packet to server. The call trace: [ 245.280251] BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 [ 245.286872] RIP: 0010:sctp_auth_calculate_hmac+0xa3/0x140 [sctp] [ 245.304572] Call Trace: [ 245.305091] [ 245.311287] sctp_sf_authenticate+0x110/0x160 [sctp] [ 245.312311] sctp_sf_eat_auth+0xf2/0x230 [sctp] [ 245.313249] sctp_do_sm+0x9a/0x2d0 [sctp] [ 245.321483] sctp_assoc_bh_rcv+0xed/0x1a0 [sctp] [ 245.322495] sctp_rcv+0xa66/0xc70 [sctp] It's because the old ep->auth_hmacs wasn't copied to the new ep while ep->auth_hmacs is used in sctp_auth_calculate_hmac() when processing the incoming auth chunks, and it should have been done when migrating sock. Reported-by: Ying Xu Signed-off-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/socket.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 44f2acb3d433..6140471efd4b 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -9225,6 +9225,16 @@ static int sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, if (err) return err; + /* New ep's auth_hmacs should be set if old ep's is set, in case + * that net->sctp.auth_enable has been changed to 0 by users and + * new ep's auth_hmacs couldn't be set in sctp_endpoint_init(). + */ + if (oldsp->ep->auth_hmacs) { + err = sctp_auth_init_hmacs(newsp->ep, GFP_KERNEL); + if (err) + return err; + } + /* Move any messages in the old socket's receive queue that are for the * peeled off association to the new socket's receive queue. */ -- cgit v1.2.3-59-g8ed1b From f54ba391d88f5a5d032175b4c308c176e34b80b7 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Fri, 8 Mar 2019 08:57:26 +0100 Subject: xsk: fix to reject invalid flags in xsk_bind MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Passing a non-existing flag in the sxdp_flags member of struct sockaddr_xdp was, incorrectly, silently ignored. This patch addresses that behavior, and rejects any non-existing flags. We have examined existing user space code, and to our best knowledge, no one is relying on the current incorrect behavior. AF_XDP is still in its infancy, so from our perspective, the risk of breakage is very low, and addressing this problem now is important. Fixes: 965a99098443 ("xsk: add support for bind for Rx") Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann --- net/xdp/xsk.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 6697084e3fdf..a14e8864e4fa 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -407,6 +407,10 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) if (sxdp->sxdp_family != AF_XDP) return -EINVAL; + flags = sxdp->sxdp_flags; + if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY)) + return -EINVAL; + mutex_lock(&xs->mutex); if (xs->dev) { err = -EBUSY; @@ -425,7 +429,6 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) } qid = sxdp->sxdp_queue_id; - flags = sxdp->sxdp_flags; if (flags & XDP_SHARED_UMEM) { struct xdp_sock *umem_xs; -- cgit v1.2.3-59-g8ed1b From c57b557b644da624982c36b74f608cdb7b902868 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Fri, 8 Mar 2019 08:57:27 +0100 Subject: xsk: fix to reject invalid options in Tx descriptor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Passing a non-existing option in the options member of struct xdp_desc was, incorrectly, silently ignored. This patch addresses that behavior, and drops any Tx descriptor with non-existing options. We have examined existing user space code, and to our best knowledge, no one is relying on the current incorrect behavior. AF_XDP is still in its infancy, so from our perspective, the risk of breakage is very low, and addressing this problem now is important. Fixes: 35fcde7f8deb ("xsk: support for Tx") Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann --- net/xdp/xsk_queue.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index bcb5cbb40419..610c0bdc0c2b 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -174,8 +174,8 @@ static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d) if (!xskq_is_valid_addr(q, d->addr)) return false; - if (((d->addr + d->len) & q->chunk_mask) != - (d->addr & q->chunk_mask)) { + if (((d->addr + d->len) & q->chunk_mask) != (d->addr & q->chunk_mask) || + d->options) { q->invalid_descs++; return false; } -- cgit v1.2.3-59-g8ed1b From 71b91a506bb05f9aef3acd57af2e835d85721942 Mon Sep 17 00:00:00 2001 From: Bo YU Date: Fri, 8 Mar 2019 01:45:51 -0500 Subject: bpf: fix warning about using plain integer as NULL Sparse warning below: sudo make C=2 CF=-D__CHECK_ENDIAN__ M=net/bpf/ CHECK net/bpf//test_run.c net/bpf//test_run.c:19:77: warning: Using plain integer as NULL pointer ./include/linux/bpf-cgroup.h:295:77: warning: Using plain integer as NULL pointer Fixes: 8bad74f9840f ("bpf: extend cgroup bpf core to allow multiple cgroup storage types") Acked-by: Yonghong Song Signed-off-by: Bo YU Signed-off-by: Daniel Borkmann --- include/linux/bpf-cgroup.h | 2 +- net/bpf/test_run.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 695b2a880d9a..a4c644c1c091 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -292,7 +292,7 @@ static inline int bpf_cgroup_storage_assign(struct bpf_prog *prog, static inline void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *map) {} static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc( - struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { return 0; } + struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { return NULL; } static inline void bpf_cgroup_storage_free( struct bpf_cgroup_storage *storage) {} static inline int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index da7051d62727..fab142b796ef 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -16,7 +16,7 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *retval, u32 *time) { - struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { 0 }; + struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { NULL }; enum bpf_cgroup_storage_type stype; u64 time_start, time_spent = 0; int ret = 0; -- cgit v1.2.3-59-g8ed1b From 4c404ce23358d5d8fbdeb7a6021a9b33d3c3c167 Mon Sep 17 00:00:00 2001 From: Adalbert Lazăr Date: Wed, 6 Mar 2019 12:13:53 +0200 Subject: vsock/virtio: fix kernel panic from virtio_transport_reset_no_sock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previous to commit 22b5c0b63f32 ("vsock/virtio: fix kernel panic after device hot-unplug"), vsock_core_init() was called from virtio_vsock_probe(). Now, virtio_transport_reset_no_sock() can be called before vsock_core_init() has the chance to run. [Wed Feb 27 14:17:09 2019] BUG: unable to handle kernel NULL pointer dereference at 0000000000000110 [Wed Feb 27 14:17:09 2019] #PF error: [normal kernel read fault] [Wed Feb 27 14:17:09 2019] PGD 0 P4D 0 [Wed Feb 27 14:17:09 2019] Oops: 0000 [#1] SMP PTI [Wed Feb 27 14:17:09 2019] CPU: 3 PID: 59 Comm: kworker/3:1 Not tainted 5.0.0-rc7-390-generic-hvi #390 [Wed Feb 27 14:17:09 2019] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [Wed Feb 27 14:17:09 2019] Workqueue: virtio_vsock virtio_transport_rx_work [vmw_vsock_virtio_transport] [Wed Feb 27 14:17:09 2019] RIP: 0010:virtio_transport_reset_no_sock+0x8c/0xc0 [vmw_vsock_virtio_transport_common] [Wed Feb 27 14:17:09 2019] Code: 35 8b 4f 14 48 8b 57 08 31 f6 44 8b 4f 10 44 8b 07 48 8d 7d c8 e8 84 f8 ff ff 48 85 c0 48 89 c3 74 2a e8 f7 31 03 00 48 89 df <48> 8b 80 10 01 00 00 e8 68 fb 69 ed 48 8b 75 f0 65 48 33 34 25 28 [Wed Feb 27 14:17:09 2019] RSP: 0018:ffffb42701ab7d40 EFLAGS: 00010282 [Wed Feb 27 14:17:09 2019] RAX: 0000000000000000 RBX: ffff9d79637ee080 RCX: 0000000000000003 [Wed Feb 27 14:17:09 2019] RDX: 0000000000000001 RSI: 0000000000000002 RDI: ffff9d79637ee080 [Wed Feb 27 14:17:09 2019] RBP: ffffb42701ab7d78 R08: ffff9d796fae70e0 R09: ffff9d796f403500 [Wed Feb 27 14:17:09 2019] R10: ffffb42701ab7d90 R11: 0000000000000000 R12: ffff9d7969d09240 [Wed Feb 27 14:17:09 2019] R13: ffff9d79624e6840 R14: ffff9d7969d09318 R15: ffff9d796d48ff80 [Wed Feb 27 14:17:09 2019] FS: 0000000000000000(0000) GS:ffff9d796fac0000(0000) knlGS:0000000000000000 [Wed Feb 27 14:17:09 2019] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [Wed Feb 27 14:17:09 2019] CR2: 0000000000000110 CR3: 0000000427f22000 CR4: 00000000000006e0 [Wed Feb 27 14:17:09 2019] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [Wed Feb 27 14:17:09 2019] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [Wed Feb 27 14:17:09 2019] Call Trace: [Wed Feb 27 14:17:09 2019] virtio_transport_recv_pkt+0x63/0x820 [vmw_vsock_virtio_transport_common] [Wed Feb 27 14:17:09 2019] ? kfree+0x17e/0x190 [Wed Feb 27 14:17:09 2019] ? detach_buf_split+0x145/0x160 [Wed Feb 27 14:17:09 2019] ? __switch_to_asm+0x40/0x70 [Wed Feb 27 14:17:09 2019] virtio_transport_rx_work+0xa0/0x106 [vmw_vsock_virtio_transport] [Wed Feb 27 14:17:09 2019] NET: Registered protocol family 40 [Wed Feb 27 14:17:09 2019] process_one_work+0x167/0x410 [Wed Feb 27 14:17:09 2019] worker_thread+0x4d/0x460 [Wed Feb 27 14:17:09 2019] kthread+0x105/0x140 [Wed Feb 27 14:17:09 2019] ? rescuer_thread+0x360/0x360 [Wed Feb 27 14:17:09 2019] ? kthread_destroy_worker+0x50/0x50 [Wed Feb 27 14:17:09 2019] ret_from_fork+0x35/0x40 [Wed Feb 27 14:17:09 2019] Modules linked in: vmw_vsock_virtio_transport vmw_vsock_virtio_transport_common input_leds vsock serio_raw i2c_piix4 mac_hid qemu_fw_cfg autofs4 cirrus ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops virtio_net psmouse drm net_failover pata_acpi virtio_blk failover floppy Fixes: 22b5c0b63f32 ("vsock/virtio: fix kernel panic after device hot-unplug") Reported-by: Alexandru Herghelegiu Signed-off-by: Adalbert Lazăr Co-developed-by: Stefan Hajnoczi Reviewed-by: Stefan Hajnoczi Reviewed-by: Stefano Garzarella Signed-off-by: David S. Miller --- net/vmw_vsock/virtio_transport_common.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 3ae3a33da70b..602715fc9a75 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -662,6 +662,8 @@ static int virtio_transport_reset(struct vsock_sock *vsk, */ static int virtio_transport_reset_no_sock(struct virtio_vsock_pkt *pkt) { + const struct virtio_transport *t; + struct virtio_vsock_pkt *reply; struct virtio_vsock_pkt_info info = { .op = VIRTIO_VSOCK_OP_RST, .type = le16_to_cpu(pkt->hdr.type), @@ -672,15 +674,21 @@ static int virtio_transport_reset_no_sock(struct virtio_vsock_pkt *pkt) if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) return 0; - pkt = virtio_transport_alloc_pkt(&info, 0, - le64_to_cpu(pkt->hdr.dst_cid), - le32_to_cpu(pkt->hdr.dst_port), - le64_to_cpu(pkt->hdr.src_cid), - le32_to_cpu(pkt->hdr.src_port)); - if (!pkt) + reply = virtio_transport_alloc_pkt(&info, 0, + le64_to_cpu(pkt->hdr.dst_cid), + le32_to_cpu(pkt->hdr.dst_port), + le64_to_cpu(pkt->hdr.src_cid), + le32_to_cpu(pkt->hdr.src_port)); + if (!reply) return -ENOMEM; - return virtio_transport_get_ops()->send_pkt(pkt); + t = virtio_transport_get_ops(); + if (!t) { + virtio_transport_free_pkt(reply); + return -ENOTCONN; + } + + return t->send_pkt(reply); } static void virtio_transport_wait_close(struct sock *sk, long timeout) -- cgit v1.2.3-59-g8ed1b From b62989fc4ea27863e7aef00c93a10118324d3ed0 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Wed, 6 Mar 2019 17:50:43 +0200 Subject: net: sched: fix potential use-after-free in __tcf_chain_put() When used with unlocked classifier that have filters attached to actions with goto chain, __tcf_chain_put() for last non action reference can race with calls to same function from action cleanup code that releases last action reference. In this case action cleanup handler could free the chain if it executes after all references to chain were released, but before all concurrent users finished using it. Modify __tcf_chain_put() to only access tcf_chain fields when holding block->lock. Remove local variables that were used to cache some tcf_chain fields and are no longer needed because their values can now be obtained directly from chain under block->lock protection. Fixes: 726d061286ce ("net: sched: prevent insertion of new classifiers during chain flush") Signed-off-by: Vlad Buslov Signed-off-by: David S. Miller --- net/sched/cls_api.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 478095d50f95..2c2aac4ac721 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -470,10 +470,9 @@ static void __tcf_chain_put(struct tcf_chain *chain, bool by_act, { struct tcf_block *block = chain->block; const struct tcf_proto_ops *tmplt_ops; - bool is_last, free_block = false; + bool free_block = false; unsigned int refcnt; void *tmplt_priv; - u32 chain_index; mutex_lock(&block->lock); if (explicitly_created) { @@ -492,23 +491,21 @@ static void __tcf_chain_put(struct tcf_chain *chain, bool by_act, * save these to temporary variables. */ refcnt = --chain->refcnt; - is_last = refcnt - chain->action_refcnt == 0; tmplt_ops = chain->tmplt_ops; tmplt_priv = chain->tmplt_priv; - chain_index = chain->index; - - if (refcnt == 0) - free_block = tcf_chain_detach(chain); - mutex_unlock(&block->lock); /* The last dropped non-action reference will trigger notification. */ - if (is_last && !by_act) { - tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain_index, + if (refcnt - chain->action_refcnt == 0 && !by_act) { + tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain->index, block, NULL, 0, 0, false); /* Last reference to chain, no need to lock. */ chain->flushing = false; } + if (refcnt == 0) + free_block = tcf_chain_detach(chain); + mutex_unlock(&block->lock); + if (refcnt == 0) { tc_chain_tmplt_del(tmplt_ops, tmplt_priv); tcf_chain_destroy(chain, free_block); -- cgit v1.2.3-59-g8ed1b From 5355ed6388e23b69a00d48398a68d022135e6486 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2019 10:41:00 -0800 Subject: fou, fou6: avoid uninit-value in gue_err() and gue6_err() My prior commit missed the fact that these functions were using udp_hdr() (aka skb_transport_header()) to get access to GUE header. Since pskb_transport_may_pull() does not exist yet, we have to add transport_offset to our pskb_may_pull() calls. BUG: KMSAN: uninit-value in gue_err+0x514/0xfa0 net/ipv4/fou.c:1032 CPU: 1 PID: 10648 Comm: syz-executor.1 Not tainted 5.0.0+ #11 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x173/0x1d0 lib/dump_stack.c:113 kmsan_report+0x12e/0x2a0 mm/kmsan/kmsan.c:600 __msan_warning+0x82/0xf0 mm/kmsan/kmsan_instr.c:313 gue_err+0x514/0xfa0 net/ipv4/fou.c:1032 __udp4_lib_err_encap_no_sk net/ipv4/udp.c:571 [inline] __udp4_lib_err_encap net/ipv4/udp.c:626 [inline] __udp4_lib_err+0x12e6/0x1d40 net/ipv4/udp.c:665 udp_err+0x74/0x90 net/ipv4/udp.c:737 icmp_socket_deliver net/ipv4/icmp.c:767 [inline] icmp_unreach+0xb65/0x1070 net/ipv4/icmp.c:884 icmp_rcv+0x11a1/0x1950 net/ipv4/icmp.c:1066 ip_protocol_deliver_rcu+0x584/0xbb0 net/ipv4/ip_input.c:208 ip_local_deliver_finish net/ipv4/ip_input.c:234 [inline] NF_HOOK include/linux/netfilter.h:289 [inline] ip_local_deliver+0x624/0x7b0 net/ipv4/ip_input.c:255 dst_input include/net/dst.h:450 [inline] ip_rcv_finish net/ipv4/ip_input.c:414 [inline] NF_HOOK include/linux/netfilter.h:289 [inline] ip_rcv+0x6bd/0x740 net/ipv4/ip_input.c:524 __netif_receive_skb_one_core net/core/dev.c:4973 [inline] __netif_receive_skb net/core/dev.c:5083 [inline] process_backlog+0x756/0x10e0 net/core/dev.c:5923 napi_poll net/core/dev.c:6346 [inline] net_rx_action+0x78b/0x1a60 net/core/dev.c:6412 __do_softirq+0x53f/0x93a kernel/softirq.c:293 invoke_softirq kernel/softirq.c:375 [inline] irq_exit+0x214/0x250 kernel/softirq.c:416 exiting_irq+0xe/0x10 arch/x86/include/asm/apic.h:536 smp_apic_timer_interrupt+0x48/0x70 arch/x86/kernel/apic/apic.c:1064 apic_timer_interrupt+0x2e/0x40 arch/x86/entry/entry_64.S:814 RIP: 0010:finish_lock_switch+0x2b/0x40 kernel/sched/core.c:2597 Code: 48 89 e5 53 48 89 fb e8 63 e7 95 00 8b b8 88 0c 00 00 48 8b 00 48 85 c0 75 12 48 89 df e8 dd db 95 00 c6 00 00 c6 03 00 fb 5b <5d> c3 e8 4e e6 95 00 eb e7 66 90 66 2e 0f 1f 84 00 00 00 00 00 55 RSP: 0018:ffff888081a0fc80 EFLAGS: 00000296 ORIG_RAX: ffffffffffffff13 RAX: ffff88821fd6bd80 RBX: ffff888027898000 RCX: ccccccccccccd000 RDX: ffff88821fca8d80 RSI: ffff888000000000 RDI: 00000000000004a0 RBP: ffff888081a0fc80 R08: 0000000000000002 R09: ffff888081a0fb08 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000001 R13: ffff88811130e388 R14: ffff88811130da00 R15: ffff88812fdb7d80 finish_task_switch+0xfc/0x2d0 kernel/sched/core.c:2698 context_switch kernel/sched/core.c:2851 [inline] __schedule+0x6cc/0x800 kernel/sched/core.c:3491 schedule+0x15b/0x240 kernel/sched/core.c:3535 freezable_schedule include/linux/freezer.h:172 [inline] do_nanosleep+0x2ba/0x980 kernel/time/hrtimer.c:1679 hrtimer_nanosleep kernel/time/hrtimer.c:1733 [inline] __do_sys_nanosleep kernel/time/hrtimer.c:1767 [inline] __se_sys_nanosleep+0x746/0x960 kernel/time/hrtimer.c:1754 __x64_sys_nanosleep+0x3e/0x60 kernel/time/hrtimer.c:1754 do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 RIP: 0033:0x4855a0 Code: 00 00 48 c7 c0 d4 ff ff ff 64 c7 00 16 00 00 00 31 c0 eb be 66 0f 1f 44 00 00 83 3d b1 11 5d 00 00 75 14 b8 23 00 00 00 0f 05 <48> 3d 01 f0 ff ff 0f 83 04 e2 f8 ff c3 48 83 ec 08 e8 3a 55 fd ff RSP: 002b:0000000000a4fd58 EFLAGS: 00000246 ORIG_RAX: 0000000000000023 RAX: ffffffffffffffda RBX: 0000000000085780 RCX: 00000000004855a0 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000a4fd60 RBP: 00000000000007ec R08: 0000000000000001 R09: 0000000000ceb940 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000008 R13: 0000000000a4fdb0 R14: 0000000000085711 R15: 0000000000a4fdc0 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:205 [inline] kmsan_internal_poison_shadow+0x92/0x150 mm/kmsan/kmsan.c:159 kmsan_kmalloc+0xa6/0x130 mm/kmsan/kmsan_hooks.c:176 kmsan_slab_alloc+0xe/0x10 mm/kmsan/kmsan_hooks.c:185 slab_post_alloc_hook mm/slab.h:445 [inline] slab_alloc_node mm/slub.c:2773 [inline] __kmalloc_node_track_caller+0xe9e/0xff0 mm/slub.c:4398 __kmalloc_reserve net/core/skbuff.c:140 [inline] __alloc_skb+0x309/0xa20 net/core/skbuff.c:208 alloc_skb include/linux/skbuff.h:1012 [inline] alloc_skb_with_frags+0x186/0xa60 net/core/skbuff.c:5287 sock_alloc_send_pskb+0xafd/0x10a0 net/core/sock.c:2091 sock_alloc_send_skb+0xca/0xe0 net/core/sock.c:2108 __ip_append_data+0x34cd/0x5000 net/ipv4/ip_output.c:998 ip_append_data+0x324/0x480 net/ipv4/ip_output.c:1220 icmp_push_reply+0x23d/0x7e0 net/ipv4/icmp.c:375 __icmp_send+0x2ea3/0x30f0 net/ipv4/icmp.c:737 icmp_send include/net/icmp.h:47 [inline] ipv4_link_failure+0x6d/0x230 net/ipv4/route.c:1190 dst_link_failure include/net/dst.h:427 [inline] arp_error_report+0x106/0x1a0 net/ipv4/arp.c:297 neigh_invalidate+0x359/0x8e0 net/core/neighbour.c:992 neigh_timer_handler+0xdf2/0x1280 net/core/neighbour.c:1078 call_timer_fn+0x285/0x600 kernel/time/timer.c:1325 expire_timers kernel/time/timer.c:1362 [inline] __run_timers+0xdb4/0x11d0 kernel/time/timer.c:1681 run_timer_softirq+0x2e/0x50 kernel/time/timer.c:1694 __do_softirq+0x53f/0x93a kernel/softirq.c:293 Fixes: 26fc181e6cac ("fou, fou6: do not assume linear skbs") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Stefano Brivio Cc: Sabrina Dubroca Acked-by: Stefano Brivio Signed-off-by: David S. Miller --- net/ipv4/fou.c | 4 ++-- net/ipv6/fou6.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 437070d1ffb1..79e98e21cdd7 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -1024,7 +1024,7 @@ static int gue_err(struct sk_buff *skb, u32 info) int ret; len = sizeof(struct udphdr) + sizeof(struct guehdr); - if (!pskb_may_pull(skb, len)) + if (!pskb_may_pull(skb, transport_offset + len)) return -EINVAL; guehdr = (struct guehdr *)&udp_hdr(skb)[1]; @@ -1059,7 +1059,7 @@ static int gue_err(struct sk_buff *skb, u32 info) optlen = guehdr->hlen << 2; - if (!pskb_may_pull(skb, len + optlen)) + if (!pskb_may_pull(skb, transport_offset + len + optlen)) return -EINVAL; guehdr = (struct guehdr *)&udp_hdr(skb)[1]; diff --git a/net/ipv6/fou6.c b/net/ipv6/fou6.c index 867474abe269..ec4e2ed95f36 100644 --- a/net/ipv6/fou6.c +++ b/net/ipv6/fou6.c @@ -94,7 +94,7 @@ static int gue6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int ret; len = sizeof(struct udphdr) + sizeof(struct guehdr); - if (!pskb_may_pull(skb, len)) + if (!pskb_may_pull(skb, transport_offset + len)) return -EINVAL; guehdr = (struct guehdr *)&udp_hdr(skb)[1]; @@ -129,7 +129,7 @@ static int gue6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, optlen = guehdr->hlen << 2; - if (!pskb_may_pull(skb, len + optlen)) + if (!pskb_may_pull(skb, transport_offset + len + optlen)) return -EINVAL; guehdr = (struct guehdr *)&udp_hdr(skb)[1]; -- cgit v1.2.3-59-g8ed1b From 9d3e1368bb45893a75a5dfb7cd21fdebfa6b47af Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 8 Mar 2019 22:09:47 +0100 Subject: tcp: handle inet_csk_reqsk_queue_add() failures Commit 7716682cc58e ("tcp/dccp: fix another race at listener dismantle") let inet_csk_reqsk_queue_add() fail, and adjusted {tcp,dccp}_check_req() accordingly. However, TFO and syncookies weren't modified, thus leaking allocated resources on error. Contrary to tcp_check_req(), in both syncookies and TFO cases, we need to drop the request socket. Also, since the child socket is created with inet_csk_clone_lock(), we have to unlock it and drop an extra reference (->sk_refcount is initially set to 2 and inet_csk_reqsk_queue_add() drops only one ref). For TFO, we also need to revert the work done by tcp_try_fastopen() (with reqsk_fastopen_remove()). Fixes: 7716682cc58e ("tcp/dccp: fix another race at listener dismantle") Signed-off-by: Guillaume Nault Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/syncookies.c | 7 ++++++- net/ipv4/tcp_input.c | 8 +++++++- 2 files changed, 13 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 606f868d9f3f..e531344611a0 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -216,7 +216,12 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, refcount_set(&req->rsk_refcnt, 1); tcp_sk(child)->tsoffset = tsoff; sock_rps_save_rxhash(child, skb); - inet_csk_reqsk_queue_add(sk, req, child); + if (!inet_csk_reqsk_queue_add(sk, req, child)) { + bh_unlock_sock(child); + sock_put(child); + child = NULL; + reqsk_put(req); + } } else { reqsk_free(req); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 4eb0c8ca3c60..5def3c48870e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6498,7 +6498,13 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, af_ops->send_synack(fastopen_sk, dst, &fl, req, &foc, TCP_SYNACK_FASTOPEN); /* Add the child socket directly into the accept queue */ - inet_csk_reqsk_queue_add(sk, req, fastopen_sk); + if (!inet_csk_reqsk_queue_add(sk, req, fastopen_sk)) { + reqsk_fastopen_remove(fastopen_sk, req, false); + bh_unlock_sock(fastopen_sk); + sock_put(fastopen_sk); + reqsk_put(req); + goto drop; + } sk->sk_data_ready(sk); bh_unlock_sock(fastopen_sk); sock_put(fastopen_sk); -- cgit v1.2.3-59-g8ed1b From 69ffaebb90369ce08657b5aea4896777b9d6e8fc Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 9 Mar 2019 00:29:58 +0000 Subject: rxrpc: Fix client call queueing, waiting for channel rxrpc_get_client_conn() adds a new call to the front of the waiting_calls queue if the connection it's going to use already exists. This is bad as it allows calls to get starved out. Fix this by adding to the tail instead. Also change the other enqueue point in the same function to put it on the front (ie. when we have a new connection). This makes the point that in the case of a new connection the new call goes at the front (though it doesn't actually matter since the queue should be unoccupied). Fixes: 45025bceef17 ("rxrpc: Improve management and caching of client connection objects") Signed-off-by: David Howells Reviewed-by: Marc Dionne Signed-off-by: David S. Miller --- net/rxrpc/conn_client.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index f307a05076e1..83797b3949e2 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -353,7 +353,7 @@ static int rxrpc_get_client_conn(struct rxrpc_sock *rx, * normally have to take channel_lock but we do this before anyone else * can see the connection. */ - list_add_tail(&call->chan_wait_link, &candidate->waiting_calls); + list_add(&call->chan_wait_link, &candidate->waiting_calls); if (cp->exclusive) { call->conn = candidate; @@ -432,7 +432,7 @@ found_extant_conn: call->conn = conn; call->security_ix = conn->security_ix; call->service_id = conn->service_id; - list_add(&call->chan_wait_link, &conn->waiting_calls); + list_add_tail(&call->chan_wait_link, &conn->waiting_calls); spin_unlock(&conn->channel_lock); _leave(" = 0 [extant %d]", conn->debug_id); return 0; -- cgit v1.2.3-59-g8ed1b From 95d6ebd53c79522bf9502dbc7e89e0d63f94dae4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 10 Mar 2019 09:07:14 -0700 Subject: net/x25: fix use-after-free in x25_device_event() In case of failure x25_connect() does a x25_neigh_put(x25->neighbour) but forgets to clear x25->neighbour pointer, thus triggering use-after-free. Since the socket is visible in x25_list, we need to hold x25_list_lock to protect the operation. syzbot report : BUG: KASAN: use-after-free in x25_kill_by_device net/x25/af_x25.c:217 [inline] BUG: KASAN: use-after-free in x25_device_event+0x296/0x2b0 net/x25/af_x25.c:252 Read of size 8 at addr ffff8880a030edd0 by task syz-executor003/7854 CPU: 0 PID: 7854 Comm: syz-executor003 Not tainted 5.0.0+ #97 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 print_address_description.cold+0x7c/0x20d mm/kasan/report.c:187 kasan_report.cold+0x1b/0x40 mm/kasan/report.c:317 __asan_report_load8_noabort+0x14/0x20 mm/kasan/generic_report.c:135 x25_kill_by_device net/x25/af_x25.c:217 [inline] x25_device_event+0x296/0x2b0 net/x25/af_x25.c:252 notifier_call_chain+0xc7/0x240 kernel/notifier.c:93 __raw_notifier_call_chain kernel/notifier.c:394 [inline] raw_notifier_call_chain+0x2e/0x40 kernel/notifier.c:401 call_netdevice_notifiers_info+0x3f/0x90 net/core/dev.c:1739 call_netdevice_notifiers_extack net/core/dev.c:1751 [inline] call_netdevice_notifiers net/core/dev.c:1765 [inline] __dev_notify_flags+0x1e9/0x2c0 net/core/dev.c:7607 dev_change_flags+0x10d/0x170 net/core/dev.c:7643 dev_ifsioc+0x2b0/0x940 net/core/dev_ioctl.c:237 dev_ioctl+0x1b8/0xc70 net/core/dev_ioctl.c:488 sock_do_ioctl+0x1bd/0x300 net/socket.c:995 sock_ioctl+0x32b/0x610 net/socket.c:1096 vfs_ioctl fs/ioctl.c:46 [inline] file_ioctl fs/ioctl.c:509 [inline] do_vfs_ioctl+0xd6e/0x1390 fs/ioctl.c:696 ksys_ioctl+0xab/0xd0 fs/ioctl.c:713 __do_sys_ioctl fs/ioctl.c:720 [inline] __se_sys_ioctl fs/ioctl.c:718 [inline] __x64_sys_ioctl+0x73/0xb0 fs/ioctl.c:718 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x4467c9 Code: e8 0c e8 ff ff 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 5b 07 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fdbea222d98 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00000000006dbc58 RCX: 00000000004467c9 RDX: 0000000020000340 RSI: 0000000000008914 RDI: 0000000000000003 RBP: 00000000006dbc50 R08: 00007fdbea223700 R09: 0000000000000000 R10: 00007fdbea223700 R11: 0000000000000246 R12: 00000000006dbc5c R13: 6000030030626669 R14: 0000000000000000 R15: 0000000030626669 Allocated by task 7843: save_stack+0x45/0xd0 mm/kasan/common.c:73 set_track mm/kasan/common.c:85 [inline] __kasan_kmalloc mm/kasan/common.c:495 [inline] __kasan_kmalloc.constprop.0+0xcf/0xe0 mm/kasan/common.c:468 kasan_kmalloc+0x9/0x10 mm/kasan/common.c:509 kmem_cache_alloc_trace+0x151/0x760 mm/slab.c:3615 kmalloc include/linux/slab.h:545 [inline] x25_link_device_up+0x46/0x3f0 net/x25/x25_link.c:249 x25_device_event+0x116/0x2b0 net/x25/af_x25.c:242 notifier_call_chain+0xc7/0x240 kernel/notifier.c:93 __raw_notifier_call_chain kernel/notifier.c:394 [inline] raw_notifier_call_chain+0x2e/0x40 kernel/notifier.c:401 call_netdevice_notifiers_info+0x3f/0x90 net/core/dev.c:1739 call_netdevice_notifiers_extack net/core/dev.c:1751 [inline] call_netdevice_notifiers net/core/dev.c:1765 [inline] __dev_notify_flags+0x121/0x2c0 net/core/dev.c:7605 dev_change_flags+0x10d/0x170 net/core/dev.c:7643 dev_ifsioc+0x2b0/0x940 net/core/dev_ioctl.c:237 dev_ioctl+0x1b8/0xc70 net/core/dev_ioctl.c:488 sock_do_ioctl+0x1bd/0x300 net/socket.c:995 sock_ioctl+0x32b/0x610 net/socket.c:1096 vfs_ioctl fs/ioctl.c:46 [inline] file_ioctl fs/ioctl.c:509 [inline] do_vfs_ioctl+0xd6e/0x1390 fs/ioctl.c:696 ksys_ioctl+0xab/0xd0 fs/ioctl.c:713 __do_sys_ioctl fs/ioctl.c:720 [inline] __se_sys_ioctl fs/ioctl.c:718 [inline] __x64_sys_ioctl+0x73/0xb0 fs/ioctl.c:718 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 7865: save_stack+0x45/0xd0 mm/kasan/common.c:73 set_track mm/kasan/common.c:85 [inline] __kasan_slab_free+0x102/0x150 mm/kasan/common.c:457 kasan_slab_free+0xe/0x10 mm/kasan/common.c:465 __cache_free mm/slab.c:3494 [inline] kfree+0xcf/0x230 mm/slab.c:3811 x25_neigh_put include/net/x25.h:253 [inline] x25_connect+0x8d8/0xde0 net/x25/af_x25.c:824 __sys_connect+0x266/0x330 net/socket.c:1685 __do_sys_connect net/socket.c:1696 [inline] __se_sys_connect net/socket.c:1693 [inline] __x64_sys_connect+0x73/0xb0 net/socket.c:1693 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff8880a030edc0 which belongs to the cache kmalloc-256 of size 256 The buggy address is located 16 bytes inside of 256-byte region [ffff8880a030edc0, ffff8880a030eec0) The buggy address belongs to the page: page:ffffea000280c380 count:1 mapcount:0 mapping:ffff88812c3f07c0 index:0x0 flags: 0x1fffc0000000200(slab) raw: 01fffc0000000200 ffffea0002806788 ffffea00027f0188 ffff88812c3f07c0 raw: 0000000000000000 ffff8880a030e000 000000010000000c 0000000000000000 page dumped because: kasan: bad access detected Signed-off-by: Eric Dumazet Reported-by: syzbot+04babcefcd396fabec37@syzkaller.appspotmail.com Cc: andrew hendry Signed-off-by: David S. Miller --- net/x25/af_x25.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index eff31348e20b..27171ac6fe3b 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -820,8 +820,12 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr, sock->state = SS_CONNECTED; rc = 0; out_put_neigh: - if (rc) + if (rc) { + read_lock_bh(&x25_list_lock); x25_neigh_put(x25->neighbour); + x25->neighbour = NULL; + read_unlock_bh(&x25_list_lock); + } out_put_route: x25_route_put(rt); out: -- cgit v1.2.3-59-g8ed1b From 2a5ff07a0eb945f291e361aa6f6becca8340ba46 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 10 Mar 2019 10:39:37 -0700 Subject: gro_cells: make sure device is up in gro_cells_receive() We keep receiving syzbot reports [1] that show that tunnels do not play the rcu/IFF_UP rules properly. At device dismantle phase, gro_cells_destroy() will be called only after a full rcu grace period is observed after IFF_UP has been cleared. This means that IFF_UP needs to be tested before queueing packets into netif_rx() or gro_cells. This patch implements the test in gro_cells_receive() because too many callers do not seem to bother enough. [1] BUG: unable to handle kernel paging request at fffff4ca0b9ffffe PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP KASAN CPU: 0 PID: 21 Comm: kworker/u4:1 Not tainted 5.0.0+ #97 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: netns cleanup_net RIP: 0010:__skb_unlink include/linux/skbuff.h:1929 [inline] RIP: 0010:__skb_dequeue include/linux/skbuff.h:1945 [inline] RIP: 0010:__skb_queue_purge include/linux/skbuff.h:2656 [inline] RIP: 0010:gro_cells_destroy net/core/gro_cells.c:89 [inline] RIP: 0010:gro_cells_destroy+0x19d/0x360 net/core/gro_cells.c:78 Code: 03 42 80 3c 20 00 0f 85 53 01 00 00 48 8d 7a 08 49 8b 47 08 49 c7 07 00 00 00 00 48 89 f9 49 c7 47 08 00 00 00 00 48 c1 e9 03 <42> 80 3c 21 00 0f 85 10 01 00 00 48 89 c1 48 89 42 08 48 c1 e9 03 RSP: 0018:ffff8880aa3f79a8 EFLAGS: 00010a02 RAX: 00ffffffffffffe8 RBX: ffffe8ffffc64b70 RCX: 1ffff8ca0b9ffffe RDX: ffffc6505cffffe8 RSI: ffffffff858410ca RDI: ffffc6505cfffff0 RBP: ffff8880aa3f7a08 R08: ffff8880aa3e8580 R09: fffffbfff1263645 R10: fffffbfff1263644 R11: ffffffff8931b223 R12: dffffc0000000000 R13: 0000000000000000 R14: ffffe8ffffc64b80 R15: ffffe8ffffc64b75 kobject: 'loop2' (000000004bd7d84a): kobject_uevent_env FS: 0000000000000000(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: fffff4ca0b9ffffe CR3: 0000000094941000 CR4: 00000000001406f0 Call Trace: kobject: 'loop2' (000000004bd7d84a): fill_kobj_path: path = '/devices/virtual/block/loop2' ip_tunnel_dev_free+0x19/0x60 net/ipv4/ip_tunnel.c:1010 netdev_run_todo+0x51c/0x7d0 net/core/dev.c:8970 rtnl_unlock+0xe/0x10 net/core/rtnetlink.c:116 ip_tunnel_delete_nets+0x423/0x5f0 net/ipv4/ip_tunnel.c:1124 vti_exit_batch_net+0x23/0x30 net/ipv4/ip_vti.c:495 ops_exit_list.isra.0+0x105/0x160 net/core/net_namespace.c:156 cleanup_net+0x3fb/0x960 net/core/net_namespace.c:551 process_one_work+0x98e/0x1790 kernel/workqueue.c:2173 worker_thread+0x98/0xe40 kernel/workqueue.c:2319 kthread+0x357/0x430 kernel/kthread.c:246 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352 Modules linked in: CR2: fffff4ca0b9ffffe [ end trace 513fc9c1338d1cb3 ] RIP: 0010:__skb_unlink include/linux/skbuff.h:1929 [inline] RIP: 0010:__skb_dequeue include/linux/skbuff.h:1945 [inline] RIP: 0010:__skb_queue_purge include/linux/skbuff.h:2656 [inline] RIP: 0010:gro_cells_destroy net/core/gro_cells.c:89 [inline] RIP: 0010:gro_cells_destroy+0x19d/0x360 net/core/gro_cells.c:78 Code: 03 42 80 3c 20 00 0f 85 53 01 00 00 48 8d 7a 08 49 8b 47 08 49 c7 07 00 00 00 00 48 89 f9 49 c7 47 08 00 00 00 00 48 c1 e9 03 <42> 80 3c 21 00 0f 85 10 01 00 00 48 89 c1 48 89 42 08 48 c1 e9 03 RSP: 0018:ffff8880aa3f79a8 EFLAGS: 00010a02 RAX: 00ffffffffffffe8 RBX: ffffe8ffffc64b70 RCX: 1ffff8ca0b9ffffe RDX: ffffc6505cffffe8 RSI: ffffffff858410ca RDI: ffffc6505cfffff0 RBP: ffff8880aa3f7a08 R08: ffff8880aa3e8580 R09: fffffbfff1263645 R10: fffffbfff1263644 R11: ffffffff8931b223 R12: dffffc0000000000 kobject: 'loop3' (00000000e4ee57a6): kobject_uevent_env R13: 0000000000000000 R14: ffffe8ffffc64b80 R15: ffffe8ffffc64b75 FS: 0000000000000000(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: fffff4ca0b9ffffe CR3: 0000000094941000 CR4: 00000000001406f0 Fixes: c9e6bc644e55 ("net: add gro_cells infrastructure") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- net/core/gro_cells.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c index acf45ddbe924..e095fb871d91 100644 --- a/net/core/gro_cells.c +++ b/net/core/gro_cells.c @@ -13,22 +13,36 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb) { struct net_device *dev = skb->dev; struct gro_cell *cell; + int res; - if (!gcells->cells || skb_cloned(skb) || netif_elide_gro(dev)) - return netif_rx(skb); + rcu_read_lock(); + if (unlikely(!(dev->flags & IFF_UP))) + goto drop; + + if (!gcells->cells || skb_cloned(skb) || netif_elide_gro(dev)) { + res = netif_rx(skb); + goto unlock; + } cell = this_cpu_ptr(gcells->cells); if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) { +drop: atomic_long_inc(&dev->rx_dropped); kfree_skb(skb); - return NET_RX_DROP; + res = NET_RX_DROP; + goto unlock; } __skb_queue_tail(&cell->napi_skbs, skb); if (skb_queue_len(&cell->napi_skbs) == 1) napi_schedule(&cell->napi); - return NET_RX_SUCCESS; + + res = NET_RX_SUCCESS; + +unlock: + rcu_read_unlock(); + return res; } EXPORT_SYMBOL(gro_cells_receive); -- cgit v1.2.3-59-g8ed1b