From f394722fb0d0f701119368959d7cd0ecbc46363a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 7 Dec 2019 12:23:21 -0800 Subject: neighbour: remove neigh_cleanup() method neigh_cleanup() has not been used for seven years, and was a wrong design. Messing with shared pointer in bond_neigh_init() without proper memory barriers would at least trigger syzbot complains eventually. It is time to remove this stuff. Fixes: b63b70d87741 ("IPoIB: Use a private hash table for path lookup in xmit path") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/neighbour.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net/core') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 652da6369037..920784a9b7ff 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -98,9 +98,6 @@ static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb) static void neigh_cleanup_and_release(struct neighbour *neigh) { - if (neigh->parms->neigh_cleanup) - neigh->parms->neigh_cleanup(neigh); - trace_neigh_cleanup_and_release(neigh, 0); __neigh_notify(neigh, RTM_DELNEIGH, 0, 0); call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); -- cgit v1.2.3-59-g8ed1b From 5133498f4ad1123a5ffd4c08df6431dab882cc32 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Fri, 13 Dec 2019 18:08:17 +0000 Subject: bpf: Clear skb->tstamp in bpf_redirect when necessary Redirecting a packet from ingress to egress by using bpf_redirect breaks if the egress interface has an fq qdisc installed. This is the same problem as fixed in 'commit 8203e2d844d3 ("net: clear skb->tstamp in forwarding paths") Clear skb->tstamp when redirecting into the egress path. Fixes: 80b14dee2bea ("net: Add a new socket option for a future transmit time.") Fixes: fb420d5d91c1 ("tcp/fq: move back to CLOCK_MONOTONIC") Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/bpf/20191213180817.2510-1-lmb@cloudflare.com --- net/core/filter.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/core') diff --git a/net/core/filter.c b/net/core/filter.c index f1e703eed3d2..d914257763b5 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2055,6 +2055,7 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) } skb->dev = dev; + skb->tstamp = 0; dev_xmit_recursion_inc(); ret = dev_queue_xmit(skb); -- cgit v1.2.3-59-g8ed1b From 7c68fa2bddda6d942bd387c9ba5b4300737fd991 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 16 Dec 2019 18:51:03 -0800 Subject: net: annotate lockless accesses to sk->sk_pacing_shift sk->sk_pacing_shift can be read and written without lock synchronization. This patch adds annotations to document this fact and avoid future syzbot complains. This might also avoid unexpected false sharing in sk_pacing_shift_update(), as the compiler could remove the conditional check and always write over sk->sk_pacing_shift : if (sk->sk_pacing_shift != val) sk->sk_pacing_shift = val; Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 4 ++-- net/core/sock.c | 2 +- net/ipv4/tcp_bbr.c | 3 ++- net/ipv4/tcp_output.c | 4 ++-- 4 files changed, 7 insertions(+), 6 deletions(-) (limited to 'net/core') diff --git a/include/net/sock.h b/include/net/sock.h index 04c274a20620..22be668457bf 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2588,9 +2588,9 @@ static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto) */ static inline void sk_pacing_shift_update(struct sock *sk, int val) { - if (!sk || !sk_fullsock(sk) || sk->sk_pacing_shift == val) + if (!sk || !sk_fullsock(sk) || READ_ONCE(sk->sk_pacing_shift) == val) return; - sk->sk_pacing_shift = val; + WRITE_ONCE(sk->sk_pacing_shift, val); } /* if a socket is bound to a device, check that the given device diff --git a/net/core/sock.c b/net/core/sock.c index 043db3ce023e..8459ad579f73 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2916,7 +2916,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_max_pacing_rate = ~0UL; sk->sk_pacing_rate = ~0UL; - sk->sk_pacing_shift = 10; + WRITE_ONCE(sk->sk_pacing_shift, 10); sk->sk_incoming_cpu = -1; sk_rx_queue_clear(sk); diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 32772d6ded4e..a6545ef0d27b 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -306,7 +306,8 @@ static u32 bbr_tso_segs_goal(struct sock *sk) /* Sort of tcp_tso_autosize() but ignoring * driver provided sk_gso_max_size. */ - bytes = min_t(unsigned long, sk->sk_pacing_rate >> sk->sk_pacing_shift, + bytes = min_t(unsigned long, + sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift), GSO_MAX_SIZE - 1 - MAX_TCP_HEADER); segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk)); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 36902d08473e..1f7735ca8f22 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1725,7 +1725,7 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, u32 bytes, segs; bytes = min_t(unsigned long, - sk->sk_pacing_rate >> sk->sk_pacing_shift, + sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift), sk->sk_gso_max_size - 1 - MAX_TCP_HEADER); /* Goal is to send at least one packet per ms, @@ -2260,7 +2260,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, limit = max_t(unsigned long, 2 * skb->truesize, - sk->sk_pacing_rate >> sk->sk_pacing_shift); + sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift)); if (sk->sk_pacing_status == SK_PACING_NONE) limit = min_t(unsigned long, limit, sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes); -- cgit v1.2.3-59-g8ed1b From ddd9b5e3e765d8ed5a35786a6cb00111713fe161 Mon Sep 17 00:00:00 2001 From: Jouni Hogander Date: Tue, 17 Dec 2019 13:46:34 +0200 Subject: net-sysfs: Call dev_hold always in rx_queue_add_kobject Dev_hold has to be called always in rx_queue_add_kobject. Otherwise usage count drops below 0 in case of failure in kobject_init_and_add. Fixes: b8eb718348b8 ("net-sysfs: Fix reference count leak in rx|netdev_queue_add_kobject") Reported-by: syzbot Cc: Tetsuo Handa Cc: David Miller Cc: Lukas Bulwahn Signed-off-by: Jouni Hogander Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net/core') diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 5c4624298996..4c826b8bf9b1 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -919,14 +919,17 @@ static int rx_queue_add_kobject(struct net_device *dev, int index) struct kobject *kobj = &queue->kobj; int error = 0; + /* Kobject_put later will trigger rx_queue_release call which + * decreases dev refcount: Take that reference here + */ + dev_hold(queue->dev); + kobj->kset = dev->queues_kset; error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL, "rx-%u", index); if (error) goto err; - dev_hold(queue->dev); - if (dev->sysfs_rx_queue_group) { error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group); if (error) -- cgit v1.2.3-59-g8ed1b From 1148f9adbe71415836a18a36c1b4ece999ab0973 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 18 Dec 2019 12:18:21 +0300 Subject: net, sysctl: Fix compiler warning when only cBPF is present MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit proc_dointvec_minmax_bpf_restricted() has been firstly introduced in commit 2e4a30983b0f ("bpf: restrict access to core bpf sysctls") under CONFIG_HAVE_EBPF_JIT. Then, this ifdef has been removed in ede95a63b5e8 ("bpf: add bpf_jit_limit knob to restrict unpriv allocations"), because a new sysctl, bpf_jit_limit, made use of it. Finally, this parameter has become long instead of integer with fdadd04931c2 ("bpf: fix bpf_jit_limit knob for PAGE_SIZE >= 64K") and thus, a new proc_dolongvec_minmax_bpf_restricted() has been added. With this last change, we got back to that proc_dointvec_minmax_bpf_restricted() is used only under CONFIG_HAVE_EBPF_JIT, but the corresponding ifdef has not been brought back. So, in configurations like CONFIG_BPF_JIT=y && CONFIG_HAVE_EBPF_JIT=n since v4.20 we have: CC net/core/sysctl_net_core.o net/core/sysctl_net_core.c:292:1: warning: ‘proc_dointvec_minmax_bpf_restricted’ defined but not used [-Wunused-function] 292 | proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Suppress this by guarding it with CONFIG_HAVE_EBPF_JIT again. Fixes: fdadd04931c2 ("bpf: fix bpf_jit_limit knob for PAGE_SIZE >= 64K") Signed-off-by: Alexander Lobakin Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20191218091821.7080-1-alobakin@dlink.ru --- net/core/sysctl_net_core.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/core') diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index eb29e5adc84d..9f9e00ba3ad7 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -288,6 +288,7 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write, return ret; } +# ifdef CONFIG_HAVE_EBPF_JIT static int proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, @@ -298,6 +299,7 @@ proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, return proc_dointvec_minmax(table, write, buffer, lenp, ppos); } +# endif /* CONFIG_HAVE_EBPF_JIT */ static int proc_dolongvec_minmax_bpf_restricted(struct ctl_table *table, int write, -- cgit v1.2.3-59-g8ed1b