aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-07-10 18:16:22 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-07-10 18:16:22 -0700
commit5a764898afec0bc097003e8c3e727792289f76d6 (patch)
tree5e5ffde9c63913705b2869046458af8bd485c16e /net/core
parentmips: Remove compiler check in unroll macro (diff)
parentMerge branch 'mlxsw-Various-fixes' (diff)
downloadlinux-dev-5a764898afec0bc097003e8c3e727792289f76d6.tar.xz
linux-dev-5a764898afec0bc097003e8c3e727792289f76d6.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from David Miller: 1) Restore previous behavior of CAP_SYS_ADMIN wrt loading networking BPF programs, from Maciej Żenczykowski. 2) Fix dropped broadcasts in mac80211 code, from Seevalamuthu Mariappan. 3) Slay memory leak in nl80211 bss color attribute parsing code, from Luca Coelho. 4) Get route from skb properly in ip_route_use_hint(), from Miaohe Lin. 5) Don't allow anything other than ARPHRD_ETHER in llc code, from Eric Dumazet. 6) xsk code dips too deeply into DMA mapping implementation internals. Add dma_need_sync and use it. From Christoph Hellwig 7) Enforce power-of-2 for BPF ringbuf sizes. From Andrii Nakryiko. 8) Check for disallowed attributes when loading flow dissector BPF programs. From Lorenz Bauer. 9) Correct packet injection to L3 tunnel devices via AF_PACKET, from Jason A. Donenfeld. 10) Don't advertise checksum offload on ipa devices that don't support it. From Alex Elder. 11) Resolve several issues in TCP MD5 signature support. Missing memory barriers, bogus options emitted when using syncookies, and failure to allow md5 key changes in established states. All from Eric Dumazet. 12) Fix interface leak in hsr code, from Taehee Yoo. 13) VF reset fixes in hns3 driver, from Huazhong Tan. 14) Make loopback work again with ipv6 anycast, from David Ahern. 15) Fix TX starvation under high load in fec driver, from Tobias Waldekranz. 16) MLD2 payload lengths not checked properly in bridge multicast code, from Linus Lüssing. 17) Packet scheduler code that wants to find the inner protocol currently only works for one level of VLAN encapsulation. Allow Q-in-Q situations to work properly here, from Toke Høiland-Jørgensen. 18) Fix route leak in l2tp, from Xin Long. 19) Resolve conflict between the sk->sk_user_data usage of bpf reuseport support and various protocols. From Martin KaFai Lau. 20) Fix socket cgroup v2 reference counting in some situations, from Cong Wang. 21) Cure memory leak in mlx5 connection tracking offload support, from Eli Britstein. * git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (146 commits) mlxsw: pci: Fix use-after-free in case of failed devlink reload mlxsw: spectrum_router: Remove inappropriate usage of WARN_ON() net: macb: fix call to pm_runtime in the suspend/resume functions net: macb: fix macb_suspend() by removing call to netif_carrier_off() net: macb: fix macb_get/set_wol() when moving to phylink net: macb: mark device wake capable when "magic-packet" property present net: macb: fix wakeup test in runtime suspend/resume routines bnxt_en: fix NULL dereference in case SR-IOV configuration fails libbpf: Fix libbpf hashmap on (I)LP32 architectures net/mlx5e: CT: Fix memory leak in cleanup net/mlx5e: Fix port buffers cell size value net/mlx5e: Fix 50G per lane indication net/mlx5e: Fix CPU mapping after function reload to avoid aRFS RX crash net/mlx5e: Fix VXLAN configuration restore after function reload net/mlx5e: Fix usage of rcu-protected pointer net/mxl5e: Verify that rpriv is not NULL net/mlx5: E-Switch, Fix vlan or qos setting in legacy mode net/mlx5: Fix eeprom support for SFP module cgroup: Fix sock_cgroup_data on big-endian. selftests: bpf: Fix detach from sockmap tests ...
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev_addr_lists.c10
-rw-r--r--net/core/filter.c10
-rw-r--r--net/core/flow_dissector.c32
-rw-r--r--net/core/skmsg.c23
-rw-r--r--net/core/sock.c2
-rw-r--r--net/core/sock_map.c53
6 files changed, 93 insertions, 37 deletions
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 6393ba930097..54cd568e7c2f 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -690,6 +690,15 @@ void dev_uc_unsync(struct net_device *to, struct net_device *from)
if (to->addr_len != from->addr_len)
return;
+ /* netif_addr_lock_bh() uses lockdep subclass 0, this is okay for two
+ * reasons:
+ * 1) This is always called without any addr_list_lock, so as the
+ * outermost one here, it must be 0.
+ * 2) This is called by some callers after unlinking the upper device,
+ * so the dev->lower_level becomes 1 again.
+ * Therefore, the subclass for 'from' is 0, for 'to' is either 1 or
+ * larger.
+ */
netif_addr_lock_bh(from);
netif_addr_lock_nested(to);
__hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
@@ -911,6 +920,7 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from)
if (to->addr_len != from->addr_len)
return;
+ /* See the above comments inside dev_uc_unsync(). */
netif_addr_lock_bh(from);
netif_addr_lock_nested(to);
__hw_addr_unsync(&to->mc, &from->mc, to->addr_len);
diff --git a/net/core/filter.c b/net/core/filter.c
index 73395384afe2..82e1b5b06167 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5853,12 +5853,16 @@ BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb)
{
unsigned int iphdr_len;
- if (skb->protocol == cpu_to_be16(ETH_P_IP))
+ switch (skb_protocol(skb, true)) {
+ case cpu_to_be16(ETH_P_IP):
iphdr_len = sizeof(struct iphdr);
- else if (skb->protocol == cpu_to_be16(ETH_P_IPV6))
+ break;
+ case cpu_to_be16(ETH_P_IPV6):
iphdr_len = sizeof(struct ipv6hdr);
- else
+ break;
+ default:
return 0;
+ }
if (skb_headlen(skb) < iphdr_len)
return 0;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index d02df0b6d0d9..142a8824f0a8 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -70,10 +70,10 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
EXPORT_SYMBOL(skb_flow_dissector_init);
#ifdef CONFIG_BPF_SYSCALL
-int flow_dissector_bpf_prog_attach(struct net *net, struct bpf_prog *prog)
+int flow_dissector_bpf_prog_attach_check(struct net *net,
+ struct bpf_prog *prog)
{
enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR;
- struct bpf_prog *attached;
if (net == &init_net) {
/* BPF flow dissector in the root namespace overrides
@@ -86,26 +86,17 @@ int flow_dissector_bpf_prog_attach(struct net *net, struct bpf_prog *prog)
for_each_net(ns) {
if (ns == &init_net)
continue;
- if (rcu_access_pointer(ns->bpf.progs[type]))
+ if (rcu_access_pointer(ns->bpf.run_array[type]))
return -EEXIST;
}
} else {
/* Make sure root flow dissector is not attached
* when attaching to the non-root namespace.
*/
- if (rcu_access_pointer(init_net.bpf.progs[type]))
+ if (rcu_access_pointer(init_net.bpf.run_array[type]))
return -EEXIST;
}
- attached = rcu_dereference_protected(net->bpf.progs[type],
- lockdep_is_held(&netns_bpf_mutex));
- if (attached == prog)
- /* The same program cannot be attached twice */
- return -EINVAL;
-
- rcu_assign_pointer(net->bpf.progs[type], prog);
- if (attached)
- bpf_prog_put(attached);
return 0;
}
#endif /* CONFIG_BPF_SYSCALL */
@@ -903,7 +894,6 @@ bool __skb_flow_dissect(const struct net *net,
struct flow_dissector_key_addrs *key_addrs;
struct flow_dissector_key_tags *key_tags;
struct flow_dissector_key_vlan *key_vlan;
- struct bpf_prog *attached = NULL;
enum flow_dissect_ret fdret;
enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX;
bool mpls_el = false;
@@ -960,14 +950,14 @@ bool __skb_flow_dissect(const struct net *net,
WARN_ON_ONCE(!net);
if (net) {
enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR;
+ struct bpf_prog_array *run_array;
rcu_read_lock();
- attached = rcu_dereference(init_net.bpf.progs[type]);
-
- if (!attached)
- attached = rcu_dereference(net->bpf.progs[type]);
+ run_array = rcu_dereference(init_net.bpf.run_array[type]);
+ if (!run_array)
+ run_array = rcu_dereference(net->bpf.run_array[type]);
- if (attached) {
+ if (run_array) {
struct bpf_flow_keys flow_keys;
struct bpf_flow_dissector ctx = {
.flow_keys = &flow_keys,
@@ -975,6 +965,7 @@ bool __skb_flow_dissect(const struct net *net,
.data_end = data + hlen,
};
__be16 n_proto = proto;
+ struct bpf_prog *prog;
if (skb) {
ctx.skb = skb;
@@ -985,7 +976,8 @@ bool __skb_flow_dissect(const struct net *net,
n_proto = skb->protocol;
}
- ret = bpf_flow_dissect(attached, &ctx, n_proto, nhoff,
+ prog = READ_ONCE(run_array->items[0].prog);
+ ret = bpf_flow_dissect(prog, &ctx, n_proto, nhoff,
hlen, flags);
__skb_flow_bpf_to_target(&flow_keys, flow_dissector,
target_container);
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 351afbf6bfba..6a32a1fd34f8 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -683,7 +683,7 @@ static struct sk_psock *sk_psock_from_strp(struct strparser *strp)
return container_of(parser, struct sk_psock, parser);
}
-static void sk_psock_skb_redirect(struct sk_psock *psock, struct sk_buff *skb)
+static void sk_psock_skb_redirect(struct sk_buff *skb)
{
struct sk_psock *psock_other;
struct sock *sk_other;
@@ -715,12 +715,11 @@ static void sk_psock_skb_redirect(struct sk_psock *psock, struct sk_buff *skb)
}
}
-static void sk_psock_tls_verdict_apply(struct sk_psock *psock,
- struct sk_buff *skb, int verdict)
+static void sk_psock_tls_verdict_apply(struct sk_buff *skb, int verdict)
{
switch (verdict) {
case __SK_REDIRECT:
- sk_psock_skb_redirect(psock, skb);
+ sk_psock_skb_redirect(skb);
break;
case __SK_PASS:
case __SK_DROP:
@@ -741,8 +740,8 @@ int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb)
ret = sk_psock_bpf_run(psock, prog, skb);
ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
}
+ sk_psock_tls_verdict_apply(skb, ret);
rcu_read_unlock();
- sk_psock_tls_verdict_apply(psock, skb, ret);
return ret;
}
EXPORT_SYMBOL_GPL(sk_psock_tls_strp_read);
@@ -770,7 +769,7 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,
}
goto out_free;
case __SK_REDIRECT:
- sk_psock_skb_redirect(psock, skb);
+ sk_psock_skb_redirect(skb);
break;
case __SK_DROP:
/* fall-through */
@@ -782,11 +781,18 @@ out_free:
static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
{
- struct sk_psock *psock = sk_psock_from_strp(strp);
+ struct sk_psock *psock;
struct bpf_prog *prog;
int ret = __SK_DROP;
+ struct sock *sk;
rcu_read_lock();
+ sk = strp->sk;
+ psock = sk_psock(sk);
+ if (unlikely(!psock)) {
+ kfree_skb(skb);
+ goto out;
+ }
prog = READ_ONCE(psock->progs.skb_verdict);
if (likely(prog)) {
skb_orphan(skb);
@@ -794,8 +800,9 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
ret = sk_psock_bpf_run(psock, prog, skb);
ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
}
- rcu_read_unlock();
sk_psock_verdict_apply(psock, skb, ret);
+out:
+ rcu_read_unlock();
}
static int sk_psock_strp_read_done(struct strparser *strp, int err)
diff --git a/net/core/sock.c b/net/core/sock.c
index d832c650287c..2e5b7870e5d3 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1926,7 +1926,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
/* sk->sk_memcg will be populated at accept() time */
newsk->sk_memcg = NULL;
- cgroup_sk_alloc(&newsk->sk_cgrp_data);
+ cgroup_sk_clone(&newsk->sk_cgrp_data);
rcu_read_lock();
filter = rcu_dereference(sk->sk_filter);
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 4059f94e9bb5..0971f17e8e54 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -70,11 +70,49 @@ int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog)
struct fd f;
int ret;
+ if (attr->attach_flags || attr->replace_bpf_fd)
+ return -EINVAL;
+
f = fdget(ufd);
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
- ret = sock_map_prog_update(map, prog, attr->attach_type);
+ ret = sock_map_prog_update(map, prog, NULL, attr->attach_type);
+ fdput(f);
+ return ret;
+}
+
+int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
+{
+ u32 ufd = attr->target_fd;
+ struct bpf_prog *prog;
+ struct bpf_map *map;
+ struct fd f;
+ int ret;
+
+ if (attr->attach_flags || attr->replace_bpf_fd)
+ return -EINVAL;
+
+ f = fdget(ufd);
+ map = __bpf_map_get(f);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+
+ prog = bpf_prog_get(attr->attach_bpf_fd);
+ if (IS_ERR(prog)) {
+ ret = PTR_ERR(prog);
+ goto put_map;
+ }
+
+ if (prog->type != ptype) {
+ ret = -EINVAL;
+ goto put_prog;
+ }
+
+ ret = sock_map_prog_update(map, NULL, prog, attr->attach_type);
+put_prog:
+ bpf_prog_put(prog);
+put_map:
fdput(f);
return ret;
}
@@ -1203,27 +1241,32 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
}
int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
- u32 which)
+ struct bpf_prog *old, u32 which)
{
struct sk_psock_progs *progs = sock_map_progs(map);
+ struct bpf_prog **pprog;
if (!progs)
return -EOPNOTSUPP;
switch (which) {
case BPF_SK_MSG_VERDICT:
- psock_set_prog(&progs->msg_parser, prog);
+ pprog = &progs->msg_parser;
break;
case BPF_SK_SKB_STREAM_PARSER:
- psock_set_prog(&progs->skb_parser, prog);
+ pprog = &progs->skb_parser;
break;
case BPF_SK_SKB_STREAM_VERDICT:
- psock_set_prog(&progs->skb_verdict, prog);
+ pprog = &progs->skb_verdict;
break;
default:
return -EOPNOTSUPP;
}
+ if (old)
+ return psock_replace_prog(pprog, prog, old);
+
+ psock_set_prog(pprog, prog);
return 0;
}