aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2018-08-16 14:58:08 -0700
committerAlexei Starovoitov <ast@kernel.org>2018-08-16 14:58:09 -0700
commitcbb2fb13db3209cf4264aa5f76393a12add91315 (patch)
tree336d9a13409cccdc8d2c8f843bae5f62f4a9334b
parentbpf: fix a rcu usage warning in bpf_prog_array_copy_core() (diff)
parentbpf, sockmap: fix sock_map_ctx_update_elem race with exist/noexist (diff)
downloadlinux-dev-cbb2fb13db3209cf4264aa5f76393a12add91315.tar.xz
linux-dev-cbb2fb13db3209cf4264aa5f76393a12add91315.zip
Merge branch 'sockmap-ulp-fixes'
Daniel Borkmann says: ==================== Batch of various fixes related to BPF sockmap and ULP, including adding module alias to restrict module requests, races and memory leaks in sockmap code. For details please refer to the individual patches. Thanks! ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r--include/net/tcp.h4
-rw-r--r--kernel/bpf/sockmap.c120
-rw-r--r--net/ipv4/tcp_ulp.c4
-rw-r--r--net/tls/tls_main.c1
4 files changed, 76 insertions, 53 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h
index d196901c9dba..770917d0caa7 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2065,6 +2065,10 @@ int tcp_set_ulp_id(struct sock *sk, const int ulp);
void tcp_get_available_ulp(char *buf, size_t len);
void tcp_cleanup_ulp(struct sock *sk);
+#define MODULE_ALIAS_TCP_ULP(name) \
+ __MODULE_INFO(alias, alias_userspace, name); \
+ __MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name)
+
/* Call BPF_SOCK_OPS program that returns an int. If the return value
* is < 0, then the BPF op failed (for example if the loaded BPF
* program does not support the chosen operation or there is no BPF
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 0c1a696b041b..98e621a29e8e 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -58,6 +58,7 @@ struct bpf_stab {
struct bpf_map map;
struct sock **sock_map;
struct bpf_sock_progs progs;
+ raw_spinlock_t lock;
};
struct bucket {
@@ -89,9 +90,9 @@ enum smap_psock_state {
struct smap_psock_map_entry {
struct list_head list;
+ struct bpf_map *map;
struct sock **entry;
struct htab_elem __rcu *hash_link;
- struct bpf_htab __rcu *htab;
};
struct smap_psock {
@@ -343,13 +344,18 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
e = psock_map_pop(sk, psock);
while (e) {
if (e->entry) {
- osk = cmpxchg(e->entry, sk, NULL);
+ struct bpf_stab *stab = container_of(e->map, struct bpf_stab, map);
+
+ raw_spin_lock_bh(&stab->lock);
+ osk = *e->entry;
if (osk == sk) {
+ *e->entry = NULL;
smap_release_sock(psock, sk);
}
+ raw_spin_unlock_bh(&stab->lock);
} else {
struct htab_elem *link = rcu_dereference(e->hash_link);
- struct bpf_htab *htab = rcu_dereference(e->htab);
+ struct bpf_htab *htab = container_of(e->map, struct bpf_htab, map);
struct hlist_head *head;
struct htab_elem *l;
struct bucket *b;
@@ -370,6 +376,7 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
}
raw_spin_unlock_bh(&b->lock);
}
+ kfree(e);
e = psock_map_pop(sk, psock);
}
rcu_read_unlock();
@@ -1641,6 +1648,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
return ERR_PTR(-ENOMEM);
bpf_map_init_from_attr(&stab->map, attr);
+ raw_spin_lock_init(&stab->lock);
/* make sure page count doesn't overflow */
cost = (u64) stab->map.max_entries * sizeof(struct sock *);
@@ -1675,8 +1683,10 @@ static void smap_list_map_remove(struct smap_psock *psock,
spin_lock_bh(&psock->maps_lock);
list_for_each_entry_safe(e, tmp, &psock->maps, list) {
- if (e->entry == entry)
+ if (e->entry == entry) {
list_del(&e->list);
+ kfree(e);
+ }
}
spin_unlock_bh(&psock->maps_lock);
}
@@ -1690,8 +1700,10 @@ static void smap_list_hash_remove(struct smap_psock *psock,
list_for_each_entry_safe(e, tmp, &psock->maps, list) {
struct htab_elem *c = rcu_dereference(e->hash_link);
- if (c == hash_link)
+ if (c == hash_link) {
list_del(&e->list);
+ kfree(e);
+ }
}
spin_unlock_bh(&psock->maps_lock);
}
@@ -1711,14 +1723,15 @@ static void sock_map_free(struct bpf_map *map)
* and a grace period expire to ensure psock is really safe to remove.
*/
rcu_read_lock();
+ raw_spin_lock_bh(&stab->lock);
for (i = 0; i < stab->map.max_entries; i++) {
struct smap_psock *psock;
struct sock *sock;
- sock = xchg(&stab->sock_map[i], NULL);
+ sock = stab->sock_map[i];
if (!sock)
continue;
-
+ stab->sock_map[i] = NULL;
psock = smap_psock_sk(sock);
/* This check handles a racing sock event that can get the
* sk_callback_lock before this case but after xchg happens
@@ -1730,6 +1743,7 @@ static void sock_map_free(struct bpf_map *map)
smap_release_sock(psock, sock);
}
}
+ raw_spin_unlock_bh(&stab->lock);
rcu_read_unlock();
sock_map_remove_complete(stab);
@@ -1773,19 +1787,23 @@ static int sock_map_delete_elem(struct bpf_map *map, void *key)
if (k >= map->max_entries)
return -EINVAL;
- sock = xchg(&stab->sock_map[k], NULL);
+ raw_spin_lock_bh(&stab->lock);
+ sock = stab->sock_map[k];
+ stab->sock_map[k] = NULL;
+ raw_spin_unlock_bh(&stab->lock);
if (!sock)
return -EINVAL;
psock = smap_psock_sk(sock);
if (!psock)
- goto out;
-
- if (psock->bpf_parse)
+ return 0;
+ if (psock->bpf_parse) {
+ write_lock_bh(&sock->sk_callback_lock);
smap_stop_sock(psock, sock);
+ write_unlock_bh(&sock->sk_callback_lock);
+ }
smap_list_map_remove(psock, &stab->sock_map[k]);
smap_release_sock(psock, sock);
-out:
return 0;
}
@@ -1821,11 +1839,9 @@ out:
static int __sock_map_ctx_update_elem(struct bpf_map *map,
struct bpf_sock_progs *progs,
struct sock *sock,
- struct sock **map_link,
void *key)
{
struct bpf_prog *verdict, *parse, *tx_msg;
- struct smap_psock_map_entry *e = NULL;
struct smap_psock *psock;
bool new = false;
int err = 0;
@@ -1898,14 +1914,6 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
new = true;
}
- if (map_link) {
- e = kzalloc(sizeof(*e), GFP_ATOMIC | __GFP_NOWARN);
- if (!e) {
- err = -ENOMEM;
- goto out_free;
- }
- }
-
/* 3. At this point we have a reference to a valid psock that is
* running. Attach any BPF programs needed.
*/
@@ -1927,17 +1935,6 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
write_unlock_bh(&sock->sk_callback_lock);
}
- /* 4. Place psock in sockmap for use and stop any programs on
- * the old sock assuming its not the same sock we are replacing
- * it with. Because we can only have a single set of programs if
- * old_sock has a strp we can stop it.
- */
- if (map_link) {
- e->entry = map_link;
- spin_lock_bh(&psock->maps_lock);
- list_add_tail(&e->list, &psock->maps);
- spin_unlock_bh(&psock->maps_lock);
- }
return err;
out_free:
smap_release_sock(psock, sock);
@@ -1948,7 +1945,6 @@ out_progs:
}
if (tx_msg)
bpf_prog_put(tx_msg);
- kfree(e);
return err;
}
@@ -1958,36 +1954,57 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
{
struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
struct bpf_sock_progs *progs = &stab->progs;
- struct sock *osock, *sock;
+ struct sock *osock, *sock = skops->sk;
+ struct smap_psock_map_entry *e;
+ struct smap_psock *psock;
u32 i = *(u32 *)key;
int err;
if (unlikely(flags > BPF_EXIST))
return -EINVAL;
-
if (unlikely(i >= stab->map.max_entries))
return -E2BIG;
- sock = READ_ONCE(stab->sock_map[i]);
- if (flags == BPF_EXIST && !sock)
- return -ENOENT;
- else if (flags == BPF_NOEXIST && sock)
- return -EEXIST;
+ e = kzalloc(sizeof(*e), GFP_ATOMIC | __GFP_NOWARN);
+ if (!e)
+ return -ENOMEM;
- sock = skops->sk;
- err = __sock_map_ctx_update_elem(map, progs, sock, &stab->sock_map[i],
- key);
+ err = __sock_map_ctx_update_elem(map, progs, sock, key);
if (err)
goto out;
- osock = xchg(&stab->sock_map[i], sock);
- if (osock) {
- struct smap_psock *opsock = smap_psock_sk(osock);
+ /* psock guaranteed to be present. */
+ psock = smap_psock_sk(sock);
+ raw_spin_lock_bh(&stab->lock);
+ osock = stab->sock_map[i];
+ if (osock && flags == BPF_NOEXIST) {
+ err = -EEXIST;
+ goto out_unlock;
+ }
+ if (!osock && flags == BPF_EXIST) {
+ err = -ENOENT;
+ goto out_unlock;
+ }
- smap_list_map_remove(opsock, &stab->sock_map[i]);
- smap_release_sock(opsock, osock);
+ e->entry = &stab->sock_map[i];
+ e->map = map;
+ spin_lock_bh(&psock->maps_lock);
+ list_add_tail(&e->list, &psock->maps);
+ spin_unlock_bh(&psock->maps_lock);
+
+ stab->sock_map[i] = sock;
+ if (osock) {
+ psock = smap_psock_sk(osock);
+ smap_list_map_remove(psock, &stab->sock_map[i]);
+ smap_release_sock(psock, osock);
}
+ raw_spin_unlock_bh(&stab->lock);
+ return 0;
+out_unlock:
+ smap_release_sock(psock, sock);
+ raw_spin_unlock_bh(&stab->lock);
out:
+ kfree(e);
return err;
}
@@ -2350,7 +2367,7 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
b = __select_bucket(htab, hash);
head = &b->head;
- err = __sock_map_ctx_update_elem(map, progs, sock, NULL, key);
+ err = __sock_map_ctx_update_elem(map, progs, sock, key);
if (err)
goto err;
@@ -2376,8 +2393,7 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
}
rcu_assign_pointer(e->hash_link, l_new);
- rcu_assign_pointer(e->htab,
- container_of(map, struct bpf_htab, map));
+ e->map = map;
spin_lock_bh(&psock->maps_lock);
list_add_tail(&e->list, &psock->maps);
spin_unlock_bh(&psock->maps_lock);
diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c
index 622caa4039e0..a5995bb2eaca 100644
--- a/net/ipv4/tcp_ulp.c
+++ b/net/ipv4/tcp_ulp.c
@@ -51,7 +51,7 @@ static const struct tcp_ulp_ops *__tcp_ulp_find_autoload(const char *name)
#ifdef CONFIG_MODULES
if (!ulp && capable(CAP_NET_ADMIN)) {
rcu_read_unlock();
- request_module("%s", name);
+ request_module("tcp-ulp-%s", name);
rcu_read_lock();
ulp = tcp_ulp_find(name);
}
@@ -129,6 +129,8 @@ void tcp_cleanup_ulp(struct sock *sk)
if (icsk->icsk_ulp_ops->release)
icsk->icsk_ulp_ops->release(sk);
module_put(icsk->icsk_ulp_ops->owner);
+
+ icsk->icsk_ulp_ops = NULL;
}
/* Change upper layer protocol for socket */
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index b09867c8b817..93c0c225ab34 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -45,6 +45,7 @@
MODULE_AUTHOR("Mellanox Technologies");
MODULE_DESCRIPTION("Transport Layer Security Support");
MODULE_LICENSE("Dual BSD/GPL");
+MODULE_ALIAS_TCP_ULP("tls");
enum {
TLSV4,