From a915da9b69273815527ccb3789421cb7027b545b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 31 Jan 2012 05:18:33 +0000 Subject: tcp: md5: rcu conversion In order to be able to support proper RST messages for TCP MD5 flows, we need to allow access to MD5 keys without locking listener socket. This conversion is a nice cleanup, and shrinks size of timewait sockets by 80 bytes. IPv6 code reuses generic code found in IPv4 instead of duplicating it. Control path uses GFP_KERNEL allocations instead of GFP_ATOMIC. Signed-off-by: Eric Dumazet Cc: Shawn Lu Signed-off-by: David S. Miller --- include/linux/tcp.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux/tcp.h') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 46a85c9e1f25..c2025f159641 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -486,8 +486,7 @@ struct tcp_timewait_sock { u32 tw_ts_recent; long tw_ts_recent_stamp; #ifdef CONFIG_TCP_MD5SIG - u16 tw_md5_keylen; - u8 tw_md5_key[TCP_MD5SIG_MAXKEYLEN]; + struct tcp_md5sig_key *tw_md5_key; #endif /* Few sockets in timewait have cookies; in that case, then this * object holds a reference to them (tw_cookie_values->kref). -- cgit v1.3-14-g43fede From a8afca032998850ec63e83d555cdcf0eb5680cd6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 31 Jan 2012 18:45:40 +0000 Subject: tcp: md5: protects md5sig_info with RCU This patch makes sure we use appropriate memory barriers before publishing tp->md5sig_info, allowing tcp_md5_do_lookup() being used from tcp_v4_send_reset() without holding socket lock (upcoming patch from Shawn Lu) Note we also need to respect rcu grace period before its freeing, since we can free socket without this grace period thanks to SLAB_DESTROY_BY_RCU Signed-off-by: Eric Dumazet Cc: Shawn Lu Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- include/net/tcp.h | 1 + net/ipv4/tcp_ipv4.c | 32 ++++++++++++++++++++------------ net/ipv6/tcp_ipv6.c | 2 -- 4 files changed, 22 insertions(+), 15 deletions(-) (limited to 'include/linux/tcp.h') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index c2025f159641..115389e9b945 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -463,7 +463,7 @@ struct tcp_sock { const struct tcp_sock_af_ops *af_specific; /* TCP MD5 Signature Option information */ - struct tcp_md5sig_info *md5sig_info; + struct tcp_md5sig_info __rcu *md5sig_info; #endif /* When the cookie options are generated and exchanged, then this diff --git a/include/net/tcp.h b/include/net/tcp.h index 10ae4c7b6b4f..78880ba0f560 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1150,6 +1150,7 @@ struct tcp_md5sig_key { /* - sock block */ struct tcp_md5sig_info { struct hlist_head head; + struct rcu_head rcu; }; /* - pseudo header */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index da5d3226771b..567cca9b30df 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -879,14 +879,18 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, struct tcp_md5sig_key *key; struct hlist_node *pos; unsigned int size = sizeof(struct in_addr); + struct tcp_md5sig_info *md5sig; - if (!tp->md5sig_info) + /* caller either holds rcu_read_lock() or socket lock */ + md5sig = rcu_dereference_check(tp->md5sig_info, + sock_owned_by_user(sk)); + if (!md5sig) return NULL; #if IS_ENABLED(CONFIG_IPV6) if (family == AF_INET6) size = sizeof(struct in6_addr); #endif - hlist_for_each_entry_rcu(key, pos, &tp->md5sig_info->head, node) { + hlist_for_each_entry_rcu(key, pos, &md5sig->head, node) { if (key->family != family) continue; if (!memcmp(&key->addr, addr, size)) @@ -932,7 +936,8 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, return 0; } - md5sig = tp->md5sig_info; + md5sig = rcu_dereference_protected(tp->md5sig_info, + sock_owned_by_user(sk)); if (!md5sig) { md5sig = kmalloc(sizeof(*md5sig), gfp); if (!md5sig) @@ -940,7 +945,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, sk_nocaps_add(sk, NETIF_F_GSO_MASK); INIT_HLIST_HEAD(&md5sig->head); - tp->md5sig_info = md5sig; + rcu_assign_pointer(tp->md5sig_info, md5sig); } key = sock_kmalloc(sk, sizeof(*key), gfp); @@ -966,6 +971,7 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; + struct tcp_md5sig_info *md5sig; key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET); if (!key) @@ -973,7 +979,9 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family) hlist_del_rcu(&key->node); atomic_sub(sizeof(*key), &sk->sk_omem_alloc); kfree_rcu(key, rcu); - if (hlist_empty(&tp->md5sig_info->head)) + md5sig = rcu_dereference_protected(tp->md5sig_info, + sock_owned_by_user(sk)); + if (hlist_empty(&md5sig->head)) tcp_free_md5sig_pool(); return 0; } @@ -984,10 +992,13 @@ void tcp_clear_md5_list(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; struct hlist_node *pos, *n; + struct tcp_md5sig_info *md5sig; - if (!hlist_empty(&tp->md5sig_info->head)) + md5sig = rcu_dereference_protected(tp->md5sig_info, 1); + + if (!hlist_empty(&md5sig->head)) tcp_free_md5sig_pool(); - hlist_for_each_entry_safe(key, pos, n, &tp->md5sig_info->head, node) { + hlist_for_each_entry_safe(key, pos, n, &md5sig->head, node) { hlist_del_rcu(&key->node); atomic_sub(sizeof(*key), &sk->sk_omem_alloc); kfree_rcu(key, rcu); @@ -1009,12 +1020,9 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, if (sin->sin_family != AF_INET) return -EINVAL; - if (!cmd.tcpm_key || !cmd.tcpm_keylen) { - if (!tcp_sk(sk)->md5sig_info) - return -ENOENT; + if (!cmd.tcpm_key || !cmd.tcpm_keylen) return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr, AF_INET); - } if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) return -EINVAL; @@ -1896,7 +1904,7 @@ void tcp_v4_destroy_sock(struct sock *sk) /* Clean up the MD5 key list, if any */ if (tp->md5sig_info) { tcp_clear_md5_list(sk); - kfree(tp->md5sig_info); + kfree_rcu(tp->md5sig_info, rcu); tp->md5sig_info = NULL; } #endif diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index bec41f9a6413..c25018106ef2 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -571,8 +571,6 @@ static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval, return -EINVAL; if (!cmd.tcpm_keylen) { - if (!tcp_sk(sk)->md5sig_info) - return -ENOENT; if (ipv6_addr_v4mapped(&sin6->sin6_addr)) return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], AF_INET); -- cgit v1.3-14-g43fede