From e48c414ee61f4ac8d5cff2973e66a7cbc8a93aa5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:09:46 -0700 Subject: [INET]: Generalise the TCP sock ID lookup routines And also some TIME_WAIT functions. [acme@toy net-2.6.14]$ grep built-in /tmp/before.size /tmp/after.size /tmp/before.size: 282955 13122 9312 305389 4a8ed net/ipv4/built-in.o /tmp/after.size: 281566 13122 9312 304000 4a380 net/ipv4/built-in.o [acme@toy net-2.6.14]$ I kept them still inlined, will uninline at some point to see what would be the performance difference. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/ipv4/Makefile | 1 + net/ipv4/inet_hashtables.c | 2 ++ net/ipv4/inet_timewait_sock.c | 83 +++++++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp_diag.c | 8 ++--- net/ipv4/tcp_ipv4.c | 83 +++++-------------------------------------- net/ipv4/tcp_minisocks.c | 78 +++------------------------------------- 6 files changed, 101 insertions(+), 154 deletions(-) create mode 100644 net/ipv4/inet_timewait_sock.c (limited to 'net') diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 2d8d30e83eb0..6650d18e400f 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -5,6 +5,7 @@ obj-y := route.o inetpeer.o protocol.o \ ip_input.o ip_fragment.o ip_forward.o ip_options.o \ ip_output.o ip_sockglue.o inet_hashtables.o \ + inet_timewait_sock.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o \ datagram.o raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \ diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 88fcba05b7d6..d94e962958a4 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -162,3 +162,5 @@ struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 dad } return result; } + +EXPORT_SYMBOL_GPL(__inet_lookup_listener); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c new file mode 100644 index 000000000000..d38d160faeb7 --- /dev/null +++ b/net/ipv4/inet_timewait_sock.c @@ -0,0 +1,83 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Generic TIME_WAIT sockets functions + * + * From code orinally in TCP + */ + +#include + +#include +#include + +/* Must be called with locally disabled BHs. */ +void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashinfo) +{ + struct inet_bind_hashbucket *bhead; + struct inet_bind_bucket *tb; + /* Unlink from established hashes. */ + struct inet_ehash_bucket *ehead = &hashinfo->ehash[tw->tw_hashent]; + + write_lock(&ehead->lock); + if (hlist_unhashed(&tw->tw_node)) { + write_unlock(&ehead->lock); + return; + } + __hlist_del(&tw->tw_node); + sk_node_init(&tw->tw_node); + write_unlock(&ehead->lock); + + /* Disassociate with bind bucket. */ + bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num, hashinfo->bhash_size)]; + spin_lock(&bhead->lock); + tb = tw->tw_tb; + __hlist_del(&tw->tw_bind_node); + tw->tw_tb = NULL; + inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); + spin_unlock(&bhead->lock); +#ifdef SOCK_REFCNT_DEBUG + if (atomic_read(&tw->tw_refcnt) != 1) { + printk(KERN_DEBUG "%s timewait_sock %p refcnt=%d\n", + tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt)); + } +#endif + inet_twsk_put(tw); +} + +/* + * Enter the time wait state. This is called with locally disabled BH. + * Essentially we whip up a timewait bucket, copy the relevant info into it + * from the SK, and mess with hash chains and list linkage. + */ +void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, + struct inet_hashinfo *hashinfo) +{ + const struct inet_sock *inet = inet_sk(sk); + struct inet_ehash_bucket *ehead = &hashinfo->ehash[sk->sk_hashent]; + struct inet_bind_hashbucket *bhead; + /* Step 1: Put TW into bind hash. Original socket stays there too. + Note, that any socket with inet->num != 0 MUST be bound in + binding cache, even if it is closed. + */ + bhead = &hashinfo->bhash[inet_bhashfn(inet->num, hashinfo->bhash_size)]; + spin_lock(&bhead->lock); + tw->tw_tb = inet->bind_hash; + BUG_TRAP(inet->bind_hash); + inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); + spin_unlock(&bhead->lock); + + write_lock(&ehead->lock); + + /* Step 2: Remove SK from established hash. */ + if (__sk_del_node_init(sk)) + sock_prot_dec_use(sk->sk_prot); + + /* Step 3: Hash TW into TIMEWAIT half of established hash table. */ + inet_twsk_add_node(tw, &(ehead + hashinfo->ehash_size)->chain); + atomic_inc(&tw->tw_refcnt); + + write_unlock(&ehead->lock); +} diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 6f2d6f2276b9..60c6a797cc50 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -174,8 +174,6 @@ nlmsg_failure: return -1; } -extern struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, - int dif); #ifdef CONFIG_IP_TCPDIAG_IPV6 extern struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport, struct in6_addr *daddr, u16 dport, @@ -197,9 +195,9 @@ static int tcpdiag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) struct sk_buff *rep; if (req->tcpdiag_family == AF_INET) { - sk = tcp_v4_lookup(req->id.tcpdiag_dst[0], req->id.tcpdiag_dport, - req->id.tcpdiag_src[0], req->id.tcpdiag_sport, - req->id.tcpdiag_if); + sk = inet_lookup(&tcp_hashinfo, req->id.tcpdiag_dst[0], + req->id.tcpdiag_dport, req->id.tcpdiag_src[0], + req->id.tcpdiag_sport, req->id.tcpdiag_if); } #ifdef CONFIG_IP_TCPDIAG_IPV6 else if (req->tcpdiag_family == AF_INET6) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ce423e48ebe0..e7e91e60ac74 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -238,71 +238,6 @@ void tcp_unhash(struct sock *sk) inet_unhash(&tcp_hashinfo, sk); } -/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so - * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM - * - * Local BH must be disabled here. - */ - -static inline struct sock *__tcp_v4_lookup_established(const u32 saddr, - const u16 sport, - const u32 daddr, - const u16 hnum, - const int dif) -{ - struct inet_ehash_bucket *head; - INET_ADDR_COOKIE(acookie, saddr, daddr) - const __u32 ports = INET_COMBINED_PORTS(sport, hnum); - struct sock *sk; - const struct hlist_node *node; - /* Optimize here for direct hit, only listening connections can - * have wildcards anyways. - */ - const int hash = inet_ehashfn(daddr, hnum, saddr, sport, tcp_hashinfo.ehash_size); - head = &tcp_hashinfo.ehash[hash]; - read_lock(&head->lock); - sk_for_each(sk, node, &head->chain) { - if (INET_MATCH(sk, acookie, saddr, daddr, ports, dif)) - goto hit; /* You sunk my battleship! */ - } - - /* Must check for a TIME_WAIT'er before going to listener hash. */ - sk_for_each(sk, node, &(head + tcp_hashinfo.ehash_size)->chain) { - if (INET_TW_MATCH(sk, acookie, saddr, daddr, ports, dif)) - goto hit; - } - sk = NULL; -out: - read_unlock(&head->lock); - return sk; -hit: - sock_hold(sk); - goto out; -} - -static inline struct sock *__tcp_v4_lookup(u32 saddr, u16 sport, - u32 daddr, u16 hnum, int dif) -{ - struct sock *sk = __tcp_v4_lookup_established(saddr, sport, - daddr, hnum, dif); - - return sk ? : inet_lookup_listener(&tcp_hashinfo, daddr, hnum, dif); -} - -inline struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, - u16 dport, int dif) -{ - struct sock *sk; - - local_bh_disable(); - sk = __tcp_v4_lookup(saddr, sport, daddr, ntohs(dport), dif); - local_bh_enable(); - - return sk; -} - -EXPORT_SYMBOL_GPL(tcp_v4_lookup); - static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb) { return secure_tcp_sequence_number(skb->nh.iph->daddr, @@ -751,8 +686,8 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) return; } - sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr, - th->source, tcp_v4_iif(skb)); + sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr, + th->source, tcp_v4_iif(skb)); if (!sk) { ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); return; @@ -1359,11 +1294,9 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) if (req) return tcp_check_req(sk, skb, req, prev); - nsk = __tcp_v4_lookup_established(skb->nh.iph->saddr, - th->source, - skb->nh.iph->daddr, - ntohs(th->dest), - tcp_v4_iif(skb)); + nsk = __inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr, + th->source, skb->nh.iph->daddr, + ntohs(th->dest), tcp_v4_iif(skb)); if (nsk) { if (nsk->sk_state != TCP_TIME_WAIT) { @@ -1505,9 +1438,9 @@ int tcp_v4_rcv(struct sk_buff *skb) TCP_SKB_CB(skb)->flags = skb->nh.iph->tos; TCP_SKB_CB(skb)->sacked = 0; - sk = __tcp_v4_lookup(skb->nh.iph->saddr, th->source, - skb->nh.iph->daddr, ntohs(th->dest), - tcp_v4_iif(skb)); + sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source, + skb->nh.iph->daddr, ntohs(th->dest), + tcp_v4_iif(skb)); if (!sk) goto no_tcp_socket; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 5b5a49335fbb..4112f7a6d108 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -56,42 +56,6 @@ static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) int tcp_tw_count; - -/* Must be called with locally disabled BHs. */ -static void tcp_timewait_kill(struct inet_timewait_sock *tw) -{ - struct inet_bind_hashbucket *bhead; - struct inet_bind_bucket *tb; - /* Unlink from established hashes. */ - struct inet_ehash_bucket *ehead = &tcp_hashinfo.ehash[tw->tw_hashent]; - - write_lock(&ehead->lock); - if (hlist_unhashed(&tw->tw_node)) { - write_unlock(&ehead->lock); - return; - } - __hlist_del(&tw->tw_node); - sk_node_init(&tw->tw_node); - write_unlock(&ehead->lock); - - /* Disassociate with bind bucket. */ - bhead = &tcp_hashinfo.bhash[inet_bhashfn(tw->tw_num, tcp_hashinfo.bhash_size)]; - spin_lock(&bhead->lock); - tb = tw->tw_tb; - __hlist_del(&tw->tw_bind_node); - tw->tw_tb = NULL; - inet_bind_bucket_destroy(tcp_hashinfo.bind_bucket_cachep, tb); - spin_unlock(&bhead->lock); - -#ifdef SOCK_REFCNT_DEBUG - if (atomic_read(&tw->tw_refcnt) != 1) { - printk(KERN_DEBUG "%s timewait_sock %p refcnt=%d\n", - tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt)); - } -#endif - inet_twsk_put(tw); -} - /* * * Main purpose of TIME-WAIT state is to close connection gracefully, * when one of ends sits in LAST-ACK or CLOSING retransmitting FIN @@ -290,40 +254,6 @@ kill: return TCP_TW_SUCCESS; } -/* Enter the time wait state. This is called with locally disabled BH. - * Essentially we whip up a timewait bucket, copy the - * relevant info into it from the SK, and mess with hash chains - * and list linkage. - */ -static void __tcp_tw_hashdance(struct sock *sk, struct inet_timewait_sock *tw) -{ - const struct inet_sock *inet = inet_sk(sk); - struct inet_ehash_bucket *ehead = &tcp_hashinfo.ehash[sk->sk_hashent]; - struct inet_bind_hashbucket *bhead; - /* Step 1: Put TW into bind hash. Original socket stays there too. - Note, that any socket with inet->num != 0 MUST be bound in - binding cache, even if it is closed. - */ - bhead = &tcp_hashinfo.bhash[inet_bhashfn(inet->num, tcp_hashinfo.bhash_size)]; - spin_lock(&bhead->lock); - tw->tw_tb = inet->bind_hash; - BUG_TRAP(inet->bind_hash); - inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); - spin_unlock(&bhead->lock); - - write_lock(&ehead->lock); - - /* Step 2: Remove SK from established hash. */ - if (__sk_del_node_init(sk)) - sock_prot_dec_use(sk->sk_prot); - - /* Step 3: Hash TW into TIMEWAIT half of established hash table. */ - inet_twsk_add_node(tw, &(ehead + tcp_hashinfo.ehash_size)->chain); - atomic_inc(&tw->tw_refcnt); - - write_unlock(&ehead->lock); -} - /* * Move a socket to time-wait or dead fin-wait-2 state. */ @@ -381,7 +311,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tw->tw_ipv6only = 0; #endif /* Linkage updates. */ - __tcp_tw_hashdance(sk, tw); + __inet_twsk_hashdance(tw, sk, &tcp_hashinfo); /* Get the TIME_WAIT timeout firing. */ if (timeo < rto) @@ -448,7 +378,7 @@ rescan: inet_twsk_for_each_inmate(tw, node, &tcp_tw_death_row[slot]) { __inet_twsk_del_dead_node(tw); spin_unlock(&tw_death_lock); - tcp_timewait_kill(tw); + __inet_twsk_kill(tw, &tcp_hashinfo); inet_twsk_put(tw); killed++; spin_lock(&tw_death_lock); @@ -544,7 +474,7 @@ void tcp_tw_deschedule(struct inet_timewait_sock *tw) del_timer(&tcp_tw_timer); } spin_unlock(&tw_death_lock); - tcp_timewait_kill(tw); + __inet_twsk_kill(tw, &tcp_hashinfo); } /* Short-time timewait calendar */ @@ -653,7 +583,7 @@ void tcp_twcal_tick(unsigned long dummy) inet_twsk_for_each_inmate_safe(tw, node, safe, &tcp_twcal_row[slot]) { __inet_twsk_del_dead_node(tw); - tcp_timewait_kill(tw); + __inet_twsk_kill(tw, &tcp_hashinfo); inet_twsk_put(tw); killed++; } -- cgit v1.2.3-59-g8ed1b