From 789f558cfb3680aeb52de137418637f6b04b7d22 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 12 Apr 2015 18:51:09 -0700 Subject: tcp/dccp: get rid of central timewait timer Using a timer wheel for timewait sockets was nice ~15 years ago when memory was expensive and machines had a single processor. This does not scale, code is ugly and source of huge latencies (Typically 30 ms have been seen, cpus spinning on death_lock spinlock.) We can afford to use an extra 64 bytes per timewait sock and spread timewait load to all cpus to have better behavior. Tested: On following test, /proc/sys/net/ipv4/tcp_tw_recycle is set to 1 on the target (lpaa24) Before patch : lpaa23:~# ./super_netperf 200 -H lpaa24 -t TCP_CC -l 60 -- -p0,0 419594 lpaa23:~# ./super_netperf 200 -H lpaa24 -t TCP_CC -l 60 -- -p0,0 437171 While test is running, we can observe 25 or even 33 ms latencies. lpaa24:~# ping -c 1000 -i 0.02 -qn lpaa23 ... 1000 packets transmitted, 1000 received, 0% packet loss, time 20601ms rtt min/avg/max/mdev = 0.020/0.217/25.771/1.535 ms, pipe 2 lpaa24:~# ping -c 1000 -i 0.02 -qn lpaa23 ... 1000 packets transmitted, 1000 received, 0% packet loss, time 20702ms rtt min/avg/max/mdev = 0.019/0.183/33.761/1.441 ms, pipe 2 After patch : About 90% increase of throughput : lpaa23:~# ./super_netperf 200 -H lpaa24 -t TCP_CC -l 60 -- -p0,0 810442 lpaa23:~# ./super_netperf 200 -H lpaa24 -t TCP_CC -l 60 -- -p0,0 800992 And latencies are kept to minimal values during this load, even if network utilization is 90% higher : lpaa24:~# ping -c 1000 -i 0.02 -qn lpaa23 ... 1000 packets transmitted, 1000 received, 0% packet loss, time 19991ms rtt min/avg/max/mdev = 0.023/0.064/0.360/0.042 ms Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_timewait_sock.h | 107 ++++----------------------------------- 1 file changed, 9 insertions(+), 98 deletions(-) (limited to 'include/net/inet_timewait_sock.h') diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index b7ce1003c429..360c4802288d 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -31,67 +31,14 @@ struct inet_hashinfo; -#define INET_TWDR_RECYCLE_SLOTS_LOG 5 -#define INET_TWDR_RECYCLE_SLOTS (1 << INET_TWDR_RECYCLE_SLOTS_LOG) - -/* - * If time > 4sec, it is "slow" path, no recycling is required, - * so that we select tick to get range about 4 seconds. - */ -#if HZ <= 16 || HZ > 4096 -# error Unsupported: HZ <= 16 or HZ > 4096 -#elif HZ <= 32 -# define INET_TWDR_RECYCLE_TICK (5 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) -#elif HZ <= 64 -# define INET_TWDR_RECYCLE_TICK (6 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) -#elif HZ <= 128 -# define INET_TWDR_RECYCLE_TICK (7 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) -#elif HZ <= 256 -# define INET_TWDR_RECYCLE_TICK (8 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) -#elif HZ <= 512 -# define INET_TWDR_RECYCLE_TICK (9 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) -#elif HZ <= 1024 -# define INET_TWDR_RECYCLE_TICK (10 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) -#elif HZ <= 2048 -# define INET_TWDR_RECYCLE_TICK (11 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) -#else -# define INET_TWDR_RECYCLE_TICK (12 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) -#endif - -static inline u32 inet_tw_time_stamp(void) -{ - return jiffies; -} - -/* TIME_WAIT reaping mechanism. */ -#define INET_TWDR_TWKILL_SLOTS 8 /* Please keep this a power of 2. */ - -#define INET_TWDR_TWKILL_QUOTA 100 - struct inet_timewait_death_row { - /* Short-time timewait calendar */ - int twcal_hand; - unsigned long twcal_jiffie; - struct timer_list twcal_timer; - struct hlist_head twcal_row[INET_TWDR_RECYCLE_SLOTS]; - - spinlock_t death_lock; - int tw_count; - int period; - u32 thread_slots; - struct work_struct twkill_work; - struct timer_list tw_timer; - int slot; - struct hlist_head cells[INET_TWDR_TWKILL_SLOTS]; - struct inet_hashinfo *hashinfo; + atomic_t tw_count; + + struct inet_hashinfo *hashinfo ____cacheline_aligned_in_smp; int sysctl_tw_recycle; int sysctl_max_tw_buckets; }; -void inet_twdr_hangman(unsigned long data); -void inet_twdr_twkill_work(struct work_struct *work); -void inet_twdr_twcal_tick(unsigned long data); - struct inet_bind_bucket; /* @@ -133,52 +80,18 @@ struct inet_timewait_sock { __be16 tw_sport; kmemcheck_bitfield_begin(flags); /* And these are ours. */ - unsigned int tw_pad0 : 1, /* 1 bit hole */ + unsigned int tw_kill : 1, tw_transparent : 1, tw_flowlabel : 20, tw_pad : 2, /* 2 bits hole */ tw_tos : 8; kmemcheck_bitfield_end(flags); - u32 tw_ttd; + struct timer_list tw_timer; struct inet_bind_bucket *tw_tb; - struct hlist_node tw_death_node; + struct inet_timewait_death_row *tw_dr; }; #define tw_tclass tw_tos -static inline int inet_twsk_dead_hashed(const struct inet_timewait_sock *tw) -{ - return !hlist_unhashed(&tw->tw_death_node); -} - -static inline void inet_twsk_dead_node_init(struct inet_timewait_sock *tw) -{ - tw->tw_death_node.pprev = NULL; -} - -static inline void __inet_twsk_del_dead_node(struct inet_timewait_sock *tw) -{ - __hlist_del(&tw->tw_death_node); - inet_twsk_dead_node_init(tw); -} - -static inline int inet_twsk_del_dead_node(struct inet_timewait_sock *tw) -{ - if (inet_twsk_dead_hashed(tw)) { - __inet_twsk_del_dead_node(tw); - return 1; - } - return 0; -} - -#define inet_twsk_for_each(tw, node, head) \ - hlist_nulls_for_each_entry(tw, node, head, tw_node) - -#define inet_twsk_for_each_inmate(tw, jail) \ - hlist_for_each_entry(tw, jail, tw_death_node) - -#define inet_twsk_for_each_inmate_safe(tw, safe, jail) \ - hlist_for_each_entry_safe(tw, safe, jail, tw_death_node) - static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk) { return (struct inet_timewait_sock *)sk; @@ -193,16 +106,14 @@ int inet_twsk_bind_unhash(struct inet_timewait_sock *tw, struct inet_hashinfo *hashinfo); struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, + struct inet_timewait_death_row *dr, const int state); void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, struct inet_hashinfo *hashinfo); -void inet_twsk_schedule(struct inet_timewait_sock *tw, - struct inet_timewait_death_row *twdr, - const int timeo, const int timewait_len); -void inet_twsk_deschedule(struct inet_timewait_sock *tw, - struct inet_timewait_death_row *twdr); +void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo); +void inet_twsk_deschedule(struct inet_timewait_sock *tw); void inet_twsk_purge(struct inet_hashinfo *hashinfo, struct inet_timewait_death_row *twdr, int family); -- cgit v1.2.3-59-g8ed1b