From ebc0ffae5dfb4447e0a431ffe7fe1d467c48bbb9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Oct 2010 10:41:36 +0000 Subject: fib: RCU conversion of fib_lookup() fib_lookup() converted to be called in RCU protected context, no reference taken and released on a contended cache line (fib_clntref) fib_table_lookup() and fib_semantic_match() get an additional parameter. struct fib_info gets an rcu_head field, and is freed after an rcu grace period. Stress test : (Sending 160.000.000 UDP frames on same neighbour, IP route cache disabled, dual E5540 @2.53GHz, 32bit kernel, FIB_HASH) (about same results for FIB_TRIE) Before patch : real 1m31.199s user 0m13.761s sys 23m24.780s After patch: real 1m5.375s user 0m14.997s sys 15m50.115s Before patch Profile : 13044.00 15.4% __ip_route_output_key vmlinux 8438.00 10.0% dst_destroy vmlinux 5983.00 7.1% fib_semantic_match vmlinux 5410.00 6.4% fib_rules_lookup vmlinux 4803.00 5.7% neigh_lookup vmlinux 4420.00 5.2% _raw_spin_lock vmlinux 3883.00 4.6% rt_set_nexthop vmlinux 3261.00 3.9% _raw_read_lock vmlinux 2794.00 3.3% fib_table_lookup vmlinux 2374.00 2.8% neigh_resolve_output vmlinux 2153.00 2.5% dst_alloc vmlinux 1502.00 1.8% _raw_read_lock_bh vmlinux 1484.00 1.8% kmem_cache_alloc vmlinux 1407.00 1.7% eth_header vmlinux 1406.00 1.7% ipv4_dst_destroy vmlinux 1298.00 1.5% __copy_from_user_ll vmlinux 1174.00 1.4% dev_queue_xmit vmlinux 1000.00 1.2% ip_output vmlinux After patch Profile : 13712.00 15.8% dst_destroy vmlinux 8548.00 9.9% __ip_route_output_key vmlinux 7017.00 8.1% neigh_lookup vmlinux 4554.00 5.3% fib_semantic_match vmlinux 4067.00 4.7% _raw_read_lock vmlinux 3491.00 4.0% dst_alloc vmlinux 3186.00 3.7% neigh_resolve_output vmlinux 3103.00 3.6% fib_table_lookup vmlinux 2098.00 2.4% _raw_read_lock_bh vmlinux 2081.00 2.4% kmem_cache_alloc vmlinux 2013.00 2.3% _raw_spin_lock vmlinux 1763.00 2.0% __copy_from_user_ll vmlinux 1763.00 2.0% ip_output vmlinux 1761.00 2.0% ipv4_dst_destroy vmlinux 1631.00 1.9% eth_header vmlinux 1440.00 1.7% _raw_read_unlock_bh vmlinux Reference results, if IP route cache is enabled : real 0m29.718s user 0m10.845s sys 7m37.341s 25213.00 29.5% __ip_route_output_key vmlinux 9011.00 10.5% dst_release vmlinux 4817.00 5.6% ip_push_pending_frames vmlinux 4232.00 5.0% ip_finish_output vmlinux 3940.00 4.6% udp_sendmsg vmlinux 3730.00 4.4% __copy_from_user_ll vmlinux 3716.00 4.4% ip_route_output_flow vmlinux 2451.00 2.9% __xfrm_lookup vmlinux 2221.00 2.6% ip_append_data vmlinux 1718.00 2.0% _raw_spin_lock_bh vmlinux 1655.00 1.9% __alloc_skb vmlinux 1572.00 1.8% sock_wfree vmlinux 1345.00 1.6% kfree vmlinux Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip_fib.h | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) (limited to 'include/net/ip_fib.h') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index c93f94edc610..ba3666d31766 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -86,6 +86,7 @@ struct fib_info { #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_power; #endif + struct rcu_head rcu; struct fib_nh fib_nh[0]; #define fib_dev fib_nh[0].nh_dev }; @@ -148,7 +149,7 @@ struct fib_table { }; extern int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, - struct fib_result *res); + struct fib_result *res, int fib_flags); extern int fib_table_insert(struct fib_table *, struct fib_config *); extern int fib_table_delete(struct fib_table *, struct fib_config *); extern int fib_table_dump(struct fib_table *table, struct sk_buff *skb, @@ -185,11 +186,11 @@ static inline int fib_lookup(struct net *net, const struct flowi *flp, struct fib_table *table; table = fib_get_table(net, RT_TABLE_LOCAL); - if (!fib_table_lookup(table, flp, res)) + if (!fib_table_lookup(table, flp, res, FIB_LOOKUP_NOREF)) return 0; table = fib_get_table(net, RT_TABLE_MAIN); - if (!fib_table_lookup(table, flp, res)) + if (!fib_table_lookup(table, flp, res, FIB_LOOKUP_NOREF)) return 0; return -ENETUNREACH; } @@ -254,16 +255,6 @@ static inline void fib_info_put(struct fib_info *fi) free_fib_info(fi); } -static inline void fib_res_put(struct fib_result *res) -{ - if (res->fi) - fib_info_put(res->fi); -#ifdef CONFIG_IP_MULTIPLE_TABLES - if (res->r) - fib_rule_put(res->r); -#endif -} - #ifdef CONFIG_PROC_FS extern int __net_init fib_proc_init(struct net *net); extern void __net_exit fib_proc_exit(struct net *net); -- cgit v1.2.3-59-g8ed1b From 4aa2c466a7733af093a526e9d1cdd0b3b90d47e9 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 28 Oct 2010 02:00:43 +0000 Subject: fib: Fix fib zone and its hash leak on namespace stop When we stop a namespace we flush the table and free one, but the added fn_zone-s (and their hashes if grown) are leaked. Need to free. Tries releases all its stuff in the flushing code. Shame on us - this bug exists since the very first make-fib-per-net patches in 2.6.27 :( Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/net/ip_fib.h | 2 ++ net/ipv4/fib_frontend.c | 2 +- net/ipv4/fib_hash.c | 18 ++++++++++++++++++ net/ipv4/fib_trie.c | 5 +++++ 4 files changed, 26 insertions(+), 1 deletion(-) (limited to 'include/net/ip_fib.h') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index ba3666d31766..07bdb5e9e8ac 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -158,6 +158,8 @@ extern int fib_table_flush(struct fib_table *table); extern void fib_table_select_default(struct fib_table *table, const struct flowi *flp, struct fib_result *res); +extern void fib_free_table(struct fib_table *tb); + #ifndef CONFIG_IP_MULTIPLE_TABLES diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 36e27c2107de..eb6f69a8f27a 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1052,7 +1052,7 @@ static void ip_fib_net_exit(struct net *net) hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) { hlist_del(node); fib_table_flush(tb); - kfree(tb); + fib_free_table(tb); } } kfree(net->ipv4.fib_table_hash); diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index b232375a0b75..b3acb0417b21 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -716,6 +716,24 @@ int fib_table_flush(struct fib_table *tb) return found; } +void fib_free_table(struct fib_table *tb) +{ + struct fn_hash *table = (struct fn_hash *) tb->tb_data; + struct fn_zone *fz, *next; + + next = table->fn_zone_list; + while (next != NULL) { + fz = next; + next = fz->fz_next; + + if (fz->fz_hash != fz->fz_embedded_hash) + fz_hash_free(fz->fz_hash, fz->fz_divisor); + + kfree(fz); + } + + kfree(tb); +} static inline int fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb, diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index b14450895102..200eb538fbb3 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1797,6 +1797,11 @@ int fib_table_flush(struct fib_table *tb) return found; } +void fib_free_table(struct fib_table *tb) +{ + kfree(tb); +} + void fib_table_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) -- cgit v1.2.3-59-g8ed1b