From 19efbd93e6fb05eab81856b4fc8d64211dd37088 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Mon, 19 Feb 2018 12:58:38 +0300 Subject: net: Kill net_mutex We take net_mutex, when there are !async pernet_operations registered, and read locking of net_sem is not enough. But we may get rid of taking the mutex, and just change the logic to write lock net_sem in such cases. This obviously reduces the number of lock operations, we do. Signed-off-by: Kirill Tkhai Signed-off-by: David S. Miller --- net/core/net_namespace.c | 53 +++++++++++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index bcab9a938d6f..e89a516620dd 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -29,8 +29,6 @@ static LIST_HEAD(pernet_list); static struct list_head *first_device = &pernet_list; -/* Used only if there are !async pernet_operations registered */ -DEFINE_MUTEX(net_mutex); LIST_HEAD(net_namespace_list); EXPORT_SYMBOL_GPL(net_namespace_list); @@ -407,6 +405,7 @@ struct net *copy_net_ns(unsigned long flags, { struct ucounts *ucounts; struct net *net; + unsigned write; int rv; if (!(flags & CLONE_NEWNET)) @@ -424,20 +423,26 @@ struct net *copy_net_ns(unsigned long flags, refcount_set(&net->passive, 1); net->ucounts = ucounts; get_user_ns(user_ns); - - rv = down_read_killable(&net_sem); +again: + write = READ_ONCE(nr_sync_pernet_ops); + if (write) + rv = down_write_killable(&net_sem); + else + rv = down_read_killable(&net_sem); if (rv < 0) goto put_userns; - if (nr_sync_pernet_ops) { - rv = mutex_lock_killable(&net_mutex); - if (rv < 0) - goto up_read; + + if (!write && unlikely(READ_ONCE(nr_sync_pernet_ops))) { + up_read(&net_sem); + goto again; } rv = setup_net(net, user_ns); - if (nr_sync_pernet_ops) - mutex_unlock(&net_mutex); -up_read: - up_read(&net_sem); + + if (write) + up_write(&net_sem); + else + up_read(&net_sem); + if (rv < 0) { put_userns: put_user_ns(user_ns); @@ -485,15 +490,23 @@ static void cleanup_net(struct work_struct *work) struct net *net, *tmp, *last; struct list_head net_kill_list; LIST_HEAD(net_exit_list); + unsigned write; /* Atomically snapshot the list of namespaces to cleanup */ spin_lock_irq(&cleanup_list_lock); list_replace_init(&cleanup_list, &net_kill_list); spin_unlock_irq(&cleanup_list_lock); +again: + write = READ_ONCE(nr_sync_pernet_ops); + if (write) + down_write(&net_sem); + else + down_read(&net_sem); - down_read(&net_sem); - if (nr_sync_pernet_ops) - mutex_lock(&net_mutex); + if (!write && unlikely(READ_ONCE(nr_sync_pernet_ops))) { + up_read(&net_sem); + goto again; + } /* Don't let anyone else find us. */ rtnl_lock(); @@ -528,14 +541,14 @@ static void cleanup_net(struct work_struct *work) list_for_each_entry_reverse(ops, &pernet_list, list) ops_exit_list(ops, &net_exit_list); - if (nr_sync_pernet_ops) - mutex_unlock(&net_mutex); - /* Free the net generic variables */ list_for_each_entry_reverse(ops, &pernet_list, list) ops_free_list(ops, &net_exit_list); - up_read(&net_sem); + if (write) + up_write(&net_sem); + else + up_read(&net_sem); /* Ensure there are no outstanding rcu callbacks using this * network namespace. @@ -563,8 +576,6 @@ static void cleanup_net(struct work_struct *work) void net_ns_barrier(void) { down_write(&net_sem); - mutex_lock(&net_mutex); - mutex_unlock(&net_mutex); up_write(&net_sem); } EXPORT_SYMBOL(net_ns_barrier); -- cgit v1.2.3-59-g8ed1b From 65b7b5b90fcd17b25ef43b0cd02bda47bf286675 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Mon, 19 Feb 2018 12:58:45 +0300 Subject: net: Make cleanup_list and net::cleanup_list of llist type This simplifies cleanup queueing and makes cleanup lists to use llist primitives. Since llist has its own cmpxchg() ordering, cleanup_list_lock is not more need. Also, struct llist_node is smaller, than struct list_head, so we save some bytes in struct net with this patch. Signed-off-by: Kirill Tkhai Signed-off-by: David S. Miller --- include/net/net_namespace.h | 3 ++- net/core/net_namespace.c | 20 ++++++-------------- 2 files changed, 8 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 115b01b92f4d..d4417495773a 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -59,12 +59,13 @@ struct net { atomic64_t cookie_gen; struct list_head list; /* list of network namespaces */ - struct list_head cleanup_list; /* namespaces on death row */ struct list_head exit_list; /* To linked to call pernet exit * methods on dead net (net_sem * read locked), or to unregister * pernet ops (net_sem wr locked). */ + struct llist_node cleanup_list; /* namespaces on death row */ + struct user_namespace *user_ns; /* Owning user namespace */ struct ucounts *ucounts; spinlock_t nsid_lock; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index e89a516620dd..abf8a46e94e2 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -481,21 +481,18 @@ static void unhash_nsid(struct net *net, struct net *last) spin_unlock_bh(&net->nsid_lock); } -static DEFINE_SPINLOCK(cleanup_list_lock); -static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */ +static LLIST_HEAD(cleanup_list); static void cleanup_net(struct work_struct *work) { const struct pernet_operations *ops; struct net *net, *tmp, *last; - struct list_head net_kill_list; + struct llist_node *net_kill_list; LIST_HEAD(net_exit_list); unsigned write; /* Atomically snapshot the list of namespaces to cleanup */ - spin_lock_irq(&cleanup_list_lock); - list_replace_init(&cleanup_list, &net_kill_list); - spin_unlock_irq(&cleanup_list_lock); + net_kill_list = llist_del_all(&cleanup_list); again: write = READ_ONCE(nr_sync_pernet_ops); if (write) @@ -510,7 +507,7 @@ again: /* Don't let anyone else find us. */ rtnl_lock(); - list_for_each_entry(net, &net_kill_list, cleanup_list) + llist_for_each_entry(net, net_kill_list, cleanup_list) list_del_rcu(&net->list); /* Cache last net. After we unlock rtnl, no one new net * added to net_namespace_list can assign nsid pointer @@ -525,7 +522,7 @@ again: last = list_last_entry(&net_namespace_list, struct net, list); rtnl_unlock(); - list_for_each_entry(net, &net_kill_list, cleanup_list) { + llist_for_each_entry(net, net_kill_list, cleanup_list) { unhash_nsid(net, last); list_add_tail(&net->exit_list, &net_exit_list); } @@ -585,12 +582,7 @@ static DECLARE_WORK(net_cleanup_work, cleanup_net); void __put_net(struct net *net) { /* Cleanup the network namespace in process context */ - unsigned long flags; - - spin_lock_irqsave(&cleanup_list_lock, flags); - list_add(&net->cleanup_list, &cleanup_list); - spin_unlock_irqrestore(&cleanup_list_lock, flags); - + llist_add(&net->cleanup_list, &cleanup_list); queue_work(netns_wq, &net_cleanup_work); } EXPORT_SYMBOL_GPL(__put_net); -- cgit v1.2.3-59-g8ed1b From 8349efd903394422d1598d196b6be70c410cf8f5 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Mon, 19 Feb 2018 12:58:54 +0300 Subject: net: Queue net_cleanup_work only if there is first net added When llist_add() returns false, cleanup_net() hasn't made its llist_del_all(), while the work has already been scheduled by the first queuer. So, we may skip queue_work() in this case. Signed-off-by: Kirill Tkhai Signed-off-by: David S. Miller --- net/core/net_namespace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index abf8a46e94e2..27a55236ad64 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -582,8 +582,8 @@ static DECLARE_WORK(net_cleanup_work, cleanup_net); void __put_net(struct net *net) { /* Cleanup the network namespace in process context */ - llist_add(&net->cleanup_list, &cleanup_list); - queue_work(netns_wq, &net_cleanup_work); + if (llist_add(&net->cleanup_list, &cleanup_list)) + queue_work(netns_wq, &net_cleanup_work); } EXPORT_SYMBOL_GPL(__put_net); -- cgit v1.2.3-59-g8ed1b