diff mbox series

[1/3] net: Kill net_mutex

Message ID 151903431795.8021.5069678488269142143.stgit@localhost.localdomain
State Accepted, archived
Delegated to: David Miller
Headers show
Series net: Get rid of net_mutex and simplify cleanup_list queueing | expand

Commit Message

Kirill Tkhai Feb. 19, 2018, 9:58 a.m. UTC
We take net_mutex, when there are !async pernet_operations
registered, and read locking of net_sem is not enough. But
we may get rid of taking the mutex, and just change the logic
to write lock net_sem in such cases. This obviously reduces
the number of lock operations, we do.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
---
 include/linux/rtnetlink.h   |    1 -
 include/net/net_namespace.h |   11 ++++++---
 net/core/net_namespace.c    |   53 ++++++++++++++++++++++++++-----------------
 3 files changed, 39 insertions(+), 26 deletions(-)

Comments

Stephen Hemminger Feb. 20, 2018, 11:18 p.m. UTC | #1
On Mon, 19 Feb 2018 12:58:38 +0300
Kirill Tkhai <ktkhai@virtuozzo.com> wrote:

> +	struct list_head	exit_list;	/* To linked to call pernet exit
> +						 * methods on dead net (net_sem
> +						 * read locked), or to unregister
> +						 * pernet ops (net_sem wr locked).
> +						 */

Sorry, that comment is completely unparseable.
Either you know what it does, and therefore comment is unnecessary
Or change comment to a valid explanation of the semantics of the list.

Maybe comments about locking model are best left to where
it is used in the code.
Kirill Tkhai Feb. 21, 2018, 10:16 a.m. UTC | #2
Hi, Stephen,

On 21.02.2018 02:18, Stephen Hemminger wrote:
> On Mon, 19 Feb 2018 12:58:38 +0300
> Kirill Tkhai <ktkhai@virtuozzo.com> wrote:
> 
>> +	struct list_head	exit_list;	/* To linked to call pernet exit
>> +						 * methods on dead net (net_sem
>> +						 * read locked), or to unregister
>> +						 * pernet ops (net_sem wr locked).
>> +						 */
> 
> Sorry, that comment is completely unparseable.
> Either you know what it does, and therefore comment is unnecessary
> Or change comment to a valid explanation of the semantics of the list.
> 
> Maybe comments about locking model are best left to where
> it is used in the code.

Let's improve it :) It's used to call pernet exit methods, and net ns logic
guarantees, we never call exit methods for the same net in parallel. How
about writing this directly without mention of net_sem? Something like this:

/* To link net to call pernet exit methods */

Or maybe you have better variant?

Thanks,
Kirill
diff mbox series

Patch

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index e9ee9ad0a681..3573b4bf2fdf 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -35,7 +35,6 @@  extern int rtnl_trylock(void);
 extern int rtnl_is_locked(void);
 
 extern wait_queue_head_t netdev_unregistering_wq;
-extern struct mutex net_mutex;
 extern struct rw_semaphore net_sem;
 
 #ifdef CONFIG_PROVE_LOCKING
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 9158ec1ad06f..115b01b92f4d 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -60,8 +60,11 @@  struct net {
 
 	struct list_head	list;		/* list of network namespaces */
 	struct list_head	cleanup_list;	/* namespaces on death row */
-	struct list_head	exit_list;	/* Use only net_mutex */
-
+	struct list_head	exit_list;	/* To linked to call pernet exit
+						 * methods on dead net (net_sem
+						 * read locked), or to unregister
+						 * pernet ops (net_sem wr locked).
+						 */
 	struct user_namespace   *user_ns;	/* Owning user namespace */
 	struct ucounts		*ucounts;
 	spinlock_t		nsid_lock;
@@ -89,7 +92,7 @@  struct net {
 	/* core fib_rules */
 	struct list_head	rules_ops;
 
-	struct list_head	fib_notifier_ops;  /* protected by net_mutex */
+	struct list_head	fib_notifier_ops;  /* protected by net_sem */
 
 	struct net_device       *loopback_dev;          /* The loopback */
 	struct netns_core	core;
@@ -316,7 +319,7 @@  struct pernet_operations {
 	/*
 	 * Indicates above methods are allowed to be executed in parallel
 	 * with methods of any other pernet_operations, i.e. they are not
-	 * need synchronization via net_mutex.
+	 * need write locked net_sem.
 	 */
 	bool async;
 };
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index bcab9a938d6f..e89a516620dd 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -29,8 +29,6 @@ 
 
 static LIST_HEAD(pernet_list);
 static struct list_head *first_device = &pernet_list;
-/* Used only if there are !async pernet_operations registered */
-DEFINE_MUTEX(net_mutex);
 
 LIST_HEAD(net_namespace_list);
 EXPORT_SYMBOL_GPL(net_namespace_list);
@@ -407,6 +405,7 @@  struct net *copy_net_ns(unsigned long flags,
 {
 	struct ucounts *ucounts;
 	struct net *net;
+	unsigned write;
 	int rv;
 
 	if (!(flags & CLONE_NEWNET))
@@ -424,20 +423,26 @@  struct net *copy_net_ns(unsigned long flags,
 	refcount_set(&net->passive, 1);
 	net->ucounts = ucounts;
 	get_user_ns(user_ns);
-
-	rv = down_read_killable(&net_sem);
+again:
+	write = READ_ONCE(nr_sync_pernet_ops);
+	if (write)
+		rv = down_write_killable(&net_sem);
+	else
+		rv = down_read_killable(&net_sem);
 	if (rv < 0)
 		goto put_userns;
-	if (nr_sync_pernet_ops) {
-		rv = mutex_lock_killable(&net_mutex);
-		if (rv < 0)
-			goto up_read;
+
+	if (!write && unlikely(READ_ONCE(nr_sync_pernet_ops))) {
+		up_read(&net_sem);
+		goto again;
 	}
 	rv = setup_net(net, user_ns);
-	if (nr_sync_pernet_ops)
-		mutex_unlock(&net_mutex);
-up_read:
-	up_read(&net_sem);
+
+	if (write)
+		up_write(&net_sem);
+	else
+		up_read(&net_sem);
+
 	if (rv < 0) {
 put_userns:
 		put_user_ns(user_ns);
@@ -485,15 +490,23 @@  static void cleanup_net(struct work_struct *work)
 	struct net *net, *tmp, *last;
 	struct list_head net_kill_list;
 	LIST_HEAD(net_exit_list);
+	unsigned write;
 
 	/* Atomically snapshot the list of namespaces to cleanup */
 	spin_lock_irq(&cleanup_list_lock);
 	list_replace_init(&cleanup_list, &net_kill_list);
 	spin_unlock_irq(&cleanup_list_lock);
+again:
+	write = READ_ONCE(nr_sync_pernet_ops);
+	if (write)
+		down_write(&net_sem);
+	else
+		down_read(&net_sem);
 
-	down_read(&net_sem);
-	if (nr_sync_pernet_ops)
-		mutex_lock(&net_mutex);
+	if (!write && unlikely(READ_ONCE(nr_sync_pernet_ops))) {
+		up_read(&net_sem);
+		goto again;
+	}
 
 	/* Don't let anyone else find us. */
 	rtnl_lock();
@@ -528,14 +541,14 @@  static void cleanup_net(struct work_struct *work)
 	list_for_each_entry_reverse(ops, &pernet_list, list)
 		ops_exit_list(ops, &net_exit_list);
 
-	if (nr_sync_pernet_ops)
-		mutex_unlock(&net_mutex);
-
 	/* Free the net generic variables */
 	list_for_each_entry_reverse(ops, &pernet_list, list)
 		ops_free_list(ops, &net_exit_list);
 
-	up_read(&net_sem);
+	if (write)
+		up_write(&net_sem);
+	else
+		up_read(&net_sem);
 
 	/* Ensure there are no outstanding rcu callbacks using this
 	 * network namespace.
@@ -563,8 +576,6 @@  static void cleanup_net(struct work_struct *work)
 void net_ns_barrier(void)
 {
 	down_write(&net_sem);
-	mutex_lock(&net_mutex);
-	mutex_unlock(&net_mutex);
 	up_write(&net_sem);
 }
 EXPORT_SYMBOL(net_ns_barrier);