diff mbox series

[iproute2] ip/netns: use flock when setting up /run/netns

Message ID 20201127180651.80283-1-bluca@debian.org
State Accepted
Delegated to: stephen hemminger
Headers show
Series [iproute2] ip/netns: use flock when setting up /run/netns | expand

Commit Message

Luca Boccassi Nov. 27, 2020, 6:06 p.m. UTC
If multiple ip processes are ran at the same time to set up
separate network namespaces, and it is the first time so /run/netns
has to be set up first, and they end up doing it at the same time,
the processes might enter a recursive loop creating thousands of
mount points, which might crash the system depending on resources
available.

Try to take a flock on /run/netns before doing the mount() dance, to
ensure this cannot happen. But do not try too hard, and if it fails
continue after printing a warning, to avoid introducing regressions.

First reported on Debian: https://bugs.debian.org/949235

To reproduce (WARNING: run in a VM to avoid system lockups):

for i in {0..9}
do
        strace -e trace=mount -e inject=mount:delay_exit=1000000 ip \
 netns add "testnetns$i" 2>&1 | tee "$i.log" &
done
wait

The strace is to ensure the problem always reproduces, to add an
artificial synchronization point after the first mount().

Reported-by: Etienne Dechamps <etienne@edechamps.fr>
Signed-off-by: Luca Boccassi <bluca@debian.org>
---
 ip/ipnetns.c | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
diff mbox series

Patch

diff --git a/ip/ipnetns.c b/ip/ipnetns.c
index 14e8e087..3e96d267 100644
--- a/ip/ipnetns.c
+++ b/ip/ipnetns.c
@@ -1,5 +1,6 @@ 
 /* SPDX-License-Identifier: GPL-2.0 */
 #define _ATFILE_SOURCE
+#include <sys/file.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/wait.h>
@@ -801,6 +802,7 @@  static int netns_add(int argc, char **argv, bool create)
 	const char *name;
 	pid_t pid;
 	int fd;
+	int lock;
 	int made_netns_run_dir_mount = 0;
 
 	if (create) {
@@ -831,12 +833,37 @@  static int netns_add(int argc, char **argv, bool create)
 	 * namespace file in one namespace will unmount the network namespace
 	 * file in all namespaces allowing the network namespace to be freed
 	 * sooner.
+	 * These setup steps need to happen only once, as if multiple ip processes
+	 * try to attempt the same operation at the same time, the mountpoints will
+	 * be recursively created multiple times, eventually causing the system
+	 * to lock up. For example, this has been observed when multiple netns
+	 * namespaces are created in parallel at boot. See:
+	 * https://bugs.debian.org/949235
+	 * Try to take an exclusive file lock on the top level directory to ensure
+	 * this cannot happen, but proceed nonetheless if it cannot happen for any
+	 * reason.
 	 */
+	lock = open(NETNS_RUN_DIR, O_RDONLY|O_DIRECTORY, 0);
+	if (lock < 0) {
+		fprintf(stderr, "Cannot open netns runtime directory \"%s\": %s\n",
+			NETNS_RUN_DIR, strerror(errno));
+		return -1;
+	}
+	if (flock(lock, LOCK_EX) < 0) {
+		fprintf(stderr, "Warning: could not flock netns runtime directory \"%s\": %s\n",
+			NETNS_RUN_DIR, strerror(errno));
+		close(lock);
+		lock = -1;
+	}
 	while (mount("", NETNS_RUN_DIR, "none", MS_SHARED | MS_REC, NULL)) {
 		/* Fail unless we need to make the mount point */
 		if (errno != EINVAL || made_netns_run_dir_mount) {
 			fprintf(stderr, "mount --make-shared %s failed: %s\n",
 				NETNS_RUN_DIR, strerror(errno));
+			if (lock != -1) {
+				flock(lock, LOCK_UN);
+				close(lock);
+			}
 			return -1;
 		}
 
@@ -844,10 +871,18 @@  static int netns_add(int argc, char **argv, bool create)
 		if (mount(NETNS_RUN_DIR, NETNS_RUN_DIR, "none", MS_BIND | MS_REC, NULL)) {
 			fprintf(stderr, "mount --bind %s %s failed: %s\n",
 				NETNS_RUN_DIR, NETNS_RUN_DIR, strerror(errno));
+			if (lock != -1) {
+				flock(lock, LOCK_UN);
+				close(lock);
+			}
 			return -1;
 		}
 		made_netns_run_dir_mount = 1;
 	}
+	if (lock != -1) {
+		flock(lock, LOCK_UN);
+		close(lock);
+	}
 
 	/* Create the filesystem state */
 	fd = open(netns_path, O_RDONLY|O_CREAT|O_EXCL, 0);