diff mbox series

[net-next] net: introduce a knob to control whether to inherit devconf config

Message ID 20190118072711.491-1-xiyou.wangcong@gmail.com
State Accepted
Delegated to: David Miller
Headers show
Series [net-next] net: introduce a knob to control whether to inherit devconf config | expand

Commit Message

Cong Wang Jan. 18, 2019, 7:27 a.m. UTC
There have been many people complaining about the inconsistent
behaviors of IPv4 and IPv6 devconf when creating new network
namespaces.  Currently, for IPv4, we inherit all current settings
from init_net, but for IPv6 we reset all setting to default.

This patch introduces a new /proc file
/proc/sys/net/core/devconf_inherit_init_net to control the
behavior of whether to inhert sysctl current settings from init_net.
This file itself is only available in init_net.

As demonstrated below:

Initial setup in init_net:
 # cat /proc/sys/net/ipv4/conf/all/rp_filter
 2
 # cat /proc/sys/net/ipv6/conf/all/accept_dad
 1

Default value 0 (current behavior):
 # ip netns del test
 # ip netns add test
 # ip netns exec test cat /proc/sys/net/ipv4/conf/all/rp_filter
 2
 # ip netns exec test cat /proc/sys/net/ipv6/conf/all/accept_dad
 0

Set to 1 (inherit from init_net):
 # echo 1 > /proc/sys/net/core/devconf_inherit_init_net
 # ip netns del test
 # ip netns add test
 # ip netns exec test cat /proc/sys/net/ipv4/conf/all/rp_filter
 2
 # ip netns exec test cat /proc/sys/net/ipv6/conf/all/accept_dad
 1

Set to 2 (reset to default):
 # echo 2 > /proc/sys/net/core/devconf_inherit_init_net
 # ip netns del test
 # ip netns add test
 # ip netns exec test cat /proc/sys/net/ipv4/conf/all/rp_filter
 0
 # ip netns exec test cat /proc/sys/net/ipv6/conf/all/accept_dad
 0

Set to a value out of range (invalid):
 # echo 3 > /proc/sys/net/core/devconf_inherit_init_net
 -bash: echo: write error: Invalid argument
 # echo -1 > /proc/sys/net/core/devconf_inherit_init_net
 -bash: echo: write error: Invalid argument

Reported-by: Zhu Yanjun <Yanjun.Zhu@windriver.com>
Reported-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
---
 Documentation/sysctl/net.txt | 14 ++++++++++++
 include/linux/netdevice.h    |  1 +
 net/core/sysctl_net_core.c   | 18 +++++++++++++++
 net/ipv4/devinet.c           | 43 +++++++++++++++++-------------------
 net/ipv6/addrconf.c          |  5 +++++
 5 files changed, 58 insertions(+), 23 deletions(-)

Comments

Nicolas Dichtel Jan. 18, 2019, 8:25 a.m. UTC | #1
Le 18/01/2019 à 08:27, Cong Wang a écrit :
> There have been many people complaining about the inconsistent
> behaviors of IPv4 and IPv6 devconf when creating new network
> namespaces.  Currently, for IPv4, we inherit all current settings
> from init_net, but for IPv6 we reset all setting to default.
> 
> This patch introduces a new /proc file
> /proc/sys/net/core/devconf_inherit_init_net to control the
> behavior of whether to inhert sysctl current settings from init_net.
> This file itself is only available in init_net.
> 
> As demonstrated below:
> 
> Initial setup in init_net:
>  # cat /proc/sys/net/ipv4/conf/all/rp_filter
>  2
>  # cat /proc/sys/net/ipv6/conf/all/accept_dad
>  1
> 
> Default value 0 (current behavior):
>  # ip netns del test
>  # ip netns add test
>  # ip netns exec test cat /proc/sys/net/ipv4/conf/all/rp_filter
>  2
>  # ip netns exec test cat /proc/sys/net/ipv6/conf/all/accept_dad
>  0
> 
> Set to 1 (inherit from init_net):
>  # echo 1 > /proc/sys/net/core/devconf_inherit_init_net
>  # ip netns del test
>  # ip netns add test
>  # ip netns exec test cat /proc/sys/net/ipv4/conf/all/rp_filter
>  2
>  # ip netns exec test cat /proc/sys/net/ipv6/conf/all/accept_dad
>  1
> 
> Set to 2 (reset to default):
>  # echo 2 > /proc/sys/net/core/devconf_inherit_init_net
>  # ip netns del test
>  # ip netns add test
>  # ip netns exec test cat /proc/sys/net/ipv4/conf/all/rp_filter
>  0
>  # ip netns exec test cat /proc/sys/net/ipv6/conf/all/accept_dad
>  0
> 
> Set to a value out of range (invalid):
>  # echo 3 > /proc/sys/net/core/devconf_inherit_init_net
>  -bash: echo: write error: Invalid argument
>  # echo -1 > /proc/sys/net/core/devconf_inherit_init_net
>  -bash: echo: write error: Invalid argument
> 
> Reported-by: Zhu Yanjun <Yanjun.Zhu@windriver.com>
> Reported-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Nice!

Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Tonghao Zhang Jan. 18, 2019, 2:38 p.m. UTC | #2
On Fri, Jan 18, 2019 at 4:25 PM Nicolas Dichtel
<nicolas.dichtel@6wind.com> wrote:
>
> Le 18/01/2019 à 08:27, Cong Wang a écrit :
> > There have been many people complaining about the inconsistent
> > behaviors of IPv4 and IPv6 devconf when creating new network
> > namespaces.  Currently, for IPv4, we inherit all current settings
> > from init_net, but for IPv6 we reset all setting to default.
> >
> > This patch introduces a new /proc file
> > /proc/sys/net/core/devconf_inherit_init_net to control the
> > behavior of whether to inhert sysctl current settings from init_net.
> > This file itself is only available in init_net.
> >
> > As demonstrated below:
> >
> > Initial setup in init_net:
> >  # cat /proc/sys/net/ipv4/conf/all/rp_filter
> >  2
> >  # cat /proc/sys/net/ipv6/conf/all/accept_dad
> >  1
> >
> > Default value 0 (current behavior):
> >  # ip netns del test
> >  # ip netns add test
> >  # ip netns exec test cat /proc/sys/net/ipv4/conf/all/rp_filter
> >  2
> >  # ip netns exec test cat /proc/sys/net/ipv6/conf/all/accept_dad
> >  0
> >
> > Set to 1 (inherit from init_net):
> >  # echo 1 > /proc/sys/net/core/devconf_inherit_init_net
> >  # ip netns del test
> >  # ip netns add test
> >  # ip netns exec test cat /proc/sys/net/ipv4/conf/all/rp_filter
> >  2
> >  # ip netns exec test cat /proc/sys/net/ipv6/conf/all/accept_dad
> >  1
> >
> > Set to 2 (reset to default):
> >  # echo 2 > /proc/sys/net/core/devconf_inherit_init_net
> >  # ip netns del test
> >  # ip netns add test
> >  # ip netns exec test cat /proc/sys/net/ipv4/conf/all/rp_filter
> >  0
> >  # ip netns exec test cat /proc/sys/net/ipv6/conf/all/accept_dad
> >  0
> >
> > Set to a value out of range (invalid):
> >  # echo 3 > /proc/sys/net/core/devconf_inherit_init_net
> >  -bash: echo: write error: Invalid argument
> >  # echo -1 > /proc/sys/net/core/devconf_inherit_init_net
> >  -bash: echo: write error: Invalid argument
> >
> > Reported-by: Zhu Yanjun <Yanjun.Zhu@windriver.com>
> > Reported-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
> > Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
> Nice!
>
> Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
David Miller Jan. 22, 2019, 7:07 p.m. UTC | #3
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Thu, 17 Jan 2019 23:27:11 -0800

> There have been many people complaining about the inconsistent
> behaviors of IPv4 and IPv6 devconf when creating new network
> namespaces.  Currently, for IPv4, we inherit all current settings
> from init_net, but for IPv6 we reset all setting to default.
> 
> This patch introduces a new /proc file
> /proc/sys/net/core/devconf_inherit_init_net to control the
> behavior of whether to inhert sysctl current settings from init_net.
> This file itself is only available in init_net.
 ...
> Reported-by: Zhu Yanjun <Yanjun.Zhu@windriver.com>
> Reported-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>

Applied, thanks.
diff mbox series

Patch

diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
index 2793d4eac55f..bc0680706870 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -291,6 +291,20 @@  user space is responsible for creating them if needed.
 
 Default : 0  (for compatibility reasons)
 
+devconf_inherit_init_net
+----------------------------
+
+Controls if a new network namespace should inherit all current
+settings under /proc/sys/net/{ipv4,ipv6}/conf/{all,default}/. By
+default, we keep the current behavior: for IPv4 we inherit all current
+settings from init_net and for IPv6 we reset all settings to default.
+
+If set to 1, both IPv4 and IPv6 settings are forced to inherit from
+current ones in init_net. If set to 2, both IPv4 and IPv6 settings are
+forced to reset to their default values.
+
+Default : 0  (for compatibility reasons)
+
 2. /proc/sys/net/unix - Parameters for Unix domain sockets
 -------------------------------------------------------
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1377d085ef99..a7fe028a2879 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -630,6 +630,7 @@  struct netdev_queue {
 } ____cacheline_aligned_in_smp;
 
 extern int sysctl_fb_tunnels_only_for_init_net;
+extern int sysctl_devconf_inherit_init_net;
 
 static inline bool net_has_fallback_tunnels(const struct net *net)
 {
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index d67ec17f2cc8..84bf2861f45f 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -36,6 +36,15 @@  static int net_msg_warn;	/* Unused, but still a sysctl */
 int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0;
 EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net);
 
+/* 0 - Keep current behavior:
+ *     IPv4: inherit all current settings from init_net
+ *     IPv6: reset all settings to default
+ * 1 - Both inherit all current settings from init_net
+ * 2 - Both reset all settings to default
+ */
+int sysctl_devconf_inherit_init_net __read_mostly;
+EXPORT_SYMBOL(sysctl_devconf_inherit_init_net);
+
 #ifdef CONFIG_RPS
 static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
 				void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -544,6 +553,15 @@  static struct ctl_table net_core_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
+	{
+		.procname	= "devconf_inherit_init_net",
+		.data		= &sysctl_devconf_inherit_init_net,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &two,
+	},
 	{ }
 };
 
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index e258a00b4a3d..b824ef0604a6 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -2556,32 +2556,32 @@  static __net_init int devinet_init_net(struct net *net)
 	int err;
 	struct ipv4_devconf *all, *dflt;
 #ifdef CONFIG_SYSCTL
-	struct ctl_table *tbl = ctl_forward_entry;
+	struct ctl_table *tbl;
 	struct ctl_table_header *forw_hdr;
 #endif
 
 	err = -ENOMEM;
-	all = &ipv4_devconf;
-	dflt = &ipv4_devconf_dflt;
-
-	if (!net_eq(net, &init_net)) {
-		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
-		if (!all)
-			goto err_alloc_all;
+	all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
+	if (!all)
+		goto err_alloc_all;
 
-		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
-		if (!dflt)
-			goto err_alloc_dflt;
+	dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
+	if (!dflt)
+		goto err_alloc_dflt;
 
 #ifdef CONFIG_SYSCTL
-		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
-		if (!tbl)
-			goto err_alloc_ctl;
+	tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
+	if (!tbl)
+		goto err_alloc_ctl;
 
-		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
-		tbl[0].extra1 = all;
-		tbl[0].extra2 = net;
+	tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
+	tbl[0].extra1 = all;
+	tbl[0].extra2 = net;
 #endif
+
+	if (sysctl_devconf_inherit_init_net != 2 && !net_eq(net, &init_net)) {
+		memcpy(all, init_net.ipv4.devconf_all, sizeof(ipv4_devconf));
+		memcpy(dflt, init_net.ipv4.devconf_dflt, sizeof(ipv4_devconf_dflt));
 	}
 
 #ifdef CONFIG_SYSCTL
@@ -2611,15 +2611,12 @@  static __net_init int devinet_init_net(struct net *net)
 err_reg_dflt:
 	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
 err_reg_all:
-	if (tbl != ctl_forward_entry)
-		kfree(tbl);
+	kfree(tbl);
 err_alloc_ctl:
 #endif
-	if (dflt != &ipv4_devconf_dflt)
-		kfree(dflt);
+	kfree(dflt);
 err_alloc_dflt:
-	if (all != &ipv4_devconf)
-		kfree(all);
+	kfree(all);
 err_alloc_all:
 	return err;
 }
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 93d5ad2b1a69..0642e5ea34d5 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -6822,6 +6822,11 @@  static int __net_init addrconf_init_net(struct net *net)
 	if (!dflt)
 		goto err_alloc_dflt;
 
+	if (sysctl_devconf_inherit_init_net == 1 && !net_eq(net, &init_net)) {
+		memcpy(all, init_net.ipv6.devconf_all, sizeof(ipv6_devconf));
+		memcpy(dflt, init_net.ipv6.devconf_dflt, sizeof(ipv6_devconf_dflt));
+	}
+
 	/* these will be inherited by all namespaces */
 	dflt->autoconf = ipv6_defaults.autoconf;
 	dflt->disable_ipv6 = ipv6_defaults.disable_ipv6;