diff mbox

[net-next,04/19] net: Kill register_sysctl_rotable

Message ID m1obqngvcg.fsf@fess.ebiederm.org
State Accepted, archived
Delegated to: David Miller
Headers show

Commit Message

Eric W. Biederman April 19, 2012, 11:22 p.m. UTC
register_sysctl_rotable never caught on as an interesting way to
register sysctls.  My take on the situation is that what we want are
sysctls that we can only see in the initial network namespace.  What we
have implemented with register_sysctl_rotable are sysctls that we can
see in all of the network namespaces and can only change in the initial
network namespace.

That is a very silly way to go.  Just register the network sysctls
in the initial network namespace and we don't have any weird special
cases to deal with.

The sysctls affected are:
/proc/sys/net/ipv4/ipfrag_secret_interval
/proc/sys/net/ipv4/ipfrag_max_dist
/proc/sys/net/ipv6/ip6frag_secret_interval
/proc/sys/net/ipv6/mld_max_msf

I really don't expect anyone will miss them if they can't read them in a
child user namespace.

CC: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/net/net_namespace.h |    2 --
 net/core/sysctl_net_core.c  |    2 +-
 net/ipv4/ip_fragment.c      |    2 +-
 net/ipv6/reassembly.c       |    2 +-
 net/ipv6/sysctl_net_ipv6.c  |    2 +-
 net/sysctl_net.c            |   23 -----------------------
 6 files changed, 4 insertions(+), 29 deletions(-)

Comments

Serge E. Hallyn April 20, 2012, 1:53 p.m. UTC | #1
Quoting Eric W. Biederman (ebiederm@xmission.com):
> 
> register_sysctl_rotable never caught on as an interesting way to
> register sysctls.  My take on the situation is that what we want are
> sysctls that we can only see in the initial network namespace.  What we
> have implemented with register_sysctl_rotable are sysctls that we can
> see in all of the network namespaces and can only change in the initial
> network namespace.
> 
> That is a very silly way to go.  Just register the network sysctls
> in the initial network namespace and we don't have any weird special
> cases to deal with.
> 
> The sysctls affected are:
> /proc/sys/net/ipv4/ipfrag_secret_interval
> /proc/sys/net/ipv4/ipfrag_max_dist
> /proc/sys/net/ipv6/ip6frag_secret_interval
> /proc/sys/net/ipv6/mld_max_msf
> 
> I really don't expect anyone will miss them if they can't read them in a
> child user namespace.

If there was something userspace could do to work around certain values
of these settings then I'd say keeping the readonly values is worthwhile,
but AFAICS if a bad network context requires ipfrag_max_dist 0, there's
nothing userspace can do about it...

So from a container pov view at least, I'm happy with this.  I'm far from
qualified on the netns code itself, but taking a look in the unlikely case
I can spot something :)

> CC: Pavel Emelyanov <xemul@openvz.org>
> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
> ---
>  include/net/net_namespace.h |    2 --
>  net/core/sysctl_net_core.c  |    2 +-
>  net/ipv4/ip_fragment.c      |    2 +-
>  net/ipv6/reassembly.c       |    2 +-
>  net/ipv6/sysctl_net_ipv6.c  |    2 +-
>  net/sysctl_net.c            |   23 -----------------------
>  6 files changed, 4 insertions(+), 29 deletions(-)
> 
> diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
> index 767dcd40..6413fcb 100644
> --- a/include/net/net_namespace.h
> +++ b/include/net/net_namespace.h
> @@ -290,8 +290,6 @@ static inline int net_sysctl_init(void) { return 0; }
>  #endif
>  extern struct ctl_table_header *register_net_sysctl_table(struct net *net,
>  	const struct ctl_path *path, struct ctl_table *table);
> -extern struct ctl_table_header *register_net_sysctl_rotable(
> -	const struct ctl_path *path, struct ctl_table *table);
>  extern struct ctl_table_header *register_net_sysctl(struct net *net,
>  	const char *path, struct ctl_table *table);
>  extern void unregister_net_sysctl_table(struct ctl_table_header *header);
> diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
> index cee5991..9504086 100644
> --- a/net/core/sysctl_net_core.c
> +++ b/net/core/sysctl_net_core.c
> @@ -257,7 +257,7 @@ static __init int sysctl_core_init(void)
>  	static struct ctl_table empty[1];
>  
>  	kmemleak_not_leak(register_sysctl_paths(net_core_path, empty));
> -	register_net_sysctl_rotable(net_core_path, net_core_table);
> +	register_net_sysctl(&init_net, "net/core", net_core_table);
>  	return register_pernet_subsys(&sysctl_core_ops);
>  }
>  
> diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
> index 3727e23..a746cca 100644
> --- a/net/ipv4/ip_fragment.c
> +++ b/net/ipv4/ip_fragment.c
> @@ -807,7 +807,7 @@ static void __net_exit ip4_frags_ns_ctl_unregister(struct net *net)
>  
>  static void ip4_frags_ctl_register(void)
>  {
> -	register_net_sysctl_rotable(net_ipv4_ctl_path, ip4_frags_ctl_table);
> +	register_net_sysctl(&init_net, "net/ipv4", ip4_frags_ctl_table);
>  }
>  #else
>  static inline int ip4_frags_ns_ctl_register(struct net *net)
> diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
> index 9447bd6..42f4f7c 100644
> --- a/net/ipv6/reassembly.c
> +++ b/net/ipv6/reassembly.c
> @@ -674,7 +674,7 @@ static struct ctl_table_header *ip6_ctl_header;
>  
>  static int ip6_frags_sysctl_register(void)
>  {
> -	ip6_ctl_header = register_net_sysctl_rotable(net_ipv6_ctl_path,
> +	ip6_ctl_header = register_net_sysctl(&init_net, "net/ipv6",
>  			ip6_frags_ctl_table);
>  	return ip6_ctl_header == NULL ? -ENOMEM : 0;
>  }
> diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
> index 166a57c..06f21e5 100644
> --- a/net/ipv6/sysctl_net_ipv6.c
> +++ b/net/ipv6/sysctl_net_ipv6.c
> @@ -140,7 +140,7 @@ int ipv6_sysctl_register(void)
>  {
>  	int err = -ENOMEM;
>  
> -	ip6_header = register_net_sysctl_rotable(net_ipv6_ctl_path, ipv6_rotable);
> +	ip6_header = register_net_sysctl(&init_net, "net/ipv6", ipv6_rotable);
>  	if (ip6_header == NULL)
>  		goto out;
>  
> diff --git a/net/sysctl_net.c b/net/sysctl_net.c
> index ce97237..2b8d1d9 100644
> --- a/net/sysctl_net.c
> +++ b/net/sysctl_net.c
> @@ -59,19 +59,6 @@ static struct ctl_table_root net_sysctl_root = {
>  	.permissions = net_ctl_permissions,
>  };
>  
> -static int net_ctl_ro_header_perms(struct ctl_table_root *root,
> -		struct nsproxy *namespaces, struct ctl_table *table)
> -{
> -	if (net_eq(namespaces->net_ns, &init_net))
> -		return table->mode;
> -	else
> -		return table->mode & ~0222;
> -}
> -
> -static struct ctl_table_root net_sysctl_ro_root = {
> -	.permissions = net_ctl_ro_header_perms,
> -};
> -
>  static int __net_init sysctl_net_init(struct net *net)
>  {
>  	setup_sysctl_set(&net->sysctls, &net_sysctl_root, is_seen);
> @@ -103,8 +90,6 @@ __init int net_sysctl_init(void)
>  	ret = register_pernet_subsys(&sysctl_pernet_ops);
>  	if (ret)
>  		goto out;
> -	setup_sysctl_set(&net_sysctl_ro_root.default_set, &net_sysctl_ro_root, NULL);
> -	register_sysctl_root(&net_sysctl_ro_root);
>  	register_sysctl_root(&net_sysctl_root);
>  out:
>  	return ret;
> @@ -117,14 +102,6 @@ struct ctl_table_header *register_net_sysctl_table(struct net *net,
>  }
>  EXPORT_SYMBOL_GPL(register_net_sysctl_table);
>  
> -struct ctl_table_header *register_net_sysctl_rotable(const
> -		struct ctl_path *path, struct ctl_table *table)
> -{
> -	return __register_sysctl_paths(&net_sysctl_ro_root.default_set,
> -					path, table);
> -}
> -EXPORT_SYMBOL_GPL(register_net_sysctl_rotable);
> -
>  struct ctl_table_header *register_net_sysctl(struct net *net,
>  	const char *path, struct ctl_table *table)
>  {
> -- 
> 1.7.2.5
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric W. Biederman April 20, 2012, 2:42 p.m. UTC | #2
"Serge E. Hallyn" <serge@hallyn.com> writes:

> Quoting Eric W. Biederman (ebiederm@xmission.com):
>> 
>> register_sysctl_rotable never caught on as an interesting way to
>> register sysctls.  My take on the situation is that what we want are
>> sysctls that we can only see in the initial network namespace.  What we
>> have implemented with register_sysctl_rotable are sysctls that we can
>> see in all of the network namespaces and can only change in the initial
>> network namespace.
>> 
>> That is a very silly way to go.  Just register the network sysctls
>> in the initial network namespace and we don't have any weird special
>> cases to deal with.
>> 
>> The sysctls affected are:
>> /proc/sys/net/ipv4/ipfrag_secret_interval
>> /proc/sys/net/ipv4/ipfrag_max_dist
>> /proc/sys/net/ipv6/ip6frag_secret_interval
>> /proc/sys/net/ipv6/mld_max_msf
>> 
>> I really don't expect anyone will miss them if they can't read them in a
>> child user namespace.
>
> If there was something userspace could do to work around certain values
> of these settings then I'd say keeping the readonly values is worthwhile,
> but AFAICS if a bad network context requires ipfrag_max_dist 0, there's
> nothing userspace can do about it...
>
>
> So from a container pov view at least, I'm happy with this.  I'm far from
> qualified on the netns code itself, but taking a look in the unlikely case
> I can spot something :)

In this case I figured I would copy you and a few others who have been
talking about similar things recently, and also because you might care
that a whole bunch of networking sysctls that aren't per network
namespace will stop showing up in containers.

It is my hope that we use some of these same mechanisms that allow per
network namespace sysctls will be used to allow per pid and uts
namespace sysctls as well.  It isn't as important as the files don't
change, but we can do it cleanly and one of these days I will get around
to making /proc/sys a symlink to /proc/<pid>/sys so that I can remove
the very unorthodox d_compare tricks that we use today.

The sysctl internal data structures are now a hair cleaner than what
sysfs uses for the same class of problem so I might someday go back and
fix sysfs to use the same idea of internal links, so I can get the sysfs
dirent size down some more, and be able to more cleanly isolate the
namespace handling from the rest of the sysfs code.  It isn't bad today
but it is the source of most of the surprises and bugs when people tweak
the sysfs code.

Anyway I ramble.  Now I need to get back to your review comments on my
user namespace patchset.

Thanks for taking a glance here,
Eric
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 767dcd40..6413fcb 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -290,8 +290,6 @@  static inline int net_sysctl_init(void) { return 0; }
 #endif
 extern struct ctl_table_header *register_net_sysctl_table(struct net *net,
 	const struct ctl_path *path, struct ctl_table *table);
-extern struct ctl_table_header *register_net_sysctl_rotable(
-	const struct ctl_path *path, struct ctl_table *table);
 extern struct ctl_table_header *register_net_sysctl(struct net *net,
 	const char *path, struct ctl_table *table);
 extern void unregister_net_sysctl_table(struct ctl_table_header *header);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index cee5991..9504086 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -257,7 +257,7 @@  static __init int sysctl_core_init(void)
 	static struct ctl_table empty[1];
 
 	kmemleak_not_leak(register_sysctl_paths(net_core_path, empty));
-	register_net_sysctl_rotable(net_core_path, net_core_table);
+	register_net_sysctl(&init_net, "net/core", net_core_table);
 	return register_pernet_subsys(&sysctl_core_ops);
 }
 
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 3727e23..a746cca 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -807,7 +807,7 @@  static void __net_exit ip4_frags_ns_ctl_unregister(struct net *net)
 
 static void ip4_frags_ctl_register(void)
 {
-	register_net_sysctl_rotable(net_ipv4_ctl_path, ip4_frags_ctl_table);
+	register_net_sysctl(&init_net, "net/ipv4", ip4_frags_ctl_table);
 }
 #else
 static inline int ip4_frags_ns_ctl_register(struct net *net)
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 9447bd6..42f4f7c 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -674,7 +674,7 @@  static struct ctl_table_header *ip6_ctl_header;
 
 static int ip6_frags_sysctl_register(void)
 {
-	ip6_ctl_header = register_net_sysctl_rotable(net_ipv6_ctl_path,
+	ip6_ctl_header = register_net_sysctl(&init_net, "net/ipv6",
 			ip6_frags_ctl_table);
 	return ip6_ctl_header == NULL ? -ENOMEM : 0;
 }
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 166a57c..06f21e5 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -140,7 +140,7 @@  int ipv6_sysctl_register(void)
 {
 	int err = -ENOMEM;
 
-	ip6_header = register_net_sysctl_rotable(net_ipv6_ctl_path, ipv6_rotable);
+	ip6_header = register_net_sysctl(&init_net, "net/ipv6", ipv6_rotable);
 	if (ip6_header == NULL)
 		goto out;
 
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index ce97237..2b8d1d9 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -59,19 +59,6 @@  static struct ctl_table_root net_sysctl_root = {
 	.permissions = net_ctl_permissions,
 };
 
-static int net_ctl_ro_header_perms(struct ctl_table_root *root,
-		struct nsproxy *namespaces, struct ctl_table *table)
-{
-	if (net_eq(namespaces->net_ns, &init_net))
-		return table->mode;
-	else
-		return table->mode & ~0222;
-}
-
-static struct ctl_table_root net_sysctl_ro_root = {
-	.permissions = net_ctl_ro_header_perms,
-};
-
 static int __net_init sysctl_net_init(struct net *net)
 {
 	setup_sysctl_set(&net->sysctls, &net_sysctl_root, is_seen);
@@ -103,8 +90,6 @@  __init int net_sysctl_init(void)
 	ret = register_pernet_subsys(&sysctl_pernet_ops);
 	if (ret)
 		goto out;
-	setup_sysctl_set(&net_sysctl_ro_root.default_set, &net_sysctl_ro_root, NULL);
-	register_sysctl_root(&net_sysctl_ro_root);
 	register_sysctl_root(&net_sysctl_root);
 out:
 	return ret;
@@ -117,14 +102,6 @@  struct ctl_table_header *register_net_sysctl_table(struct net *net,
 }
 EXPORT_SYMBOL_GPL(register_net_sysctl_table);
 
-struct ctl_table_header *register_net_sysctl_rotable(const
-		struct ctl_path *path, struct ctl_table *table)
-{
-	return __register_sysctl_paths(&net_sysctl_ro_root.default_set,
-					path, table);
-}
-EXPORT_SYMBOL_GPL(register_net_sysctl_rotable);
-
 struct ctl_table_header *register_net_sysctl(struct net *net,
 	const char *path, struct ctl_table *table)
 {