diff mbox

[net-next] ipv4: properly apply change to ignore_routes_on_linkdown to all interfaces

Message ID 1456936986-2042-1-git-send-email-gospo@cumulusnetworks.com
State Rejected, archived
Delegated to: David Miller
Headers show

Commit Message

Andy Gospodarek March 2, 2016, 4:43 p.m. UTC
Any change to sysctl net.ipv4.conf.all.ignore_routes_with_linkdown does
not result in a change to all interfaces on the system.  This means that
any devices initialized before sysctl settings are applied on boot do
not see a change if the sysctl setting is different than what the stack
has as a default ('0' in this case).

This patch changes the net.ipv4.conf.all.ignore_routes_with_linkdown
setting to match what is done for forwarding for ipv4 and for
ignore_routes_with_linkdown for ipv6.  The current behavior was not
intentional and had I recognized this corner-case before posting I would
have done this with the first series.

Fixes: 0eeb075fad73 ("net: ipv4 sysctl option to ignore routes when nexthop link is down")
Signed-off-by: Andy Gospodarek <gospo@cumulusnetworks.com>
---
Generic infrastructure could be added to do this for all values, but I'm
hesitant to do this since historically users are probably depending on
the exiting behavior (whether intentional or not) for the more widely
used sysctls.

 net/ipv4/devinet.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 75 insertions(+), 2 deletions(-)

Comments

David Ahern March 2, 2016, 6:17 p.m. UTC | #1
On 3/2/16 8:43 AM, Andy Gospodarek wrote:
>
> +/* called with RTNL locked */
> +static void inet_ignore_routes_change(struct net *net)
> +{
> +	struct net_device *dev;
> +	int on = IPV4_DEVCONF_ALL(net, IGNORE_ROUTES_WITH_LINKDOWN);
> +
> +	IPV4_DEVCONF_DFLT(net, IGNORE_ROUTES_WITH_LINKDOWN) = on;
> +	inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
> +				    NETCONFA_IFINDEX_ALL,
> +				    net->ipv4.devconf_all);
> +	inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
> +				    NETCONFA_IFINDEX_DEFAULT,
> +				    net->ipv4.devconf_dflt);
> +
> +	for_each_netdev(net, dev) {
> +		struct in_device *in_dev;
> +
> +		rcu_read_lock();
> +		in_dev = __in_dev_get_rcu(dev);
> +		if (in_dev) {
> +			IN_DEV_CONF_SET(in_dev,
> +					IGNORE_ROUTES_WITH_LINKDOWN, on);
> +			inet_netconf_notify_devconf(net,
> +						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
> +						    dev->ifindex, &in_dev->cnf);
> +		}
> +		rcu_read_unlock();
> +	}
> +}
> +

This seems wrong -- changing the 'all' and 'default' settings for a 
sysctl should not require walking the interface list.
Andy Gospodarek March 2, 2016, 6:28 p.m. UTC | #2
On Wed, Mar 02, 2016 at 10:17:19AM -0800, David Ahern wrote:
> On 3/2/16 8:43 AM, Andy Gospodarek wrote:
> >
> >+/* called with RTNL locked */
> >+static void inet_ignore_routes_change(struct net *net)
> >+{
> >+	struct net_device *dev;
> >+	int on = IPV4_DEVCONF_ALL(net, IGNORE_ROUTES_WITH_LINKDOWN);
> >+
> >+	IPV4_DEVCONF_DFLT(net, IGNORE_ROUTES_WITH_LINKDOWN) = on;
> >+	inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
> >+				    NETCONFA_IFINDEX_ALL,
> >+				    net->ipv4.devconf_all);
> >+	inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
> >+				    NETCONFA_IFINDEX_DEFAULT,
> >+				    net->ipv4.devconf_dflt);
> >+
> >+	for_each_netdev(net, dev) {
> >+		struct in_device *in_dev;
> >+
> >+		rcu_read_lock();
> >+		in_dev = __in_dev_get_rcu(dev);
> >+		if (in_dev) {
> >+			IN_DEV_CONF_SET(in_dev,
> >+					IGNORE_ROUTES_WITH_LINKDOWN, on);
> >+			inet_netconf_notify_devconf(net,
> >+						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
> >+						    dev->ifindex, &in_dev->cnf);
> >+		}
> >+		rcu_read_unlock();
> >+	}
> >+}
> >+
> 
> This seems wrong -- changing the 'all' and 'default' settings for a sysctl
> should not require walking the interface list.

Not if you want to actually apply the config to all the interfaces.  If
you notice this borrows heavily from the sysctl to control ipv4
forwarding as it behaves in a manner that I would consider to be
proper.
David Ahern March 2, 2016, 6:36 p.m. UTC | #3
On 3/2/16 10:28 AM, Andy Gospodarek wrote:
>> This seems wrong -- changing the 'all' and 'default' settings for a sysctl
>> >should not require walking the interface list.
> Not if you want to actually apply the config to all the interfaces.  If
> you notice this borrows heavily from the sysctl to control ipv4
> forwarding as it behaves in a manner that I would consider to be
> proper.
>

When the config is checked runtime it can look at 'all' and the setting 
for the individual interface; you don't have to walk the devices and 
apply the setting each time it is changed. See the keep_addr_on_down 
implementation.
Andy Gospodarek March 2, 2016, 6:46 p.m. UTC | #4
On Wed, Mar 02, 2016 at 10:36:18AM -0800, David Ahern wrote:
> On 3/2/16 10:28 AM, Andy Gospodarek wrote:
> >>This seems wrong -- changing the 'all' and 'default' settings for a sysctl
> >>>should not require walking the interface list.
> >Not if you want to actually apply the config to all the interfaces.  If
> >you notice this borrows heavily from the sysctl to control ipv4
> >forwarding as it behaves in a manner that I would consider to be
> >proper.
> >
> 
> When the config is checked runtime it can look at 'all' and the setting for
> the individual interface; you don't have to walk the devices and apply the
> setting each time it is changed. See the keep_addr_on_down implementation.

Yes, I've seen that implemenation.  My interest was making sure that the
reported interface sysctl value was what was used AND it seems cleaner
to leave the ipv4 fib checks as-is since an additional clause in many of
those statements will make them even harder to read.  :-)
David Miller March 7, 2016, 3:38 a.m. UTC | #5
From: Andy Gospodarek <gospo@cumulusnetworks.com>
Date: Wed,  2 Mar 2016 11:43:06 -0500

> Any change to sysctl net.ipv4.conf.all.ignore_routes_with_linkdown does
> not result in a change to all interfaces on the system.  This means that
> any devices initialized before sysctl settings are applied on boot do
> not see a change if the sysctl setting is different than what the stack
> has as a default ('0' in this case).
> 
> This patch changes the net.ipv4.conf.all.ignore_routes_with_linkdown
> setting to match what is done for forwarding for ipv4 and for
> ignore_routes_with_linkdown for ipv6.  The current behavior was not
> intentional and had I recognized this corner-case before posting I would
> have done this with the first series.
> 
> Fixes: 0eeb075fad73 ("net: ipv4 sysctl option to ignore routes when nexthop link is down")
> Signed-off-by: Andy Gospodarek <gospo@cumulusnetworks.com>
> ---
> Generic infrastructure could be added to do this for all values, but I'm
> hesitant to do this since historically users are probably depending on
> the exiting behavior (whether intentional or not) for the more widely
> used sysctls.

"Properly" is a matter of interpretation.

Traditionally the way ipv4 works for most sysctls is that we pick up
the default and all values at the time the device get's it's ipv4
private attached (first ipv4 address configured, etc.)

So it's a bit too late now to change this behavior.

Yes, that's even if ipv6 behaves differently, and that's even if some
other ipv4 sysctls behave differently too.

I'm not applying this, sorry.
Andy Gospodarek March 7, 2016, 1:21 p.m. UTC | #6
On Sun, Mar 06, 2016 at 10:38:21PM -0500, David Miller wrote:
> From: Andy Gospodarek <gospo@cumulusnetworks.com>
> Date: Wed,  2 Mar 2016 11:43:06 -0500
> 
> > Any change to sysctl net.ipv4.conf.all.ignore_routes_with_linkdown does
> > not result in a change to all interfaces on the system.  This means that
> > any devices initialized before sysctl settings are applied on boot do
> > not see a change if the sysctl setting is different than what the stack
> > has as a default ('0' in this case).
> > 
> > This patch changes the net.ipv4.conf.all.ignore_routes_with_linkdown
> > setting to match what is done for forwarding for ipv4 and for
> > ignore_routes_with_linkdown for ipv6.  The current behavior was not
> > intentional and had I recognized this corner-case before posting I would
> > have done this with the first series.
> > 
> > Fixes: 0eeb075fad73 ("net: ipv4 sysctl option to ignore routes when nexthop link is down")
> > Signed-off-by: Andy Gospodarek <gospo@cumulusnetworks.com>
> > ---
> > Generic infrastructure could be added to do this for all values, but I'm
> > hesitant to do this since historically users are probably depending on
> > the exiting behavior (whether intentional or not) for the more widely
> > used sysctls.
> 
> "Properly" is a matter of interpretation.

:-)

> Traditionally the way ipv4 works for most sysctls is that we pick up
> the default and all values at the time the device get's it's ipv4
> private attached (first ipv4 address configured, etc.)
> 
> So it's a bit too late now to change this behavior.
> 
> Yes, that's even if ipv6 behaves differently, and that's even if some
> other ipv4 sysctls behave differently too.
> 
> I'm not applying this, sorry.

I knew there was a chance you would reach this conclusion and despite
being disappointed that you don't want to allow this change I
understand.  This is essentially a behavioral change and despite the
fact that this might not be an oft-used sysctl there are some that might
rely on the current behavior.
diff mbox

Patch

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 8c3df2c..d247e41 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -2030,6 +2030,36 @@  static void inet_forward_change(struct net *net)
 	}
 }
 
+/* called with RTNL locked */
+static void inet_ignore_routes_change(struct net *net)
+{
+	struct net_device *dev;
+	int on = IPV4_DEVCONF_ALL(net, IGNORE_ROUTES_WITH_LINKDOWN);
+
+	IPV4_DEVCONF_DFLT(net, IGNORE_ROUTES_WITH_LINKDOWN) = on;
+	inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
+				    NETCONFA_IFINDEX_ALL,
+				    net->ipv4.devconf_all);
+	inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
+				    NETCONFA_IFINDEX_DEFAULT,
+				    net->ipv4.devconf_dflt);
+
+	for_each_netdev(net, dev) {
+		struct in_device *in_dev;
+
+		rcu_read_lock();
+		in_dev = __in_dev_get_rcu(dev);
+		if (in_dev) {
+			IN_DEV_CONF_SET(in_dev,
+					IGNORE_ROUTES_WITH_LINKDOWN, on);
+			inet_netconf_notify_devconf(net,
+						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
+						    dev->ifindex, &in_dev->cnf);
+		}
+		rcu_read_unlock();
+	}
+}
+
 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
 {
 	if (cnf == net->ipv4.devconf_dflt)
@@ -2147,6 +2177,48 @@  static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
 	return ret;
 }
 
+static int devinet_sysctl_ignore_routes(struct ctl_table *ctl, int write,
+					void __user *buffer,
+					size_t *lenp, loff_t *ppos)
+{
+	int *valp = ctl->data;
+	int val = *valp;
+	loff_t pos = *ppos;
+	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
+
+	if (write && *valp != val) {
+		struct net *net = ctl->extra2;
+
+		if (valp != &IPV4_DEVCONF_DFLT(net, IGNORE_ROUTES_WITH_LINKDOWN)) {
+			if (!rtnl_trylock()) {
+				/* Restore the original values before restarting */
+				*valp = val;
+				*ppos = pos;
+				return restart_syscall();
+			}
+			if (valp == &IPV4_DEVCONF_ALL(net, IGNORE_ROUTES_WITH_LINKDOWN)) {
+				inet_ignore_routes_change(net);
+			} else {
+				struct ipv4_devconf *cnf = ctl->extra1;
+				struct in_device *idev =
+					container_of(cnf, struct in_device, cnf);
+				inet_netconf_notify_devconf(net,
+							    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
+							    idev->dev->ifindex,
+							    cnf);
+			}
+			rtnl_unlock();
+			rt_cache_flush(net);
+		} else
+			inet_netconf_notify_devconf(net,
+						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
+						    NETCONFA_IFINDEX_DEFAULT,
+						    net->ipv4.devconf_dflt);
+	}
+
+	return ret;
+}
+
 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
 	{ \
 		.procname	= name, \
@@ -2205,8 +2277,9 @@  static struct devinet_sysctl_table {
 					"igmpv2_unsolicited_report_interval"),
 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
 					"igmpv3_unsolicited_report_interval"),
-		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
-					"ignore_routes_with_linkdown"),
+		DEVINET_SYSCTL_COMPLEX_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
+					     "ignore_routes_with_linkdown",
+					     devinet_sysctl_ignore_routes),
 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
 					"drop_gratuitous_arp"),