diff mbox series

[RFC,net-next,16/19] ipv6: Flush multipath routes when all siblings are dead

Message ID 20171231161513.25785-17-idosch@mellanox.com
State RFC, archived
Delegated to: David Miller
Headers show
Series [RFC,net-next,01/19] ipv6: Remove redundant route flushing during namespace dismantle | expand

Commit Message

Ido Schimmel Dec. 31, 2017, 4:15 p.m. UTC
By default, IPv6 deletes nexthops from a multipath route when the
nexthop device is put administratively down. This differs from IPv4
where the nexthops are kept, but marked with the RTNH_F_DEAD flag. A
multipath route is flushed when all of its nexthops become dead.

Align IPv6 with IPv4 and have it conform to the same guidelines.

In case the multipath route needs to be flushed, its siblings are
flushed one by one. Otherwise, the nexthops are marked with the
appropriate flags and the tree walker is instructed to skip all the
siblings.

As explained in previous patches, care is taken to update the sernum of
the affected tree nodes, so as to prevent the use of wrong dst entries.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
 net/ipv6/route.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 46 insertions(+), 7 deletions(-)

Comments

David Ahern Jan. 2, 2018, 5:38 p.m. UTC | #1
On 12/31/17 9:15 AM, Ido Schimmel wrote:
> By default, IPv6 deletes nexthops from a multipath route when the
> nexthop device is put administratively down. This differs from IPv4
> where the nexthops are kept, but marked with the RTNH_F_DEAD flag. A
> multipath route is flushed when all of its nexthops become dead.
> 
> Align IPv6 with IPv4 and have it conform to the same guidelines.
> 
> In case the multipath route needs to be flushed, its siblings are
> flushed one by one. Otherwise, the nexthops are marked with the
> appropriate flags and the tree walker is instructed to skip all the
> siblings.
> 
> As explained in previous patches, care is taken to update the sernum of
> the affected tree nodes, so as to prevent the use of wrong dst entries.
> 
> Signed-off-by: Ido Schimmel <idosch@mellanox.com>
> ---
>  net/ipv6/route.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 46 insertions(+), 7 deletions(-)
> 
> diff --git a/net/ipv6/route.c b/net/ipv6/route.c
> index ab95e07b5e74..9b142eb83f3c 100644
> --- a/net/ipv6/route.c
> +++ b/net/ipv6/route.c
> @@ -3486,8 +3486,10 @@ static int fib6_ifup(struct rt6_info *rt, void *p_arg)
>  	const struct arg_netdev_event *arg = p_arg;
>  	const struct net *net = dev_net(arg->dev);
>  
> -	if (rt != net->ipv6.ip6_null_entry && rt->dst.dev == arg->dev)
> +	if (rt != net->ipv6.ip6_null_entry && rt->dst.dev == arg->dev) {
>  		rt->rt6i_nh_flags &= ~arg->nh_flags;
> +		fib6_update_sernum_upto_root(dev_net(rt->dst.dev), rt);
> +	}
>  
>  	return 0;
>  }
> @@ -3528,6 +3530,35 @@ static void rt6_multipath_flush(struct rt6_info *rt)
>  		iter->should_flush = 1;
>  }
>  
> +static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt,
> +					     const struct net_device *down_dev)
> +{
> +	struct rt6_info *iter;
> +	unsigned int dead = 0;
> +
> +	if (rt->dst.dev == down_dev || rt->rt6i_nh_flags & RTNH_F_DEAD)
> +		dead++;
> +	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
> +		if (iter->dst.dev == down_dev ||
> +		    iter->rt6i_nh_flags & RTNH_F_DEAD)
> +			dead++;
> +
> +	return dead;
> +}
> +
> +static void rt6_multipath_nh_flags_set(struct rt6_info *rt,
> +				       const struct net_device *dev,
> +				       unsigned int nh_flags)
> +{
> +	struct rt6_info *iter;
> +
> +	if (rt->dst.dev == dev)
> +		rt->rt6i_nh_flags |= nh_flags;
> +	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
> +		if (iter->dst.dev == dev)
> +			iter->rt6i_nh_flags |= nh_flags;
> +}
> +
>  /* called with write lock held for table with rt */
>  static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
>  {
> @@ -3550,13 +3581,21 @@ static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
>  		}
>  		return -2;
>  	case NETDEV_DOWN:
> -		if (rt->dst.dev != dev)
> -			break;
> -		if (rt->rt6i_nsiblings == 0 ||
> -		    !rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
> +		if (rt->should_flush)
>  			return -1;
> -		rt->rt6i_nh_flags |= (RTNH_F_DEAD | RTNH_F_LINKDOWN);
> -		break;
> +		if (!rt->rt6i_nsiblings)
> +			return rt->dst.dev == dev ? -1 : 0;
> +		if (rt6_multipath_uses_dev(rt, dev)) {
> +			if (rt->rt6i_nsiblings + 1 ==
> +			    rt6_multipath_dead_count(rt, dev)) {

I'd prefer a tmp variable to make that line more readable and still
within the 80 column guideline.
			unsigned int count;

			count = rt6_multipath_dead_count(rt, dev);
			if (rt->rt6i_nsiblings + 1 == count) {


> +				rt6_multipath_flush(rt);
> +				return -1;
> +			}
> +			rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
> +						   RTNH_F_LINKDOWN);
> +			fib6_update_sernum(rt);
> +		}
> +		return -2;
>  	case NETDEV_CHANGE:
>  		if (rt->dst.dev != dev ||
>  		    rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
>
Ido Schimmel Jan. 3, 2018, 7:54 a.m. UTC | #2
On Tue, Jan 02, 2018 at 10:38:19AM -0700, David Ahern wrote:
> On 12/31/17 9:15 AM, Ido Schimmel wrote:
> > @@ -3550,13 +3581,21 @@ static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
> >  		}
> >  		return -2;
> >  	case NETDEV_DOWN:
> > -		if (rt->dst.dev != dev)
> > -			break;
> > -		if (rt->rt6i_nsiblings == 0 ||
> > -		    !rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
> > +		if (rt->should_flush)
> >  			return -1;
> > -		rt->rt6i_nh_flags |= (RTNH_F_DEAD | RTNH_F_LINKDOWN);
> > -		break;
> > +		if (!rt->rt6i_nsiblings)
> > +			return rt->dst.dev == dev ? -1 : 0;
> > +		if (rt6_multipath_uses_dev(rt, dev)) {
> > +			if (rt->rt6i_nsiblings + 1 ==
> > +			    rt6_multipath_dead_count(rt, dev)) {
> 
> I'd prefer a tmp variable to make that line more readable and still
> within the 80 column guideline.
> 			unsigned int count;
> 
> 			count = rt6_multipath_dead_count(rt, dev);
> 			if (rt->rt6i_nsiblings + 1 == count) {

OK, will change.

Thanks!

> 
> 
> > +				rt6_multipath_flush(rt);
> > +				return -1;
> > +			}
> > +			rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
> > +						   RTNH_F_LINKDOWN);
> > +			fib6_update_sernum(rt);
> > +		}
> > +		return -2;
diff mbox series

Patch

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ab95e07b5e74..9b142eb83f3c 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3486,8 +3486,10 @@  static int fib6_ifup(struct rt6_info *rt, void *p_arg)
 	const struct arg_netdev_event *arg = p_arg;
 	const struct net *net = dev_net(arg->dev);
 
-	if (rt != net->ipv6.ip6_null_entry && rt->dst.dev == arg->dev)
+	if (rt != net->ipv6.ip6_null_entry && rt->dst.dev == arg->dev) {
 		rt->rt6i_nh_flags &= ~arg->nh_flags;
+		fib6_update_sernum_upto_root(dev_net(rt->dst.dev), rt);
+	}
 
 	return 0;
 }
@@ -3528,6 +3530,35 @@  static void rt6_multipath_flush(struct rt6_info *rt)
 		iter->should_flush = 1;
 }
 
+static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt,
+					     const struct net_device *down_dev)
+{
+	struct rt6_info *iter;
+	unsigned int dead = 0;
+
+	if (rt->dst.dev == down_dev || rt->rt6i_nh_flags & RTNH_F_DEAD)
+		dead++;
+	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
+		if (iter->dst.dev == down_dev ||
+		    iter->rt6i_nh_flags & RTNH_F_DEAD)
+			dead++;
+
+	return dead;
+}
+
+static void rt6_multipath_nh_flags_set(struct rt6_info *rt,
+				       const struct net_device *dev,
+				       unsigned int nh_flags)
+{
+	struct rt6_info *iter;
+
+	if (rt->dst.dev == dev)
+		rt->rt6i_nh_flags |= nh_flags;
+	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
+		if (iter->dst.dev == dev)
+			iter->rt6i_nh_flags |= nh_flags;
+}
+
 /* called with write lock held for table with rt */
 static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
 {
@@ -3550,13 +3581,21 @@  static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
 		}
 		return -2;
 	case NETDEV_DOWN:
-		if (rt->dst.dev != dev)
-			break;
-		if (rt->rt6i_nsiblings == 0 ||
-		    !rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
+		if (rt->should_flush)
 			return -1;
-		rt->rt6i_nh_flags |= (RTNH_F_DEAD | RTNH_F_LINKDOWN);
-		break;
+		if (!rt->rt6i_nsiblings)
+			return rt->dst.dev == dev ? -1 : 0;
+		if (rt6_multipath_uses_dev(rt, dev)) {
+			if (rt->rt6i_nsiblings + 1 ==
+			    rt6_multipath_dead_count(rt, dev)) {
+				rt6_multipath_flush(rt);
+				return -1;
+			}
+			rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
+						   RTNH_F_LINKDOWN);
+			fib6_update_sernum(rt);
+		}
+		return -2;
 	case NETDEV_CHANGE:
 		if (rt->dst.dev != dev ||
 		    rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))