diff mbox

[4/4,v3] ipv4: mark nexthop as dead when it's subnet becomes unreachable

Message ID 1521507.ObZglEaf1D@tuxracer
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Sergey Popovich Jan. 24, 2014, 10:25 a.m. UTC
Removing ip address and it's subnet route using fib_del_ifaddr() does
not purge routes with nexthop in such subnet.

This could be easily reproduced with the following config:

  ip link add dev dummy1 type dummy
  ip link set up dev dummy1
  ip -4 addr add 10.0.10.1/24 dev dummy1
  ip -4 addr add 10.0.20.1/24 dev dummy1

  ip -4 route add 172.16.0.0/12 proto static via 10.0.10.5
  ip -4 route show exact 172.16.0.0/12
  172.16.0.0/12 via 10.0.10.5 dev dummy1  proto static

  ip -4 addr del 10.0.10.1/24 dev dummy1

  ip -4 route show exact 172.16.0.0/12
  172.16.0.0/12 via 10.0.10.5 dev dummy1  proto static

Add interface address (ifa) parameter to fib_sync_down_dev()
and use it to match nexthop against it's subnet.

Use fib_sync_down_dev() in fib_del_ifaddr() among with fib_sync_down_addr()
to mark as dead routes with nexthop in ifa.

  v3. Fix NH marking as dead when NH gateway subnet is still on
      interface (e.g. 10.0.10.1/24 and 10.0.30.1/16 and NH is 10.0.10.5).
      Thanks to Julian Anastasov.

  v2. Fix NH marking as dead when NH created with onlink option.

Signed-off-by: Sergey Popovich <popovich_sergei@mail.ru>
---
 include/net/ip_fib.h     |  3 ++-
 net/ipv4/fib_frontend.c  |  5 +++--
 net/ipv4/fib_semantics.c | 29 +++++++++++++++++++++++++++--
 3 files changed, 32 insertions(+), 5 deletions(-)

Comments

Julian Anastasov Jan. 24, 2014, 9:49 p.m. UTC | #1
Hello,

On Fri, 24 Jan 2014, Sergey Popovich wrote:

> -int fib_sync_down_dev(struct net_device *dev, int force)
> +static inline bool fib_sync_down_gw(struct fib_nh *nh,
> +				    struct in_ifaddr *ifr)
> +{
> +	if (!ifr)
> +		return true;
> +
> +	if (nh->nh_flags & RTNH_F_ONLINK)
> +		return false;
> +
> +	if (!inet_ifa_match(nh->nh_gw, ifr))
> +		return false;
> +

	You need to walk subnets here, not IPs, so
for_primary_ifa() instead of for_ifa() will save some
cycles. But for me such change still looks expensive
and does not fix the root of the problem:

- You fix the problem from IP address point of view.
The actual problem is that subnet is removed, i.e.
it is the route removal that is making GWs unreachable.
I can ip route delete some link route and cause GWs
to become unreachable.

- not sure that walking the ifa_list is a fast operation

- sadly, the NHs can not survive the secondary address
promotion as done in __inet_del_ifa().

	You can have additional optimization in
fib_del_ifaddr() while calling fib_sync_down_dev():
do nothing if secondary address is deleted because its
subnet (primary address) should be present. For example:

	if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local) |
	    (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
	     fib_sync_down_dev(dev, ifa, 0)))

> +	for_ifa(ifr->ifa_dev) {

	Below ifa == ifr check will not be needed when
for_primary_ifa() is used or when fib_sync_down_dev() is
called only for primary IPs. We can see some ifr in the
list only if it is secondary IP deleted during the promotion
process. Without promotion, the primary/secondary ifa is
unlinked before the NETDEV_DOWN notification and we do
not see it here.

> +		if (unlikely(ifa == ifr))
> +			continue;
> +		if (inet_ifa_match(nh->nh_gw, ifa))
> +			return false;
> +	} endfor_ifa(ifr->ifa_dev);
> +
> +	return true;

Regards

--
Julian Anastasov <ja@ssi.bg>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 9922093..0405fc9 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -287,8 +287,9 @@  static inline int fib_num_tclassid_users(struct net *net)
 #endif
 
 /* Exported by fib_semantics.c */
+struct in_ifaddr;
 int ip_fib_check_default(__be32 gw, struct net_device *dev);
-int fib_sync_down_dev(struct net_device *dev, int force);
+int fib_sync_down_dev(struct net_device *dev, struct in_ifaddr *ifa, int force);
 int fib_sync_down_addr(struct net *net, __be32 local);
 int fib_sync_up(struct net_device *dev);
 void fib_select_multipath(struct fib_result *res);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index ae5f35f..fd3445e 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -907,7 +907,8 @@  void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
 			 * First of all, we scan fib_info list searching
 			 * for stray nexthop entries, then ignite fib_flush.
 			 */
-			if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
+			if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local) +
+			    fib_sync_down_dev(dev, ifa, 0))
 				fib_flush(dev_net(dev));
 		}
 	}
@@ -997,7 +998,7 @@  static void nl_fib_lookup_exit(struct net *net)
 
 static void fib_disable_ip(struct net_device *dev, int force)
 {
-	if (fib_sync_down_dev(dev, force))
+	if (fib_sync_down_dev(dev, NULL, force))
 		fib_flush(dev_net(dev));
 	rt_cache_flush(dev_net(dev));
 	arp_ifdown(dev);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 9d43468..fbebba5 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1112,7 +1112,29 @@  int fib_sync_down_addr(struct net *net, __be32 local)
 	return ret;
 }
 
-int fib_sync_down_dev(struct net_device *dev, int force)
+static inline bool fib_sync_down_gw(struct fib_nh *nh,
+				    struct in_ifaddr *ifr)
+{
+	if (!ifr)
+		return true;
+
+	if (nh->nh_flags & RTNH_F_ONLINK)
+		return false;
+
+	if (!inet_ifa_match(nh->nh_gw, ifr))
+		return false;
+
+	for_ifa(ifr->ifa_dev) {
+		if (unlikely(ifa == ifr))
+			continue;
+		if (inet_ifa_match(nh->nh_gw, ifa))
+			return false;
+	} endfor_ifa(ifr->ifa_dev);
+
+	return true;
+}
+
+int fib_sync_down_dev(struct net_device *dev, struct in_ifaddr *ifa, int force)
 {
 	int ret = 0;
 	int scope = RT_SCOPE_NOWHERE;
@@ -1124,6 +1146,8 @@  int fib_sync_down_dev(struct net_device *dev, int force)
 	if (force)
 		scope = -1;
 
+	BUG_ON(ifa && ifa->ifa_dev->dev != dev);
+
 	hlist_for_each_entry(nh, head, nh_hash) {
 		struct fib_info *fi = nh->nh_parent;
 		int dead;
@@ -1137,7 +1161,8 @@  int fib_sync_down_dev(struct net_device *dev, int force)
 			if (nexthop_nh->nh_flags & RTNH_F_DEAD)
 				dead++;
 			else if (nexthop_nh->nh_dev == dev &&
-				 nexthop_nh->nh_scope != scope) {
+				 nexthop_nh->nh_scope != scope &&
+				 fib_sync_down_gw(nexthop_nh, ifa)) {
 				nexthop_nh->nh_flags |= RTNH_F_DEAD;
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 				spin_lock_bh(&fib_multipath_lock);