[v4,net-next,2/6] net/ipv6: Change address check to always take a device argument

Message ID 20180313152941.31218-3-dsahern@gmail.com
State Accepted
Delegated to: David Miller
Headers show
Series
  • net/ipv6: Address checks need to consider the L3 domain
Related show

Commit Message

David Ahern March 13, 2018, 3:29 p.m.
ipv6_chk_addr_and_flags determines if an address is a local address and
optionally if it is an address on a specific device. For example, it is
called by ip6_route_info_create to determine if a given gateway address
is a local address. The address check currently does not consider L3
domains and as a result does not allow a route to be added in one VRF
if the nexthop points to an address in a second VRF. e.g.,

    $ ip route add 2001:db8:1::/64 vrf r2 via 2001:db8:102::23
    Error: Invalid gateway address.

where 2001:db8:102::23 is an address on an interface in vrf r1.

ipv6_chk_addr_and_flags needs to allow callers to always pass in a device
with a separate argument to not limit the address to the specific device.
The device is used used to determine the L3 domain of interest.

To that end add an argument to skip the device check and update callers
to always pass a device where possible and use the new argument to mean
any address in the domain.

Update a handful of users of ipv6_chk_addr with a NULL dev argument. This
patch handles the change to these callers without adding the domain check.

ip6_validate_gw needs to handle 2 cases - one where the device is given
as part of the nexthop spec and the other where the device is resolved.
There is at least 1 VRF case where deferring the check to only after
the route lookup has resolved the device fails with an unintuitive error
"RTNETLINK answers: No route to host" as opposed to the preferred
"Error: Gateway can not be a local address." The 'no route to host'
error is because of the fallback to a full lookup. The check is done
twice to avoid this error.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/addrconf.h |  4 ++--
 net/ipv6/addrconf.c    | 11 ++++++++---
 net/ipv6/anycast.c     |  9 ++++++---
 net/ipv6/datagram.c    |  5 +++--
 net/ipv6/ip6_tunnel.c  | 12 ++++++++----
 net/ipv6/ndisc.c       |  2 +-
 net/ipv6/route.c       | 19 +++++++++++++++----
 7 files changed, 43 insertions(+), 19 deletions(-)

Comments

Ido Schimmel March 14, 2018, 12:22 p.m. | #1
On Tue, Mar 13, 2018 at 08:29:37AM -0700, David Ahern wrote:
> ipv6_chk_addr_and_flags determines if an address is a local address and
> optionally if it is an address on a specific device. For example, it is
> called by ip6_route_info_create to determine if a given gateway address
> is a local address. The address check currently does not consider L3
> domains and as a result does not allow a route to be added in one VRF
> if the nexthop points to an address in a second VRF. e.g.,
> 
>     $ ip route add 2001:db8:1::/64 vrf r2 via 2001:db8:102::23
>     Error: Invalid gateway address.
> 
> where 2001:db8:102::23 is an address on an interface in vrf r1.
> 
> ipv6_chk_addr_and_flags needs to allow callers to always pass in a device
> with a separate argument to not limit the address to the specific device.
> The device is used used to determine the L3 domain of interest.
> 
> To that end add an argument to skip the device check and update callers
> to always pass a device where possible and use the new argument to mean
> any address in the domain.
> 
> Update a handful of users of ipv6_chk_addr with a NULL dev argument. This
> patch handles the change to these callers without adding the domain check.
> 
> ip6_validate_gw needs to handle 2 cases - one where the device is given
> as part of the nexthop spec and the other where the device is resolved.
> There is at least 1 VRF case where deferring the check to only after
> the route lookup has resolved the device fails with an unintuitive error
> "RTNETLINK answers: No route to host" as opposed to the preferred
> "Error: Gateway can not be a local address." The 'no route to host'
> error is because of the fallback to a full lookup. The check is done
> twice to avoid this error.
> 
> Signed-off-by: David Ahern <dsahern@gmail.com>

Reviewed-by: Ido Schimmel <idosch@mellanox.com>

Thanks for the detailed commit message.

Patch

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index c4185a7b0e90..132e5b95167a 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -69,8 +69,8 @@  int addrconf_set_dstaddr(struct net *net, void __user *arg);
 int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
 		  const struct net_device *dev, int strict);
 int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
-			    const struct net_device *dev, int strict,
-			    u32 banned_flags);
+			    const struct net_device *dev, bool skip_dev_check,
+			    int strict, u32 banned_flags);
 
 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index b5fd116c046a..0677b9732d56 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1851,19 +1851,24 @@  static int ipv6_count_addresses(const struct inet6_dev *idev)
 int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
 		  const struct net_device *dev, int strict)
 {
-	return ipv6_chk_addr_and_flags(net, addr, dev, strict, IFA_F_TENTATIVE);
+	return ipv6_chk_addr_and_flags(net, addr, dev, !dev,
+				       strict, IFA_F_TENTATIVE);
 }
 EXPORT_SYMBOL(ipv6_chk_addr);
 
 int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
-			    const struct net_device *dev, int strict,
-			    u32 banned_flags)
+			    const struct net_device *dev, bool skip_dev_check,
+			    int strict, u32 banned_flags)
 {
 	unsigned int hash = inet6_addr_hash(net, addr);
 	struct inet6_ifaddr *ifp;
 	u32 ifp_flags;
 
 	rcu_read_lock();
+
+	if (skip_dev_check)
+		dev = NULL;
+
 	hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index c61718dba2e6..d580d4d456a5 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -66,7 +66,11 @@  int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 		return -EPERM;
 	if (ipv6_addr_is_multicast(addr))
 		return -EINVAL;
-	if (ipv6_chk_addr(net, addr, NULL, 0))
+
+	if (ifindex)
+		dev = __dev_get_by_index(net, ifindex);
+
+	if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE))
 		return -EINVAL;
 
 	pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
@@ -90,8 +94,7 @@  int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 			dev = __dev_get_by_flags(net, IFF_UP,
 						 IFF_UP | IFF_LOOPBACK);
 		}
-	} else
-		dev = __dev_get_by_index(net, ifindex);
+	}
 
 	if (!dev) {
 		err = -ENODEV;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index fbf08ce3f5ab..b27333d7b099 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -801,8 +801,9 @@  int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
 			if (addr_type != IPV6_ADDR_ANY) {
 				int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;
 				if (!(inet_sk(sk)->freebind || inet_sk(sk)->transparent) &&
-				    !ipv6_chk_addr(net, &src_info->ipi6_addr,
-						   strict ? dev : NULL, 0) &&
+				    !ipv6_chk_addr_and_flags(net, &src_info->ipi6_addr,
+							     dev, !strict, 0,
+							     IFA_F_TENTATIVE) &&
 				    !ipv6_chk_acast_addr_src(net, dev,
 							     &src_info->ipi6_addr))
 					err = -EINVAL;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 5c045fa407da..456fcf942f95 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -758,9 +758,11 @@  int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
 			ldev = dev_get_by_index_rcu(net, p->link);
 
 		if ((ipv6_addr_is_multicast(laddr) ||
-		     likely(ipv6_chk_addr(net, laddr, ldev, 0))) &&
+		     likely(ipv6_chk_addr_and_flags(net, laddr, ldev, false,
+						    0, IFA_F_TENTATIVE))) &&
 		    ((p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) ||
-		     likely(!ipv6_chk_addr(net, raddr, NULL, 0))))
+		     likely(!ipv6_chk_addr_and_flags(net, raddr, ldev, true,
+						     0, IFA_F_TENTATIVE))))
 			ret = 1;
 	}
 	return ret;
@@ -990,12 +992,14 @@  int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
 		if (p->link)
 			ldev = dev_get_by_index_rcu(net, p->link);
 
-		if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0)))
+		if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false,
+						      0, IFA_F_TENTATIVE)))
 			pr_warn("%s xmit: Local address not yet configured!\n",
 				p->name);
 		else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) &&
 			 !ipv6_addr_is_multicast(raddr) &&
-			 unlikely(ipv6_chk_addr(net, raddr, NULL, 0)))
+			 unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev,
+							  true, 0, IFA_F_TENTATIVE)))
 			pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
 				p->name);
 		else
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 8af5eef464c1..10024eb0c521 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -707,7 +707,7 @@  static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
 	int probes = atomic_read(&neigh->probes);
 
 	if (skb && ipv6_chk_addr_and_flags(dev_net(dev), &ipv6_hdr(skb)->saddr,
-					   dev, 1,
+					   dev, false, 1,
 					   IFA_F_TENTATIVE|IFA_F_OPTIMISTIC))
 		saddr = &ipv6_hdr(skb)->saddr;
 	probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 23ced851fdb1..939d122e71b4 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2632,7 +2632,9 @@  static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
 {
 	const struct in6_addr *gw_addr = &cfg->fc_gateway;
 	int gwa_type = ipv6_addr_type(gw_addr);
+	bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
 	const struct net_device *dev = *_dev;
+	bool need_addr_check = !dev;
 	int err = -EINVAL;
 
 	/* if gw_addr is local we will fail to detect this in case
@@ -2640,10 +2642,9 @@  static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
 	 * will return already-added prefix route via interface that
 	 * prefix route was assigned to, which might be non-loopback.
 	 */
-	if (ipv6_chk_addr_and_flags(net, gw_addr,
-				    gwa_type & IPV6_ADDR_LINKLOCAL ?
-				    dev : NULL, 0, 0)) {
-		NL_SET_ERR_MSG(extack, "Invalid gateway address");
+	if (dev &&
+	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
+		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
 		goto out;
 	}
 
@@ -2683,6 +2684,16 @@  static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
 			       "Egress device can not be loopback device for this route");
 		goto out;
 	}
+
+	/* if we did not check gw_addr above, do so now that the
+	 * egress device has been resolved.
+	 */
+	if (need_addr_check &&
+	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
+		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
+		goto out;
+	}
+
 	err = 0;
 out:
 	return err;