gre: strict physical device binding
diff mbox

Message ID 1232362205-10572-1-git-send-email-timo.teras@iki.fi
State Accepted, archived
Delegated to: David Miller
Headers show

Commit Message

Timo Teräs Jan. 19, 2009, 10:50 a.m. UTC
Check the device on receive path and allow otherwise identical devices
as long as the physical device differs.

This is useful for NBMA tunnels, where you want to use different gre IP
for each public IP available via different physical devices.

Signed-off-by: Timo Teras <timo.teras@iki.fi>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
---
 net/ipv4/ip_gre.c |  128 ++++++++++++++++++++++++++++++++++++----------------
 1 files changed, 88 insertions(+), 40 deletions(-)

Comments

David Miller Jan. 20, 2009, 1:22 a.m. UTC | #1
From: Timo Teras <timo.teras@iki.fi>
Date: Mon, 19 Jan 2009 12:50:05 +0200

> Check the device on receive path and allow otherwise identical devices
> as long as the physical device differs.
> 
> This is useful for NBMA tunnels, where you want to use different gre IP
> for each public IP available via different physical devices.
> 
> Signed-off-by: Timo Teras <timo.teras@iki.fi>

Seems reasonable, applied.

Thanks Timo.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller Jan. 21, 2009, 9:54 p.m. UTC | #2
From: David Miller <davem@davemloft.net>
Date: Mon, 19 Jan 2009 17:22:24 -0800 (PST)

> From: Timo Teras <timo.teras@iki.fi>
> Date: Mon, 19 Jan 2009 12:50:05 +0200
> 
> > Check the device on receive path and allow otherwise identical devices
> > as long as the physical device differs.
> > 
> > This is useful for NBMA tunnels, where you want to use different gre IP
> > for each public IP available via different physical devices.
> > 
> > Signed-off-by: Timo Teras <timo.teras@iki.fi>
> 
> Seems reasonable, applied.
> 
> Thanks Timo.

BTW, isn't it much more efficient to implement this priority scoring
using a simple integer instead of this big honking 4 entry array of
pointers on the stack (one entry of which isn't even _used_)?

Something like:

	int score = 4;

	...
	for_each_hash_chain() {
		if (!match)
			continue;
		if (exact_match)
			return this_entry;
		this_score = 0;
		if (condition1)
			this_score |= 2;
		if (condition2)
			this_score |= 1;
		if (this_score < score) {
			score = this_score;
			cand = this_entry;
		}
	}
	...
	return cand;
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Timo Teräs Jan. 22, 2009, 5:34 a.m. UTC | #3
David Miller wrote:
> From: David Miller <davem@davemloft.net>
> Date: Mon, 19 Jan 2009 17:22:24 -0800 (PST)
> 
>> From: Timo Teras <timo.teras@iki.fi>
>> Date: Mon, 19 Jan 2009 12:50:05 +0200
>>
>>> Check the device on receive path and allow otherwise identical devices
>>> as long as the physical device differs.
>>>
>>> This is useful for NBMA tunnels, where you want to use different gre IP
>>> for each public IP available via different physical devices.
>>>
>>> Signed-off-by: Timo Teras <timo.teras@iki.fi>
>> Seems reasonable, applied.
>>
>> Thanks Timo.
> 
> BTW, isn't it much more efficient to implement this priority scoring
> using a simple integer instead of this big honking 4 entry array of
> pointers on the stack (one entry of which isn't even _used_)?
> 
> Something like:
> 
> 	int score = 4;
> 
> 	...
> 	for_each_hash_chain() {
> 		if (!match)
> 			continue;
> 		if (exact_match)
> 			return this_entry;
> 		this_score = 0;
> 		if (condition1)
> 			this_score |= 2;
> 		if (condition2)
> 			this_score |= 1;
> 		if (this_score < score) {
> 			score = this_score;
> 			cand = this_entry;
> 		}
> 	}
> 	...
> 	return cand;

I guess so, well send updated patch.

- Timo

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch
diff mbox

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 0101521..4a43739 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -164,67 +164,113 @@  static DEFINE_RWLOCK(ipgre_lock);
 
 /* Given src, dst and key, find appropriate for input tunnel. */
 
-static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net,
+static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
 					      __be32 remote, __be32 local,
 					      __be32 key, __be16 gre_proto)
 {
+	struct net *net = dev_net(dev);
+	int link = dev->ifindex;
 	unsigned h0 = HASH(remote);
 	unsigned h1 = HASH(key);
-	struct ip_tunnel *t;
-	struct ip_tunnel *t2 = NULL;
+	struct ip_tunnel *t, *sel[4] = { NULL, NULL, NULL, NULL };
 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
 	int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
 		       ARPHRD_ETHER : ARPHRD_IPGRE;
+	int idx;
 
 	for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
-		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
-			if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
-				if (t->dev->type == dev_type)
-					return t;
-				if (t->dev->type == ARPHRD_IPGRE && !t2)
-					t2 = t;
-			}
-		}
+		if (local != t->parms.iph.saddr ||
+		    remote != t->parms.iph.daddr ||
+		    key != t->parms.i_key ||
+		    !(t->dev->flags & IFF_UP))
+			continue;
+
+		if (t->dev->type != ARPHRD_IPGRE &&
+		    t->dev->type != dev_type)
+			continue;
+
+		idx = 0;
+		if (t->parms.link != link)
+			idx |= 1;
+		if (t->dev->type != dev_type)
+			idx |= 2;
+		if (idx == 0)
+			return t;
+		if (sel[idx] == NULL)
+			sel[idx] = t;
 	}
 
 	for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
-		if (remote == t->parms.iph.daddr) {
-			if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
-				if (t->dev->type == dev_type)
-					return t;
-				if (t->dev->type == ARPHRD_IPGRE && !t2)
-					t2 = t;
-			}
-		}
+		if (remote != t->parms.iph.daddr ||
+		    key != t->parms.i_key ||
+		    !(t->dev->flags & IFF_UP))
+			continue;
+
+		if (t->dev->type != ARPHRD_IPGRE &&
+		    t->dev->type != dev_type)
+			continue;
+
+		idx = 0;
+		if (t->parms.link != link)
+			idx |= 1;
+		if (t->dev->type != dev_type)
+			idx |= 2;
+		if (idx == 0)
+			return t;
+		if (sel[idx] == NULL)
+			sel[idx] = t;
 	}
 
 	for (t = ign->tunnels_l[h1]; t; t = t->next) {
-		if (local == t->parms.iph.saddr ||
-		     (local == t->parms.iph.daddr &&
-		      ipv4_is_multicast(local))) {
-			if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
-				if (t->dev->type == dev_type)
-					return t;
-				if (t->dev->type == ARPHRD_IPGRE && !t2)
-					t2 = t;
-			}
-		}
+		if ((local != t->parms.iph.saddr &&
+		     (local != t->parms.iph.daddr ||
+		      !ipv4_is_multicast(local))) ||
+		    key != t->parms.i_key ||
+		    !(t->dev->flags & IFF_UP))
+			continue;
+
+		if (t->dev->type != ARPHRD_IPGRE &&
+		    t->dev->type != dev_type)
+			continue;
+
+		idx = 0;
+		if (t->parms.link != link)
+			idx |= 1;
+		if (t->dev->type != dev_type)
+			idx |= 2;
+		if (idx == 0)
+			return t;
+		if (sel[idx] == NULL)
+			sel[idx] = t;
 	}
 
 	for (t = ign->tunnels_wc[h1]; t; t = t->next) {
-		if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
-			if (t->dev->type == dev_type)
-				return t;
-			if (t->dev->type == ARPHRD_IPGRE && !t2)
-				t2 = t;
-		}
+		if (t->parms.i_key != key ||
+		    !(t->dev->flags & IFF_UP))
+			continue;
+
+		if (t->dev->type != ARPHRD_IPGRE &&
+		    t->dev->type != dev_type)
+			continue;
+
+		idx = 0;
+		if (t->parms.link != link)
+			idx |= 1;
+		if (t->dev->type != dev_type)
+			idx |= 2;
+		if (idx == 0)
+			return t;
+		if (sel[idx] == NULL)
+			sel[idx] = t;
 	}
 
-	if (t2)
-		return t2;
+	for (idx = 1; idx < ARRAY_SIZE(sel); idx++)
+		if (sel[idx] != NULL)
+			return sel[idx];
 
-	if (ign->fb_tunnel_dev->flags&IFF_UP)
+	if (ign->fb_tunnel_dev->flags & IFF_UP)
 		return netdev_priv(ign->fb_tunnel_dev);
+
 	return NULL;
 }
 
@@ -284,6 +330,7 @@  static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
 	__be32 remote = parms->iph.daddr;
 	__be32 local = parms->iph.saddr;
 	__be32 key = parms->i_key;
+	int link = parms->link;
 	struct ip_tunnel *t, **tp;
 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
 
@@ -291,6 +338,7 @@  static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
 		if (local == t->parms.iph.saddr &&
 		    remote == t->parms.iph.daddr &&
 		    key == t->parms.i_key &&
+		    link == t->parms.link &&
 		    type == t->dev->type)
 			break;
 
@@ -421,7 +469,7 @@  static void ipgre_err(struct sk_buff *skb, u32 info)
 	}
 
 	read_lock(&ipgre_lock);
-	t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr,
+	t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
 				flags & GRE_KEY ?
 				*(((__be32 *)p) + (grehlen / 4) - 1) : 0,
 				p[1]);
@@ -518,7 +566,7 @@  static int ipgre_rcv(struct sk_buff *skb)
 	gre_proto = *(__be16 *)(h + 2);
 
 	read_lock(&ipgre_lock);
-	if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
+	if ((tunnel = ipgre_tunnel_lookup(skb->dev,
 					  iph->saddr, iph->daddr, key,
 					  gre_proto))) {
 		struct net_device_stats *stats = &tunnel->dev->stats;