diff mbox

[2/4] ipgre: follow state of lower device

Message ID 20120412163142.544953001@vyatta.com
State Not Applicable, archived
Delegated to: David Miller
Headers show

Commit Message

stephen hemminger April 12, 2012, 4:31 p.m. UTC
GRE tunnels like other layered devices should propogate
carrier and RFC2863 state from lower device to tunnel.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Ben Hutchings April 12, 2012, 5:32 p.m. UTC | #1
On Thu, 2012-04-12 at 09:31 -0700, Stephen Hemminger wrote:

> GRE tunnels like other layered devices should propogate
> carrier and RFC2863 state from lower device to tunnel.
> 
> Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
> 
> --- a/net/ipv4/ip_gre.c	2012-04-12 08:07:39.508107847 -0700
> +++ b/net/ipv4/ip_gre.c	2012-04-12 08:10:14.177499183 -0700
[...]
> @@ -1732,6 +1734,36 @@ static struct rtnl_link_ops ipgre_tap_op
>  	.fill_info	= ipgre_fill_info,
>  };
>  
> +/* If lower device changes state, reflect that to the tunnel. */
> +static int ipgre_notify(struct notifier_block *unused,
> +			unsigned long event, void *ptr)
> +{
> +	struct net_device *dev = ptr;
> +	struct net *net = dev_net(dev);
> +	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
> +	unsigned int prio, h;
> +	struct ip_tunnel *t;
> +
> +	if (event == NETDEV_CHANGE)
> +		return NOTIFY_DONE;

Surely we should handle NETDEV_UP, NETDEV_CHANGE, NETDEV_DOWN here?  Not
everything other than NETDEV_CHANGE.

> +	for (prio = 0; prio < 4; prio++)
> +		for (h = 0; h < HASH_SIZE; h++) {
> +			for (t = rtnl_dereference(ign->tunnels[prio][h]);
> +			     t; t = rtnl_dereference(t->next)) {
> +				if (dev->ifindex != t->dev->iflink)
> +					continue;
> +				netif_stacked_transfer_operstate(dev, t->dev);
> +			}
> +		}
[...]

This seems potentially very inefficient.

Ben.
stephen hemminger April 12, 2012, 5:38 p.m. UTC | #2
On Thu, 12 Apr 2012 18:32:07 +0100
Ben Hutchings <bhutchings@solarflare.com> wrote:

> On Thu, 2012-04-12 at 09:31 -0700, Stephen Hemminger wrote:
> 
> > GRE tunnels like other layered devices should propogate
> > carrier and RFC2863 state from lower device to tunnel.
> > 
> > Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
> > 
> > --- a/net/ipv4/ip_gre.c	2012-04-12 08:07:39.508107847 -0700
> > +++ b/net/ipv4/ip_gre.c	2012-04-12 08:10:14.177499183 -0700
> [...]
> > @@ -1732,6 +1734,36 @@ static struct rtnl_link_ops ipgre_tap_op
> >  	.fill_info	= ipgre_fill_info,
> >  };
> >  
> > +/* If lower device changes state, reflect that to the tunnel. */
> > +static int ipgre_notify(struct notifier_block *unused,
> > +			unsigned long event, void *ptr)
> > +{
> > +	struct net_device *dev = ptr;
> > +	struct net *net = dev_net(dev);
> > +	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
> > +	unsigned int prio, h;
> > +	struct ip_tunnel *t;
> > +
> > +	if (event == NETDEV_CHANGE)
> > +		return NOTIFY_DONE;
> 
> Surely we should handle NETDEV_UP, NETDEV_CHANGE, NETDEV_DOWN here?  Not
> everything other than NETDEV_CHANGE.

yes, up and down needed as well.

> > +	for (prio = 0; prio < 4; prio++)
> > +		for (h = 0; h < HASH_SIZE; h++) {
> > +			for (t = rtnl_dereference(ign->tunnels[prio][h]);
> > +			     t; t = rtnl_dereference(t->next)) {
> > +				if (dev->ifindex != t->dev->iflink)
> > +					continue;
> > +				netif_stacked_transfer_operstate(dev, t->dev);
> > +			}
> > +		}
> [...]
> 
> This seems potentially very inefficient.

Yes, but there is no list of tunnels per device.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller April 14, 2012, 6:53 p.m. UTC | #3
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Thu, 12 Apr 2012 09:31:17 -0700

> GRE tunnels like other layered devices should propogate
> carrier and RFC2863 state from lower device to tunnel.
> 
> Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>

Like others I don't like the ugly hash traversal.

A small hash on ifindex, iflink, or whatever ought to be easy and make
the code look much nicer.

Longer term project is that a lot of this tunneling code can be
commonized at some point.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Stephen Hemminger April 15, 2012, 2:56 a.m. UTC | #4
----- Original Message -----
> From: Stephen Hemminger <shemminger@vyatta.com>
> Date: Thu, 12 Apr 2012 09:31:17 -0700
> 
> > GRE tunnels like other layered devices should propogate
> > carrier and RFC2863 state from lower device to tunnel.
> > 
> > Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
> 
> Like others I don't like the ugly hash traversal.
> 
> A small hash on ifindex, iflink, or whatever ought to be easy and
> make
> the code look much nicer.
> 
> Longer term project is that a lot of this tunneling code can be
> commonized at some point.

yeah. also want to replace open coded rcu with rcu hlist.

other tunnels that are needed are gretap over ipv6, and vxvlan.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
stephen hemminger May 3, 2012, 10:40 p.m. UTC | #5
On Sat, 14 Apr 2012 14:53:02 -0400 (EDT)
David Miller <davem@davemloft.net> wrote:

> From: Stephen Hemminger <shemminger@vyatta.com>
> Date: Thu, 12 Apr 2012 09:31:17 -0700
> 
> > GRE tunnels like other layered devices should propogate
> > carrier and RFC2863 state from lower device to tunnel.
> > 
> > Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
> 
> Like others I don't like the ugly hash traversal.
> 
> A small hash on ifindex, iflink, or whatever ought to be easy and make
> the code look much nicer.
> 
> Longer term project is that a lot of this tunneling code can be
> commonized at some point.

The whole set of tunnels needs to be cleaned up to be something modular, clean
and cached like the code in OpenVswitch.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christian Benvenuti (benve) May 4, 2012, 11:34 p.m. UTC | #6
Is this the same issue I described in the email below?

  Subject:Route flush on linkdown: physical vs virtual/stacked
interfaces
  http://marc.info/?l=linux-netdev&m=133468470719285&w=2

(ie, need to propagate carrier changes to upper layer device/s)

Thanks
/Chris

> -----Original Message-----
> From: netdev-owner@vger.kernel.org
[mailto:netdev-owner@vger.kernel.org] On Behalf Of Stephen
> Hemminger
> Sent: Thursday, May 03, 2012 3:40 PM
> To: David Miller
> Cc: netdev@vger.kernel.org
> Subject: Re: [PATCH 2/4] ipgre: follow state of lower device
> 
> On Sat, 14 Apr 2012 14:53:02 -0400 (EDT)
> David Miller <davem@davemloft.net> wrote:
> 
> > From: Stephen Hemminger <shemminger@vyatta.com>
> > Date: Thu, 12 Apr 2012 09:31:17 -0700
> >
> > > GRE tunnels like other layered devices should propogate
> > > carrier and RFC2863 state from lower device to tunnel.
> > >
> > > Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
> >
> > Like others I don't like the ugly hash traversal.
> >
> > A small hash on ifindex, iflink, or whatever ought to be easy and
make
> > the code look much nicer.
> >
> > Longer term project is that a lot of this tunneling code can be
> > commonized at some point.
> 
> The whole set of tunnels needs to be cleaned up to be something
modular, clean
> and cached like the code in OpenVswitch.
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

--- a/net/ipv4/ip_gre.c	2012-04-12 08:07:39.508107847 -0700
+++ b/net/ipv4/ip_gre.c	2012-04-12 08:10:14.177499183 -0700
@@ -991,6 +991,7 @@  static int ipgre_tunnel_bind_dev(struct
 	if (tdev) {
 		hlen = tdev->hard_header_len + tdev->needed_headroom;
 		mtu = tdev->mtu;
+		netif_stacked_transfer_operstate(tdev, dev);
 	}
 	dev->iflink = tunnel->parms.link;
 
@@ -1575,6 +1576,7 @@  static int ipgre_newlink(struct net *src
 
 	dev_hold(dev);
 	ipgre_tunnel_link(ign, nt);
+	linkwatch_fire_event(dev);
 
 out:
 	return err;
@@ -1732,6 +1734,36 @@  static struct rtnl_link_ops ipgre_tap_op
 	.fill_info	= ipgre_fill_info,
 };
 
+/* If lower device changes state, reflect that to the tunnel. */
+static int ipgre_notify(struct notifier_block *unused,
+			unsigned long event, void *ptr)
+{
+	struct net_device *dev = ptr;
+	struct net *net = dev_net(dev);
+	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
+	unsigned int prio, h;
+	struct ip_tunnel *t;
+
+	if (event == NETDEV_CHANGE)
+		return NOTIFY_DONE;
+
+	for (prio = 0; prio < 4; prio++)
+		for (h = 0; h < HASH_SIZE; h++) {
+			for (t = rtnl_dereference(ign->tunnels[prio][h]);
+			     t; t = rtnl_dereference(t->next)) {
+				if (dev->ifindex != t->dev->iflink)
+					continue;
+				netif_stacked_transfer_operstate(dev, t->dev);
+			}
+		}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block ipgre_notifier = {
+	.notifier_call = ipgre_notify,
+};
+
 /*
  *	And now the modules code and kernel interface.
  */
@@ -1760,9 +1792,15 @@  static int __init ipgre_init(void)
 	if (err < 0)
 		goto tap_ops_failed;
 
+	err = register_netdevice_notifier(&ipgre_notifier);
+	if (err < 0)
+		goto notify_failed;
+
 out:
 	return err;
 
+notify_failed:
+	rtnl_link_unregister(&ipgre_tap_ops);
 tap_ops_failed:
 	rtnl_link_unregister(&ipgre_link_ops);
 rtnl_link_failed:
@@ -1774,6 +1812,7 @@  add_proto_failed:
 
 static void __exit ipgre_fini(void)
 {
+	unregister_netdevice_notifier(&ipgre_notifier);
 	rtnl_link_unregister(&ipgre_tap_ops);
 	rtnl_link_unregister(&ipgre_link_ops);
 	if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)