diff mbox

[2/4] macvlan: allow in-kernel modules to create and manage macvlan devices

Message ID 20091110222757.24100.16046.stgit@mimic.site
State Not Applicable, archived
Delegated to: David Miller
Headers show

Commit Message

Patrick Mullaney Nov. 10, 2009, 10:27 p.m. UTC
The macvlan driver didn't allow for creation/deletion of devices
by other in-kernel modules. This patch provides common routines
for both in-kernel and netlink based management. This patch
also enables macvlan device support for gro for lower level
devices that support gro.

Signed-off-by: Patrick Mullaney <pmullaney@novell.com>
---

 drivers/net/macvlan.c   |   72 +++++++++++++++++++++++++++++++----------------
 include/linux/macvlan.h |    6 ++++
 2 files changed, 53 insertions(+), 25 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Patrick McHardy Nov. 11, 2009, 3:29 p.m. UTC | #1
Patrick Mullaney wrote:
> The macvlan driver didn't allow for creation/deletion of devices
> by other in-kernel modules. This patch provides common routines
> for both in-kernel and netlink based management. This patch
> also enables macvlan device support for gro for lower level
> devices that support gro.

> -static void macvlan_transfer_operstate(struct net_device *dev)
> +void macvlan_transfer_operstate(struct net_device *dev)
>  {
>  	struct macvlan_dev *vlan = netdev_priv(dev);
>  	const struct net_device *lowerdev = vlan->lowerdev;
> @@ -458,6 +458,7 @@ static void macvlan_transfer_operstate(struct net_device *dev)
>  			netif_carrier_off(dev);
>  	}
>  }
> +EXPORT_SYMBOL_GPL(macvlan_transfer_operstate);

I think this function could be moved to net/core/dev.c or
net/core/link_watch.c. The VLAN code has an identical copy.

> -int macvlan_newlink(struct net_device *dev,
> -		    struct nlattr *tb[], struct nlattr *data[])
> +int macvlan_link_lowerdev(struct net_device *dev,
> +						  struct net_device *lowerdev)

Please indent this more cleanly.

>  {
>  	struct macvlan_dev *vlan = netdev_priv(dev);
>  	struct macvlan_port *port;
> +	int err = 0;
> +
> +	if (lowerdev->macvlan_port == NULL) {
> +		err = macvlan_port_create(lowerdev);
> +		if (err < 0)
> +			return err;
> +	}
> +	port = lowerdev->macvlan_port;
> +
> +	vlan->lowerdev = lowerdev;
> +	vlan->dev      = dev;
> +	vlan->port     = port;
> +	vlan->receive  = netif_rx;
> +
> +	macvlan_init(dev);
> +
> +	list_add_tail(&vlan->list, &port->vlans);
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(macvlan_link_lowerdev);

> @@ -502,23 +539,14 @@ int macvlan_newlink(struct net_device *dev,
>  	if (!tb[IFLA_ADDRESS])
>  		random_ether_addr(dev->dev_addr);
>  
> -	if (lowerdev->macvlan_port == NULL) {
> -		err = macvlan_port_create(lowerdev);
> -		if (err < 0)
> -			return err;
> -	}
> -	port = lowerdev->macvlan_port;
> -
> -	vlan->lowerdev = lowerdev;
> -	vlan->dev      = dev;
> -	vlan->port     = port;
> -	vlan->receive  = netif_rx;
> +	err = macvlan_link_lowerdev(dev, lowerdev);
> +	if (err < 0)
> +		return err;
>
>  	err = register_netdevice(dev);
>  	if (err < 0)
>  		return err;

You've already added the device to the port->vlans list, so you
need to remove it again when register_netdevice() fails.

> -	list_add_tail(&vlan->list, &port->vlans);
>  	macvlan_transfer_operstate(dev);
>  	return 0;
>  }
> @@ -526,14 +554,8 @@ EXPORT_SYMBOL_GPL(macvlan_newlink);
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Patrick Mullaney Nov. 13, 2009, 7:55 p.m. UTC | #2
(Applies to net-2.6.git/master:1dfc5827)

These patches allow other modules to override the receive
path of a macvlan. This is being done to support guest
VMs operating directly over a macvlan. Routines to allow
creation and deletion of macvlans from in-kernel modules
were also exposed/added.

---

Patrick Mullaney (3):
      macvlan: allow in-kernel modules to create and manage macvlan devices
      macvlan:  derived from Arnd Bergmann's patch for macvtap
      netdevice: provide common routine for macvlan and vlan operstate management


 drivers/net/macvlan.c     |  135 ++++++++++++++++++++++-----------------------
 include/linux/macvlan.h   |   41 ++++++++++++++
 include/linux/netdevice.h |    3 +
 net/8021q/vlan.c          |   29 +---------
 net/core/dev.c            |   27 +++++++++
 5 files changed, 142 insertions(+), 93 deletions(-)
 create mode 100644 include/linux/macvlan.h

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
stephen hemminger Nov. 13, 2009, 9:27 p.m. UTC | #3
On Fri, 13 Nov 2009 14:55:06 -0500
Patrick Mullaney <pmullaney@novell.com> wrote:

> These patches allow other modules to override the receive
> path of a macvlan. This is being done to support guest
> VMs operating directly over a macvlan. Routines to allow
> creation and deletion of macvlans from in-kernel modules
> were also exposed/added.

Which guest VM, how will it use it? The kernel is not in the business
of providing infrastructure for out of tree patches.

Also, macvlan should really being calling netif_receive_skb()
not going through another queue/softirq cycle.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Arnd Bergmann Nov. 27, 2009, 11:43 p.m. UTC | #4
On Friday 13 November 2009, Stephen Hemminger wrote:
> Also, macvlan should really being calling netif_receive_skb()
> not going through another queue/softirq cycle.

I've added a patch for this in my experimental queue now.
When I last tried this, I saw a kernel stack overflow
but it seems fine now.

I wonder if we can also return from the macvlan hook with
skb->dev changed to netif_receive_skb rather than calling it
recursively.

	Arnd <><
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller Nov. 28, 2009, 12:19 a.m. UTC | #5
From: Arnd Bergmann <arnd@arndb.de>
Date: Sat, 28 Nov 2009 00:43:58 +0100

> On Friday 13 November 2009, Stephen Hemminger wrote:
>> Also, macvlan should really being calling netif_receive_skb()
>> not going through another queue/softirq cycle.
> 
> I've added a patch for this in my experimental queue now.
> When I last tried this, I saw a kernel stack overflow
> but it seems fine now.

I think it is unwise for any virtual device layer to use netif_receive_skb().
Just like tunnels they should always use netif_rx().

Otherwise stack overflow is a very real concern.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
stephen hemminger Nov. 28, 2009, 5:38 a.m. UTC | #6
On Fri, 27 Nov 2009 16:19:57 -0800 (PST)
David Miller <davem@davemloft.net> wrote:

> From: Arnd Bergmann <arnd@arndb.de>
> Date: Sat, 28 Nov 2009 00:43:58 +0100
> 
> > On Friday 13 November 2009, Stephen Hemminger wrote:
> >> Also, macvlan should really being calling netif_receive_skb()
> >> not going through another queue/softirq cycle.
> > 
> > I've added a patch for this in my experimental queue now.
> > When I last tried this, I saw a kernel stack overflow
> > but it seems fine now.
> 
> I think it is unwise for any virtual device layer to use netif_receive_skb().
> Just like tunnels they should always use netif_rx().
> 
> Otherwise stack overflow is a very real concern.

Maybe we should figure out a way for protocols to return new skb in netif_receive_skb
to avoid extra softirq, but avoid stack overflow?
David Miller Nov. 28, 2009, 6:58 a.m. UTC | #7
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Fri, 27 Nov 2009 21:38:24 -0800

> Maybe we should figure out a way for protocols to return new skb in netif_receive_skb
> to avoid extra softirq, but avoid stack overflow?

Eric Dumazet and I tried to find ways to handle this, please see the
archives.  It's not an easy problem to solve and none of the patches
we came up with avoided crashes under high stress scenarios.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 0a389b8..6b98b26 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -208,7 +208,7 @@  static const struct header_ops macvlan_hard_header_ops = {
 	.cache_update	= eth_header_cache_update,
 };
 
-static int macvlan_open(struct net_device *dev)
+int macvlan_open(struct net_device *dev)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
 	struct net_device *lowerdev = vlan->lowerdev;
@@ -235,7 +235,7 @@  out:
 	return err;
 }
 
-static int macvlan_stop(struct net_device *dev)
+int macvlan_stop(struct net_device *dev)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
 	struct net_device *lowerdev = vlan->lowerdev;
@@ -316,7 +316,7 @@  static struct lock_class_key macvlan_netdev_addr_lock_key;
 #define MACVLAN_FEATURES \
 	(NETIF_F_SG | NETIF_F_ALL_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \
 	 NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_GSO_ROBUST | \
-	 NETIF_F_TSO_ECN | NETIF_F_TSO6)
+	 NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO)
 
 #define MACVLAN_STATE_MASK \
 	((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT))
@@ -440,7 +440,7 @@  static void macvlan_port_destroy(struct net_device *dev)
 	kfree(port);
 }
 
-static void macvlan_transfer_operstate(struct net_device *dev)
+void macvlan_transfer_operstate(struct net_device *dev)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
 	const struct net_device *lowerdev = vlan->lowerdev;
@@ -458,6 +458,7 @@  static void macvlan_transfer_operstate(struct net_device *dev)
 			netif_carrier_off(dev);
 	}
 }
+EXPORT_SYMBOL_GPL(macvlan_transfer_operstate);
 
 int macvlan_validate(struct nlattr *tb[], struct nlattr *data[])
 {
@@ -471,11 +472,47 @@  int macvlan_validate(struct nlattr *tb[], struct nlattr *data[])
 }
 EXPORT_SYMBOL_GPL(macvlan_validate);
 
-int macvlan_newlink(struct net_device *dev,
-		    struct nlattr *tb[], struct nlattr *data[])
+int macvlan_link_lowerdev(struct net_device *dev,
+						  struct net_device *lowerdev)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
 	struct macvlan_port *port;
+	int err = 0;
+
+	if (lowerdev->macvlan_port == NULL) {
+		err = macvlan_port_create(lowerdev);
+		if (err < 0)
+			return err;
+	}
+	port = lowerdev->macvlan_port;
+
+	vlan->lowerdev = lowerdev;
+	vlan->dev      = dev;
+	vlan->port     = port;
+	vlan->receive  = netif_rx;
+
+	macvlan_init(dev);
+
+	list_add_tail(&vlan->list, &port->vlans);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(macvlan_link_lowerdev);
+
+void macvlan_unlink_lowerdev(struct net_device *dev)
+{
+	struct macvlan_dev *vlan = netdev_priv(dev);
+	struct macvlan_port *port = vlan->port;
+
+	list_del(&vlan->list);
+
+	if (list_empty(&port->vlans))
+		macvlan_port_destroy(port->dev);
+}
+EXPORT_SYMBOL_GPL(macvlan_unlink_lowerdev);
+
+int macvlan_newlink(struct net_device *dev,
+		    struct nlattr *tb[], struct nlattr *data[])
+{
 	struct net_device *lowerdev;
 	int err;
 
@@ -502,23 +539,14 @@  int macvlan_newlink(struct net_device *dev,
 	if (!tb[IFLA_ADDRESS])
 		random_ether_addr(dev->dev_addr);
 
-	if (lowerdev->macvlan_port == NULL) {
-		err = macvlan_port_create(lowerdev);
-		if (err < 0)
-			return err;
-	}
-	port = lowerdev->macvlan_port;
-
-	vlan->lowerdev = lowerdev;
-	vlan->dev      = dev;
-	vlan->port     = port;
-	vlan->receive  = netif_rx;
+	err = macvlan_link_lowerdev(dev, lowerdev);
+	if (err < 0)
+		return err;
 
 	err = register_netdevice(dev);
 	if (err < 0)
 		return err;
 
-	list_add_tail(&vlan->list, &port->vlans);
 	macvlan_transfer_operstate(dev);
 	return 0;
 }
@@ -526,14 +554,8 @@  EXPORT_SYMBOL_GPL(macvlan_newlink);
 
 void macvlan_dellink(struct net_device *dev)
 {
-	struct macvlan_dev *vlan = netdev_priv(dev);
-	struct macvlan_port *port = vlan->port;
-
-	list_del(&vlan->list);
+	macvlan_unlink_lowerdev(dev);
 	unregister_netdevice(dev);
-
-	if (list_empty(&port->vlans))
-		macvlan_port_destroy(port->dev);
 }
 EXPORT_SYMBOL_GPL(macvlan_dellink);
 
diff --git a/include/linux/macvlan.h b/include/linux/macvlan.h
index 3f3c6c3..cf8738a 100644
--- a/include/linux/macvlan.h
+++ b/include/linux/macvlan.h
@@ -24,6 +24,12 @@  struct macvlan_dev {
 };
 
 extern int macvlan_start_xmit(struct sk_buff *skb, struct net_device *dev);
+extern int macvlan_link_lowerdev(struct net_device *dev,
+						  struct net_device *lowerdev);
+
+extern void macvlan_unlink_lowerdev(struct net_device *dev);
+
+extern void macvlan_transfer_operstate(struct net_device *dev);
 
 extern void macvlan_setup(struct net_device *dev);