diff mbox

[RFC,1/3] netpoll: add generic support for bridge and bonding devices

Message ID 20100322082059.4967.63492.sendpatchset@localhost.localdomain
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

Amerigo Wang March 22, 2010, 8:17 a.m. UTC
This whole patchset is for adding netpoll support to bridge and bonding
devices. I already tested it for bridge, bonding, bridge over bonding,
and bonding over bridge. It looks fine now.

Please comment.


To make bridge and bonding support netpoll, we need to adjust
some netpoll generic code. This patch does the following things:

1) introduce two new priv_flags for struct net_device:
   IFF_IN_NETPOLL which identifies we are processing a netpoll;
   IFF_DISABLE_NETPOLL is used to disable netpoll support for a device
   at run-time;

2) introduce three new methods for netdev_ops:
   ->ndo_netpoll_setup() is used to setup netpoll for a device;
   ->ndo_netpoll_xmit() is used to transmit netpoll requests;
   ->ndo_netpoll_cleanup() is used to clean up netpoll when a device is
     removed.

3) introduce netpoll_poll_dev() which takes a struct net_device * parameter;

4) export netpoll_send_skb() and netpoll_poll_dev() which will be used later;

5) hide a pointer to struct netpoll in struct netpoll_info, ditto.

Cc: David Miller <davem@davemloft.net>
Cc: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: WANG Cong <amwang@redhat.com>

---
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Matt Mackall March 22, 2010, 10:31 p.m. UTC | #1
On Mon, 2010-03-22 at 04:17 -0400, Amerigo Wang wrote:
> This whole patchset is for adding netpoll support to bridge and bonding
> devices. I already tested it for bridge, bonding, bridge over bonding,
> and bonding over bridge. It looks fine now.

Ages ago, Jeff Moyer took a run at this, added him to the cc: on the off
chance he still cares.

> Please comment.
> 
> 
> To make bridge and bonding support netpoll, we need to adjust
> some netpoll generic code. This patch does the following things:
> 
> 1) introduce two new priv_flags for struct net_device:
>    IFF_IN_NETPOLL which identifies we are processing a netpoll;
>    IFF_DISABLE_NETPOLL is used to disable netpoll support for a device
>    at run-time;

This one is a little worrisome. I've tried to keep the netpoll code
restricted to as tight an area as possible. Adding new flags like these
that random drivers might try to fiddle with seems like a good way for a
driver writer to get in trouble. Also flag space is filling up.

> 2) introduce three new methods for netdev_ops:
>    ->ndo_netpoll_setup() is used to setup netpoll for a device;
>    ->ndo_netpoll_xmit() is used to transmit netpoll requests;
>    ->ndo_netpoll_cleanup() is used to clean up netpoll when a device is
>      removed.

Seems like a lot of interface for something to be used by only a couple
core drivers. Hopefully Dave has an opinion here.

> 3) introduce netpoll_poll_dev() which takes a struct net_device * parameter;
> 
> 4) export netpoll_send_skb() and netpoll_poll_dev() which will be used later;
> 
> 5) hide a pointer to struct netpoll in struct netpoll_info, ditto.
> 
> Cc: David Miller <davem@davemloft.net>
> Cc: Neil Horman <nhorman@tuxdriver.com>
> Signed-off-by: WANG Cong <amwang@redhat.com>
> 
> ---
> Index: linux-2.6/include/linux/if.h
> ===================================================================
> --- linux-2.6.orig/include/linux/if.h
> +++ linux-2.6/include/linux/if.h
> @@ -71,6 +71,8 @@
>  					 * release skb->dst
>  					 */
>  #define IFF_DONT_BRIDGE 0x800		/* disallow bridging this ether dev */
> +#define IFF_IN_NETPOLL	0x1000		/* whether we are processing netpoll */
> +#define IFF_DISABLE_NETPOLL	0x2000	/* disable netpoll at run-time */
>  
>  #define IF_GET_IFACE	0x0001		/* for querying only */
>  #define IF_GET_PROTO	0x0002
> Index: linux-2.6/include/linux/netdevice.h
> ===================================================================
> --- linux-2.6.orig/include/linux/netdevice.h
> +++ linux-2.6/include/linux/netdevice.h
> @@ -530,6 +530,8 @@ struct netdev_queue {
>  	unsigned long		tx_dropped;
>  } ____cacheline_aligned_in_smp;
>  
> +struct netpoll;
> +struct netpoll_info;
>  
>  /*
>   * This structure defines the management hooks for network devices.
> @@ -667,6 +669,12 @@ struct net_device_ops {
>  						        unsigned short vid);
>  #ifdef CONFIG_NET_POLL_CONTROLLER
>  	void                    (*ndo_poll_controller)(struct net_device *dev);
> +	void			(*ndo_netpoll_setup)(struct net_device *dev,
> +						     struct netpoll_info *npinfo);
> +	int			(*ndo_netpoll_xmit)(struct netpoll *np,
> +						    struct sk_buff *skb,
> +						    struct net_device *dev);
> +	void			(*ndo_netpoll_cleanup)(struct net_device *dev);
>  #endif
>  	int			(*ndo_set_vf_mac)(struct net_device *dev,
>  						  int queue, u8 *mac);
> Index: linux-2.6/include/linux/netpoll.h
> ===================================================================
> --- linux-2.6.orig/include/linux/netpoll.h
> +++ linux-2.6/include/linux/netpoll.h
> @@ -36,8 +36,11 @@ struct netpoll_info {
>  	struct sk_buff_head txq;
>  
>  	struct delayed_work tx_work;
> +
> +	struct netpoll *netpoll;
>  };
>  
> +void netpoll_poll_dev(struct net_device *dev);
>  void netpoll_poll(struct netpoll *np);
>  void netpoll_send_udp(struct netpoll *np, const char *msg, int len);
>  void netpoll_print_options(struct netpoll *np);
> @@ -47,6 +50,7 @@ int netpoll_trap(void);
>  void netpoll_set_trap(int trap);
>  void netpoll_cleanup(struct netpoll *np);
>  int __netpoll_rx(struct sk_buff *skb);
> +void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb);
>  
> 
>  #ifdef CONFIG_NETPOLL
> Index: linux-2.6/net/core/netpoll.c
> ===================================================================
> --- linux-2.6.orig/net/core/netpoll.c
> +++ linux-2.6/net/core/netpoll.c
> @@ -178,9 +178,8 @@ static void service_arp_queue(struct net
>  	}
>  }
>  
> -void netpoll_poll(struct netpoll *np)
> +void netpoll_poll_dev(struct net_device *dev)
>  {
> -	struct net_device *dev = np->dev;
>  	const struct net_device_ops *ops;
>  
>  	if (!dev || !netif_running(dev))
> @@ -200,6 +199,13 @@ void netpoll_poll(struct netpoll *np)
>  	zap_completion_queue();
>  }
>  
> +void netpoll_poll(struct netpoll *np)
> +{
> +	if (!np->dev)
> +		return;
> +	netpoll_poll_dev(np->dev);
> +}
> +
>  static void refill_skbs(void)
>  {
>  	struct sk_buff *skb;
> @@ -281,7 +287,7 @@ static int netpoll_owner_active(struct n
>  	return 0;
>  }
>  
> -static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
> +void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
>  {
>  	int status = NETDEV_TX_BUSY;
>  	unsigned long tries;
> @@ -307,7 +313,10 @@ static void netpoll_send_skb(struct netp
>  		     tries > 0; --tries) {
>  			if (__netif_tx_trylock(txq)) {
>  				if (!netif_tx_queue_stopped(txq)) {
> -					status = ops->ndo_start_xmit(skb, dev);
> +					if (ops->ndo_netpoll_xmit)
> +						status = ops->ndo_netpoll_xmit(np, skb, dev);
> +					else
> +						status = ops->ndo_start_xmit(skb, dev);
>  					if (status == NETDEV_TX_OK)
>  						txq_trans_update(txq);
>  				}
> @@ -752,7 +761,10 @@ int netpoll_setup(struct netpoll *np)
>  		atomic_inc(&npinfo->refcnt);
>  	}
>  
> -	if (!ndev->netdev_ops->ndo_poll_controller) {
> +	npinfo->netpoll = np;
> +
> +	if (ndev->priv_flags & IFF_DISABLE_NETPOLL
> +			|| !ndev->netdev_ops->ndo_poll_controller) {
>  		printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
>  		       np->name, np->dev_name);
>  		err = -ENOTSUPP;
> @@ -830,6 +842,9 @@ int netpoll_setup(struct netpoll *np)
>  	/* last thing to do is link it to the net device structure */
>  	ndev->npinfo = npinfo;
>  
> +	if (ndev->netdev_ops->ndo_netpoll_setup)
> +		ndev->netdev_ops->ndo_netpoll_setup(ndev, npinfo);
> +
>  	/* avoid racing with NAPI reading npinfo */
>  	synchronize_rcu();
>  
> @@ -904,6 +919,7 @@ void netpoll_set_trap(int trap)
>  		atomic_dec(&trapped);
>  }
>  
> +EXPORT_SYMBOL(netpoll_send_skb);
>  EXPORT_SYMBOL(netpoll_set_trap);
>  EXPORT_SYMBOL(netpoll_trap);
>  EXPORT_SYMBOL(netpoll_print_options);
> @@ -911,4 +927,5 @@ EXPORT_SYMBOL(netpoll_parse_options);
>  EXPORT_SYMBOL(netpoll_setup);
>  EXPORT_SYMBOL(netpoll_cleanup);
>  EXPORT_SYMBOL(netpoll_send_udp);
> +EXPORT_SYMBOL(netpoll_poll_dev);
>  EXPORT_SYMBOL(netpoll_poll);
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
Amerigo Wang March 23, 2010, 2:13 a.m. UTC | #2
Matt Mackall wrote:
> On Mon, 2010-03-22 at 04:17 -0400, Amerigo Wang wrote:
>> This whole patchset is for adding netpoll support to bridge and bonding
>> devices. I already tested it for bridge, bonding, bridge over bonding,
>> and bonding over bridge. It looks fine now.
> 
> Ages ago, Jeff Moyer took a run at this, added him to the cc: on the off
> chance he still cares.
> 
>> Please comment.
>>
>>
>> To make bridge and bonding support netpoll, we need to adjust
>> some netpoll generic code. This patch does the following things:
>>
>> 1) introduce two new priv_flags for struct net_device:
>>    IFF_IN_NETPOLL which identifies we are processing a netpoll;
>>    IFF_DISABLE_NETPOLL is used to disable netpoll support for a device
>>    at run-time;
> 
> This one is a little worrisome. I've tried to keep the netpoll code
> restricted to as tight an area as possible. Adding new flags like these
> that random drivers might try to fiddle with seems like a good way for a
> driver writer to get in trouble. Also flag space is filling up.


Somewhat, but currently I don't have other way to replace this.
Any suggestions?

> 
>> 2) introduce three new methods for netdev_ops:
>>    ->ndo_netpoll_setup() is used to setup netpoll for a device;
>>    ->ndo_netpoll_xmit() is used to transmit netpoll requests;
>>    ->ndo_netpoll_cleanup() is used to clean up netpoll when a device is
>>      removed.
> 
> Seems like a lot of interface for something to be used by only a couple
> core drivers. Hopefully Dave has an opinion here.
> 

Yeah, I worry about this too, maybe we can group those methods
for netpoll together into another struct, and just put a pointer
here?

Thanks!
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller March 23, 2010, 3:49 a.m. UTC | #3
From: Cong Wang <amwang@redhat.com>
Date: Tue, 23 Mar 2010 10:13:43 +0800

> Matt Mackall wrote:
>> Seems like a lot of interface for something to be used by only a
>> couple
>> core drivers. Hopefully Dave has an opinion here.
>> 
> 
> Yeah, I worry about this too, maybe we can group those methods
> for netpoll together into another struct, and just put a pointer
> here?

This looks like it's tackled at the wrong layer, to be honest.

Teaching all of these layers about eachother's states is
going to end up being a nightmare in the end.

All of this "where is the npinfo" business can be handled
generically in net/core/dev.c I think, with none of these
callbacks.

For example, something like "if dev lacks ->npinfo, check
it's master".

Another thing, I wouldn't iterate over all devices, like I
see in the bonding poll controller method.  Just whichever
one supports netpoll you see first, use it and exit
immediately.  Don't send it to every single port, I can't
see how that might be desirable or useful.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Amerigo Wang March 23, 2010, 4:47 a.m. UTC | #4
David Miller wrote:
> From: Cong Wang <amwang@redhat.com>
> Date: Tue, 23 Mar 2010 10:13:43 +0800
> 
>> Matt Mackall wrote:
>>> Seems like a lot of interface for something to be used by only a
>>> couple
>>> core drivers. Hopefully Dave has an opinion here.
>>>
>> Yeah, I worry about this too, maybe we can group those methods
>> for netpoll together into another struct, and just put a pointer
>> here?
> 
> This looks like it's tackled at the wrong layer, to be honest.
> 
> Teaching all of these layers about eachother's states is
> going to end up being a nightmare in the end.
> 
> All of this "where is the npinfo" business can be handled
> generically in net/core/dev.c I think, with none of these
> callbacks.
> 
> For example, something like "if dev lacks ->npinfo, check
> it's master".

This is a good point! I haven't tried but certainly this is
worthy a try. Ideally those callbacks can be all removed,
but I don't know if this is true practically. ;)

I will try.

> 
> Another thing, I wouldn't iterate over all devices, like I
> see in the bonding poll controller method.  Just whichever
> one supports netpoll you see first, use it and exit
> immediately.  Don't send it to every single port, I can't
> see how that might be desirable or useful.

Yeah, for bonding case, probably. But for bridge case, I think
we still need to check all, right?

Thanks!
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller March 23, 2010, 4:58 a.m. UTC | #5
From: Cong Wang <amwang@redhat.com>
Date: Tue, 23 Mar 2010 12:47:39 +0800

> Yeah, for bonding case, probably. But for bridge case, I think
> we still need to check all, right?

Why?  Who cares?

If it goes out one port and reaches it's destination
the objective has been achieved.

Sending it out N more times achieves nothing.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Amerigo Wang March 23, 2010, 5:15 a.m. UTC | #6
David Miller wrote:
> From: Cong Wang <amwang@redhat.com>
> Date: Tue, 23 Mar 2010 12:47:39 +0800
> 
>> Yeah, for bonding case, probably. But for bridge case, I think
>> we still need to check all, right?
> 
> Why?  Who cares?
> 
> If it goes out one port and reaches it's destination
> the objective has been achieved.
> 
> Sending it out N more times achieves nothing.

We have to check which port has the right destination.

Ideally we should check the right destination address to
choose the port, but currently we don't have a generic
way to check this, thus I chose to send it to all ports.
You are right, this needs to be improved.

Thanks!

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jeff Moyer March 23, 2010, 12:11 p.m. UTC | #7
Matt Mackall <mpm@selenic.com> writes:

> On Mon, 2010-03-22 at 04:17 -0400, Amerigo Wang wrote:
>> This whole patchset is for adding netpoll support to bridge and bonding
>> devices. I already tested it for bridge, bonding, bridge over bonding,
>> and bonding over bridge. It looks fine now.
>
> Ages ago, Jeff Moyer took a run at this, added him to the cc: on the off
> chance he still cares.

I'll take a look at it in a bit.  For now, here is the link to my
original post on this for Amerigo's reading pleasure:

  http://lkml.indiana.edu/hypermail/linux/kernel/0507.0/0206.html

Cheers,
Jeff
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Amerigo Wang March 24, 2010, 2:29 a.m. UTC | #8
Jeff Moyer wrote:
> Matt Mackall <mpm@selenic.com> writes:
> 
>> On Mon, 2010-03-22 at 04:17 -0400, Amerigo Wang wrote:
>>> This whole patchset is for adding netpoll support to bridge and bonding
>>> devices. I already tested it for bridge, bonding, bridge over bonding,
>>> and bonding over bridge. It looks fine now.
>> Ages ago, Jeff Moyer took a run at this, added him to the cc: on the off
>> chance he still cares.
> 
> I'll take a look at it in a bit.  For now, here is the link to my
> original post on this for Amerigo's reading pleasure:
> 
>   http://lkml.indiana.edu/hypermail/linux/kernel/0507.0/0206.html
> 

Thanks, Jeff! I will take a look at it.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

Index: linux-2.6/include/linux/if.h
===================================================================
--- linux-2.6.orig/include/linux/if.h
+++ linux-2.6/include/linux/if.h
@@ -71,6 +71,8 @@ 
 					 * release skb->dst
 					 */
 #define IFF_DONT_BRIDGE 0x800		/* disallow bridging this ether dev */
+#define IFF_IN_NETPOLL	0x1000		/* whether we are processing netpoll */
+#define IFF_DISABLE_NETPOLL	0x2000	/* disable netpoll at run-time */
 
 #define IF_GET_IFACE	0x0001		/* for querying only */
 #define IF_GET_PROTO	0x0002
Index: linux-2.6/include/linux/netdevice.h
===================================================================
--- linux-2.6.orig/include/linux/netdevice.h
+++ linux-2.6/include/linux/netdevice.h
@@ -530,6 +530,8 @@  struct netdev_queue {
 	unsigned long		tx_dropped;
 } ____cacheline_aligned_in_smp;
 
+struct netpoll;
+struct netpoll_info;
 
 /*
  * This structure defines the management hooks for network devices.
@@ -667,6 +669,12 @@  struct net_device_ops {
 						        unsigned short vid);
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	void                    (*ndo_poll_controller)(struct net_device *dev);
+	void			(*ndo_netpoll_setup)(struct net_device *dev,
+						     struct netpoll_info *npinfo);
+	int			(*ndo_netpoll_xmit)(struct netpoll *np,
+						    struct sk_buff *skb,
+						    struct net_device *dev);
+	void			(*ndo_netpoll_cleanup)(struct net_device *dev);
 #endif
 	int			(*ndo_set_vf_mac)(struct net_device *dev,
 						  int queue, u8 *mac);
Index: linux-2.6/include/linux/netpoll.h
===================================================================
--- linux-2.6.orig/include/linux/netpoll.h
+++ linux-2.6/include/linux/netpoll.h
@@ -36,8 +36,11 @@  struct netpoll_info {
 	struct sk_buff_head txq;
 
 	struct delayed_work tx_work;
+
+	struct netpoll *netpoll;
 };
 
+void netpoll_poll_dev(struct net_device *dev);
 void netpoll_poll(struct netpoll *np);
 void netpoll_send_udp(struct netpoll *np, const char *msg, int len);
 void netpoll_print_options(struct netpoll *np);
@@ -47,6 +50,7 @@  int netpoll_trap(void);
 void netpoll_set_trap(int trap);
 void netpoll_cleanup(struct netpoll *np);
 int __netpoll_rx(struct sk_buff *skb);
+void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb);
 
 
 #ifdef CONFIG_NETPOLL
Index: linux-2.6/net/core/netpoll.c
===================================================================
--- linux-2.6.orig/net/core/netpoll.c
+++ linux-2.6/net/core/netpoll.c
@@ -178,9 +178,8 @@  static void service_arp_queue(struct net
 	}
 }
 
-void netpoll_poll(struct netpoll *np)
+void netpoll_poll_dev(struct net_device *dev)
 {
-	struct net_device *dev = np->dev;
 	const struct net_device_ops *ops;
 
 	if (!dev || !netif_running(dev))
@@ -200,6 +199,13 @@  void netpoll_poll(struct netpoll *np)
 	zap_completion_queue();
 }
 
+void netpoll_poll(struct netpoll *np)
+{
+	if (!np->dev)
+		return;
+	netpoll_poll_dev(np->dev);
+}
+
 static void refill_skbs(void)
 {
 	struct sk_buff *skb;
@@ -281,7 +287,7 @@  static int netpoll_owner_active(struct n
 	return 0;
 }
 
-static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
+void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 {
 	int status = NETDEV_TX_BUSY;
 	unsigned long tries;
@@ -307,7 +313,10 @@  static void netpoll_send_skb(struct netp
 		     tries > 0; --tries) {
 			if (__netif_tx_trylock(txq)) {
 				if (!netif_tx_queue_stopped(txq)) {
-					status = ops->ndo_start_xmit(skb, dev);
+					if (ops->ndo_netpoll_xmit)
+						status = ops->ndo_netpoll_xmit(np, skb, dev);
+					else
+						status = ops->ndo_start_xmit(skb, dev);
 					if (status == NETDEV_TX_OK)
 						txq_trans_update(txq);
 				}
@@ -752,7 +761,10 @@  int netpoll_setup(struct netpoll *np)
 		atomic_inc(&npinfo->refcnt);
 	}
 
-	if (!ndev->netdev_ops->ndo_poll_controller) {
+	npinfo->netpoll = np;
+
+	if (ndev->priv_flags & IFF_DISABLE_NETPOLL
+			|| !ndev->netdev_ops->ndo_poll_controller) {
 		printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
 		       np->name, np->dev_name);
 		err = -ENOTSUPP;
@@ -830,6 +842,9 @@  int netpoll_setup(struct netpoll *np)
 	/* last thing to do is link it to the net device structure */
 	ndev->npinfo = npinfo;
 
+	if (ndev->netdev_ops->ndo_netpoll_setup)
+		ndev->netdev_ops->ndo_netpoll_setup(ndev, npinfo);
+
 	/* avoid racing with NAPI reading npinfo */
 	synchronize_rcu();
 
@@ -904,6 +919,7 @@  void netpoll_set_trap(int trap)
 		atomic_dec(&trapped);
 }
 
+EXPORT_SYMBOL(netpoll_send_skb);
 EXPORT_SYMBOL(netpoll_set_trap);
 EXPORT_SYMBOL(netpoll_trap);
 EXPORT_SYMBOL(netpoll_print_options);
@@ -911,4 +927,5 @@  EXPORT_SYMBOL(netpoll_parse_options);
 EXPORT_SYMBOL(netpoll_setup);
 EXPORT_SYMBOL(netpoll_cleanup);
 EXPORT_SYMBOL(netpoll_send_udp);
+EXPORT_SYMBOL(netpoll_poll_dev);
 EXPORT_SYMBOL(netpoll_poll);