Patchwork [RFC,2/2] igb: skb recycling

login
register
mail settings
Submitter stephen hemminger
Date Oct. 21, 2008, 7:11 p.m.
Message ID <20081021121137.5c67d338@extreme>
Download mbox | patch
Permalink /patch/5258/
State Deferred
Delegated to: Jeff Garzik
Headers show

Comments

stephen hemminger - Oct. 21, 2008, 7:11 p.m.
This driver is multiqueue, so implement a small skb recycling queue per
cpu. It doesn't make sense to have a global queue since then a lock would
be required. Not sure if this is going to work; compile tested only, needs more evaluation

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Duyck - Oct. 22, 2008, 11:53 p.m.
Stephen Hemminger wrote:
> This driver is multiqueue, so implement a small skb recycling queue
> per cpu. It doesn't make sense to have a global queue since then a
> lock would be required. Not sure if this is going to work; compile
> tested only, needs more evaluation

I did a bit of quick testing and found an issue in the sections of code below.  As is the current patch generates a null pointer exception on unload because the percpu_alloc and for_each_possible_cpu are working off of two different CPU masks.

The simple solution is to either replace percpu_alloc with alloc_percpu and use cpu_possible_map or replace for_each_possible_cpu with for_each_online_cpu and use the cpu_online_map.

Attached is the version I tested at my desk that used the cpu_possible_map solution.  I went that route since alloc_percpu seemed like a fairly clean macro.

Thanks,

Alex

> --- a/drivers/net/igb/igb_main.c        2008-10-21 09:39:25.000000000
> -0700 +++ b/drivers/net/igb/igb_main.c        2008-10-21
> 10:13:42.000000000 -0700 @@ -824,6 +824,11 @@ void igb_down(struct
>                 igb_adapter *adapte igb_reset(adapter);
>         igb_clean_all_tx_rings(adapter);
>         igb_clean_all_rx_rings(adapter);
> +
> +       for_each_possible_cpu(i) {
> +               struct sk_buff_head *rx_recycle =
> per_cpu_ptr(adapter->rx_recycle,i); +
> __skb_queue_purge(rx_recycle); +       }
>  }
>
>  void igb_reinit_locked(struct igb_adapter *adapter)
> @@ -1022,6 +1027,11 @@ static int __devinit igb_probe(struct pc
>         adapter = netdev_priv(netdev);
>         adapter->netdev = netdev;
>         adapter->pdev = pdev;
> +
> +       adapter->rx_recycle = percpu_alloc(sizeof(struct
> sk_buff_head), GFP_KERNEL); +       if (!adapter->rx_recycle)
> +               goto err_alloc_recycle;
> +
>         hw = &adapter->hw;
>         hw->back = adapter;
>         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
Alexander Duyck - Oct. 24, 2008, 7:01 p.m.
Duyck, Alexander H wrote:
> Stephen Hemminger wrote:
>> This driver is multiqueue, so implement a small skb recycling queue
>> per cpu. It doesn't make sense to have a global queue since then a
>> lock would be required. Not sure if this is going to work; compile
>> tested only, needs more evaluation
> 
> I did a bit of quick testing and found an issue in the sections of
> code below.  As is the current patch generates a null pointer
> exception on unload because the percpu_alloc and
> for_each_possible_cpu are working off of two different CPU masks.   
> 
> The simple solution is to either replace percpu_alloc with
> alloc_percpu and use cpu_possible_map or replace
> for_each_possible_cpu with for_each_online_cpu and use the
> cpu_online_map.   
> 
> Attached is the version I tested at my desk that used the
> cpu_possible_map solution.  I went that route since alloc_percpu
> seemed like a fairly clean macro.  

It turns out I missed one issue in my testing that was brought to my attention.  It looks like the issue was just due to the fact that the per cpu queues weren't initialized to store packets.  This updated patch adds a loop that goes through and calls __skb_queue_init_head on all the queues.

Thanks,

Alex

Patch

--- a/drivers/net/igb/igb.h	2008-10-21 09:37:39.000000000 -0700
+++ b/drivers/net/igb/igb.h	2008-10-21 09:39:15.000000000 -0700
@@ -232,6 +232,7 @@  struct igb_adapter {
 
 	/* TX */
 	struct igb_ring *tx_ring;      /* One per active queue */
+	struct sk_buff_head *rx_recycle; /* One per cpu */
 	unsigned int restart_queue;
 	unsigned long tx_queue_len;
 	u32 txd_cmd;
--- a/drivers/net/igb/igb_main.c	2008-10-21 09:39:25.000000000 -0700
+++ b/drivers/net/igb/igb_main.c	2008-10-21 10:13:42.000000000 -0700
@@ -824,6 +824,11 @@  void igb_down(struct igb_adapter *adapte
 		igb_reset(adapter);
 	igb_clean_all_tx_rings(adapter);
 	igb_clean_all_rx_rings(adapter);
+
+	for_each_possible_cpu(i) {
+		struct sk_buff_head *rx_recycle = per_cpu_ptr(adapter->rx_recycle,i);
+		__skb_queue_purge(rx_recycle);
+	}
 }
 
 void igb_reinit_locked(struct igb_adapter *adapter)
@@ -1022,6 +1027,11 @@  static int __devinit igb_probe(struct pc
 	adapter = netdev_priv(netdev);
 	adapter->netdev = netdev;
 	adapter->pdev = pdev;
+
+	adapter->rx_recycle = percpu_alloc(sizeof(struct sk_buff_head), GFP_KERNEL);
+	if (!adapter->rx_recycle)
+		goto err_alloc_recycle;
+
 	hw = &adapter->hw;
 	hw->back = adapter;
 	adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
@@ -1289,6 +1299,8 @@  err_sw_init:
 err_hw_init:
 	iounmap(hw->hw_addr);
 err_ioremap:
+	percpu_free(adapter->rx_recycle);
+err_alloc_recycle:
 	free_netdev(netdev);
 err_alloc_etherdev:
 	pci_release_selected_regions(pdev, bars);
@@ -1352,6 +1364,8 @@  static void __devexit igb_remove(struct 
 		iounmap(adapter->hw.flash_address);
 	pci_release_selected_regions(pdev, adapter->bars);
 
+	percpu_free(adapter->rx_recycle);
+
 	free_netdev(netdev);
 
 	pci_disable_device(pdev);
@@ -1989,6 +2003,11 @@  static void igb_free_all_tx_resources(st
 		igb_free_tx_resources(&adapter->tx_ring[i]);
 }
 
+static inline unsigned int igb_rx_bufsize(const struct igb_adapter *adapter)
+{
+	return (adapter->rx_ps_hdr_size ? : adapter->rx_buffer_len) + NET_IP_ALIGN;
+}
+
 static void igb_unmap_and_free_tx_resource(struct igb_adapter *adapter,
 					   struct igb_buffer *buffer_info)
 {
@@ -2000,7 +2019,15 @@  static void igb_unmap_and_free_tx_resour
 		buffer_info->dma = 0;
 	}
 	if (buffer_info->skb) {
-		dev_kfree_skb_any(buffer_info->skb);
+		struct sk_buff_head *rx_recycle
+			= per_cpu_ptr(adapter->rx_recycle, smp_processor_id());
+
+		if (skb_queue_len(rx_recycle) < 16 &&
+		    skb_recycle_check(buffer_info->skb, igb_rx_bufsize(adapter)))
+			__skb_queue_head(rx_recycle, buffer_info->skb);
+		else
+		    dev_kfree_skb_any(buffer_info->skb);
+
 		buffer_info->skb = NULL;
 	}
 	buffer_info->time_stamp = 0;
@@ -4014,18 +4041,18 @@  static void igb_alloc_rx_buffers_adv(str
 		}
 
 		if (!buffer_info->skb) {
-			int bufsz;
-
-			if (adapter->rx_ps_hdr_size)
-				bufsz = adapter->rx_ps_hdr_size;
-			else
-				bufsz = adapter->rx_buffer_len;
-			bufsz += NET_IP_ALIGN;
-			skb = netdev_alloc_skb(netdev, bufsz);
+			unsigned int bufsz = igb_rx_bufsize(adapter);
+			struct sk_buff_head *rx_recycle
+				= per_cpu_ptr(adapter->rx_recycle,
+					      smp_processor_id());
 
+			skb = __skb_dequeue(rx_recycle);
 			if (!skb) {
-				adapter->alloc_rx_buff_failed++;
-				goto no_buffers;
+				skb = netdev_alloc_skb(netdev, bufsz);
+				if (!skb) {
+					adapter->alloc_rx_buff_failed++;
+					goto no_buffers;
+				}
 			}
 
 			/* Make buffer alignment 2 beyond a 16 byte boundary