diff mbox

net: fix a race in gro_cell_poll()

Message ID 1355178723.27891.85.camel@edumazet-glaptop
State Accepted, archived
Delegated to: David Miller
Headers show

Commit Message

Eric Dumazet Dec. 10, 2012, 10:32 p.m. UTC
From: Eric Dumazet <edumazet@google.com>

Dmitry Kravkov reported packet drops for GRE packets since GRO support
was added.

There is a race in gro_cell_poll() because we call napi_complete()
without any synchronization with a concurrent gro_cells_receive()

Once bug was triggered, we queued packets but did not schedule NAPI
poll.

We can fix this issue using the spinlock protected the napi_skbs queue,
as we have to hold it to perform skb dequeue anyway.

As we open-code skb_dequeue(), we no longer need to mask IRQS, as both
producer and consumer run under BH context.

Bug added in commit c9e6bc644e (net: add gro_cells infrastructure)

Reported-by: Dmitry Kravkov <dmitry@broadcom.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
---

David: I could reproduce the bug Dmitry reported, and have
  verified this patch fixes the issue.

 include/net/gro_cells.h |   14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Dmitry Kravkov Dec. 10, 2012, 10:46 p.m. UTC | #1
> -----Original Message-----

> From: Eric Dumazet [mailto:eric.dumazet@gmail.com]

> Sent: Tuesday, December 11, 2012 12:32 AM

> To: Dmitry Kravkov; David Miller

> Cc: netdev@vger.kernel.org

> Subject: [PATCH] net: fix a race in gro_cell_poll()

> 

> From: Eric Dumazet <edumazet@google.com>

> 

> Dmitry Kravkov reported packet drops for GRE packets since GRO support

> was added.

> 

> There is a race in gro_cell_poll() because we call napi_complete()

> without any synchronization with a concurrent gro_cells_receive()

> 

> Once bug was triggered, we queued packets but did not schedule NAPI

> poll.

> 

> We can fix this issue using the spinlock protected the napi_skbs queue,

> as we have to hold it to perform skb dequeue anyway.

> 

> As we open-code skb_dequeue(), we no longer need to mask IRQS, as both

> producer and consumer run under BH context.

> 

> Bug added in commit c9e6bc644e (net: add gro_cells infrastructure)

> 

> Reported-by: Dmitry Kravkov <dmitry@broadcom.com>

> Signed-off-by: Eric Dumazet <edumazet@google.com>

> ---

> 

> David: I could reproduce the bug Dmitry reported, and have

>   verified this patch fixes the issue.

> 

>  include/net/gro_cells.h |   14 +++++++++-----

>  1 file changed, 9 insertions(+), 5 deletions(-)

> 

> diff --git a/include/net/gro_cells.h b/include/net/gro_cells.h

> index 4fd8a4b..e5062c9 100644

> --- a/include/net/gro_cells.h

> +++ b/include/net/gro_cells.h

> @@ -17,7 +17,6 @@ struct gro_cells {

> 

>  static inline void gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb)

>  {

> -	unsigned long flags;

>  	struct gro_cell *cell = gcells->cells;

>  	struct net_device *dev = skb->dev;

> 

> @@ -35,32 +34,37 @@ static inline void gro_cells_receive(struct gro_cells

> *gcells, struct sk_buff *s

>  		return;

>  	}

> 

> -	spin_lock_irqsave(&cell->napi_skbs.lock, flags);

> +	/* We run in BH context */

> +	spin_lock(&cell->napi_skbs.lock);

> 

>  	__skb_queue_tail(&cell->napi_skbs, skb);

>  	if (skb_queue_len(&cell->napi_skbs) == 1)

>  		napi_schedule(&cell->napi);

> 

> -	spin_unlock_irqrestore(&cell->napi_skbs.lock, flags);

> +	spin_unlock(&cell->napi_skbs.lock);

>  }

> 

> +/* called unser BH context */

>  static inline int gro_cell_poll(struct napi_struct *napi, int budget)

>  {

>  	struct gro_cell *cell = container_of(napi, struct gro_cell, napi);

>  	struct sk_buff *skb;

>  	int work_done = 0;

> 

> +	spin_lock(&cell->napi_skbs.lock);

>  	while (work_done < budget) {

> -		skb = skb_dequeue(&cell->napi_skbs);

> +		skb = __skb_dequeue(&cell->napi_skbs);

>  		if (!skb)

>  			break;

> -

> +		spin_unlock(&cell->napi_skbs.lock);

>  		napi_gro_receive(napi, skb);

>  		work_done++;

> +		spin_lock(&cell->napi_skbs.lock);

>  	}

> 

>  	if (work_done < budget)

>  		napi_complete(napi);

> +	spin_unlock(&cell->napi_skbs.lock);

>  	return work_done;

>  }

> 

> 

My scenario is working, now
Thanks Eric.

Tested-by: Dmitry Kravkov <dmitry@broadcom.com>
David Miller Dec. 11, 2012, 5:50 p.m. UTC | #2
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 10 Dec 2012 14:32:03 -0800

> From: Eric Dumazet <edumazet@google.com>
> 
> Dmitry Kravkov reported packet drops for GRE packets since GRO support
> was added.
> 
> There is a race in gro_cell_poll() because we call napi_complete()
> without any synchronization with a concurrent gro_cells_receive()
> 
> Once bug was triggered, we queued packets but did not schedule NAPI
> poll.
> 
> We can fix this issue using the spinlock protected the napi_skbs queue,
> as we have to hold it to perform skb dequeue anyway.
> 
> As we open-code skb_dequeue(), we no longer need to mask IRQS, as both
> producer and consumer run under BH context.
> 
> Bug added in commit c9e6bc644e (net: add gro_cells infrastructure)
> 
> Reported-by: Dmitry Kravkov <dmitry@broadcom.com>
> Signed-off-by: Eric Dumazet <edumazet@google.com>

Applied and queued up for -stable, thanks.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/net/gro_cells.h b/include/net/gro_cells.h
index 4fd8a4b..e5062c9 100644
--- a/include/net/gro_cells.h
+++ b/include/net/gro_cells.h
@@ -17,7 +17,6 @@  struct gro_cells {
 
 static inline void gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb)
 {
-	unsigned long flags;
 	struct gro_cell *cell = gcells->cells;
 	struct net_device *dev = skb->dev;
 
@@ -35,32 +34,37 @@  static inline void gro_cells_receive(struct gro_cells *gcells, struct sk_buff *s
 		return;
 	}
 
-	spin_lock_irqsave(&cell->napi_skbs.lock, flags);
+	/* We run in BH context */
+	spin_lock(&cell->napi_skbs.lock);
 
 	__skb_queue_tail(&cell->napi_skbs, skb);
 	if (skb_queue_len(&cell->napi_skbs) == 1)
 		napi_schedule(&cell->napi);
 
-	spin_unlock_irqrestore(&cell->napi_skbs.lock, flags);
+	spin_unlock(&cell->napi_skbs.lock);
 }
 
+/* called unser BH context */
 static inline int gro_cell_poll(struct napi_struct *napi, int budget)
 {
 	struct gro_cell *cell = container_of(napi, struct gro_cell, napi);
 	struct sk_buff *skb;
 	int work_done = 0;
 
+	spin_lock(&cell->napi_skbs.lock);
 	while (work_done < budget) {
-		skb = skb_dequeue(&cell->napi_skbs);
+		skb = __skb_dequeue(&cell->napi_skbs);
 		if (!skb)
 			break;
-
+		spin_unlock(&cell->napi_skbs.lock);
 		napi_gro_receive(napi, skb);
 		work_done++;
+		spin_lock(&cell->napi_skbs.lock);
 	}
 
 	if (work_done < budget)
 		napi_complete(napi);
+	spin_unlock(&cell->napi_skbs.lock);
 	return work_done;
 }