diff mbox

[1/2] bnx2: allocate with GFP_KERNEL flag on RX path init

Message ID 20100715142537.12504.60051.send-patch@dhcp-lab-109.englab.brq.redhat.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Stanislaw Gruszka July 15, 2010, 2:25 p.m. UTC
Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
---
 drivers/net/bnx2.c |   17 +++++++++--------
 1 files changed, 9 insertions(+), 8 deletions(-)

Comments

Michael Chan July 15, 2010, 2:48 p.m. UTC | #1
Stanislaw Gruszka wrote:

> Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
> ---
> @@ -2974,7 +2974,7 @@ bnx2_rx_skb(struct bnx2 *bp, struct
> bnx2_rx_ring_info *rxr, struct sk_buff *skb,
>       int err;
>       u16 prod = ring_idx & 0xffff;
>
> -     err = bnx2_alloc_rx_skb(bp, rxr, prod);
> +     err = bnx2_alloc_rx_skb(bp, rxr, prod, GFP_KERNEL);

This should be GFP_ATOMIC since it is called from NAPI softirq
context.


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mitchell Erblich July 15, 2010, 6:57 p.m. UTC | #2
On Jul 15, 2010, at 7:25 AM, Stanislaw Gruszka wrote:

> Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
> ---
> drivers/net/bnx2.c |   17 +++++++++--------
> 1 files changed, 9 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
> index a203f39..6de4cb7 100644
> --- a/drivers/net/bnx2.c
> +++ b/drivers/net/bnx2.c
> @@ -2664,13 +2664,13 @@ bnx2_set_mac_addr(struct bnx2 *bp, u8 *mac_addr, u32 pos)
> }
> 
> static inline int
> -bnx2_alloc_rx_page(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, u16 index)
> +bnx2_alloc_rx_page(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, u16 index, gfp_t gfp)
> {
> 	dma_addr_t mapping;
> 	struct sw_pg *rx_pg = &rxr->rx_pg_ring[index];
> 	struct rx_bd *rxbd =
> 		&rxr->rx_pg_desc_ring[RX_RING(index)][RX_IDX(index)];
> -	struct page *page = alloc_page(GFP_ATOMIC);
> +	struct page *page = alloc_page(gfp);
> 
> 	if (!page)
> 		return -ENOMEM;
> @@ -2705,7 +2705,7 @@ bnx2_free_rx_page(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, u16 index)
> }
> 
> static inline int
> -bnx2_alloc_rx_skb(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, u16 index)
> +bnx2_alloc_rx_skb(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, u16 index, gfp_t gfp)
> {
> 	struct sk_buff *skb;
> 	struct sw_bd *rx_buf = &rxr->rx_buf_ring[index];
> @@ -2713,7 +2713,7 @@ bnx2_alloc_rx_skb(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, u16 index)
> 	struct rx_bd *rxbd = &rxr->rx_desc_ring[RX_RING(index)][RX_IDX(index)];
> 	unsigned long align;
> 
> -	skb = netdev_alloc_skb(bp->dev, bp->rx_buf_size);
> +	skb = __netdev_alloc_skb(bp->dev, bp->rx_buf_size, gfp);
> 	if (skb == NULL) {
> 		return -ENOMEM;
> 	}
> @@ -2974,7 +2974,7 @@ bnx2_rx_skb(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, struct sk_buff *skb,
> 	int err;
> 	u16 prod = ring_idx & 0xffff;
> 
> -	err = bnx2_alloc_rx_skb(bp, rxr, prod);
> +	err = bnx2_alloc_rx_skb(bp, rxr, prod, GFP_KERNEL);
> 	if (unlikely(err)) {
> 		bnx2_reuse_rx_skb(bp, rxr, skb, (u16) (ring_idx >> 16), prod);
> 		if (hdr_len) {
> @@ -3039,7 +3039,8 @@ bnx2_rx_skb(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, struct sk_buff *skb,
> 			rx_pg->page = NULL;
> 
> 			err = bnx2_alloc_rx_page(bp, rxr,
> -						 RX_PG_RING_IDX(pg_prod));
> +						 RX_PG_RING_IDX(pg_prod),
> +	

> 					 GFP_ATOMIC);

Why not GFP_NOWAIT here?
This would then not use the last reserved pages of memory.
This still would remove the possibe sleep asociated with GFP_KERNEL.

Mitchell Erblich


> 			if (unlikely(err)) {
> 				rxr->rx_pg_cons = pg_cons;
> 				rxr->rx_pg_prod = pg_prod;
> @@ -5179,7 +5180,7 @@ bnx2_init_rx_ring(struct bnx2 *bp, int ring_num)
> 
> 	ring_prod = prod = rxr->rx_pg_prod;
> 	for (i = 0; i < bp->rx_pg_ring_size; i++) {
> -		if (bnx2_alloc_rx_page(bp, rxr, ring_prod) < 0) {
> +		if (bnx2_alloc_rx_page(bp, rxr, ring_prod, GFP_KERNEL) < 0) {
> 			netdev_warn(bp->dev, "init'ed rx page ring %d with %d/%d pages only\n",
> 				    ring_num, i, bp->rx_pg_ring_size);
> 			break;
> @@ -5191,7 +5192,7 @@ bnx2_init_rx_ring(struct bnx2 *bp, int ring_num)
> 
> 	ring_prod = prod = rxr->rx_prod;
> 	for (i = 0; i < bp->rx_ring_size; i++) {
> -		if (bnx2_alloc_rx_skb(bp, rxr, ring_prod) < 0) {
> +		if (bnx2_alloc_rx_skb(bp, rxr, ring_prod, GFP_KERNEL) < 0) {
> 			netdev_warn(bp->dev, "init'ed rx ring %d with %d/%d skbs only\n",
> 				    ring_num, i, bp->rx_ring_size);
> 			break;
> -- 
> 1.7.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller July 16, 2010, 3:25 a.m. UTC | #3
From: "Michael Chan" <mchan@broadcom.com>
Date: Thu, 15 Jul 2010 07:48:40 -0700

> Stanislaw Gruszka wrote:
> 
>> Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
>> ---
>> @@ -2974,7 +2974,7 @@ bnx2_rx_skb(struct bnx2 *bp, struct
>> bnx2_rx_ring_info *rxr, struct sk_buff *skb,
>>       int err;
>>       u16 prod = ring_idx & 0xffff;
>>
>> -     err = bnx2_alloc_rx_skb(bp, rxr, prod);
>> +     err = bnx2_alloc_rx_skb(bp, rxr, prod, GFP_KERNEL);
> 
> This should be GFP_ATOMIC since it is called from NAPI softirq
> context.

This fatal issue gives me doubts about whether this patch was even
tested at all.

Immediately the kernel memory allocator should have issued a warning
due to this GFP_KERNEL allocation in a non-sleep'able context.

Stanislaw, how did you test this patch?
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mitchell Erblich July 16, 2010, 5:57 a.m. UTC | #4
On Jul 15, 2010, at 8:25 PM, David Miller wrote:

> From: "Michael Chan" <mchan@broadcom.com>
> Date: Thu, 15 Jul 2010 07:48:40 -0700
> 
>> Stanislaw Gruszka wrote:
>> 
>>> Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
>>> ---
>>> @@ -2974,7 +2974,7 @@ bnx2_rx_skb(struct bnx2 *bp, struct
>>> bnx2_rx_ring_info *rxr, struct sk_buff *skb,
>>>      int err;
>>>      u16 prod = ring_idx & 0xffff;
>>> 
>>> -     err = bnx2_alloc_rx_skb(bp, rxr, prod);
>>> +     err = bnx2_alloc_rx_skb(bp, rxr, prod, GFP_KERNEL);
>> 
>> This should be GFP_ATOMIC since it is called from NAPI softirq
>> context.
> 
> This fatal issue gives me doubts about whether this patch was even
> tested at all.
> 
> Immediately the kernel memory allocator should have issued a warning
> due to this GFP_KERNEL allocation in a non-sleep'able context.
> 
> Stanislaw, how did you test this patch?
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Group,

		Why NOT GFP_NOWAIT. This won't use the last resource pages
		versus GFP_ATOMIC?

		GFP_ATOMIC IMO, SHOULD be used in the paths that cleans
		and frees pages.

Mitchell Erblich


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Stanislaw Gruszka July 16, 2010, 7:13 a.m. UTC | #5
On Thu, 15 Jul 2010 20:25:37 -0700 (PDT)
David Miller <davem@davemloft.net> wrote:

> > This should be GFP_ATOMIC since it is called from NAPI softirq
> > context.
> 
> This fatal issue gives me doubts about whether this patch was even
> tested at all.
> 
> Immediately the kernel memory allocator should have issued a warning
> due to this GFP_KERNEL allocation in a non-sleep'able context.
> 
> Stanislaw, how did you test this patch?

I run net-next-2.6 kernel with patches on machine with bnx2 device,
but I compiled kernel with CONFIG_DEBUG_KOBJECT and all dmesg was filled
by messages like:

kobject: 'block' (ffff8801663122c0): kobject_add_internal: parent: '2:2:1:0', set: '(null)'
kobject: 'sdc' (ffff8801642ca070): kobject_add_internal: parent: 'block', set: 'devices'
kobject: 'sdc' (ffff8801642ca070): kobject_uevent_env

so I missed the warning, grr...

Stanislaw
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Stanislaw Gruszka July 16, 2010, 7:30 a.m. UTC | #6
On Thu, 15 Jul 2010 11:57:59 -0700
Mitchell Erblich <erblichs@earthlink.net> wrote:

> > @@ -3039,7 +3039,8 @@ bnx2_rx_skb(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, struct sk_buff *skb,
> > 			rx_pg->page = NULL;
> > 
> > 			err = bnx2_alloc_rx_page(bp, rxr,
> > -						 RX_PG_RING_IDX(pg_prod));
> > +						 RX_PG_RING_IDX(pg_prod),
> > +	
> 
> > 					 GFP_ATOMIC);
> 
> Why not GFP_NOWAIT here?
> This would then not use the last reserved pages of memory.
> This still would remove the possibe sleep asociated with GFP_KERNEL.

There is no GFP_NOWAIT usage in any network driver. I'm not sure if
this flag is intended to driver usage. Anyway I can not judge if
GFP_ATOMIC -> GFP_NOWAIT conversion is good or bad idea, I think you
should ask mm guys about that.

Stanislaw
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index a203f39..6de4cb7 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -2664,13 +2664,13 @@  bnx2_set_mac_addr(struct bnx2 *bp, u8 *mac_addr, u32 pos)
 }
 
 static inline int
-bnx2_alloc_rx_page(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, u16 index)
+bnx2_alloc_rx_page(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, u16 index, gfp_t gfp)
 {
 	dma_addr_t mapping;
 	struct sw_pg *rx_pg = &rxr->rx_pg_ring[index];
 	struct rx_bd *rxbd =
 		&rxr->rx_pg_desc_ring[RX_RING(index)][RX_IDX(index)];
-	struct page *page = alloc_page(GFP_ATOMIC);
+	struct page *page = alloc_page(gfp);
 
 	if (!page)
 		return -ENOMEM;
@@ -2705,7 +2705,7 @@  bnx2_free_rx_page(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, u16 index)
 }
 
 static inline int
-bnx2_alloc_rx_skb(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, u16 index)
+bnx2_alloc_rx_skb(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, u16 index, gfp_t gfp)
 {
 	struct sk_buff *skb;
 	struct sw_bd *rx_buf = &rxr->rx_buf_ring[index];
@@ -2713,7 +2713,7 @@  bnx2_alloc_rx_skb(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, u16 index)
 	struct rx_bd *rxbd = &rxr->rx_desc_ring[RX_RING(index)][RX_IDX(index)];
 	unsigned long align;
 
-	skb = netdev_alloc_skb(bp->dev, bp->rx_buf_size);
+	skb = __netdev_alloc_skb(bp->dev, bp->rx_buf_size, gfp);
 	if (skb == NULL) {
 		return -ENOMEM;
 	}
@@ -2974,7 +2974,7 @@  bnx2_rx_skb(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, struct sk_buff *skb,
 	int err;
 	u16 prod = ring_idx & 0xffff;
 
-	err = bnx2_alloc_rx_skb(bp, rxr, prod);
+	err = bnx2_alloc_rx_skb(bp, rxr, prod, GFP_KERNEL);
 	if (unlikely(err)) {
 		bnx2_reuse_rx_skb(bp, rxr, skb, (u16) (ring_idx >> 16), prod);
 		if (hdr_len) {
@@ -3039,7 +3039,8 @@  bnx2_rx_skb(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, struct sk_buff *skb,
 			rx_pg->page = NULL;
 
 			err = bnx2_alloc_rx_page(bp, rxr,
-						 RX_PG_RING_IDX(pg_prod));
+						 RX_PG_RING_IDX(pg_prod),
+						 GFP_ATOMIC);
 			if (unlikely(err)) {
 				rxr->rx_pg_cons = pg_cons;
 				rxr->rx_pg_prod = pg_prod;
@@ -5179,7 +5180,7 @@  bnx2_init_rx_ring(struct bnx2 *bp, int ring_num)
 
 	ring_prod = prod = rxr->rx_pg_prod;
 	for (i = 0; i < bp->rx_pg_ring_size; i++) {
-		if (bnx2_alloc_rx_page(bp, rxr, ring_prod) < 0) {
+		if (bnx2_alloc_rx_page(bp, rxr, ring_prod, GFP_KERNEL) < 0) {
 			netdev_warn(bp->dev, "init'ed rx page ring %d with %d/%d pages only\n",
 				    ring_num, i, bp->rx_pg_ring_size);
 			break;
@@ -5191,7 +5192,7 @@  bnx2_init_rx_ring(struct bnx2 *bp, int ring_num)
 
 	ring_prod = prod = rxr->rx_prod;
 	for (i = 0; i < bp->rx_ring_size; i++) {
-		if (bnx2_alloc_rx_skb(bp, rxr, ring_prod) < 0) {
+		if (bnx2_alloc_rx_skb(bp, rxr, ring_prod, GFP_KERNEL) < 0) {
 			netdev_warn(bp->dev, "init'ed rx ring %d with %d/%d skbs only\n",
 				    ring_num, i, bp->rx_ring_size);
 			break;