Patchwork [1/2] ucc_geth: Move freeing of TX packets to NAPI context.

login
register
mail settings
Submitter Joakim Tjernlund
Date March 26, 2009, 12:54 p.m.
Message ID <1238072077-27044-1-git-send-email-Joakim.Tjernlund@transmode.se>
Download mbox | patch
Permalink /patch/25149/
State Not Applicable
Headers show

Comments

Joakim Tjernlund - March 26, 2009, 12:54 p.m.
Also set NAPI weight to 64 as this is a common value.
This will make the system alot more responsive while
ping flooding the ucc_geth ethernet interaface.

Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
---
 drivers/net/ucc_geth.c |   32 ++++++++++++--------------------
 drivers/net/ucc_geth.h |    1 -
 2 files changed, 12 insertions(+), 21 deletions(-)
Eric Dumazet - March 26, 2009, 2:15 p.m.
Joakim Tjernlund a écrit :
> Also set NAPI weight to 64 as this is a common value.
> This will make the system alot more responsive while
> ping flooding the ucc_geth ethernet interaface.
> 
> Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
> ---
>  drivers/net/ucc_geth.c |   32 ++++++++++++--------------------
>  drivers/net/ucc_geth.h |    1 -
>  2 files changed, 12 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/net/ucc_geth.c b/drivers/net/ucc_geth.c
> index 097aed8..7fc91aa 100644
> --- a/drivers/net/ucc_geth.c
> +++ b/drivers/net/ucc_geth.c
> @@ -3214,7 +3214,7 @@ static int ucc_geth_tx(struct net_device *dev, u8 txQ)
>  		dev->stats.tx_packets++;
>  
>  		/* Free the sk buffer associated with this TxBD */
> -		dev_kfree_skb_irq(ugeth->
> +		dev_kfree_skb(ugeth->
>  				  tx_skbuff[txQ][ugeth->skb_dirtytx[txQ]]);
>  		ugeth->tx_skbuff[txQ][ugeth->skb_dirtytx[txQ]] = NULL;
>  		ugeth->skb_dirtytx[txQ] =
> @@ -3248,9 +3248,16 @@ static int ucc_geth_poll(struct napi_struct *napi, int budget)

>  	for (i = 0; i < ug_info->numQueuesRx; i++)
>  		howmany += ucc_geth_rx(ugeth, i, budget - howmany);

Not related to your patch, but seeing above code, I can understand
you might have a problem in flood situation : Only first queue(s) are
depleted, and last one cannot be because howmany >= budget.

This driver might have to remember last queue it handled at previous
call ucc_geth_poll(), so that all rxqueues have same probability to be scanned.

j = ug->lastslot;
for (i = 0; ug_info->numQueuesRx; i++) {
	if (++j >= ug_info->numQueuesRx)
		j = 0;
	howmany += ucc_geth_rx(ugeth, j, budget - howmany);
	if (howmany >= budget)
		break;
}
ug->lastslot = j;


>  
> +	/* Tx event processing */
> +	spin_lock(&ugeth->lock);
> +	for (i = 0; i < ug_info->numQueuesTx; i++) {
> +		ucc_geth_tx(ugeth->dev, i);
> +	}
> +	spin_unlock(&ugeth->lock);
> +
>  	if (howmany < budget) {
>  		netif_rx_complete(napi);
> -		setbits32(ugeth->uccf->p_uccm, UCCE_RX_EVENTS);
> +		setbits32(ugeth->uccf->p_uccm, UCCE_RX_EVENTS | UCCE_TX_EVENTS);
>  	}
>  
>  	return howmany;
> @@ -3264,8 +3271,6 @@ static irqreturn_t ucc_geth_irq_handler(int irq, void *info)
>  	struct ucc_geth_info *ug_info;
>  	register u32 ucce;
>  	register u32 uccm;
> -	register u32 tx_mask;
> -	u8 i;
>  
>  	ugeth_vdbg("%s: IN", __func__);
>  
> @@ -3279,27 +3284,14 @@ static irqreturn_t ucc_geth_irq_handler(int irq, void *info)
>  	out_be32(uccf->p_ucce, ucce);
>  
>  	/* check for receive events that require processing */
> -	if (ucce & UCCE_RX_EVENTS) {
> +	if (ucce & (UCCE_RX_EVENTS | UCCE_TX_EVENTS)) {
>  		if (netif_rx_schedule_prep(&ugeth->napi)) {
> -			uccm &= ~UCCE_RX_EVENTS;
> +			uccm &= ~(UCCE_RX_EVENTS | UCCE_TX_EVENTS);
>  			out_be32(uccf->p_uccm, uccm);
>  			__netif_rx_schedule(&ugeth->napi);
>  		}
>  	}
>  
> -	/* Tx event processing */
> -	if (ucce & UCCE_TX_EVENTS) {
> -		spin_lock(&ugeth->lock);
> -		tx_mask = UCC_GETH_UCCE_TXB0;
> -		for (i = 0; i < ug_info->numQueuesTx; i++) {
> -			if (ucce & tx_mask)
> -				ucc_geth_tx(dev, i);
> -			ucce &= ~tx_mask;
> -			tx_mask <<= 1;
> -		}
> -		spin_unlock(&ugeth->lock);
> -	}
> -
>  	/* Errors and other events */
>  	if (ucce & UCCE_OTHER) {
>  		if (ucce & UCC_GETH_UCCE_BSY)
> @@ -3733,7 +3725,7 @@ static int ucc_geth_probe(struct of_device* ofdev, const struct of_device_id *ma
>  	dev->netdev_ops = &ucc_geth_netdev_ops;
>  	dev->watchdog_timeo = TX_TIMEOUT;
>  	INIT_WORK(&ugeth->timeout_work, ucc_geth_timeout_work);
> -	netif_napi_add(dev, &ugeth->napi, ucc_geth_poll, UCC_GETH_DEV_WEIGHT);
> +	netif_napi_add(dev, &ugeth->napi, ucc_geth_poll, 64);
>  	dev->mtu = 1500;
>  
>  	ugeth->msg_enable = netif_msg_init(debug.msg_enable, UGETH_MSG_DEFAULT);
> diff --git a/drivers/net/ucc_geth.h b/drivers/net/ucc_geth.h
> index 44218b8..50bad53 100644
> --- a/drivers/net/ucc_geth.h
> +++ b/drivers/net/ucc_geth.h
> @@ -843,7 +843,6 @@ struct ucc_geth_hardware_statistics {
>  /* Driver definitions */
>  #define TX_BD_RING_LEN                          0x10
>  #define RX_BD_RING_LEN                          0x10
> -#define UCC_GETH_DEV_WEIGHT                     TX_BD_RING_LEN
>  
>  #define TX_RING_MOD_MASK(size)                  (size-1)
>  #define RX_RING_MOD_MASK(size)                  (size-1)
Joakim Tjernlund - March 26, 2009, 4:55 p.m.
Eric Dumazet <dada1@cosmosbay.com> wrote on 26/03/2009 15:15:25:
> 
> Joakim Tjernlund a écrit :
> > Also set NAPI weight to 64 as this is a common value.
> > This will make the system alot more responsive while
> > ping flooding the ucc_geth ethernet interaface.
> > 
> > Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
> > ---
> >  drivers/net/ucc_geth.c |   32 ++++++++++++--------------------
> >  drivers/net/ucc_geth.h |    1 -
> >  2 files changed, 12 insertions(+), 21 deletions(-)
> > 
> > diff --git a/drivers/net/ucc_geth.c b/drivers/net/ucc_geth.c
> > index 097aed8..7fc91aa 100644
> > --- a/drivers/net/ucc_geth.c
> > +++ b/drivers/net/ucc_geth.c
> > @@ -3214,7 +3214,7 @@ static int ucc_geth_tx(struct net_device *dev, 
u8 txQ)
> >        dev->stats.tx_packets++;
> > 
> >        /* Free the sk buffer associated with this TxBD */
> > -      dev_kfree_skb_irq(ugeth->
> > +      dev_kfree_skb(ugeth->
> >                tx_skbuff[txQ][ugeth->skb_dirtytx[txQ]]);
> >        ugeth->tx_skbuff[txQ][ugeth->skb_dirtytx[txQ]] = NULL;
> >        ugeth->skb_dirtytx[txQ] =
> > @@ -3248,9 +3248,16 @@ static int ucc_geth_poll(struct napi_struct 
*napi, int budget)
> 
> >     for (i = 0; i < ug_info->numQueuesRx; i++)
> >        howmany += ucc_geth_rx(ugeth, i, budget - howmany);
> 
> Not related to your patch, but seeing above code, I can understand
> you might have a problem in flood situation : Only first queue(s) are
> depleted, and last one cannot be because howmany >= budget.
> 
> This driver might have to remember last queue it handled at previous
> call ucc_geth_poll(), so that all rxqueues have same probability to be 
scanned.
> 
> j = ug->lastslot;
> for (i = 0; ug_info->numQueuesRx; i++) {
>    if (++j >= ug_info->numQueuesRx)
>       j = 0;
>    howmany += ucc_geth_rx(ugeth, j, budget - howmany);
>    if (howmany >= budget)
>       break;
> }
> ug->lastslot = j;

yes, or scan the RX queues in prio order. However ATM there is only
one queue so it won't be a problem until one extends the driver with
several queues.

Thanks,
        Jocke
Yang Li - March 27, 2009, 10:50 a.m.
On Thu, Mar 26, 2009 at 8:54 PM, Joakim Tjernlund
<Joakim.Tjernlund@transmode.se> wrote:
> Also set NAPI weight to 64 as this is a common value.
> This will make the system alot more responsive while
> ping flooding the ucc_geth ethernet interaface.
>
> Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
> ---
>  drivers/net/ucc_geth.c |   32 ++++++++++++--------------------
>  drivers/net/ucc_geth.h |    1 -
>  2 files changed, 12 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/net/ucc_geth.c b/drivers/net/ucc_geth.c
> index 097aed8..7fc91aa 100644
> --- a/drivers/net/ucc_geth.c
> +++ b/drivers/net/ucc_geth.c
> @@ -3214,7 +3214,7 @@ static int ucc_geth_tx(struct net_device *dev, u8 txQ)
>                dev->stats.tx_packets++;
>
>                /* Free the sk buffer associated with this TxBD */
> -               dev_kfree_skb_irq(ugeth->
> +               dev_kfree_skb(ugeth->
>                                  tx_skbuff[txQ][ugeth->skb_dirtytx[txQ]]);
>                ugeth->tx_skbuff[txQ][ugeth->skb_dirtytx[txQ]] = NULL;
>                ugeth->skb_dirtytx[txQ] =
> @@ -3248,9 +3248,16 @@ static int ucc_geth_poll(struct napi_struct *napi, int budget)
>        for (i = 0; i < ug_info->numQueuesRx; i++)
>                howmany += ucc_geth_rx(ugeth, i, budget - howmany);
>
> +       /* Tx event processing */
> +       spin_lock(&ugeth->lock);
> +       for (i = 0; i < ug_info->numQueuesTx; i++) {
> +               ucc_geth_tx(ugeth->dev, i);
> +       }
> +       spin_unlock(&ugeth->lock);
> +
>        if (howmany < budget) {
>                netif_rx_complete(napi);
> -               setbits32(ugeth->uccf->p_uccm, UCCE_RX_EVENTS);
> +               setbits32(ugeth->uccf->p_uccm, UCCE_RX_EVENTS | UCCE_TX_EVENTS);
>        }
>
>        return howmany;
> @@ -3264,8 +3271,6 @@ static irqreturn_t ucc_geth_irq_handler(int irq, void *info)
>        struct ucc_geth_info *ug_info;
>        register u32 ucce;
>        register u32 uccm;
> -       register u32 tx_mask;
> -       u8 i;
>
>        ugeth_vdbg("%s: IN", __func__);
>
> @@ -3279,27 +3284,14 @@ static irqreturn_t ucc_geth_irq_handler(int irq, void *info)
>        out_be32(uccf->p_ucce, ucce);
>
>        /* check for receive events that require processing */
> -       if (ucce & UCCE_RX_EVENTS) {
> +       if (ucce & (UCCE_RX_EVENTS | UCCE_TX_EVENTS)) {
>                if (netif_rx_schedule_prep(&ugeth->napi)) {
> -                       uccm &= ~UCCE_RX_EVENTS;
> +                       uccm &= ~(UCCE_RX_EVENTS | UCCE_TX_EVENTS);
>                        out_be32(uccf->p_uccm, uccm);
>                        __netif_rx_schedule(&ugeth->napi);
>                }
>        }
>
> -       /* Tx event processing */
> -       if (ucce & UCCE_TX_EVENTS) {
> -               spin_lock(&ugeth->lock);
> -               tx_mask = UCC_GETH_UCCE_TXB0;
> -               for (i = 0; i < ug_info->numQueuesTx; i++) {
> -                       if (ucce & tx_mask)
> -                               ucc_geth_tx(dev, i);
> -                       ucce &= ~tx_mask;
> -                       tx_mask <<= 1;
> -               }
> -               spin_unlock(&ugeth->lock);
> -       }
> -
>        /* Errors and other events */
>        if (ucce & UCCE_OTHER) {
>                if (ucce & UCC_GETH_UCCE_BSY)
> @@ -3733,7 +3725,7 @@ static int ucc_geth_probe(struct of_device* ofdev, const struct of_device_id *ma
>        dev->netdev_ops = &ucc_geth_netdev_ops;
>        dev->watchdog_timeo = TX_TIMEOUT;
>        INIT_WORK(&ugeth->timeout_work, ucc_geth_timeout_work);
> -       netif_napi_add(dev, &ugeth->napi, ucc_geth_poll, UCC_GETH_DEV_WEIGHT);
> +       netif_napi_add(dev, &ugeth->napi, ucc_geth_poll, 64);

It doesn't make sense to have larger napi budget than the size of RX
BD ring.  You can't have more BDs than RX_BD_RING_LEN in backlog for
napi_poll to process.  Increase the RX_BD_RING_LEN if you want to
increase UCC_GETH_DEV_WEIGHT.  However please also provide the
performance comparison for this kind of change.  Thanks

- Leo
Joakim Tjernlund - March 27, 2009, 11:52 a.m.
pku.leo@gmail.com wrote on 27/03/2009 11:50:09:
> 
> On Thu, Mar 26, 2009 at 8:54 PM, Joakim Tjernlund
> <Joakim.Tjernlund@transmode.se> wrote:
> > Also set NAPI weight to 64 as this is a common value.
> > This will make the system alot more responsive while
> > ping flooding the ucc_geth ethernet interaface.
> >
> > Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
> > ---
> >        /* Errors and other events */
> >        if (ucce & UCCE_OTHER) {
> >                if (ucce & UCC_GETH_UCCE_BSY)
> > @@ -3733,7 +3725,7 @@ static int ucc_geth_probe(struct of_device* 
ofdev, const struct of_device_id *ma
> >        dev->netdev_ops = &ucc_geth_netdev_ops;
> >        dev->watchdog_timeo = TX_TIMEOUT;
> >        INIT_WORK(&ugeth->timeout_work, ucc_geth_timeout_work);
> > -       netif_napi_add(dev, &ugeth->napi, ucc_geth_poll, 
UCC_GETH_DEV_WEIGHT);
> > +       netif_napi_add(dev, &ugeth->napi, ucc_geth_poll, 64);
> 
> It doesn't make sense to have larger napi budget than the size of RX
> BD ring.  You can't have more BDs than RX_BD_RING_LEN in backlog for
> napi_poll to process.  Increase the RX_BD_RING_LEN if you want to
> increase UCC_GETH_DEV_WEIGHT.  However please also provide the
> performance comparison for this kind of change.  Thanks

Bring it up with David Miller. After my initial attempt to just increase
weight somewhat, he requested that I hardcoded it to 64. Just read the 
whole thread.
If I don't increase weight somewhat, ping -f -l 3 almost halts the board. 
Logging
in takes forever. These are my "performance numbers".

weight theory:
Before the drivers gets to the end of a full BD ring, new pkgs arrives so 
that
even if the DB ring is only 16, the driver wants to process 17 or more 
pkgs.
David Miller - March 27, 2009, 9:55 p.m.
From: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
Date: Fri, 27 Mar 2009 12:52:41 +0100

> pku.leo@gmail.com wrote on 27/03/2009 11:50:09:
> > It doesn't make sense to have larger napi budget than the size of RX
> > BD ring.  You can't have more BDs than RX_BD_RING_LEN in backlog for
> > napi_poll to process.  Increase the RX_BD_RING_LEN if you want to
> > increase UCC_GETH_DEV_WEIGHT.  However please also provide the
> > performance comparison for this kind of change.  Thanks
> 
> Bring it up with David Miller.

Right I told him to make this very change.

Pretty soon the driver's won't be able to select this value
and it will default to 64 everwhere anyways.

"performance comparison"?  He can't do that unless he tests
performance with two active NAPI devices on the same exact
cpu as the weight value is for fairness between devices.

So you don't tweak "weight" to make one device perform better,
you tweak it to eliminate unfairness between multiple devices.
And the only way to achieve that is to use similar values
across all devices, perhaps link-speed rated which we can't
do just yet, and that's why I told him to use 64 just like
every other driver basically does.
Yang Li - March 30, 2009, 7:36 a.m.
On Fri, Mar 27, 2009 at 7:52 PM, Joakim Tjernlund
<Joakim.Tjernlund@transmode.se> wrote:
> pku.leo@gmail.com wrote on 27/03/2009 11:50:09:
>>
>> On Thu, Mar 26, 2009 at 8:54 PM, Joakim Tjernlund
>> <Joakim.Tjernlund@transmode.se> wrote:
>> > Also set NAPI weight to 64 as this is a common value.
>> > This will make the system alot more responsive while
>> > ping flooding the ucc_geth ethernet interaface.
>> >
>> > Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
>> > ---
>> >        /* Errors and other events */
>> >        if (ucce & UCCE_OTHER) {
>> >                if (ucce & UCC_GETH_UCCE_BSY)
>> > @@ -3733,7 +3725,7 @@ static int ucc_geth_probe(struct of_device*
> ofdev, const struct of_device_id *ma
>> >        dev->netdev_ops = &ucc_geth_netdev_ops;
>> >        dev->watchdog_timeo = TX_TIMEOUT;
>> >        INIT_WORK(&ugeth->timeout_work, ucc_geth_timeout_work);
>> > -       netif_napi_add(dev, &ugeth->napi, ucc_geth_poll,
> UCC_GETH_DEV_WEIGHT);
>> > +       netif_napi_add(dev, &ugeth->napi, ucc_geth_poll, 64);
>>
>> It doesn't make sense to have larger napi budget than the size of RX
>> BD ring.  You can't have more BDs than RX_BD_RING_LEN in backlog for
>> napi_poll to process.  Increase the RX_BD_RING_LEN if you want to
>> increase UCC_GETH_DEV_WEIGHT.  However please also provide the
>> performance comparison for this kind of change.  Thanks
>
> Bring it up with David Miller. After my initial attempt to just increase
> weight somewhat, he requested that I hardcoded it to 64. Just read the
> whole thread.
> If I don't increase weight somewhat, ping -f -l 3 almost halts the board.
> Logging
> in takes forever. These are my "performance numbers".

Faster response time is surely good.  But it might also mean CPU is
not fully loaded.  IMHO, throughput is a more important factor for
network devices.  When you try to optimize the driver, please also
consider the throughput change.  Thanks.

- Leo
Joakim Tjernlund - March 30, 2009, 7:48 a.m.
pku.leo@gmail.com wrote on 30/03/2009 09:36:21:
> 
> On Fri, Mar 27, 2009 at 7:52 PM, Joakim Tjernlund
> <Joakim.Tjernlund@transmode.se> wrote:
> > pku.leo@gmail.com wrote on 27/03/2009 11:50:09:
> >>
> >> On Thu, Mar 26, 2009 at 8:54 PM, Joakim Tjernlund
> >> <Joakim.Tjernlund@transmode.se> wrote:
> >> > Also set NAPI weight to 64 as this is a common value.
> >> > This will make the system alot more responsive while
> >> > ping flooding the ucc_geth ethernet interaface.
> >> >
> >> > Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
> >> > ---
> >> >        /* Errors and other events */
> >> >        if (ucce & UCCE_OTHER) {
> >> >                if (ucce & UCC_GETH_UCCE_BSY)
> >> > @@ -3733,7 +3725,7 @@ static int ucc_geth_probe(struct of_device*
> > ofdev, const struct of_device_id *ma
> >> >        dev->netdev_ops = &ucc_geth_netdev_ops;
> >> >        dev->watchdog_timeo = TX_TIMEOUT;
> >> >        INIT_WORK(&ugeth->timeout_work, ucc_geth_timeout_work);
> >> > -       netif_napi_add(dev, &ugeth->napi, ucc_geth_poll,
> > UCC_GETH_DEV_WEIGHT);
> >> > +       netif_napi_add(dev, &ugeth->napi, ucc_geth_poll, 64);
> >>
> >> It doesn't make sense to have larger napi budget than the size of RX
> >> BD ring.  You can't have more BDs than RX_BD_RING_LEN in backlog for
> >> napi_poll to process.  Increase the RX_BD_RING_LEN if you want to
> >> increase UCC_GETH_DEV_WEIGHT.  However please also provide the
> >> performance comparison for this kind of change.  Thanks
> >
> > Bring it up with David Miller. After my initial attempt to just 
increase
> > weight somewhat, he requested that I hardcoded it to 64. Just read the
> > whole thread.
> > If I don't increase weight somewhat, ping -f -l 3 almost halts the 
board.
> > Logging
> > in takes forever. These are my "performance numbers".
> 
> Faster response time is surely good.  But it might also mean CPU is
> not fully loaded.  IMHO, throughput is a more important factor for
> network devices.  When you try to optimize the driver, please also
> consider the throughput change.  Thanks.

This particular change isn't about performance, it is about not
"bricking" the board during heavy traffic. Next step is to optimize
the driver.

 Jocke
Yang Li - March 30, 2009, 7:57 a.m.
On Mon, Mar 30, 2009 at 3:48 PM, Joakim Tjernlund
<Joakim.Tjernlund@transmode.se> wrote:
> pku.leo@gmail.com wrote on 30/03/2009 09:36:21:
>>
>> On Fri, Mar 27, 2009 at 7:52 PM, Joakim Tjernlund
>> <Joakim.Tjernlund@transmode.se> wrote:
>> > pku.leo@gmail.com wrote on 27/03/2009 11:50:09:
>> >>
>> >> On Thu, Mar 26, 2009 at 8:54 PM, Joakim Tjernlund
>> >> <Joakim.Tjernlund@transmode.se> wrote:
>> >> > Also set NAPI weight to 64 as this is a common value.
>> >> > This will make the system alot more responsive while
>> >> > ping flooding the ucc_geth ethernet interaface.
>> >> >
>> >> > Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
>> >> > ---
>> >> >        /* Errors and other events */
>> >> >        if (ucce & UCCE_OTHER) {
>> >> >                if (ucce & UCC_GETH_UCCE_BSY)
>> >> > @@ -3733,7 +3725,7 @@ static int ucc_geth_probe(struct of_device*
>> > ofdev, const struct of_device_id *ma
>> >> >        dev->netdev_ops = &ucc_geth_netdev_ops;
>> >> >        dev->watchdog_timeo = TX_TIMEOUT;
>> >> >        INIT_WORK(&ugeth->timeout_work, ucc_geth_timeout_work);
>> >> > -       netif_napi_add(dev, &ugeth->napi, ucc_geth_poll,
>> > UCC_GETH_DEV_WEIGHT);
>> >> > +       netif_napi_add(dev, &ugeth->napi, ucc_geth_poll, 64);
>> >>
>> >> It doesn't make sense to have larger napi budget than the size of RX
>> >> BD ring.  You can't have more BDs than RX_BD_RING_LEN in backlog for
>> >> napi_poll to process.  Increase the RX_BD_RING_LEN if you want to
>> >> increase UCC_GETH_DEV_WEIGHT.  However please also provide the
>> >> performance comparison for this kind of change.  Thanks
>> >
>> > Bring it up with David Miller. After my initial attempt to just
> increase
>> > weight somewhat, he requested that I hardcoded it to 64. Just read the
>> > whole thread.
>> > If I don't increase weight somewhat, ping -f -l 3 almost halts the
> board.
>> > Logging
>> > in takes forever. These are my "performance numbers".
>>
>> Faster response time is surely good.  But it might also mean CPU is
>> not fully loaded.  IMHO, throughput is a more important factor for
>> network devices.  When you try to optimize the driver, please also
>> consider the throughput change.  Thanks.
>
> This particular change isn't about performance, it is about not
> "bricking" the board during heavy traffic. Next step is to optimize
> the driver.

Sure.  I mean for other changes like tx NAPI, ring size tweak and tx logic.

- Leo

Patch

diff --git a/drivers/net/ucc_geth.c b/drivers/net/ucc_geth.c
index 097aed8..7fc91aa 100644
--- a/drivers/net/ucc_geth.c
+++ b/drivers/net/ucc_geth.c
@@ -3214,7 +3214,7 @@  static int ucc_geth_tx(struct net_device *dev, u8 txQ)
 		dev->stats.tx_packets++;
 
 		/* Free the sk buffer associated with this TxBD */
-		dev_kfree_skb_irq(ugeth->
+		dev_kfree_skb(ugeth->
 				  tx_skbuff[txQ][ugeth->skb_dirtytx[txQ]]);
 		ugeth->tx_skbuff[txQ][ugeth->skb_dirtytx[txQ]] = NULL;
 		ugeth->skb_dirtytx[txQ] =
@@ -3248,9 +3248,16 @@  static int ucc_geth_poll(struct napi_struct *napi, int budget)
 	for (i = 0; i < ug_info->numQueuesRx; i++)
 		howmany += ucc_geth_rx(ugeth, i, budget - howmany);
 
+	/* Tx event processing */
+	spin_lock(&ugeth->lock);
+	for (i = 0; i < ug_info->numQueuesTx; i++) {
+		ucc_geth_tx(ugeth->dev, i);
+	}
+	spin_unlock(&ugeth->lock);
+
 	if (howmany < budget) {
 		netif_rx_complete(napi);
-		setbits32(ugeth->uccf->p_uccm, UCCE_RX_EVENTS);
+		setbits32(ugeth->uccf->p_uccm, UCCE_RX_EVENTS | UCCE_TX_EVENTS);
 	}
 
 	return howmany;
@@ -3264,8 +3271,6 @@  static irqreturn_t ucc_geth_irq_handler(int irq, void *info)
 	struct ucc_geth_info *ug_info;
 	register u32 ucce;
 	register u32 uccm;
-	register u32 tx_mask;
-	u8 i;
 
 	ugeth_vdbg("%s: IN", __func__);
 
@@ -3279,27 +3284,14 @@  static irqreturn_t ucc_geth_irq_handler(int irq, void *info)
 	out_be32(uccf->p_ucce, ucce);
 
 	/* check for receive events that require processing */
-	if (ucce & UCCE_RX_EVENTS) {
+	if (ucce & (UCCE_RX_EVENTS | UCCE_TX_EVENTS)) {
 		if (netif_rx_schedule_prep(&ugeth->napi)) {
-			uccm &= ~UCCE_RX_EVENTS;
+			uccm &= ~(UCCE_RX_EVENTS | UCCE_TX_EVENTS);
 			out_be32(uccf->p_uccm, uccm);
 			__netif_rx_schedule(&ugeth->napi);
 		}
 	}
 
-	/* Tx event processing */
-	if (ucce & UCCE_TX_EVENTS) {
-		spin_lock(&ugeth->lock);
-		tx_mask = UCC_GETH_UCCE_TXB0;
-		for (i = 0; i < ug_info->numQueuesTx; i++) {
-			if (ucce & tx_mask)
-				ucc_geth_tx(dev, i);
-			ucce &= ~tx_mask;
-			tx_mask <<= 1;
-		}
-		spin_unlock(&ugeth->lock);
-	}
-
 	/* Errors and other events */
 	if (ucce & UCCE_OTHER) {
 		if (ucce & UCC_GETH_UCCE_BSY)
@@ -3733,7 +3725,7 @@  static int ucc_geth_probe(struct of_device* ofdev, const struct of_device_id *ma
 	dev->netdev_ops = &ucc_geth_netdev_ops;
 	dev->watchdog_timeo = TX_TIMEOUT;
 	INIT_WORK(&ugeth->timeout_work, ucc_geth_timeout_work);
-	netif_napi_add(dev, &ugeth->napi, ucc_geth_poll, UCC_GETH_DEV_WEIGHT);
+	netif_napi_add(dev, &ugeth->napi, ucc_geth_poll, 64);
 	dev->mtu = 1500;
 
 	ugeth->msg_enable = netif_msg_init(debug.msg_enable, UGETH_MSG_DEFAULT);
diff --git a/drivers/net/ucc_geth.h b/drivers/net/ucc_geth.h
index 44218b8..50bad53 100644
--- a/drivers/net/ucc_geth.h
+++ b/drivers/net/ucc_geth.h
@@ -843,7 +843,6 @@  struct ucc_geth_hardware_statistics {
 /* Driver definitions */
 #define TX_BD_RING_LEN                          0x10
 #define RX_BD_RING_LEN                          0x10
-#define UCC_GETH_DEV_WEIGHT                     TX_BD_RING_LEN
 
 #define TX_RING_MOD_MASK(size)                  (size-1)
 #define RX_RING_MOD_MASK(size)                  (size-1)