diff mbox

forcedeth: Stay in NAPI as long as there's work

Message ID alpine.DEB.1.00.1004272332350.30235@pokey.mtv.corp.google.com
State Superseded, archived
Delegated to: David Miller
Headers show

Commit Message

Tom Herbert April 28, 2010, 6:36 a.m. UTC
Add loop in NAPI poll routine to keep processing RX and TX as long as
there is more work to do.  This is similar to what tg3 and some other
drivers do.

This modification seems improves performance (maximum pps).  Running
500 instances of netperf TCP_RR test with one byte sizes on between
two sixteen core AMD machines (RPS enabled) gives:

Before patch: 186715 tps
With patch: 400949 tps

Signed-off-by: Tom Herbert <therbert@google.com>
---
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Joe Perches April 28, 2010, 5:54 p.m. UTC | #1
On Tue, 2010-04-27 at 23:36 -0700, Tom Herbert wrote:
> Add loop in NAPI poll routine to keep processing RX and TX as long as
> there is more work to do.  This is similar to what tg3 and some other
> drivers do.
> Signed-off-by: Tom Herbert <therbert@google.com>
> ---
> diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
> index a1c0e7b..1e4de7b 100644
> --- a/drivers/net/forcedeth.c
> +++ b/drivers/net/forcedeth.c
> @@ -3736,6 +3736,23 @@ static irqreturn_t nv_nic_irq_tx(int foo, void *data)
>  }
>  
>  #ifdef CONFIG_FORCEDETH_NAPI
> +static inline int nv_has_work(struct fe_priv *np)
> +{
> +	if (nv_optimized(np)) {
> +		return (
> +		    ((np->get_rx.ex != np->put_rx.ex) &&
> +		     !(le32_to_cpu(np->get_rx.ex->flaglen) & NV_RX2_AVAIL)) ||
> +		    ((np->get_tx.ex != np->put_tx.ex) &&
> +		     !(le32_to_cpu(np->get_tx.ex->flaglen) & NV_TX_VALID)));
> +	} else {
> +		return (
> +		    ((np->get_rx.orig != np->put_rx.orig) &&
> +		     !(le32_to_cpu(np->get_rx.orig->flaglen) & NV_RX_AVAIL)) ||
> +		    ((np->get_tx.orig != np->put_tx.orig) &&
> +		     !(le32_to_cpu(np->get_tx.orig->flaglen) & NV_TX_VALID)));
> +	}
> +}

It might be better to test the comparisons using
a cpu_to_le32 of the constants.

static inline int nv_has_work(struct fe_priv *np)
{
	if (nv_optimized(np))
		return ((np->get_rx.ex != np->put_rx.ex) &&
			!(np->get_rx.ex->flaglen & cpu_to_le32(NV_RX2_AVAIL))) ||
		       ((np->get_tx.ex != np->put_tx.ex) &&
			!(np->get_tx.ex->flaglen & cpu_to_le32(NV_TX_VALID)));

	return ((np->get_rx.orig != np->put_rx.orig) &&
		!(np->get_rx.orig->flaglen & cpu_to_le32(NV_RX_AVAIL))) ||
	       ((np->get_tx.orig != np->put_tx.orig) &&
		!(np->get_tx.orig->flaglen & cpu_to_le32(NV_TX_VALID)));
}


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tom Herbert April 28, 2010, 6:07 p.m. UTC | #2
>
> It might be better to test the comparisons using
> a cpu_to_le32 of the constants.
>
Yes.  Probably should also be changed in nv_tx_done{_optimized} and
nv_rx_process{_optimized}

> static inline int nv_has_work(struct fe_priv *np)
> {
>        if (nv_optimized(np))
>                return ((np->get_rx.ex != np->put_rx.ex) &&
>                        !(np->get_rx.ex->flaglen & cpu_to_le32(NV_RX2_AVAIL))) ||
>                       ((np->get_tx.ex != np->put_tx.ex) &&
>                        !(np->get_tx.ex->flaglen & cpu_to_le32(NV_TX_VALID)));
>
>        return ((np->get_rx.orig != np->put_rx.orig) &&
>                !(np->get_rx.orig->flaglen & cpu_to_le32(NV_RX_AVAIL))) ||
>               ((np->get_tx.orig != np->put_tx.orig) &&
>                !(np->get_tx.orig->flaglen & cpu_to_le32(NV_TX_VALID)));
> }
>
>
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
stephen hemminger April 28, 2010, 6:13 p.m. UTC | #3
On Wed, 28 Apr 2010 10:54:11 -0700
Joe Perches <joe@perches.com> wrote:

> On Tue, 2010-04-27 at 23:36 -0700, Tom Herbert wrote:
> > Add loop in NAPI poll routine to keep processing RX and TX as long as
> > there is more work to do.  This is similar to what tg3 and some other
> > drivers do.
> > Signed-off-by: Tom Herbert <therbert@google.com>
> > ---
> > diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
> > index a1c0e7b..1e4de7b 100644
> > --- a/drivers/net/forcedeth.c
> > +++ b/drivers/net/forcedeth.c
> > @@ -3736,6 +3736,23 @@ static irqreturn_t nv_nic_irq_tx(int foo, void *data)
> >  }
> >  
> >  #ifdef CONFIG_FORCEDETH_NAPI
> > +static inline int nv_has_work(struct fe_priv *np)
> > +{
> > +	if (nv_optimized(np)) {
> > +		return (
> > +		    ((np->get_rx.ex != np->put_rx.ex) &&
> > +		     !(le32_to_cpu(np->get_rx.ex->flaglen) & NV_RX2_AVAIL)) ||
> > +		    ((np->get_tx.ex != np->put_tx.ex) &&
> > +		     !(le32_to_cpu(np->get_tx.ex->flaglen) & NV_TX_VALID)));
> > +	} else {
> > +		return (
> > +		    ((np->get_rx.orig != np->put_rx.orig) &&
> > +		     !(le32_to_cpu(np->get_rx.orig->flaglen) & NV_RX_AVAIL)) ||
> > +		    ((np->get_tx.orig != np->put_tx.orig) &&
> > +		     !(le32_to_cpu(np->get_tx.orig->flaglen) & NV_TX_VALID)));
> > +	}
> > +}

Why than adding another check step, why not just keep going until
rx_done returns 0?

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tom Herbert April 28, 2010, 6:18 p.m. UTC | #4
On Wed, Apr 28, 2010 at 11:07 AM, Tom Herbert <therbert@google.com> wrote:
>>
>> It might be better to test the comparisons using
>> a cpu_to_le32 of the constants.
>>
> Yes.  Probably should also be changed in nv_tx_done{_optimized} and
> nv_rx_process{_optimized}
>
Scratch that.  flags are checked all over the place in those other functions.

>> static inline int nv_has_work(struct fe_priv *np)
>> {
>>        if (nv_optimized(np))
>>                return ((np->get_rx.ex != np->put_rx.ex) &&
>>                        !(np->get_rx.ex->flaglen & cpu_to_le32(NV_RX2_AVAIL))) ||
>>                       ((np->get_tx.ex != np->put_tx.ex) &&
>>                        !(np->get_tx.ex->flaglen & cpu_to_le32(NV_TX_VALID)));
>>
>>        return ((np->get_rx.orig != np->put_rx.orig) &&
>>                !(np->get_rx.orig->flaglen & cpu_to_le32(NV_RX_AVAIL))) ||
>>               ((np->get_tx.orig != np->put_tx.orig) &&
>>                !(np->get_tx.orig->flaglen & cpu_to_le32(NV_TX_VALID)));
>> }
>>
>>
>>
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index a1c0e7b..1e4de7b 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -3736,6 +3736,23 @@  static irqreturn_t nv_nic_irq_tx(int foo, void *data)
 }
 
 #ifdef CONFIG_FORCEDETH_NAPI
+static inline int nv_has_work(struct fe_priv *np)
+{
+	if (nv_optimized(np)) {
+		return (
+		    ((np->get_rx.ex != np->put_rx.ex) &&
+		     !(le32_to_cpu(np->get_rx.ex->flaglen) & NV_RX2_AVAIL)) ||
+		    ((np->get_tx.ex != np->put_tx.ex) &&
+		     !(le32_to_cpu(np->get_tx.ex->flaglen) & NV_TX_VALID)));
+	} else {
+		return (
+		    ((np->get_rx.orig != np->put_rx.orig) &&
+		     !(le32_to_cpu(np->get_rx.orig->flaglen) & NV_RX_AVAIL)) ||
+		    ((np->get_tx.orig != np->put_tx.orig) &&
+		     !(le32_to_cpu(np->get_tx.orig->flaglen) & NV_TX_VALID)));
+	}
+}
+
 static int nv_napi_poll(struct napi_struct *napi, int budget)
 {
 	struct fe_priv *np = container_of(napi, struct fe_priv, napi);
@@ -3743,30 +3760,33 @@  static int nv_napi_poll(struct napi_struct *napi, int budget)
 	u8 __iomem *base = get_hwbase(dev);
 	unsigned long flags;
 	int retcode;
-	int tx_work, rx_work;
+	int tx_work = 0, rx_work = 0;
 
-	if (!nv_optimized(np)) {
-		spin_lock_irqsave(&np->lock, flags);
-		tx_work = nv_tx_done(dev, np->tx_ring_size);
-		spin_unlock_irqrestore(&np->lock, flags);
+	do {
+		if (!nv_optimized(np)) {
+			spin_lock_irqsave(&np->lock, flags);
+			tx_work += nv_tx_done(dev, np->tx_ring_size);
+			spin_unlock_irqrestore(&np->lock, flags);
 
-		rx_work = nv_rx_process(dev, budget);
-		retcode = nv_alloc_rx(dev);
-	} else {
-		spin_lock_irqsave(&np->lock, flags);
-		tx_work = nv_tx_done_optimized(dev, np->tx_ring_size);
-		spin_unlock_irqrestore(&np->lock, flags);
+			rx_work += nv_rx_process(dev, budget);
+			retcode = nv_alloc_rx(dev);
+		} else {
+			spin_lock_irqsave(&np->lock, flags);
+			tx_work += nv_tx_done_optimized(dev, np->tx_ring_size);
+			spin_unlock_irqrestore(&np->lock, flags);
 
-		rx_work = nv_rx_process_optimized(dev, budget);
-		retcode = nv_alloc_rx_optimized(dev);
-	}
+			rx_work += nv_rx_process_optimized(dev, budget);
+			retcode = nv_alloc_rx_optimized(dev);
+		}
 
-	if (retcode) {
-		spin_lock_irqsave(&np->lock, flags);
-		if (!np->in_shutdown)
-			mod_timer(&np->oom_kick, jiffies + OOM_REFILL);
-		spin_unlock_irqrestore(&np->lock, flags);
-	}
+		if (unlikely(retcode)) {
+			spin_lock_irqsave(&np->lock, flags);
+			if (!np->in_shutdown)
+				mod_timer(&np->oom_kick, jiffies + OOM_REFILL);
+			spin_unlock_irqrestore(&np->lock, flags);
+			break;
+		}
+	} while (rx_work < budget && nv_has_work(np));
 
 	nv_change_interrupt_mode(dev, tx_work + rx_work);