Patchwork [2/2] netdev/fec: fix performance impact from mdio poll operation

login
register
mail settings
Submitter Bryan Wu
Date April 8, 2010, 6:44 a.m.
Message ID <1270709099-9303-2-git-send-email-bryan.wu@canonical.com>
Download mbox | patch
Permalink /patch/49698/
State Accepted
Delegated to: Andy Whitcroft
Headers show

Comments

Bryan Wu - April 8, 2010, 6:44 a.m.
BugLink: http://bugs.launchpad.net/bugs/546649
BugLink: http://bugs.launchpad.net/bugs/457878

After introducing phylib supporting, users experienced performace drop. That is
because of the mdio polling operation of phylib. Use msleep to replace the busy
waiting cpu_relax() and remove the warning message.

Signed-off-by: Bryan Wu <bryan.wu@canonical.com>
---
 drivers/net/fec.c |   45 +++++++++++++++++++++------------------------
 1 files changed, 21 insertions(+), 24 deletions(-)
Chase Douglas - April 8, 2010, 8:38 p.m.
On Thu, Apr 8, 2010 at 2:44 AM, Bryan Wu <bryan.wu@canonical.com> wrote:
> BugLink: http://bugs.launchpad.net/bugs/546649
> BugLink: http://bugs.launchpad.net/bugs/457878
>
> After introducing phylib supporting, users experienced performace drop. That is
> because of the mdio polling operation of phylib. Use msleep to replace the busy
> waiting cpu_relax() and remove the warning message.

Is this performance drop in terms of overall system performance
because of the cpu_relax loop, or network/driver performance? I'm
assuming the former, but I want to make sure.

I was going to ask whether this was safe due to the potential for
msleep being run in an interrupt context. However, I found this
comment which calls the read() function that is reassuring:

http://lxr.linux.no/linux+v2.6.33/drivers/net/phy/mdio_bus.c#L201

Although things look ok to me, I don't have enough insight into this
driver to be able to give an Ack.

-- Chase

> Signed-off-by: Bryan Wu <bryan.wu@canonical.com>
> ---
>  drivers/net/fec.c |   45 +++++++++++++++++++++------------------------
>  1 files changed, 21 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/net/fec.c b/drivers/net/fec.c
> index 53240d3..2280373 100644
> --- a/drivers/net/fec.c
> +++ b/drivers/net/fec.c
> @@ -216,7 +216,7 @@ static void fec_stop(struct net_device *dev);
>  #define FEC_MMFR_TA            (2 << 16)
>  #define FEC_MMFR_DATA(v)       (v & 0xffff)
>
> -#define FEC_MII_TIMEOUT                10000
> +#define FEC_MII_TIMEOUT                10
>
>  /* Transmitter timeout */
>  #define TX_TIMEOUT (2 * HZ)
> @@ -624,13 +624,29 @@ spin_unlock:
>  /*
>  * NOTE: a MII transaction is during around 25 us, so polling it...
>  */
> -static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
> +static int fec_enet_mdio_poll(struct fec_enet_private *fep)
>  {
> -       struct fec_enet_private *fep = bus->priv;
>        int timeout = FEC_MII_TIMEOUT;
>
>        fep->mii_timeout = 0;
>
> +       /* wait for end of transfer */
> +       while (!(readl(fep->hwp + FEC_IEVENT) & FEC_ENET_MII)) {
> +               msleep(1);
> +               if (timeout-- < 0) {
> +                       fep->mii_timeout = 1;
> +                       break;
> +               }
> +       }
> +
> +       return 0;
> +}
> +
> +static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
> +{
> +       struct fec_enet_private *fep = bus->priv;
> +
> +
>        /* clear MII end of transfer bit*/
>        writel(FEC_ENET_MII, fep->hwp + FEC_IEVENT);
>
> @@ -639,15 +655,7 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
>                FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(regnum) |
>                FEC_MMFR_TA, fep->hwp + FEC_MII_DATA);
>
> -       /* wait for end of transfer */
> -       while (!(readl(fep->hwp + FEC_IEVENT) & FEC_ENET_MII)) {
> -               cpu_relax();
> -               if (timeout-- < 0) {
> -                       fep->mii_timeout = 1;
> -                       printk(KERN_ERR "FEC: MDIO read timeout\n");
> -                       return -ETIMEDOUT;
> -               }
> -       }
> +       fec_enet_mdio_poll(fep);
>
>        /* return value */
>        return FEC_MMFR_DATA(readl(fep->hwp + FEC_MII_DATA));
> @@ -657,9 +665,6 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
>                           u16 value)
>  {
>        struct fec_enet_private *fep = bus->priv;
> -       int timeout = FEC_MII_TIMEOUT;
> -
> -       fep->mii_timeout = 0;
>
>        /* clear MII end of transfer bit*/
>        writel(FEC_ENET_MII, fep->hwp + FEC_IEVENT);
> @@ -670,15 +675,7 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
>                FEC_MMFR_TA | FEC_MMFR_DATA(value),
>                fep->hwp + FEC_MII_DATA);
>
> -       /* wait for end of transfer */
> -       while (!(readl(fep->hwp + FEC_IEVENT) & FEC_ENET_MII)) {
> -               cpu_relax();
> -               if (timeout-- < 0) {
> -                       fep->mii_timeout = 1;
> -                       printk(KERN_ERR "FEC: MDIO write timeout\n");
> -                       return -ETIMEDOUT;
> -               }
> -       }
> +       fec_enet_mdio_poll(fep);
>
>        return 0;
>  }
> --
> 1.7.0.1
Bryan Wu - April 9, 2010, 2:19 a.m.
Chase Douglas wrote:
> On Thu, Apr 8, 2010 at 2:44 AM, Bryan Wu <bryan.wu@canonical.com> wrote:
>   
>> BugLink: http://bugs.launchpad.net/bugs/546649
>> BugLink: http://bugs.launchpad.net/bugs/457878
>>
>> After introducing phylib supporting, users experienced performace drop. That is
>> because of the mdio polling operation of phylib. Use msleep to replace the busy
>> waiting cpu_relax() and remove the warning message.
>>     
>
> Is this performance drop in terms of overall system performance
> because of the cpu_relax loop, or network/driver performance? I'm
> assuming the former, but I want to make sure.
>
>   
When we transfer some large file over the ethernet, the transfer speed 
will drop from 6MB/s to 3MB/s in my testing environment. And we also 
experienced some slow system response during the transfer.
So this cpu_relax() and timeout=10000 cause the system in busy wait for 
a mdio transaction finishing flag. It will cause the networking and 
system performance bad.

> I was going to ask whether this was safe due to the potential for
> msleep being run in an interrupt context. However, I found this
> comment which calls the read() function that is reassuring:
>
> http://lxr.linux.no/linux+v2.6.33/drivers/net/phy/mdio_bus.c#L201
>
>   

Yeah, that's right. mdio_{read|write} cannot be called in interrupt 
context. So msleep is pretty safe here. I found other netdev drivers 
also use this method to polling.

> Although things look ok to me, I don't have enough insight into this
> driver to be able to give an Ack.
>
>
>   
Thanks,
-Bryan
Andy Whitcroft - April 13, 2010, 7:09 p.m.
On Thu, Apr 08, 2010 at 02:44:59PM +0800, Bryan Wu wrote:
> BugLink: http://bugs.launchpad.net/bugs/546649
> BugLink: http://bugs.launchpad.net/bugs/457878
> 
> After introducing phylib supporting, users experienced performace drop. That is
> because of the mdio polling operation of phylib. Use msleep to replace the busy
> waiting cpu_relax() and remove the warning message.
> 
> Signed-off-by: Bryan Wu <bryan.wu@canonical.com>
> ---
>  drivers/net/fec.c |   45 +++++++++++++++++++++------------------------
>  1 files changed, 21 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/net/fec.c b/drivers/net/fec.c
> index 53240d3..2280373 100644
> --- a/drivers/net/fec.c
> +++ b/drivers/net/fec.c
> @@ -216,7 +216,7 @@ static void fec_stop(struct net_device *dev);
>  #define FEC_MMFR_TA		(2 << 16)
>  #define FEC_MMFR_DATA(v)	(v & 0xffff)
>  
> -#define FEC_MII_TIMEOUT		10000
> +#define FEC_MII_TIMEOUT		10
>  
>  /* Transmitter timeout */
>  #define TX_TIMEOUT (2 * HZ)
> @@ -624,13 +624,29 @@ spin_unlock:
>  /*
>   * NOTE: a MII transaction is during around 25 us, so polling it...
>   */
> -static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
> +static int fec_enet_mdio_poll(struct fec_enet_private *fep)
>  {
> -	struct fec_enet_private *fep = bus->priv;
>  	int timeout = FEC_MII_TIMEOUT;
>  
>  	fep->mii_timeout = 0;
>  
> +	/* wait for end of transfer */
> +	while (!(readl(fep->hwp + FEC_IEVENT) & FEC_ENET_MII)) {
> +		msleep(1);
> +		if (timeout-- < 0) {
> +			fep->mii_timeout = 1;
> +			break;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
> +{
> +	struct fec_enet_private *fep = bus->priv;
> +
> +
>  	/* clear MII end of transfer bit*/
>  	writel(FEC_ENET_MII, fep->hwp + FEC_IEVENT);
>  
> @@ -639,15 +655,7 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
>  		FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(regnum) |
>  		FEC_MMFR_TA, fep->hwp + FEC_MII_DATA);
>  
> -	/* wait for end of transfer */
> -	while (!(readl(fep->hwp + FEC_IEVENT) & FEC_ENET_MII)) {
> -		cpu_relax();
> -		if (timeout-- < 0) {
> -			fep->mii_timeout = 1;
> -			printk(KERN_ERR "FEC: MDIO read timeout\n");
> -			return -ETIMEDOUT;
> -		}
> -	}
> +	fec_enet_mdio_poll(fep);

You have pulled out this timeout loop, but in the replacement there is
not indication that it has timed-out.  In the old code we would have
returned -ETIMEDOUT and not touched MII_DATA (below).  In the new we
will timeout, mark mii_timeout but not return an indication of that, not
would we note it here, and so we would drop through and touch MII_DATA.
Is this an intentional change of semantics?

>  
>  	/* return value */
>  	return FEC_MMFR_DATA(readl(fep->hwp + FEC_MII_DATA));
> @@ -657,9 +665,6 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
>  			   u16 value)
>  {
>  	struct fec_enet_private *fep = bus->priv;
> -	int timeout = FEC_MII_TIMEOUT;
> -
> -	fep->mii_timeout = 0;
>  
>  	/* clear MII end of transfer bit*/
>  	writel(FEC_ENET_MII, fep->hwp + FEC_IEVENT);
> @@ -670,15 +675,7 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
>  		FEC_MMFR_TA | FEC_MMFR_DATA(value),
>  		fep->hwp + FEC_MII_DATA);
>  
> -	/* wait for end of transfer */
> -	while (!(readl(fep->hwp + FEC_IEVENT) & FEC_ENET_MII)) {
> -		cpu_relax();
> -		if (timeout-- < 0) {
> -			fep->mii_timeout = 1;
> -			printk(KERN_ERR "FEC: MDIO write timeout\n");
> -			return -ETIMEDOUT;
> -		}
> -	}
> +	fec_enet_mdio_poll(fep);
>  
>  	return 0;
>  }

-apw
Bryan Wu - April 13, 2010, 9:37 p.m.
On 04/13/2010 12:09 PM, Andy Whitcroft wrote:
> On Thu, Apr 08, 2010 at 02:44:59PM +0800, Bryan Wu wrote:
>> BugLink: http://bugs.launchpad.net/bugs/546649
>> BugLink: http://bugs.launchpad.net/bugs/457878
>>
>> After introducing phylib supporting, users experienced performace drop. That is
>> because of the mdio polling operation of phylib. Use msleep to replace the busy
>> waiting cpu_relax() and remove the warning message.
>>
>> Signed-off-by: Bryan Wu<bryan.wu@canonical.com>
>> ---
>>   drivers/net/fec.c |   45 +++++++++++++++++++++------------------------
>>   1 files changed, 21 insertions(+), 24 deletions(-)
>>
>> diff --git a/drivers/net/fec.c b/drivers/net/fec.c
>> index 53240d3..2280373 100644
>> --- a/drivers/net/fec.c
>> +++ b/drivers/net/fec.c
>> @@ -216,7 +216,7 @@ static void fec_stop(struct net_device *dev);
>>   #define FEC_MMFR_TA		(2<<  16)
>>   #define FEC_MMFR_DATA(v)	(v&  0xffff)
>>
>> -#define FEC_MII_TIMEOUT		10000
>> +#define FEC_MII_TIMEOUT		10
>>
>>   /* Transmitter timeout */
>>   #define TX_TIMEOUT (2 * HZ)
>> @@ -624,13 +624,29 @@ spin_unlock:
>>   /*
>>    * NOTE: a MII transaction is during around 25 us, so polling it...
>>    */
>> -static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
>> +static int fec_enet_mdio_poll(struct fec_enet_private *fep)
>>   {
>> -	struct fec_enet_private *fep = bus->priv;
>>   	int timeout = FEC_MII_TIMEOUT;
>>
>>   	fep->mii_timeout = 0;
>>
>> +	/* wait for end of transfer */
>> +	while (!(readl(fep->hwp + FEC_IEVENT)&  FEC_ENET_MII)) {
>> +		msleep(1);
>> +		if (timeout--<  0) {
>> +			fep->mii_timeout = 1;
>> +			break;
>> +		}
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
>> +{
>> +	struct fec_enet_private *fep = bus->priv;
>> +
>> +
>>   	/* clear MII end of transfer bit*/
>>   	writel(FEC_ENET_MII, fep->hwp + FEC_IEVENT);
>>
>> @@ -639,15 +655,7 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
>>   		FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(regnum) |
>>   		FEC_MMFR_TA, fep->hwp + FEC_MII_DATA);
>>
>> -	/* wait for end of transfer */
>> -	while (!(readl(fep->hwp + FEC_IEVENT)&  FEC_ENET_MII)) {
>> -		cpu_relax();
>> -		if (timeout--<  0) {
>> -			fep->mii_timeout = 1;
>> -			printk(KERN_ERR "FEC: MDIO read timeout\n");
>> -			return -ETIMEDOUT;
>> -		}
>> -	}
>> +	fec_enet_mdio_poll(fep);
>
> You have pulled out this timeout loop, but in the replacement there is
> not indication that it has timed-out.  In the old code we would have
> returned -ETIMEDOUT and not touched MII_DATA (below).  In the new we
> will timeout, mark mii_timeout but not return an indication of that, not
> would we note it here, and so we would drop through and touch MII_DATA.
> Is this an intentional change of semantics?
>

Since the timeout thing is not an error just a warning, the caller in phylib 
timer will call this function shortly and normally ignore the negative return 
value. I just looked up this busy timeout things in drivers/net/.

drivers/net/davinci_emac.c:
it uses endless loop:
/* Wait until mdio is ready for next command */
#define MDIO_WAIT_FOR_USER_ACCESS\
                 while ((emac_mdio_read((MDIO_USERACCESS(0))) &\
                         MDIO_USERACCESS_GO) != 0)


drivers/net/au1000_eth.c
it returns -1 when timeout

drivers/net/arm/at91_ether.c
it is the same as us, don't return any thing if timeout.

So actually, I am not sure about the return value requirement for this function. 
And I found several timeout warning when we do large file transfer. The hardware 
is not perfect.

But I will try to check the return value of the poll function, and let you guys 
know the result.

Thanks,
-Bryan
Eric Miao - April 13, 2010, 10:57 p.m.
On Wed, Apr 14, 2010 at 5:37 AM, Bryan Wu <bryan.wu@canonical.com> wrote:
> On 04/13/2010 12:09 PM, Andy Whitcroft wrote:
>> On Thu, Apr 08, 2010 at 02:44:59PM +0800, Bryan Wu wrote:
>>> BugLink: http://bugs.launchpad.net/bugs/546649
>>> BugLink: http://bugs.launchpad.net/bugs/457878
>>>
>>> After introducing phylib supporting, users experienced performace drop. That is
>>> because of the mdio polling operation of phylib. Use msleep to replace the busy
>>> waiting cpu_relax() and remove the warning message.
>>>
>>> Signed-off-by: Bryan Wu<bryan.wu@canonical.com>
>>> ---
>>>   drivers/net/fec.c |   45 +++++++++++++++++++++------------------------
>>>   1 files changed, 21 insertions(+), 24 deletions(-)
>>>
>>> diff --git a/drivers/net/fec.c b/drivers/net/fec.c
>>> index 53240d3..2280373 100644
>>> --- a/drivers/net/fec.c
>>> +++ b/drivers/net/fec.c
>>> @@ -216,7 +216,7 @@ static void fec_stop(struct net_device *dev);
>>>   #define FEC_MMFR_TA                (2<<  16)
>>>   #define FEC_MMFR_DATA(v)   (v&  0xffff)
>>>
>>> -#define FEC_MII_TIMEOUT             10000
>>> +#define FEC_MII_TIMEOUT             10
>>>
>>>   /* Transmitter timeout */
>>>   #define TX_TIMEOUT (2 * HZ)
>>> @@ -624,13 +624,29 @@ spin_unlock:
>>>   /*
>>>    * NOTE: a MII transaction is during around 25 us, so polling it...
>>>    */
>>> -static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
>>> +static int fec_enet_mdio_poll(struct fec_enet_private *fep)
>>>   {
>>> -    struct fec_enet_private *fep = bus->priv;
>>>      int timeout = FEC_MII_TIMEOUT;
>>>
>>>      fep->mii_timeout = 0;
>>>
>>> +    /* wait for end of transfer */
>>> +    while (!(readl(fep->hwp + FEC_IEVENT)&  FEC_ENET_MII)) {
>>> +            msleep(1);
>>> +            if (timeout--<  0) {
>>> +                    fep->mii_timeout = 1;
>>> +                    break;
>>> +            }
>>> +    }
>>> +

This is ugly, I'd suggest wait_event_timeout(), though the response
time will be worse instead.

msleep(1) is almost equivalent on a system with HZ == 100.

>>> +    return 0;
>>> +}
>>> +
>>> +static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
>>> +{
>>> +    struct fec_enet_private *fep = bus->priv;
>>> +
>>> +
>>>      /* clear MII end of transfer bit*/
>>>      writel(FEC_ENET_MII, fep->hwp + FEC_IEVENT);
>>>
>>> @@ -639,15 +655,7 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
>>>              FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(regnum) |
>>>              FEC_MMFR_TA, fep->hwp + FEC_MII_DATA);
>>>
>>> -    /* wait for end of transfer */
>>> -    while (!(readl(fep->hwp + FEC_IEVENT)&  FEC_ENET_MII)) {
>>> -            cpu_relax();
>>> -            if (timeout--<  0) {
>>> -                    fep->mii_timeout = 1;
>>> -                    printk(KERN_ERR "FEC: MDIO read timeout\n");
>>> -                    return -ETIMEDOUT;
>>> -            }
>>> -    }

I think polling will instead improve the performance (by waiting less once
the event happens), why is it causing performance drop instead?

>>> +    fec_enet_mdio_poll(fep);
>>
>> You have pulled out this timeout loop, but in the replacement there is
>> not indication that it has timed-out.  In the old code we would have
>> returned -ETIMEDOUT and not touched MII_DATA (below).  In the new we
>> will timeout, mark mii_timeout but not return an indication of that, not
>> would we note it here, and so we would drop through and touch MII_DATA.
>> Is this an intentional change of semantics?
>>
>
> Since the timeout thing is not an error just a warning, the caller in phylib
> timer will call this function shortly and normally ignore the negative return
> value. I just looked up this busy timeout things in drivers/net/.
>

returning -1 is always a bad idea, -1 means -EPERM, which is strongly
undesirable. -ETIMEDOUT, I'd say, is a lot better in any sense.

> drivers/net/davinci_emac.c:
> it uses endless loop:
> /* Wait until mdio is ready for next command */
> #define MDIO_WAIT_FOR_USER_ACCESS\
>                 while ((emac_mdio_read((MDIO_USERACCESS(0))) &\
>                         MDIO_USERACCESS_GO) != 0)
>
>
> drivers/net/au1000_eth.c
> it returns -1 when timeout
>
> drivers/net/arm/at91_ether.c
> it is the same as us, don't return any thing if timeout.
>
> So actually, I am not sure about the return value requirement for this function.
> And I found several timeout warning when we do large file transfer. The hardware
> is not perfect.
>
> But I will try to check the return value of the poll function, and let you guys
> know the result.
>
> Thanks,
> -Bryan
>
> --
> kernel-team mailing list
> kernel-team@lists.ubuntu.com
> https://lists.ubuntu.com/mailman/listinfo/kernel-team
>
Eric Miao - April 13, 2010, 10:58 p.m.
On Wed, Apr 14, 2010 at 6:57 AM, Eric Miao <eric.miao@canonical.com> wrote:
> On Wed, Apr 14, 2010 at 5:37 AM, Bryan Wu <bryan.wu@canonical.com> wrote:
>> On 04/13/2010 12:09 PM, Andy Whitcroft wrote:
>>> On Thu, Apr 08, 2010 at 02:44:59PM +0800, Bryan Wu wrote:
>>>> BugLink: http://bugs.launchpad.net/bugs/546649
>>>> BugLink: http://bugs.launchpad.net/bugs/457878
>>>>
>>>> After introducing phylib supporting, users experienced performace drop. That is
>>>> because of the mdio polling operation of phylib. Use msleep to replace the busy
>>>> waiting cpu_relax() and remove the warning message.
>>>>
>>>> Signed-off-by: Bryan Wu<bryan.wu@canonical.com>
>>>> ---
>>>>   drivers/net/fec.c |   45 +++++++++++++++++++++------------------------
>>>>   1 files changed, 21 insertions(+), 24 deletions(-)
>>>>
>>>> diff --git a/drivers/net/fec.c b/drivers/net/fec.c
>>>> index 53240d3..2280373 100644
>>>> --- a/drivers/net/fec.c
>>>> +++ b/drivers/net/fec.c
>>>> @@ -216,7 +216,7 @@ static void fec_stop(struct net_device *dev);
>>>>   #define FEC_MMFR_TA                (2<<  16)
>>>>   #define FEC_MMFR_DATA(v)   (v&  0xffff)
>>>>
>>>> -#define FEC_MII_TIMEOUT             10000
>>>> +#define FEC_MII_TIMEOUT             10
>>>>
>>>>   /* Transmitter timeout */
>>>>   #define TX_TIMEOUT (2 * HZ)
>>>> @@ -624,13 +624,29 @@ spin_unlock:
>>>>   /*
>>>>    * NOTE: a MII transaction is during around 25 us, so polling it...
>>>>    */
>>>> -static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
>>>> +static int fec_enet_mdio_poll(struct fec_enet_private *fep)
>>>>   {
>>>> -    struct fec_enet_private *fep = bus->priv;
>>>>      int timeout = FEC_MII_TIMEOUT;
>>>>
>>>>      fep->mii_timeout = 0;
>>>>
>>>> +    /* wait for end of transfer */
>>>> +    while (!(readl(fep->hwp + FEC_IEVENT)&  FEC_ENET_MII)) {
>>>> +            msleep(1);
>>>> +            if (timeout--<  0) {
>>>> +                    fep->mii_timeout = 1;
>>>> +                    break;
>>>> +            }
>>>> +    }
>>>> +
>
> This is ugly, I'd suggest wait_event_timeout(), though the response
> time will be worse instead.
>
> msleep(1) is almost equivalent on a system with HZ == 100.
>

Typo, sorry. I mean:

msleep(1) is almost equivalent to msleep(10) on a system with HZ == 100. Won't
be too much response improvement.
Bryan Wu - April 13, 2010, 11:25 p.m.
On 04/13/2010 03:57 PM, Eric Miao wrote:
> On Wed, Apr 14, 2010 at 5:37 AM, Bryan Wu<bryan.wu@canonical.com>  wrote:
>> On 04/13/2010 12:09 PM, Andy Whitcroft wrote:
>>> On Thu, Apr 08, 2010 at 02:44:59PM +0800, Bryan Wu wrote:
>>>> BugLink: http://bugs.launchpad.net/bugs/546649
>>>> BugLink: http://bugs.launchpad.net/bugs/457878
>>>>
>>>> After introducing phylib supporting, users experienced performace drop. That is
>>>> because of the mdio polling operation of phylib. Use msleep to replace the busy
>>>> waiting cpu_relax() and remove the warning message.
>>>>
>>>> Signed-off-by: Bryan Wu<bryan.wu@canonical.com>
>>>> ---
>>>>    drivers/net/fec.c |   45 +++++++++++++++++++++------------------------
>>>>    1 files changed, 21 insertions(+), 24 deletions(-)
>>>>
>>>> diff --git a/drivers/net/fec.c b/drivers/net/fec.c
>>>> index 53240d3..2280373 100644
>>>> --- a/drivers/net/fec.c
>>>> +++ b/drivers/net/fec.c
>>>> @@ -216,7 +216,7 @@ static void fec_stop(struct net_device *dev);
>>>>    #define FEC_MMFR_TA                (2<<    16)
>>>>    #define FEC_MMFR_DATA(v)   (v&    0xffff)
>>>>
>>>> -#define FEC_MII_TIMEOUT             10000
>>>> +#define FEC_MII_TIMEOUT             10
>>>>
>>>>    /* Transmitter timeout */
>>>>    #define TX_TIMEOUT (2 * HZ)
>>>> @@ -624,13 +624,29 @@ spin_unlock:
>>>>    /*
>>>>     * NOTE: a MII transaction is during around 25 us, so polling it...
>>>>     */
>>>> -static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
>>>> +static int fec_enet_mdio_poll(struct fec_enet_private *fep)
>>>>    {
>>>> -    struct fec_enet_private *fep = bus->priv;
>>>>       int timeout = FEC_MII_TIMEOUT;
>>>>
>>>>       fep->mii_timeout = 0;
>>>>
>>>> +    /* wait for end of transfer */
>>>> +    while (!(readl(fep->hwp + FEC_IEVENT)&    FEC_ENET_MII)) {
>>>> +            msleep(1);
>>>> +            if (timeout--<    0) {
>>>> +                    fep->mii_timeout = 1;
>>>> +                    break;
>>>> +            }
>>>> +    }
>>>> +
>
> This is ugly, I'd suggest wait_event_timeout(), though the response
> time will be worse instead.
>

Oh, I don't think it is ugly, since you can find numbers of such code in polling 
functions (not only mdio, but also SPI, I2C etc). Sometime we have to wait for a 
flag. If we busy wait such as cpu_relax() which is barrier() on ARM, it will 
cause the system response and transfer performance drop in our Babbage board. 
The drop is not because the busy polling is because the hardware is not perfect, 
especially when we transfer large files. The problem might be the timeout trying 
number 100000 is too large, busy loop in the read_mdio function is too long when 
the hardware is not perfect.

So if the mdio polling timeout, the phylib timer will try read_mdio shortly. 
That's a polling method to monitor the ethernet phy link status change. 
Generally, this kind of timeout is an acceptable situation, we will recover it 
when we restart read_mdio again.

> msleep(1) is almost equivalent on a system with HZ == 100.
>

Actually, I don't wanna a very high resolution time delay, I just wanna sleep a 
very short time and check again the flag. There is no usleep(), msleep(1) is 
friendly here.

>>>> +    return 0;
>>>> +}
>>>> +
>>>> +static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
>>>> +{
>>>> +    struct fec_enet_private *fep = bus->priv;
>>>> +
>>>> +
>>>>       /* clear MII end of transfer bit*/
>>>>       writel(FEC_ENET_MII, fep->hwp + FEC_IEVENT);
>>>>
>>>> @@ -639,15 +655,7 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
>>>>               FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(regnum) |
>>>>               FEC_MMFR_TA, fep->hwp + FEC_MII_DATA);
>>>>
>>>> -    /* wait for end of transfer */
>>>> -    while (!(readl(fep->hwp + FEC_IEVENT)&    FEC_ENET_MII)) {
>>>> -            cpu_relax();
>>>> -            if (timeout--<    0) {
>>>> -                    fep->mii_timeout = 1;
>>>> -                    printk(KERN_ERR "FEC: MDIO read timeout\n");
>>>> -                    return -ETIMEDOUT;
>>>> -            }
>>>> -    }
>
> I think polling will instead improve the performance (by waiting less once
> the event happens), why is it causing performance drop instead?
>

It should be, but if we wait for a flag too long especially the hardware is not 
perfect, it will cause the performance drop. That is we observed in babbage board.


>>>> +    fec_enet_mdio_poll(fep);
>>>
>>> You have pulled out this timeout loop, but in the replacement there is
>>> not indication that it has timed-out.  In the old code we would have
>>> returned -ETIMEDOUT and not touched MII_DATA (below).  In the new we
>>> will timeout, mark mii_timeout but not return an indication of that, not
>>> would we note it here, and so we would drop through and touch MII_DATA.
>>> Is this an intentional change of semantics?
>>>
>>
>> Since the timeout thing is not an error just a warning, the caller in phylib
>> timer will call this function shortly and normally ignore the negative return
>> value. I just looked up this busy timeout things in drivers/net/.
>>
>
> returning -1 is always a bad idea, -1 means -EPERM, which is strongly
> undesirable. -ETIMEDOUT, I'd say, is a lot better in any sense.
>

-ETIMEDOUT is the best if we return some thing when it is timeout.

-Bryan

>> drivers/net/davinci_emac.c:
>> it uses endless loop:
>> /* Wait until mdio is ready for next command */
>> #define MDIO_WAIT_FOR_USER_ACCESS\
>>                  while ((emac_mdio_read((MDIO_USERACCESS(0)))&\
>>                          MDIO_USERACCESS_GO) != 0)
>>
>>
>> drivers/net/au1000_eth.c
>> it returns -1 when timeout
>>
>> drivers/net/arm/at91_ether.c
>> it is the same as us, don't return any thing if timeout.
>>
>> So actually, I am not sure about the return value requirement for this function.
>> And I found several timeout warning when we do large file transfer. The hardware
>> is not perfect.
>>
>> But I will try to check the return value of the poll function, and let you guys
>> know the result.
>>
>> Thanks,
>> -Bryan
>>
Bryan Wu - April 13, 2010, 11:58 p.m.
On 04/13/2010 04:25 PM, Bryan Wu wrote:
> On 04/13/2010 03:57 PM, Eric Miao wrote:
>> On Wed, Apr 14, 2010 at 5:37 AM, Bryan Wu<bryan.wu@canonical.com>   wrote:
>>> On 04/13/2010 12:09 PM, Andy Whitcroft wrote:
>>>> On Thu, Apr 08, 2010 at 02:44:59PM +0800, Bryan Wu wrote:
>>>>> BugLink: http://bugs.launchpad.net/bugs/546649
>>>>> BugLink: http://bugs.launchpad.net/bugs/457878
>>>>>
>>>>> After introducing phylib supporting, users experienced performace drop. That is
>>>>> because of the mdio polling operation of phylib. Use msleep to replace the busy
>>>>> waiting cpu_relax() and remove the warning message.
>>>>>
>>>>> Signed-off-by: Bryan Wu<bryan.wu@canonical.com>
>>>>> ---
>>>>>     drivers/net/fec.c |   45 +++++++++++++++++++++------------------------
>>>>>     1 files changed, 21 insertions(+), 24 deletions(-)
>>>>>
>>>>> diff --git a/drivers/net/fec.c b/drivers/net/fec.c
>>>>> index 53240d3..2280373 100644
>>>>> --- a/drivers/net/fec.c
>>>>> +++ b/drivers/net/fec.c
>>>>> @@ -216,7 +216,7 @@ static void fec_stop(struct net_device *dev);
>>>>>     #define FEC_MMFR_TA                (2<<     16)
>>>>>     #define FEC_MMFR_DATA(v)   (v&     0xffff)
>>>>>
>>>>> -#define FEC_MII_TIMEOUT             10000
>>>>> +#define FEC_MII_TIMEOUT             10
>>>>>
>>>>>     /* Transmitter timeout */
>>>>>     #define TX_TIMEOUT (2 * HZ)
>>>>> @@ -624,13 +624,29 @@ spin_unlock:
>>>>>     /*
>>>>>      * NOTE: a MII transaction is during around 25 us, so polling it...
>>>>>      */
>>>>> -static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
>>>>> +static int fec_enet_mdio_poll(struct fec_enet_private *fep)
>>>>>     {
>>>>> -    struct fec_enet_private *fep = bus->priv;
>>>>>        int timeout = FEC_MII_TIMEOUT;
>>>>>
>>>>>        fep->mii_timeout = 0;
>>>>>
>>>>> +    /* wait for end of transfer */
>>>>> +    while (!(readl(fep->hwp + FEC_IEVENT)&     FEC_ENET_MII)) {
>>>>> +            msleep(1);
>>>>> +            if (timeout--<     0) {
>>>>> +                    fep->mii_timeout = 1;
>>>>> +                    break;
>>>>> +            }
>>>>> +    }
>>>>> +
>>
>> This is ugly, I'd suggest wait_event_timeout(), though the response
>> time will be worse instead.
>>
>
> Oh, I don't think it is ugly, since you can find numbers of such code in polling
> functions (not only mdio, but also SPI, I2C etc). Sometime we have to wait for a
> flag. If we busy wait such as cpu_relax() which is barrier() on ARM, it will
> cause the system response and transfer performance drop in our Babbage board.
> The drop is not because the busy polling is because the hardware is not perfect,
> especially when we transfer large files. The problem might be the timeout trying
> number 100000 is too large, busy loop in the read_mdio function is too long when
> the hardware is not perfect.
>
> So if the mdio polling timeout, the phylib timer will try read_mdio shortly.
> That's a polling method to monitor the ethernet phy link status change.
> Generally, this kind of timeout is an acceptable situation, we will recover it
> when we restart read_mdio again.
>
>> msleep(1) is almost equivalent on a system with HZ == 100.
>>
>
> Actually, I don't wanna a very high resolution time delay, I just wanna sleep a
> very short time and check again the flag. There is no usleep(), msleep(1) is
> friendly here.
>
>>>>> +    return 0;
>>>>> +}
>>>>> +
>>>>> +static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
>>>>> +{
>>>>> +    struct fec_enet_private *fep = bus->priv;
>>>>> +
>>>>> +
>>>>>        /* clear MII end of transfer bit*/
>>>>>        writel(FEC_ENET_MII, fep->hwp + FEC_IEVENT);
>>>>>
>>>>> @@ -639,15 +655,7 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
>>>>>                FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(regnum) |
>>>>>                FEC_MMFR_TA, fep->hwp + FEC_MII_DATA);
>>>>>
>>>>> -    /* wait for end of transfer */
>>>>> -    while (!(readl(fep->hwp + FEC_IEVENT)&     FEC_ENET_MII)) {
>>>>> -            cpu_relax();
>>>>> -            if (timeout--<     0) {
>>>>> -                    fep->mii_timeout = 1;
>>>>> -                    printk(KERN_ERR "FEC: MDIO read timeout\n");
>>>>> -                    return -ETIMEDOUT;
>>>>> -            }
>>>>> -    }
>>
>> I think polling will instead improve the performance (by waiting less once
>> the event happens), why is it causing performance drop instead?
>>
>
> It should be, but if we wait for a flag too long especially the hardware is not
> perfect, it will cause the performance drop. That is we observed in babbage board.
>
>
>>>>> +    fec_enet_mdio_poll(fep);
>>>>
>>>> You have pulled out this timeout loop, but in the replacement there is
>>>> not indication that it has timed-out.  In the old code we would have
>>>> returned -ETIMEDOUT and not touched MII_DATA (below).  In the new we
>>>> will timeout, mark mii_timeout but not return an indication of that, not
>>>> would we note it here, and so we would drop through and touch MII_DATA.
>>>> Is this an intentional change of semantics?
>>>>
>>>
>>> Since the timeout thing is not an error just a warning, the caller in phylib
>>> timer will call this function shortly and normally ignore the negative return
>>> value. I just looked up this busy timeout things in drivers/net/.
>>>
>>
>> returning -1 is always a bad idea, -1 means -EPERM, which is strongly
>> undesirable. -ETIMEDOUT, I'd say, is a lot better in any sense.
>>
>
> -ETIMEDOUT is the best if we return some thing when it is timeout.
>
> -Bryan
>
>>> drivers/net/davinci_emac.c:
>>> it uses endless loop:
>>> /* Wait until mdio is ready for next command */
>>> #define MDIO_WAIT_FOR_USER_ACCESS\
>>>                   while ((emac_mdio_read((MDIO_USERACCESS(0)))&\
>>>                           MDIO_USERACCESS_GO) != 0)
>>>
>>>
>>> drivers/net/au1000_eth.c
>>> it returns -1 when timeout
>>>
>>> drivers/net/arm/at91_ether.c
>>> it is the same as us, don't return any thing if timeout.
>>>
>>> So actually, I am not sure about the return value requirement for this function.
>>> And I found several timeout warning when we do large file transfer. The hardware
>>> is not perfect.
>>>
>>> But I will try to check the return value of the poll function, and let you guys
>>> know the result.
>>>
>>> Thanks,
>>> -Bryan
>>>
>

I modified the patch to return -ETIMEDOUT when we meet a timeout during mdio 
polling. After the testing from Tobin Davis, it will cause a dramatically 
performance drop. The -ETIMEDOUT will cause the phylib set the PHY status as 
HALT. It will take a while to recover the PHY status.

I will ping upstream about this performance drop things. But since we got 
customers are using this in Karmic, is it possible to apply this patch and let 
them get the good performance firstly. If we need update this patch later, I 
will patch it again.

-Bryan

Patch

diff --git a/drivers/net/fec.c b/drivers/net/fec.c
index 53240d3..2280373 100644
--- a/drivers/net/fec.c
+++ b/drivers/net/fec.c
@@ -216,7 +216,7 @@  static void fec_stop(struct net_device *dev);
 #define FEC_MMFR_TA		(2 << 16)
 #define FEC_MMFR_DATA(v)	(v & 0xffff)
 
-#define FEC_MII_TIMEOUT		10000
+#define FEC_MII_TIMEOUT		10
 
 /* Transmitter timeout */
 #define TX_TIMEOUT (2 * HZ)
@@ -624,13 +624,29 @@  spin_unlock:
 /*
  * NOTE: a MII transaction is during around 25 us, so polling it...
  */
-static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
+static int fec_enet_mdio_poll(struct fec_enet_private *fep)
 {
-	struct fec_enet_private *fep = bus->priv;
 	int timeout = FEC_MII_TIMEOUT;
 
 	fep->mii_timeout = 0;
 
+	/* wait for end of transfer */
+	while (!(readl(fep->hwp + FEC_IEVENT) & FEC_ENET_MII)) {
+		msleep(1);
+		if (timeout-- < 0) {
+			fep->mii_timeout = 1;
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
+{
+	struct fec_enet_private *fep = bus->priv;
+
+
 	/* clear MII end of transfer bit*/
 	writel(FEC_ENET_MII, fep->hwp + FEC_IEVENT);
 
@@ -639,15 +655,7 @@  static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
 		FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(regnum) |
 		FEC_MMFR_TA, fep->hwp + FEC_MII_DATA);
 
-	/* wait for end of transfer */
-	while (!(readl(fep->hwp + FEC_IEVENT) & FEC_ENET_MII)) {
-		cpu_relax();
-		if (timeout-- < 0) {
-			fep->mii_timeout = 1;
-			printk(KERN_ERR "FEC: MDIO read timeout\n");
-			return -ETIMEDOUT;
-		}
-	}
+	fec_enet_mdio_poll(fep);
 
 	/* return value */
 	return FEC_MMFR_DATA(readl(fep->hwp + FEC_MII_DATA));
@@ -657,9 +665,6 @@  static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
 			   u16 value)
 {
 	struct fec_enet_private *fep = bus->priv;
-	int timeout = FEC_MII_TIMEOUT;
-
-	fep->mii_timeout = 0;
 
 	/* clear MII end of transfer bit*/
 	writel(FEC_ENET_MII, fep->hwp + FEC_IEVENT);
@@ -670,15 +675,7 @@  static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
 		FEC_MMFR_TA | FEC_MMFR_DATA(value),
 		fep->hwp + FEC_MII_DATA);
 
-	/* wait for end of transfer */
-	while (!(readl(fep->hwp + FEC_IEVENT) & FEC_ENET_MII)) {
-		cpu_relax();
-		if (timeout-- < 0) {
-			fep->mii_timeout = 1;
-			printk(KERN_ERR "FEC: MDIO write timeout\n");
-			return -ETIMEDOUT;
-		}
-	}
+	fec_enet_mdio_poll(fep);
 
 	return 0;
 }