diff mbox

of_mdio: Fix broken PHY IRQ in case of probe deferral

Message ID 1495112345-24795-1-git-send-email-geert+renesas@glider.be
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Geert Uytterhoeven May 18, 2017, 12:59 p.m. UTC
If an Ethernet PHY is initialized before the interrupt controller it is
connected to, a message like the following is printed:

    irq: no irq domain found for /interrupt-controller@e61c0000 !

However, the actual error is ignored, leading to a non-functional (-1)
PHY interrupt later:

    Micrel KSZ8041RNLI ee700000.ethernet-ffffffff:01: attached PHY driver [Micrel KSZ8041RNLI] (mii_bus:phy_addr=ee700000.ethernet-ffffffff:01, irq=-1)

Depending on whether the PHY driver will fall back to polling, Ethernet
may or may not work.

To fix this:
  1. Switch of_mdiobus_register_phy() from irq_of_parse_and_map() to
     of_irq_get().
     Unlike the former, the latter returns -EPROBE_DEFER if the
     interrupt controller is not yet available, so this condition can be
     detected.
     Other errors are handled the same as before, i.e. use the passed
     mdio->irq[addr] as interrupt.
  2. Propagate and handle errors from of_mdiobus_register_phy() and
     of_mdiobus_register_device().

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
Seen on r8a7791/koelsch when using the new CPG/MSSR clock driver.
I assume it always happened on RZ/G1 in mainline.
---
 drivers/of/of_mdio.c | 39 +++++++++++++++++++++++++++------------
 1 file changed, 27 insertions(+), 12 deletions(-)

Comments

David Miller May 18, 2017, 3:21 p.m. UTC | #1
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Thu, 18 May 2017 14:59:05 +0200

> If an Ethernet PHY is initialized before the interrupt controller it is
> connected to, a message like the following is printed:
> 
>     irq: no irq domain found for /interrupt-controller@e61c0000 !
> 
> However, the actual error is ignored, leading to a non-functional (-1)
> PHY interrupt later:
> 
>     Micrel KSZ8041RNLI ee700000.ethernet-ffffffff:01: attached PHY driver [Micrel KSZ8041RNLI] (mii_bus:phy_addr=ee700000.ethernet-ffffffff:01, irq=-1)
> 
> Depending on whether the PHY driver will fall back to polling, Ethernet
> may or may not work.
> 
> To fix this:
>   1. Switch of_mdiobus_register_phy() from irq_of_parse_and_map() to
>      of_irq_get().
>      Unlike the former, the latter returns -EPROBE_DEFER if the
>      interrupt controller is not yet available, so this condition can be
>      detected.
>      Other errors are handled the same as before, i.e. use the passed
>      mdio->irq[addr] as interrupt.
>   2. Propagate and handle errors from of_mdiobus_register_phy() and
>      of_mdiobus_register_device().
> 
> Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>

Florian or someone similarly knowledgable, please review.
Andrew Lunn May 18, 2017, 4:09 p.m. UTC | #2
On Thu, May 18, 2017 at 02:59:05PM +0200, Geert Uytterhoeven wrote:
> If an Ethernet PHY is initialized before the interrupt controller it is
> connected to, a message like the following is printed:
> 
>     irq: no irq domain found for /interrupt-controller@e61c0000 !
> 
> However, the actual error is ignored, leading to a non-functional (-1)
> PHY interrupt later:
> 
>     Micrel KSZ8041RNLI ee700000.ethernet-ffffffff:01: attached PHY driver [Micrel KSZ8041RNLI] (mii_bus:phy_addr=ee700000.ethernet-ffffffff:01, irq=-1)
> 
> Depending on whether the PHY driver will fall back to polling, Ethernet
> may or may not work.
> 
> To fix this:
>   1. Switch of_mdiobus_register_phy() from irq_of_parse_and_map() to
>      of_irq_get().
>      Unlike the former, the latter returns -EPROBE_DEFER if the
>      interrupt controller is not yet available, so this condition can be
>      detected.
>      Other errors are handled the same as before, i.e. use the passed
>      mdio->irq[addr] as interrupt.
>   2. Propagate and handle errors from of_mdiobus_register_phy() and
>      of_mdiobus_register_device().
> 
> Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
> ---
> Seen on r8a7791/koelsch when using the new CPG/MSSR clock driver.
> I assume it always happened on RZ/G1 in mainline.
> ---
>  drivers/of/of_mdio.c | 39 +++++++++++++++++++++++++++------------
>  1 file changed, 27 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
> index 7e4c80f9b6cda0d3..f9ac2893f56184be 100644
> --- a/drivers/of/of_mdio.c
> +++ b/drivers/of/of_mdio.c
> @@ -44,7 +44,7 @@ static int of_get_phy_id(struct device_node *device, u32 *phy_id)
>  	return -EINVAL;
>  }
>  
> -static void of_mdiobus_register_phy(struct mii_bus *mdio,
> +static int of_mdiobus_register_phy(struct mii_bus *mdio,
>  				    struct device_node *child, u32 addr)
>  {
>  	struct phy_device *phy;
> @@ -60,9 +60,13 @@ static void of_mdiobus_register_phy(struct mii_bus *mdio,
>  	else
>  		phy = get_phy_device(mdio, addr, is_c45);
>  	if (IS_ERR(phy))
> -		return;
> +		return PTR_ERR(phy);
>  
> -	rc = irq_of_parse_and_map(child, 0);
> +	rc = of_irq_get(child, 0);
> +	if (rc == -EPROBE_DEFER) {
> +		phy_device_free(phy);
> +		return rc;
> +	}

Maybe this should be consistent. All other places there is an error,
you return it. Here however, you only return the error if it is
EPROBE_DEFER.

	Andrew


>  	if (rc > 0) {
>  		phy->irq = rc;
>  		mdio->irq[addr] = rc;
> @@ -84,22 +88,23 @@ static void of_mdiobus_register_phy(struct mii_bus *mdio,
>  	if (rc) {
>  		phy_device_free(phy);
>  		of_node_put(child);
> -		return;
> +		return rc;
>  	}
>  
>  	dev_dbg(&mdio->dev, "registered phy %s at address %i\n",
>  		child->name, addr);
> +	return 0;
>  }
>
Geert Uytterhoeven May 18, 2017, 4:13 p.m. UTC | #3
Hi Andrew,

On Thu, May 18, 2017 at 6:09 PM, Andrew Lunn <andrew@lunn.ch> wrote:
> On Thu, May 18, 2017 at 02:59:05PM +0200, Geert Uytterhoeven wrote:
>> If an Ethernet PHY is initialized before the interrupt controller it is
>> connected to, a message like the following is printed:
>>
>>     irq: no irq domain found for /interrupt-controller@e61c0000 !
>>
>> However, the actual error is ignored, leading to a non-functional (-1)
>> PHY interrupt later:
>>
>>     Micrel KSZ8041RNLI ee700000.ethernet-ffffffff:01: attached PHY driver [Micrel KSZ8041RNLI] (mii_bus:phy_addr=ee700000.ethernet-ffffffff:01, irq=-1)
>>
>> Depending on whether the PHY driver will fall back to polling, Ethernet
>> may or may not work.
>>
>> To fix this:
>>   1. Switch of_mdiobus_register_phy() from irq_of_parse_and_map() to
>>      of_irq_get().
>>      Unlike the former, the latter returns -EPROBE_DEFER if the
>>      interrupt controller is not yet available, so this condition can be
>>      detected.
>>      Other errors are handled the same as before, i.e. use the passed
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
>>      mdio->irq[addr] as interrupt.
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
>>   2. Propagate and handle errors from of_mdiobus_register_phy() and
>>      of_mdiobus_register_device().
>>
>> Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
>> ---
>> Seen on r8a7791/koelsch when using the new CPG/MSSR clock driver.
>> I assume it always happened on RZ/G1 in mainline.
>> ---
>>  drivers/of/of_mdio.c | 39 +++++++++++++++++++++++++++------------
>>  1 file changed, 27 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
>> index 7e4c80f9b6cda0d3..f9ac2893f56184be 100644
>> --- a/drivers/of/of_mdio.c
>> +++ b/drivers/of/of_mdio.c
>> @@ -44,7 +44,7 @@ static int of_get_phy_id(struct device_node *device, u32 *phy_id)
>>       return -EINVAL;
>>  }
>>
>> -static void of_mdiobus_register_phy(struct mii_bus *mdio,
>> +static int of_mdiobus_register_phy(struct mii_bus *mdio,
>>                                   struct device_node *child, u32 addr)
>>  {
>>       struct phy_device *phy;
>> @@ -60,9 +60,13 @@ static void of_mdiobus_register_phy(struct mii_bus *mdio,
>>       else
>>               phy = get_phy_device(mdio, addr, is_c45);
>>       if (IS_ERR(phy))
>> -             return;
>> +             return PTR_ERR(phy);
>>
>> -     rc = irq_of_parse_and_map(child, 0);
>> +     rc = of_irq_get(child, 0);
>> +     if (rc == -EPROBE_DEFER) {
>> +             phy_device_free(phy);
>> +             return rc;
>> +     }
>
> Maybe this should be consistent. All other places there is an error,
> you return it. Here however, you only return the error if it is
> EPROBE_DEFER.

That's because of the "else" branch in the code below:

        if (rc > 0) {
                phy->irq = rc;
                mdio->irq[addr] = rc;
        } else {
                phy->irq = mdio->irq[addr];
        }

cfr. the marked part of the patch description.
I didn't want to change that behavior, as it's not clear to me why it's handled
that way.

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
Andrew Lunn May 18, 2017, 4:33 p.m. UTC | #4
> >>               phy = get_phy_device(mdio, addr, is_c45);
> >>       if (IS_ERR(phy))
> >> -             return;
> >> +             return PTR_ERR(phy);
> >>
> >> -     rc = irq_of_parse_and_map(child, 0);
> >> +     rc = of_irq_get(child, 0);
> >> +     if (rc == -EPROBE_DEFER) {
> >> +             phy_device_free(phy);
> >> +             return rc;
> >> +     }
> >
> > Maybe this should be consistent. All other places there is an error,
> > you return it. Here however, you only return the error if it is
> > EPROBE_DEFER.
> 
> That's because of the "else" branch in the code below:
> 
>         if (rc > 0) {
>                 phy->irq = rc;
>                 mdio->irq[addr] = rc;
>         } else {
>                 phy->irq = mdio->irq[addr];
>         }
> 
> cfr. the marked part of the patch description.
> I didn't want to change that behavior, as it's not clear to me why it's handled
> that way.

So there seems to be 3 conditions that need handling:

1) of_irq_get() gives us an interrupt number.
2) of_irq_get() indicates there is no irq in the device tree.
3) of_irq_get() indicates a real error

1) We have.

2) We should fall back to using the mdio busses irq for the
device. There are a couple of mdio drivers which do this, e.g.
stmicro/stmmac/stmmac_mdio.c. mdiobus_alloc() ensures it is set to
PHY_POLL, so if the driver does not set it, we poll.

3) This is new. We have two choices. Ignore the error and poll. Or
return the error. Historically we have ignored the error. But should
we? I would probably return the error, now that we can. But...

Florian?

	Andrew
Geert Uytterhoeven May 18, 2017, 5:38 p.m. UTC | #5
Hi Andrew,

On Thu, May 18, 2017 at 6:33 PM, Andrew Lunn <andrew@lunn.ch> wrote:
>> >>               phy = get_phy_device(mdio, addr, is_c45);
>> >>       if (IS_ERR(phy))
>> >> -             return;
>> >> +             return PTR_ERR(phy);
>> >>
>> >> -     rc = irq_of_parse_and_map(child, 0);
>> >> +     rc = of_irq_get(child, 0);
>> >> +     if (rc == -EPROBE_DEFER) {
>> >> +             phy_device_free(phy);
>> >> +             return rc;
>> >> +     }
>> >
>> > Maybe this should be consistent. All other places there is an error,
>> > you return it. Here however, you only return the error if it is
>> > EPROBE_DEFER.
>>
>> That's because of the "else" branch in the code below:
>>
>>         if (rc > 0) {
>>                 phy->irq = rc;
>>                 mdio->irq[addr] = rc;
>>         } else {
>>                 phy->irq = mdio->irq[addr];
>>         }
>>
>> cfr. the marked part of the patch description.
>> I didn't want to change that behavior, as it's not clear to me why it's handled
>> that way.
>
> So there seems to be 3 conditions that need handling:
>
> 1) of_irq_get() gives us an interrupt number.
> 2) of_irq_get() indicates there is no irq in the device tree.
> 3) of_irq_get() indicates a real error
>
> 1) We have.
>
> 2) We should fall back to using the mdio busses irq for the
> device. There are a couple of mdio drivers which do this, e.g.
> stmicro/stmmac/stmmac_mdio.c. mdiobus_alloc() ensures it is set to
> PHY_POLL, so if the driver does not set it, we poll.
>
> 3) This is new. We have two choices. Ignore the error and poll. Or
> return the error. Historically we have ignored the error. But should
> we? I would probably return the error, now that we can. But...

The issue itself isn't new, though.
I reported it in "of_mdiobus_register_phy() and deferred probe"
(https://lkml.org/lkml/2015/10/22/377), and posted a workaround in "[PATCH v2]
irqchip/renesas-irqc: Postpone driver initialization"
(https://lkml.org/lkml/2016/11/8/794).

Due to the fallback to polling, so far it was easier to complain when
someone broke polling, than to fix the real problem ;-)

But when I saw Thomas' patch[*] for of_irq_to_resource(), the time was ripe
to tackle the root cause.

[*] https://git.kernel.org/pub/scm/linux/kernel/git/robh/linux.git/commit/?h=dt/next&id=7a4228bbff769ebf449981a4248616db9f0cffec

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
Florian Fainelli May 18, 2017, 6:25 p.m. UTC | #6
On 05/18/2017 05:59 AM, Geert Uytterhoeven wrote:
> If an Ethernet PHY is initialized before the interrupt controller it is
> connected to, a message like the following is printed:
> 
>     irq: no irq domain found for /interrupt-controller@e61c0000 !
> 
> However, the actual error is ignored, leading to a non-functional (-1)
> PHY interrupt later:
> 
>     Micrel KSZ8041RNLI ee700000.ethernet-ffffffff:01: attached PHY driver [Micrel KSZ8041RNLI] (mii_bus:phy_addr=ee700000.ethernet-ffffffff:01, irq=-1)
> 
> Depending on whether the PHY driver will fall back to polling, Ethernet
> may or may not work.
> 
> To fix this:
>   1. Switch of_mdiobus_register_phy() from irq_of_parse_and_map() to
>      of_irq_get().
>      Unlike the former, the latter returns -EPROBE_DEFER if the
>      interrupt controller is not yet available, so this condition can be
>      detected.
>      Other errors are handled the same as before, i.e. use the passed
>      mdio->irq[addr] as interrupt.
>   2. Propagate and handle errors from of_mdiobus_register_phy() and
>      of_mdiobus_register_device().

This most certainly works fine in the simple case where you have one PHY
hanging off the MDIO bus, now what happens if you have several?

Presumably, the first PHY that returns EPROBE_DEFER will make the entire
bus registration return EPROB_DEFER as well, and so on, and so forth,
but I am not sure if we will be properly unwinding the successful
registration of PHYs that either don't have an interrupt, or did not
return EPROBE_DEFER.

It should be possible to mimic this behavior by using the fixed PHY, and
possibly the dsa_loop.c driver which would create 4 ports, expecting 4
fixed PHYs to be present.

> 
> Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
> ---
> Seen on r8a7791/koelsch when using the new CPG/MSSR clock driver.
> I assume it always happened on RZ/G1 in mainline.
> ---
>  drivers/of/of_mdio.c | 39 +++++++++++++++++++++++++++------------
>  1 file changed, 27 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
> index 7e4c80f9b6cda0d3..f9ac2893f56184be 100644
> --- a/drivers/of/of_mdio.c
> +++ b/drivers/of/of_mdio.c
> @@ -44,7 +44,7 @@ static int of_get_phy_id(struct device_node *device, u32 *phy_id)
>  	return -EINVAL;
>  }
>  
> -static void of_mdiobus_register_phy(struct mii_bus *mdio,
> +static int of_mdiobus_register_phy(struct mii_bus *mdio,
>  				    struct device_node *child, u32 addr)
>  {
>  	struct phy_device *phy;
> @@ -60,9 +60,13 @@ static void of_mdiobus_register_phy(struct mii_bus *mdio,
>  	else
>  		phy = get_phy_device(mdio, addr, is_c45);
>  	if (IS_ERR(phy))
> -		return;
> +		return PTR_ERR(phy);
>  
> -	rc = irq_of_parse_and_map(child, 0);
> +	rc = of_irq_get(child, 0);
> +	if (rc == -EPROBE_DEFER) {
> +		phy_device_free(phy);
> +		return rc;
> +	}
>  	if (rc > 0) {
>  		phy->irq = rc;
>  		mdio->irq[addr] = rc;
> @@ -84,22 +88,23 @@ static void of_mdiobus_register_phy(struct mii_bus *mdio,
>  	if (rc) {
>  		phy_device_free(phy);
>  		of_node_put(child);
> -		return;
> +		return rc;
>  	}
>  
>  	dev_dbg(&mdio->dev, "registered phy %s at address %i\n",
>  		child->name, addr);
> +	return 0;
>  }
>  
> -static void of_mdiobus_register_device(struct mii_bus *mdio,
> -				       struct device_node *child, u32 addr)
> +static int of_mdiobus_register_device(struct mii_bus *mdio,
> +				      struct device_node *child, u32 addr)
>  {
>  	struct mdio_device *mdiodev;
>  	int rc;
>  
>  	mdiodev = mdio_device_create(mdio, addr);
>  	if (IS_ERR(mdiodev))
> -		return;
> +		return PTR_ERR(mdiodev);
>  
>  	/* Associate the OF node with the device structure so it
>  	 * can be looked up later.
> @@ -112,11 +117,12 @@ static void of_mdiobus_register_device(struct mii_bus *mdio,
>  	if (rc) {
>  		mdio_device_free(mdiodev);
>  		of_node_put(child);
> -		return;
> +		return rc;
>  	}
>  
>  	dev_dbg(&mdio->dev, "registered mdio device %s at address %i\n",
>  		child->name, addr);
> +	return 0;
>  }
>  
>  int of_mdio_parse_addr(struct device *dev, const struct device_node *np)
> @@ -242,9 +248,11 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
>  		}
>  
>  		if (of_mdiobus_child_is_phy(child))
> -			of_mdiobus_register_phy(mdio, child, addr);
> +			rc = of_mdiobus_register_phy(mdio, child, addr);
>  		else
> -			of_mdiobus_register_device(mdio, child, addr);
> +			rc = of_mdiobus_register_device(mdio, child, addr);
> +		if (rc)
> +			goto unregister;
>  	}
>  
>  	if (!scanphys)
> @@ -265,12 +273,19 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
>  			dev_info(&mdio->dev, "scan phy %s at address %i\n",
>  				 child->name, addr);
>  
> -			if (of_mdiobus_child_is_phy(child))
> -				of_mdiobus_register_phy(mdio, child, addr);
> +			if (of_mdiobus_child_is_phy(child)) {
> +				rc = of_mdiobus_register_phy(mdio, child, addr);
> +				if (rc)
> +					goto unregister;
> +			}
>  		}
>  	}
>  
>  	return 0;
> +
> +unregister:
> +	mdiobus_unregister(mdio);
> +	return rc;
>  }
>  EXPORT_SYMBOL(of_mdiobus_register);
>  
>
Geert Uytterhoeven May 18, 2017, 6:48 p.m. UTC | #7
Hi Florian,

On Thu, May 18, 2017 at 8:25 PM, Florian Fainelli <f.fainelli@gmail.com> wrote:
> On 05/18/2017 05:59 AM, Geert Uytterhoeven wrote:
>> If an Ethernet PHY is initialized before the interrupt controller it is
>> connected to, a message like the following is printed:
>>
>>     irq: no irq domain found for /interrupt-controller@e61c0000 !
>>
>> However, the actual error is ignored, leading to a non-functional (-1)
>> PHY interrupt later:
>>
>>     Micrel KSZ8041RNLI ee700000.ethernet-ffffffff:01: attached PHY driver [Micrel KSZ8041RNLI] (mii_bus:phy_addr=ee700000.ethernet-ffffffff:01, irq=-1)
>>
>> Depending on whether the PHY driver will fall back to polling, Ethernet
>> may or may not work.
>>
>> To fix this:
>>   1. Switch of_mdiobus_register_phy() from irq_of_parse_and_map() to
>>      of_irq_get().
>>      Unlike the former, the latter returns -EPROBE_DEFER if the
>>      interrupt controller is not yet available, so this condition can be
>>      detected.
>>      Other errors are handled the same as before, i.e. use the passed
>>      mdio->irq[addr] as interrupt.
>>   2. Propagate and handle errors from of_mdiobus_register_phy() and
>>      of_mdiobus_register_device().
>
> This most certainly works fine in the simple case where you have one PHY
> hanging off the MDIO bus, now what happens if you have several?
>
> Presumably, the first PHY that returns EPROBE_DEFER will make the entire
> bus registration return EPROB_DEFER as well, and so on, and so forth,
> but I am not sure if we will be properly unwinding the successful
> registration of PHYs that either don't have an interrupt, or did not
> return EPROBE_DEFER.
>
> It should be possible to mimic this behavior by using the fixed PHY, and
> possibly the dsa_loop.c driver which would create 4 ports, expecting 4
> fixed PHYs to be present.

mdiobus_unregister(), called from of_mdiobus_register() on failure,
should do the unwinding, right?

And when the driver is reprobed, all PHYs are reprobed, until they all
succeed.

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
Andrew Lunn May 18, 2017, 7:34 p.m. UTC | #8
> > This most certainly works fine in the simple case where you have one PHY
> > hanging off the MDIO bus, now what happens if you have several?
> >
> > Presumably, the first PHY that returns EPROBE_DEFER will make the entire
> > bus registration return EPROB_DEFER as well, and so on, and so forth,
> > but I am not sure if we will be properly unwinding the successful
> > registration of PHYs that either don't have an interrupt, or did not
> > return EPROBE_DEFER.
> >
> > It should be possible to mimic this behavior by using the fixed PHY, and
> > possibly the dsa_loop.c driver which would create 4 ports, expecting 4
> > fixed PHYs to be present.
> 
> mdiobus_unregister(), called from of_mdiobus_register() on failure,
> should do the unwinding, right?
> 
> And when the driver is reprobed, all PHYs are reprobed, until they all
> succeed.

That is the theory. I looked at that while reviewing the patch. But
this has probably not been tested in anger. It would be good to test
this properly, with not just the first PHY returning -EPROBE_DEFER, to
really test the unwind.

    Andrew
Geert Uytterhoeven May 18, 2017, 8:36 p.m. UTC | #9
Hi Andrew,

On Thu, May 18, 2017 at 9:34 PM, Andrew Lunn <andrew@lunn.ch> wrote:
>> > This most certainly works fine in the simple case where you have one PHY
>> > hanging off the MDIO bus, now what happens if you have several?
>> >
>> > Presumably, the first PHY that returns EPROBE_DEFER will make the entire
>> > bus registration return EPROB_DEFER as well, and so on, and so forth,
>> > but I am not sure if we will be properly unwinding the successful
>> > registration of PHYs that either don't have an interrupt, or did not
>> > return EPROBE_DEFER.
>> >
>> > It should be possible to mimic this behavior by using the fixed PHY, and
>> > possibly the dsa_loop.c driver which would create 4 ports, expecting 4
>> > fixed PHYs to be present.
>>
>> mdiobus_unregister(), called from of_mdiobus_register() on failure,
>> should do the unwinding, right?
>>
>> And when the driver is reprobed, all PHYs are reprobed, until they all
>> succeed.
>
> That is the theory. I looked at that while reviewing the patch. But
> this has probably not been tested in anger. It would be good to test
> this properly, with not just the first PHY returning -EPROBE_DEFER, to
> really test the unwind.

Unfortunately I don't have a board with multiple PHYs, so I cannot test
that case.

Does unbinding/rebinding a network driver with multiple PHYs currently
work? Or module unload/reload?

That should exercise a similar code path.

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
Florian Fainelli May 18, 2017, 10:21 p.m. UTC | #10
On 05/18/2017 01:36 PM, Geert Uytterhoeven wrote:
> Hi Andrew,
> 
> On Thu, May 18, 2017 at 9:34 PM, Andrew Lunn <andrew@lunn.ch> wrote:
>>>> This most certainly works fine in the simple case where you have one PHY
>>>> hanging off the MDIO bus, now what happens if you have several?
>>>>
>>>> Presumably, the first PHY that returns EPROBE_DEFER will make the entire
>>>> bus registration return EPROB_DEFER as well, and so on, and so forth,
>>>> but I am not sure if we will be properly unwinding the successful
>>>> registration of PHYs that either don't have an interrupt, or did not
>>>> return EPROBE_DEFER.
>>>>
>>>> It should be possible to mimic this behavior by using the fixed PHY, and
>>>> possibly the dsa_loop.c driver which would create 4 ports, expecting 4
>>>> fixed PHYs to be present.
>>>
>>> mdiobus_unregister(), called from of_mdiobus_register() on failure,
>>> should do the unwinding, right?
>>>
>>> And when the driver is reprobed, all PHYs are reprobed, until they all
>>> succeed.
>>
>> That is the theory. I looked at that while reviewing the patch. But
>> this has probably not been tested in anger. It would be good to test
>> this properly, with not just the first PHY returning -EPROBE_DEFER, to
>> really test the unwind.
> 
> Unfortunately I don't have a board with multiple PHYs, so I cannot test
> that case.
> 
> Does unbinding/rebinding a network driver with multiple PHYs currently
> work? Or module unload/reload?

Usually there is a strict 1:1 mapping between a network device (not
driver) and a PHY device, switch drivers however, would have multiple
PHYs (one per port, aka net_deice).

NB: binding and unbinding of PHYs is pretty broken at the moment though,
because there is a complete disconnect between what the Ethernet MAC
expects, and the state in which the PHY is. I had some patches to fix
that, but this turned out to be playing whack-a-mole which I typically
suck at.
Geert Uytterhoeven May 23, 2017, 9:36 a.m. UTC | #11
Hi Florian,

On Fri, May 19, 2017 at 12:21 AM, Florian Fainelli <f.fainelli@gmail.com> wrote:
> On 05/18/2017 01:36 PM, Geert Uytterhoeven wrote:
>> On Thu, May 18, 2017 at 9:34 PM, Andrew Lunn <andrew@lunn.ch> wrote:
>>>>> This most certainly works fine in the simple case where you have one PHY
>>>>> hanging off the MDIO bus, now what happens if you have several?
>>>>>
>>>>> Presumably, the first PHY that returns EPROBE_DEFER will make the entire
>>>>> bus registration return EPROB_DEFER as well, and so on, and so forth,
>>>>> but I am not sure if we will be properly unwinding the successful
>>>>> registration of PHYs that either don't have an interrupt, or did not
>>>>> return EPROBE_DEFER.
>>>>>
>>>>> It should be possible to mimic this behavior by using the fixed PHY, and
>>>>> possibly the dsa_loop.c driver which would create 4 ports, expecting 4
>>>>> fixed PHYs to be present.
>>>>
>>>> mdiobus_unregister(), called from of_mdiobus_register() on failure,
>>>> should do the unwinding, right?
>>>>
>>>> And when the driver is reprobed, all PHYs are reprobed, until they all
>>>> succeed.
>>>
>>> That is the theory. I looked at that while reviewing the patch. But
>>> this has probably not been tested in anger. It would be good to test
>>> this properly, with not just the first PHY returning -EPROBE_DEFER, to
>>> really test the unwind.
>>
>> Unfortunately I don't have a board with multiple PHYs, so I cannot test
>> that case.

I tried adding a few dummy PHYs in DT, but that didn't work.

So how can we proceed?

I think the only way my patch can cause issues is because some systems
may rely on EPROBE_DEFER errors being ignored.

>> Does unbinding/rebinding a network driver with multiple PHYs currently
>> work? Or module unload/reload?
>
> Usually there is a strict 1:1 mapping between a network device (not
> driver) and a PHY device, switch drivers however, would have multiple
> PHYs (one per port, aka net_deice).
>
> NB: binding and unbinding of PHYs is pretty broken at the moment though,
> because there is a complete disconnect between what the Ethernet MAC
> expects, and the state in which the PHY is. I had some patches to fix
> that, but this turned out to be playing whack-a-mole which I typically
> suck at.

I didn't mean unbinding the PHY, but the network device.
Don't you have the same issue with the state of PHYs as left by the bootloader?

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
Geert Uytterhoeven June 6, 2017, 9:43 a.m. UTC | #12
Hi Florian,

On Tue, May 23, 2017 at 11:36 AM, Geert Uytterhoeven
<geert@linux-m68k.org> wrote:
> On Fri, May 19, 2017 at 12:21 AM, Florian Fainelli <f.fainelli@gmail.com> wrote:
>> On 05/18/2017 01:36 PM, Geert Uytterhoeven wrote:
>>> On Thu, May 18, 2017 at 9:34 PM, Andrew Lunn <andrew@lunn.ch> wrote:
>>>>>> This most certainly works fine in the simple case where you have one PHY
>>>>>> hanging off the MDIO bus, now what happens if you have several?
>>>>>>
>>>>>> Presumably, the first PHY that returns EPROBE_DEFER will make the entire
>>>>>> bus registration return EPROB_DEFER as well, and so on, and so forth,
>>>>>> but I am not sure if we will be properly unwinding the successful
>>>>>> registration of PHYs that either don't have an interrupt, or did not
>>>>>> return EPROBE_DEFER.
>>>>>>
>>>>>> It should be possible to mimic this behavior by using the fixed PHY, and
>>>>>> possibly the dsa_loop.c driver which would create 4 ports, expecting 4
>>>>>> fixed PHYs to be present.
>>>>>
>>>>> mdiobus_unregister(), called from of_mdiobus_register() on failure,
>>>>> should do the unwinding, right?
>>>>>
>>>>> And when the driver is reprobed, all PHYs are reprobed, until they all
>>>>> succeed.
>>>>
>>>> That is the theory. I looked at that while reviewing the patch. But
>>>> this has probably not been tested in anger. It would be good to test
>>>> this properly, with not just the first PHY returning -EPROBE_DEFER, to
>>>> really test the unwind.
>>>
>>> Unfortunately I don't have a board with multiple PHYs, so I cannot test
>>> that case.
>
> I tried adding a few dummy PHYs in DT, but that didn't work.
>
> So how can we proceed?
>
> I think the only way my patch can cause issues is because some systems
> may rely on EPROBE_DEFER errors being ignored.
>
>>> Does unbinding/rebinding a network driver with multiple PHYs currently
>>> work? Or module unload/reload?
>>
>> Usually there is a strict 1:1 mapping between a network device (not
>> driver) and a PHY device, switch drivers however, would have multiple
>> PHYs (one per port, aka net_deice).
>>
>> NB: binding and unbinding of PHYs is pretty broken at the moment though,
>> because there is a complete disconnect between what the Ethernet MAC
>> expects, and the state in which the PHY is. I had some patches to fix
>> that, but this turned out to be playing whack-a-mole which I typically
>> suck at.
>
> I didn't mean unbinding the PHY, but the network device.
> Don't you have the same issue with the state of PHYs as left by the bootloader?

Anyone who can test the behavior on an Ethernet device with multiple PHYs,
e.g. by faking an -EPROBE_DEFER somewhere in the middle?

I'd like to get this issue fixed in v4.13, to avoid a regression when migrating
several systems to a new and better clock driver in v4.14, which will trigger
EPROBE_DEFER.

Thanks!

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
Geert Uytterhoeven July 2, 2017, 8:37 p.m. UTC | #13
On Tue, Jun 6, 2017 at 11:43 AM, Geert Uytterhoeven
<geert@linux-m68k.org> wrote:
> On Tue, May 23, 2017 at 11:36 AM, Geert Uytterhoeven
> <geert@linux-m68k.org> wrote:
>> On Fri, May 19, 2017 at 12:21 AM, Florian Fainelli <f.fainelli@gmail.com> wrote:
>>> On 05/18/2017 01:36 PM, Geert Uytterhoeven wrote:
>>>> On Thu, May 18, 2017 at 9:34 PM, Andrew Lunn <andrew@lunn.ch> wrote:
>>>>>>> This most certainly works fine in the simple case where you have one PHY
>>>>>>> hanging off the MDIO bus, now what happens if you have several?
>>>>>>>
>>>>>>> Presumably, the first PHY that returns EPROBE_DEFER will make the entire
>>>>>>> bus registration return EPROB_DEFER as well, and so on, and so forth,
>>>>>>> but I am not sure if we will be properly unwinding the successful
>>>>>>> registration of PHYs that either don't have an interrupt, or did not
>>>>>>> return EPROBE_DEFER.
>>>>>>>
>>>>>>> It should be possible to mimic this behavior by using the fixed PHY, and
>>>>>>> possibly the dsa_loop.c driver which would create 4 ports, expecting 4
>>>>>>> fixed PHYs to be present.
>>>>>>
>>>>>> mdiobus_unregister(), called from of_mdiobus_register() on failure,
>>>>>> should do the unwinding, right?
>>>>>>
>>>>>> And when the driver is reprobed, all PHYs are reprobed, until they all
>>>>>> succeed.
>>>>>
>>>>> That is the theory. I looked at that while reviewing the patch. But
>>>>> this has probably not been tested in anger. It would be good to test
>>>>> this properly, with not just the first PHY returning -EPROBE_DEFER, to
>>>>> really test the unwind.
>>>>
>>>> Unfortunately I don't have a board with multiple PHYs, so I cannot test
>>>> that case.
>>
>> I tried adding a few dummy PHYs in DT, but that didn't work.
>>
>> So how can we proceed?
>>
>> I think the only way my patch can cause issues is because some systems
>> may rely on EPROBE_DEFER errors being ignored.
>>
>>>> Does unbinding/rebinding a network driver with multiple PHYs currently
>>>> work? Or module unload/reload?
>>>
>>> Usually there is a strict 1:1 mapping between a network device (not
>>> driver) and a PHY device, switch drivers however, would have multiple
>>> PHYs (one per port, aka net_deice).
>>>
>>> NB: binding and unbinding of PHYs is pretty broken at the moment though,
>>> because there is a complete disconnect between what the Ethernet MAC
>>> expects, and the state in which the PHY is. I had some patches to fix
>>> that, but this turned out to be playing whack-a-mole which I typically
>>> suck at.
>>
>> I didn't mean unbinding the PHY, but the network device.
>> Don't you have the same issue with the state of PHYs as left by the bootloader?
>
> Anyone who can test the behavior on an Ethernet device with multiple PHYs,
> e.g. by faking an -EPROBE_DEFER somewhere in the middle?
>
> I'd like to get this issue fixed in v4.13, to avoid a regression when migrating
> several systems to a new and better clock driver in v4.14, which will trigger
> EPROBE_DEFER.

Ping?

This patch fixes a real issue.

Thanks!

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
Florian Fainelli July 9, 2017, 4:49 p.m. UTC | #14
On 07/02/2017 01:37 PM, Geert Uytterhoeven wrote:
> On Tue, Jun 6, 2017 at 11:43 AM, Geert Uytterhoeven
> <geert@linux-m68k.org> wrote:
>> On Tue, May 23, 2017 at 11:36 AM, Geert Uytterhoeven
>> <geert@linux-m68k.org> wrote:
>>> On Fri, May 19, 2017 at 12:21 AM, Florian Fainelli <f.fainelli@gmail.com> wrote:
>>>> On 05/18/2017 01:36 PM, Geert Uytterhoeven wrote:
>>>>> On Thu, May 18, 2017 at 9:34 PM, Andrew Lunn <andrew@lunn.ch> wrote:
>>>>>>>> This most certainly works fine in the simple case where you have one PHY
>>>>>>>> hanging off the MDIO bus, now what happens if you have several?
>>>>>>>>
>>>>>>>> Presumably, the first PHY that returns EPROBE_DEFER will make the entire
>>>>>>>> bus registration return EPROB_DEFER as well, and so on, and so forth,
>>>>>>>> but I am not sure if we will be properly unwinding the successful
>>>>>>>> registration of PHYs that either don't have an interrupt, or did not
>>>>>>>> return EPROBE_DEFER.
>>>>>>>>
>>>>>>>> It should be possible to mimic this behavior by using the fixed PHY, and
>>>>>>>> possibly the dsa_loop.c driver which would create 4 ports, expecting 4
>>>>>>>> fixed PHYs to be present.
>>>>>>>
>>>>>>> mdiobus_unregister(), called from of_mdiobus_register() on failure,
>>>>>>> should do the unwinding, right?
>>>>>>>
>>>>>>> And when the driver is reprobed, all PHYs are reprobed, until they all
>>>>>>> succeed.
>>>>>>
>>>>>> That is the theory. I looked at that while reviewing the patch. But
>>>>>> this has probably not been tested in anger. It would be good to test
>>>>>> this properly, with not just the first PHY returning -EPROBE_DEFER, to
>>>>>> really test the unwind.
>>>>>
>>>>> Unfortunately I don't have a board with multiple PHYs, so I cannot test
>>>>> that case.
>>>
>>> I tried adding a few dummy PHYs in DT, but that didn't work.
>>>
>>> So how can we proceed?
>>>
>>> I think the only way my patch can cause issues is because some systems
>>> may rely on EPROBE_DEFER errors being ignored.
>>>
>>>>> Does unbinding/rebinding a network driver with multiple PHYs currently
>>>>> work? Or module unload/reload?
>>>>
>>>> Usually there is a strict 1:1 mapping between a network device (not
>>>> driver) and a PHY device, switch drivers however, would have multiple
>>>> PHYs (one per port, aka net_deice).
>>>>
>>>> NB: binding and unbinding of PHYs is pretty broken at the moment though,
>>>> because there is a complete disconnect between what the Ethernet MAC
>>>> expects, and the state in which the PHY is. I had some patches to fix
>>>> that, but this turned out to be playing whack-a-mole which I typically
>>>> suck at.
>>>
>>> I didn't mean unbinding the PHY, but the network device.
>>> Don't you have the same issue with the state of PHYs as left by the bootloader?
>>
>> Anyone who can test the behavior on an Ethernet device with multiple PHYs,
>> e.g. by faking an -EPROBE_DEFER somewhere in the middle?
>>
>> I'd like to get this issue fixed in v4.13, to avoid a regression when migrating
>> several systems to a new and better clock driver in v4.14, which will trigger
>> EPROBE_DEFER.
> 
> Ping?
> 
> This patch fixes a real issue.

It sure does fix a real issue, but I am really concerned about the
inability to test this patch in a configuration where we have multiple
PHY(s) or MDIO device(s) hanging off the same MDIO bus and one of those
requesting an EPROBE_DEFER.

I currently don't have a setup where I could exercise this, Andrew, do you?
Andrew Lunn July 9, 2017, 5:28 p.m. UTC | #15
> It sure does fix a real issue, but I am really concerned about the
> inability to test this patch in a configuration where we have multiple
> PHY(s) or MDIO device(s) hanging off the same MDIO bus and one of those
> requesting an EPROBE_DEFER.
> 
> I currently don't have a setup where I could exercise this, Andrew, do you?

Hi Florian

What i do have, is a switch with some built in copper Marvell PHYs and
external SFF modules which use fixed link. I can probably hack the
fixed-link driver to return EPROBE_DEFER a few times.

	   Andrew
Geert Uytterhoeven July 9, 2017, 7:38 p.m. UTC | #16
Hi Florian, Andrew,

On Sun, Jul 9, 2017 at 7:28 PM, Andrew Lunn <andrew@lunn.ch> wrote:
>> It sure does fix a real issue, but I am really concerned about the
>> inability to test this patch in a configuration where we have multiple
>> PHY(s) or MDIO device(s) hanging off the same MDIO bus and one of those
>> requesting an EPROBE_DEFER.

If that case happens now, it silently falls back to polling, hiding the
problem.

Actually it means there may be users that rely on this broken behavior,
that start to fail when their interrupts are suddenly handled :-(

>> I currently don't have a setup where I could exercise this, Andrew, do you?
>
> What i do have, is a switch with some built in copper Marvell PHYs and
> external SFF modules which use fixed link. I can probably hack the
> fixed-link driver to return EPROBE_DEFER a few times.

That would be great, thanks!

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
diff mbox

Patch

diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index 7e4c80f9b6cda0d3..f9ac2893f56184be 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -44,7 +44,7 @@  static int of_get_phy_id(struct device_node *device, u32 *phy_id)
 	return -EINVAL;
 }
 
-static void of_mdiobus_register_phy(struct mii_bus *mdio,
+static int of_mdiobus_register_phy(struct mii_bus *mdio,
 				    struct device_node *child, u32 addr)
 {
 	struct phy_device *phy;
@@ -60,9 +60,13 @@  static void of_mdiobus_register_phy(struct mii_bus *mdio,
 	else
 		phy = get_phy_device(mdio, addr, is_c45);
 	if (IS_ERR(phy))
-		return;
+		return PTR_ERR(phy);
 
-	rc = irq_of_parse_and_map(child, 0);
+	rc = of_irq_get(child, 0);
+	if (rc == -EPROBE_DEFER) {
+		phy_device_free(phy);
+		return rc;
+	}
 	if (rc > 0) {
 		phy->irq = rc;
 		mdio->irq[addr] = rc;
@@ -84,22 +88,23 @@  static void of_mdiobus_register_phy(struct mii_bus *mdio,
 	if (rc) {
 		phy_device_free(phy);
 		of_node_put(child);
-		return;
+		return rc;
 	}
 
 	dev_dbg(&mdio->dev, "registered phy %s at address %i\n",
 		child->name, addr);
+	return 0;
 }
 
-static void of_mdiobus_register_device(struct mii_bus *mdio,
-				       struct device_node *child, u32 addr)
+static int of_mdiobus_register_device(struct mii_bus *mdio,
+				      struct device_node *child, u32 addr)
 {
 	struct mdio_device *mdiodev;
 	int rc;
 
 	mdiodev = mdio_device_create(mdio, addr);
 	if (IS_ERR(mdiodev))
-		return;
+		return PTR_ERR(mdiodev);
 
 	/* Associate the OF node with the device structure so it
 	 * can be looked up later.
@@ -112,11 +117,12 @@  static void of_mdiobus_register_device(struct mii_bus *mdio,
 	if (rc) {
 		mdio_device_free(mdiodev);
 		of_node_put(child);
-		return;
+		return rc;
 	}
 
 	dev_dbg(&mdio->dev, "registered mdio device %s at address %i\n",
 		child->name, addr);
+	return 0;
 }
 
 int of_mdio_parse_addr(struct device *dev, const struct device_node *np)
@@ -242,9 +248,11 @@  int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
 		}
 
 		if (of_mdiobus_child_is_phy(child))
-			of_mdiobus_register_phy(mdio, child, addr);
+			rc = of_mdiobus_register_phy(mdio, child, addr);
 		else
-			of_mdiobus_register_device(mdio, child, addr);
+			rc = of_mdiobus_register_device(mdio, child, addr);
+		if (rc)
+			goto unregister;
 	}
 
 	if (!scanphys)
@@ -265,12 +273,19 @@  int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
 			dev_info(&mdio->dev, "scan phy %s at address %i\n",
 				 child->name, addr);
 
-			if (of_mdiobus_child_is_phy(child))
-				of_mdiobus_register_phy(mdio, child, addr);
+			if (of_mdiobus_child_is_phy(child)) {
+				rc = of_mdiobus_register_phy(mdio, child, addr);
+				if (rc)
+					goto unregister;
+			}
 		}
 	}
 
 	return 0;
+
+unregister:
+	mdiobus_unregister(mdio);
+	return rc;
 }
 EXPORT_SYMBOL(of_mdiobus_register);