diff mbox

[05/12] phylib: Allow reading and writing a mii bus from atomic context.

Message ID 1b8219e7d7993d0e8204b94e959477e3007534ce.1276615626.git.richard.cochran@omicron.at
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Richard Cochran June 15, 2010, 4:08 p.m. UTC
In order to support hardware time stamping from a PHY, it is necessary to
read from the PHY while running in_interrupt(). This patch allows a mii
bus to operate in an atomic context. An mii_bus driver may declare itself
capable for this mode. Drivers which do not do this will remain with the
default that bus operations may sleep.

Signed-off-by: Richard Cochran <richard.cochran@omicron.at>
---
 drivers/net/phy/mdio_bus.c |   35 ++++++++++++++++++++++++++++-------
 include/linux/phy.h        |   13 +++++++++++--
 2 files changed, 39 insertions(+), 9 deletions(-)

Comments

Grant Likely June 15, 2010, 4:43 p.m. UTC | #1
On Tue, Jun 15, 2010 at 10:08 AM, Richard Cochran
<richardcochran@gmail.com> wrote:
> In order to support hardware time stamping from a PHY, it is necessary to
> read from the PHY while running in_interrupt(). This patch allows a mii
> bus to operate in an atomic context. An mii_bus driver may declare itself
> capable for this mode. Drivers which do not do this will remain with the
> default that bus operations may sleep.
>
> Signed-off-by: Richard Cochran <richard.cochran@omicron.at>

Last I checked, the MDIO bus is very slow.  Is this really a good
idea?  How much latency does MDIO access have on the hardware you are
working with?

I also don't like the idea of taking a spin lock during MDIO
operations, and the dual locking mode in the core code.

I'd rather see separate atomic context hooks that doesn't obtain the
mutex under the assumption that the caller has already done so.  If
the bus doesn't support atomic access, then it won't implement the
hooks and the core code should return an error.

I've cc'd Thomas Gleixner.  He might have a better idea about how to
implement what you're trying to do.

g.

> ---
>  drivers/net/phy/mdio_bus.c |   35 ++++++++++++++++++++++++++++-------
>  include/linux/phy.h        |   13 +++++++++++--
>  2 files changed, 39 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
> index 6a6b819..441be7d 100644
> --- a/drivers/net/phy/mdio_bus.c
> +++ b/drivers/net/phy/mdio_bus.c
> @@ -36,6 +36,22 @@
>  #include <asm/irq.h>
>  #include <asm/uaccess.h>
>
> +static inline void mdiobus_lock(struct mii_bus *bus)
> +{
> +       if (MDIOBUS_SLEEPS_RW == bus->locktype)
> +               mutex_lock(&bus->mdio_lock.m);
> +       else
> +               spin_lock(&bus->mdio_lock.s);
> +}
> +
> +static inline void mdiobus_unlock(struct mii_bus *bus)
> +{
> +       if (MDIOBUS_SLEEPS_RW == bus->locktype)
> +               mutex_unlock(&bus->mdio_lock.m);
> +       else
> +               spin_unlock(&bus->mdio_lock.s);
> +}
> +
>  /**
>  * mdiobus_alloc - allocate a mii_bus structure
>  *
> @@ -107,7 +123,10 @@ int mdiobus_register(struct mii_bus *bus)
>                return -EINVAL;
>        }
>
> -       mutex_init(&bus->mdio_lock);
> +       if (MDIOBUS_SLEEPS_RW == bus->locktype)
> +               mutex_init(&bus->mdio_lock.m);
> +       else
> +               spin_lock_init(&bus->mdio_lock.s);
>
>        if (bus->reset)
>                bus->reset(bus);
> @@ -212,11 +231,12 @@ int mdiobus_read(struct mii_bus *bus, int addr, u32 regnum)
>  {
>        int retval;
>
> -       BUG_ON(in_interrupt());
> +       if (MDIOBUS_SLEEPS_RW == bus->locktype)
> +               BUG_ON(in_interrupt());
>
> -       mutex_lock(&bus->mdio_lock);
> +       mdiobus_lock(bus);
>        retval = bus->read(bus, addr, regnum);
> -       mutex_unlock(&bus->mdio_lock);
> +       mdiobus_unlock(bus);
>
>        return retval;
>  }
> @@ -237,11 +257,12 @@ int mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val)
>  {
>        int err;
>
> -       BUG_ON(in_interrupt());
> +       if (MDIOBUS_SLEEPS_RW == bus->locktype)
> +               BUG_ON(in_interrupt());
>
> -       mutex_lock(&bus->mdio_lock);
> +       mdiobus_lock(bus);
>        err = bus->write(bus, addr, regnum, val);
> -       mutex_unlock(&bus->mdio_lock);
> +       mdiobus_unlock(bus);
>
>        return err;
>  }
> diff --git a/include/linux/phy.h b/include/linux/phy.h
> index 7a8caac..352b030 100644
> --- a/include/linux/phy.h
> +++ b/include/linux/phy.h
> @@ -98,11 +98,20 @@ struct mii_bus {
>        int (*write)(struct mii_bus *bus, int phy_id, int regnum, u16 val);
>        int (*reset)(struct mii_bus *bus);
>
> +       /* Indicates whether bus may be used from an atomic context. */
> +       enum {
> +               MDIOBUS_SLEEPS_RW,
> +               MDIOBUS_ATOMIC_RW
> +       } locktype;
> +
>        /*
> -        * A lock to ensure that only one thing can read/write
> +        * A lock or mutex to ensure that only one thing can read/write
>         * the MDIO bus at a time
>         */
> -       struct mutex mdio_lock;
> +       union {
> +               struct mutex m;
> +               spinlock_t s;
> +       } mdio_lock;
>
>        struct device *parent;
>        enum {
> --
> 1.6.3.3
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
>
Richard Cochran June 15, 2010, 5:08 p.m. UTC | #2
On Tue, Jun 15, 2010 at 10:43:08AM -0600, Grant Likely wrote:
> On Tue, Jun 15, 2010 at 10:08 AM, Richard Cochran
> <richardcochran@gmail.com> wrote:
> > In order to support hardware time stamping from a PHY, it is necessary to
> > read from the PHY while running in_interrupt(). This patch allows a mii
> > bus to operate in an atomic context. An mii_bus driver may declare itself
> > capable for this mode. Drivers which do not do this will remain with the
> > default that bus operations may sleep.
> >
> > Signed-off-by: Richard Cochran <richard.cochran@omicron.at>
> 
> Last I checked, the MDIO bus is very slow.  Is this really a good
> idea?  How much latency does MDIO access have on the hardware you are
> working with?

Yes, MDIO access is slow, and it can vary (eg bit banging
implementations). It clear that getting PHY timestamps is costly, but
for applications that want PTP synchronization, one is willing to pay
the price.

> I also don't like the idea of taking a spin lock during MDIO
> operations, and the dual locking mode in the core code.

Originally, the phylib used a spinlock for this. It was replaced with
a mutex in 35b5f6b1a82b5c586e0b24c711dc6ba944e88ef1 in order to
accommodate mdio busses that may need to sleep. So, keeping the option
to use a spinlock is similar to the previous implementation.

> I'd rather see separate atomic context hooks that doesn't obtain the
> mutex under the assumption that the caller has already done so.  If
> the bus doesn't support atomic access, then it won't implement the
> hooks and the core code should return an error.
> 
> I've cc'd Thomas Gleixner.  He might have a better idea about how to
> implement what you're trying to do.

Okay, I'm open to suggestions...

Thanks,

Richard
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Grant Likely June 15, 2010, 6:29 p.m. UTC | #3
On Tue, Jun 15, 2010 at 11:08 AM, Richard Cochran
<richardcochran@gmail.com> wrote:
> On Tue, Jun 15, 2010 at 10:43:08AM -0600, Grant Likely wrote:
>> On Tue, Jun 15, 2010 at 10:08 AM, Richard Cochran
>> <richardcochran@gmail.com> wrote:
>> > In order to support hardware time stamping from a PHY, it is necessary to
>> > read from the PHY while running in_interrupt(). This patch allows a mii
>> > bus to operate in an atomic context. An mii_bus driver may declare itself
>> > capable for this mode. Drivers which do not do this will remain with the
>> > default that bus operations may sleep.
>> >
>> > Signed-off-by: Richard Cochran <richard.cochran@omicron.at>
>>
>> Last I checked, the MDIO bus is very slow.  Is this really a good
>> idea?  How much latency does MDIO access have on the hardware you are
>> working with?
>
> Yes, MDIO access is slow, and it can vary (eg bit banging
> implementations). It clear that getting PHY timestamps is costly, but
> for applications that want PTP synchronization, one is willing to pay
> the price.
>
>> I also don't like the idea of taking a spin lock during MDIO
>> operations, and the dual locking mode in the core code.
>
> Originally, the phylib used a spinlock for this. It was replaced with
> a mutex in 35b5f6b1a82b5c586e0b24c711dc6ba944e88ef1 in order to
> accommodate mdio busses that may need to sleep. So, keeping the option
> to use a spinlock is similar to the previous implementation.

That's right, and I fully agree with that change.  To me, going back
to allowing spin locks is a regression because it adds a new source of
scheduling latency.  Using a mutex forces users to take into account
the slow nature of MDIO access.  For existing callers, this isn't a
problem because they already are designed for this characteristic.  A
new user which depends on atomic access should use a different API
which doesn't take the lock with the understanding that it is may
return a failure if it doesn't support it or if it cannot perform the
operation atomically.

That still leaves the troubling MDIO induced latency issue.

g.
Richard Cochran June 16, 2010, 6:20 a.m. UTC | #4
> That's right, and I fully agree with that change.  To me, going back
> to allowing spin locks is a regression because it adds a new source of
> scheduling latency.

I think that the change was not about reducing scheduling
latency. Rather, the idea was simply to allow mdio bus drivers that
sleep. Here is the change log message:

    commit 35b5f6b1a82b5c586e0b24c711dc6ba944e88ef1

    PHYLIB: Locking fixes for PHY I/O potentially sleeping
    
    PHY read/write functions can potentially sleep (e.g., a PHY accessed
    via I2C).  The following changes were made to account for this:
    
        * Change spin locks to mutex locks
        * Add a BUG_ON() to phy_read() phy_write() to warn against
          calling them from an interrupt context.
        * Use work queue for PHY state machine handling since
          it can potentially sleep
        * Change phydev lock from spinlock to mutex

The fundamental issue is this: Fro the SO_TIMESTAMPING API, receive
timestamps must appear in a control message along with the packet
data. Only the MAC driver (or the PHY driver) knows how to get the
timestamp. The stack calls the MAC driver via its napi poll
function. During the call, the driver must provide the skb with Rx
timestamp.

The only reasonable way to do this is to have the driver fetch the
timestamp durng the napi poll function. For MAC drivers with fast
register access, the performance penalty is small. For PHY drivers
with must go via the MDIO bus, the performance penalty is obviously
larger, and the user must be willing to live with it.

You might suggest the alternate that the driver would defer the
netif_receive_skb() callback until a work queue completes, providing
the Rx timestamp. The driver would then call netif_receive_skb() at
some later time.

However, there are a number of problems with this idea:

1. It is really icky for the drivers to be creating new skb queues for
   this purpose. MAC drivers would have to maintain such queues on
   behalf of the PHY drivers, but only when the PHYs support
   timestamping. Yuck.

2. There is a (soft) real time constraint on the delivery of the PTP
   packets to the user space application. Basicly, delays and jitter
   in the time to receive the packet negatively affect the clock servo
   loop.

3. It cannot work for many kinds of PTP timestamping hardware. Some of
   hardware only timestamps PTP packets. That means that not every
   received packet will have a timestamp. Such hardware provides some
   key data from the packet (like PTP UUID and sequence number) with
   the timestamp. Software must match this information to a particular
   packet. In order to defer a skb, the driver must first obtain the
   timestamp information. This is a catch-22.

Having said all that, I am still open to suggestions...

Richard
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 6a6b819..441be7d 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -36,6 +36,22 @@ 
 #include <asm/irq.h>
 #include <asm/uaccess.h>
 
+static inline void mdiobus_lock(struct mii_bus *bus)
+{
+	if (MDIOBUS_SLEEPS_RW == bus->locktype)
+		mutex_lock(&bus->mdio_lock.m);
+	else
+		spin_lock(&bus->mdio_lock.s);
+}
+
+static inline void mdiobus_unlock(struct mii_bus *bus)
+{
+	if (MDIOBUS_SLEEPS_RW == bus->locktype)
+		mutex_unlock(&bus->mdio_lock.m);
+	else
+		spin_unlock(&bus->mdio_lock.s);
+}
+
 /**
  * mdiobus_alloc - allocate a mii_bus structure
  *
@@ -107,7 +123,10 @@  int mdiobus_register(struct mii_bus *bus)
 		return -EINVAL;
 	}
 
-	mutex_init(&bus->mdio_lock);
+	if (MDIOBUS_SLEEPS_RW == bus->locktype)
+		mutex_init(&bus->mdio_lock.m);
+	else
+		spin_lock_init(&bus->mdio_lock.s);
 
 	if (bus->reset)
 		bus->reset(bus);
@@ -212,11 +231,12 @@  int mdiobus_read(struct mii_bus *bus, int addr, u32 regnum)
 {
 	int retval;
 
-	BUG_ON(in_interrupt());
+	if (MDIOBUS_SLEEPS_RW == bus->locktype)
+		BUG_ON(in_interrupt());
 
-	mutex_lock(&bus->mdio_lock);
+	mdiobus_lock(bus);
 	retval = bus->read(bus, addr, regnum);
-	mutex_unlock(&bus->mdio_lock);
+	mdiobus_unlock(bus);
 
 	return retval;
 }
@@ -237,11 +257,12 @@  int mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val)
 {
 	int err;
 
-	BUG_ON(in_interrupt());
+	if (MDIOBUS_SLEEPS_RW == bus->locktype)
+		BUG_ON(in_interrupt());
 
-	mutex_lock(&bus->mdio_lock);
+	mdiobus_lock(bus);
 	err = bus->write(bus, addr, regnum, val);
-	mutex_unlock(&bus->mdio_lock);
+	mdiobus_unlock(bus);
 
 	return err;
 }
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 7a8caac..352b030 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -98,11 +98,20 @@  struct mii_bus {
 	int (*write)(struct mii_bus *bus, int phy_id, int regnum, u16 val);
 	int (*reset)(struct mii_bus *bus);
 
+	/* Indicates whether bus may be used from an atomic context. */
+	enum {
+		MDIOBUS_SLEEPS_RW,
+		MDIOBUS_ATOMIC_RW
+	} locktype;
+
 	/*
-	 * A lock to ensure that only one thing can read/write
+	 * A lock or mutex to ensure that only one thing can read/write
 	 * the MDIO bus at a time
 	 */
-	struct mutex mdio_lock;
+	union {
+		struct mutex m;
+		spinlock_t s;
+	} mdio_lock;
 
 	struct device *parent;
 	enum {