diff mbox series

[v2,net] net: fec: fix hardware time stamping by external devices

Message ID 20200711120842.2631-1-sorganov@gmail.com
State Changes Requested
Delegated to: David Miller
Headers show
Series [v2,net] net: fec: fix hardware time stamping by external devices | expand

Commit Message

Sergey Organov July 11, 2020, 12:08 p.m. UTC
Fix support for external PTP-aware devices such as DSA or PTP PHY:

Make sure we never time stamp tx packets when hardware time stamping
is disabled.

Check for PTP PHY being in use and then pass ioctls related to time
stamping of Ethernet packets to the PTP PHY rather than handle them
ourselves. In addition, disable our own hardware time stamping in this
case.

Fixes: 6605b73 ("FEC: Add time stamping code and a PTP hardware clock")
Signed-off-by: Sergey Organov <sorganov@gmail.com>
---

v2:
  - Extracted from larger patch series
  - Description/comments updated according to discussions
  - Added Fixes: tag

 drivers/net/ethernet/freescale/fec.h      |  1 +
 drivers/net/ethernet/freescale/fec_main.c | 23 +++++++++++++++++------
 drivers/net/ethernet/freescale/fec_ptp.c  | 12 ++++++++++++
 3 files changed, 30 insertions(+), 6 deletions(-)

Comments

Vladimir Oltean July 11, 2020, 11:19 p.m. UTC | #1
Hi Sergey,

On Sat, Jul 11, 2020 at 03:08:42PM +0300, Sergey Organov wrote:
> Fix support for external PTP-aware devices such as DSA or PTP PHY:
> 
> Make sure we never time stamp tx packets when hardware time stamping
> is disabled.
> 
> Check for PTP PHY being in use and then pass ioctls related to time
> stamping of Ethernet packets to the PTP PHY rather than handle them
> ourselves. In addition, disable our own hardware time stamping in this
> case.
> 
> Fixes: 6605b73 ("FEC: Add time stamping code and a PTP hardware clock")

Please use a 12-character sha1sum. Try to use the "pretty" format
specifier I gave you in the original thread, it saves you from counting,
and also from people complaining once it gets merged:

https://www.google.com/search?q=stephen+rothwell+%22fixes+tag+needs+some+work%22

> Signed-off-by: Sergey Organov <sorganov@gmail.com>
> ---
> 
> v2:
>   - Extracted from larger patch series
>   - Description/comments updated according to discussions
>   - Added Fixes: tag
> 
>  drivers/net/ethernet/freescale/fec.h      |  1 +
>  drivers/net/ethernet/freescale/fec_main.c | 23 +++++++++++++++++------
>  drivers/net/ethernet/freescale/fec_ptp.c  | 12 ++++++++++++
>  3 files changed, 30 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
> index d8d76da..832a217 100644
> --- a/drivers/net/ethernet/freescale/fec.h
> +++ b/drivers/net/ethernet/freescale/fec.h
> @@ -590,6 +590,7 @@ struct fec_enet_private {
>  void fec_ptp_init(struct platform_device *pdev, int irq_idx);
>  void fec_ptp_stop(struct platform_device *pdev);
>  void fec_ptp_start_cyclecounter(struct net_device *ndev);
> +void fec_ptp_disable_hwts(struct net_device *ndev);
>  int fec_ptp_set(struct net_device *ndev, struct ifreq *ifr);
>  int fec_ptp_get(struct net_device *ndev, struct ifreq *ifr);
>  
> diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
> index 3982285..cc7fbfc 100644
> --- a/drivers/net/ethernet/freescale/fec_main.c
> +++ b/drivers/net/ethernet/freescale/fec_main.c
> @@ -1294,8 +1294,13 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
>  			ndev->stats.tx_bytes += skb->len;
>  		}
>  
> -		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS) &&
> -			fep->bufdesc_ex) {
> +		/* NOTE: SKBTX_IN_PROGRESS being set does not imply it's we who
> +		 * are to time stamp the packet, so we still need to check time
> +		 * stamping enabled flag.
> +		 */
> +		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS &&
> +			     fep->hwts_tx_en) &&
> +		    fep->bufdesc_ex) {
>  			struct skb_shared_hwtstamps shhwtstamps;
>  			struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
>  
> @@ -2723,10 +2728,16 @@ static int fec_enet_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd)
>  		return -ENODEV;
>  
>  	if (fep->bufdesc_ex) {
> -		if (cmd == SIOCSHWTSTAMP)
> -			return fec_ptp_set(ndev, rq);
> -		if (cmd == SIOCGHWTSTAMP)
> -			return fec_ptp_get(ndev, rq);
> +		bool use_fec_hwts = !phy_has_hwtstamp(phydev);

I thought we were in agreement that FEC does not support PHY
timestamping at this point, and this patch would only be fixing DSA
switches (even though PHYs would need this fixed too, when support is
added for them)? I would definitely not introduce support (and
incomplete, at that) for a new feature in a bugfix patch.

But it looks like we aren't.

> +
> +		if (cmd == SIOCSHWTSTAMP) {
> +			if (use_fec_hwts)
> +				return fec_ptp_set(ndev, rq);
> +			fec_ptp_disable_hwts(ndev);
> +		} else if (cmd == SIOCGHWTSTAMP) {
> +			if (use_fec_hwts)
> +				return fec_ptp_get(ndev, rq);
> +		}
>  	}
>  
>  	return phy_mii_ioctl(phydev, rq, cmd);
> diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c
> index 945643c..f8a592c 100644
> --- a/drivers/net/ethernet/freescale/fec_ptp.c
> +++ b/drivers/net/ethernet/freescale/fec_ptp.c
> @@ -452,6 +452,18 @@ static int fec_ptp_enable(struct ptp_clock_info *ptp,
>  	return -EOPNOTSUPP;
>  }
>  
> +/**
> + * fec_ptp_disable_hwts - disable hardware time stamping
> + * @ndev: pointer to net_device
> + */
> +void fec_ptp_disable_hwts(struct net_device *ndev)

This is not really needed, is it?
- PHY ability of hwtstamping does not change across the runtime of the
  kernel (or do you have a "special" one where it does?)
- The initial values for hwts_tx_en and hwts_rx_en are already 0
- There is no code path for which it is possible for hwts_tx_en or
  hwts_rx_en to have been non-zero prior to this call making them zero.

It is just "to be sure", in a very non-necessary way.

But nonetheless, it shouldn't be present in this patch either way, due
to the fact that one patch should have one topic only, and the topic of
this patch should be solving a clearly defined bug.

> +{
> +	struct fec_enet_private *fep = netdev_priv(ndev);
> +
> +	fep->hwts_tx_en = 0;
> +	fep->hwts_rx_en = 0;
> +}
> +
>  int fec_ptp_set(struct net_device *ndev, struct ifreq *ifr)
>  {
>  	struct fec_enet_private *fep = netdev_priv(ndev);
> -- 
> 2.10.0.1.g57b01a3
> 

Thanks,
-Vladimir
Sergey Organov July 12, 2020, 2:16 p.m. UTC | #2
Vladimir Oltean <olteanv@gmail.com> writes:

> Hi Sergey,
>
> On Sat, Jul 11, 2020 at 03:08:42PM +0300, Sergey Organov wrote:
>> Fix support for external PTP-aware devices such as DSA or PTP PHY:
>> 
>> Make sure we never time stamp tx packets when hardware time stamping
>> is disabled.
>> 
>> Check for PTP PHY being in use and then pass ioctls related to time
>> stamping of Ethernet packets to the PTP PHY rather than handle them
>> ourselves. In addition, disable our own hardware time stamping in this
>> case.
>> 
>> Fixes: 6605b73 ("FEC: Add time stamping code and a PTP hardware clock")
>
> Please use a 12-character sha1sum. Try to use the "pretty" format
> specifier I gave you in the original thread, it saves you from
> counting,

I did as you suggested:

[pretty]
        fixes = Fixes: %h (\"%s\")
[alias]
	fixes = show --no-patch --pretty='Fixes: %h (\"%s\")'

And that's what it gave me. Dunno, maybe its Git version that is
responsible?

I now tried to find a way to specify the number of digits in the
abbreviated hash in the format, but failed. There is likely some global
setting for minimum number of digits, but I'm yet to find it. Any idea?

> and also from people complaining once it gets merged:
>
> https://www.google.com/search?q=stephen+rothwell+%22fixes+tag+needs+some+work%22
>
>> Signed-off-by: Sergey Organov <sorganov@gmail.com>
>> ---
>> 
>> v2:
>>   - Extracted from larger patch series
>>   - Description/comments updated according to discussions
>>   - Added Fixes: tag
>> 
>>  drivers/net/ethernet/freescale/fec.h      |  1 +
>>  drivers/net/ethernet/freescale/fec_main.c | 23 +++++++++++++++++------
>>  drivers/net/ethernet/freescale/fec_ptp.c  | 12 ++++++++++++
>>  3 files changed, 30 insertions(+), 6 deletions(-)
>> 
>> diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
>> index d8d76da..832a217 100644
>> --- a/drivers/net/ethernet/freescale/fec.h
>> +++ b/drivers/net/ethernet/freescale/fec.h
>> @@ -590,6 +590,7 @@ struct fec_enet_private {
>>  void fec_ptp_init(struct platform_device *pdev, int irq_idx);
>>  void fec_ptp_stop(struct platform_device *pdev);
>>  void fec_ptp_start_cyclecounter(struct net_device *ndev);
>> +void fec_ptp_disable_hwts(struct net_device *ndev);
>>  int fec_ptp_set(struct net_device *ndev, struct ifreq *ifr);
>>  int fec_ptp_get(struct net_device *ndev, struct ifreq *ifr);
>>  
>> diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
>> index 3982285..cc7fbfc 100644
>> --- a/drivers/net/ethernet/freescale/fec_main.c
>> +++ b/drivers/net/ethernet/freescale/fec_main.c
>> @@ -1294,8 +1294,13 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
>>  			ndev->stats.tx_bytes += skb->len;
>>  		}
>>  
>> -		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS) &&
>> -			fep->bufdesc_ex) {
>> +		/* NOTE: SKBTX_IN_PROGRESS being set does not imply it's we who
>> +		 * are to time stamp the packet, so we still need to check time
>> +		 * stamping enabled flag.
>> +		 */
>> +		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS &&
>> +			     fep->hwts_tx_en) &&
>> +		    fep->bufdesc_ex) {
>>  			struct skb_shared_hwtstamps shhwtstamps;
>>  			struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
>>  
>> @@ -2723,10 +2728,16 @@ static int fec_enet_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd)
>>  		return -ENODEV;
>>  
>>  	if (fep->bufdesc_ex) {
>> -		if (cmd == SIOCSHWTSTAMP)
>> -			return fec_ptp_set(ndev, rq);
>> -		if (cmd == SIOCGHWTSTAMP)
>> -			return fec_ptp_get(ndev, rq);
>> +		bool use_fec_hwts = !phy_has_hwtstamp(phydev);
>
> I thought we were in agreement that FEC does not support PHY
> timestamping at this point, and this patch would only be fixing DSA
> switches (even though PHYs would need this fixed too, when support is
> added for them)? I would definitely not introduce support (and
> incomplete, at that) for a new feature in a bugfix patch.
>
> But it looks like we aren't.

We were indeed, and, honestly, I did prepare the split version of the
changes. But then I felt uneasy describing these commits, as I realized
that I fix single source file and single original commit by adding
proper support for a single feature that is described in your (single)
recent document, but with 2 separate commits, each of which solves only
half of the problem. I felt I need to somehow explain why could somebody
want half a fix, and didn't know how, so I've merged them back into
single commit.

In case you insist they are to be separate, I do keep the split version
in my git tree, but to finish it that way, I'd like to clarify a few
details:

1. Should it be patch series with 2 commits, or 2 entirely separate
patches?

2. If patch series, which change should go first? Here please notice
that ioctl() change makes no sense without SKBTX fix unconditionally,
while SKBTX fix makes no sense without ioctl() fix for PTP PHY users
only.

3. If entirely separate patches, should I somehow refer to SKBTX patch in
ioctl() one (and/or vice versa), to make it explicit they are
(inter)dependent? 

4. How/if should I explain why anybody would benefit from applying
SKBTX patch, yet be in trouble applying ioctl() one? 

>
>> +
>> +		if (cmd == SIOCSHWTSTAMP) {
>> +			if (use_fec_hwts)
>> +				return fec_ptp_set(ndev, rq);
>> +			fec_ptp_disable_hwts(ndev);
>> +		} else if (cmd == SIOCGHWTSTAMP) {
>> +			if (use_fec_hwts)
>> +				return fec_ptp_get(ndev, rq);
>> +		}
>>  	}
>>  
>>  	return phy_mii_ioctl(phydev, rq, cmd);
>> diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c
>> index 945643c..f8a592c 100644
>> --- a/drivers/net/ethernet/freescale/fec_ptp.c
>> +++ b/drivers/net/ethernet/freescale/fec_ptp.c
>> @@ -452,6 +452,18 @@ static int fec_ptp_enable(struct ptp_clock_info *ptp,
>>  	return -EOPNOTSUPP;
>>  }
>>  
>> +/**
>> + * fec_ptp_disable_hwts - disable hardware time stamping
>> + * @ndev: pointer to net_device
>> + */
>> +void fec_ptp_disable_hwts(struct net_device *ndev)
>
> This is not really needed, is it?
> - PHY ability of hwtstamping does not change across the runtime of the
>   kernel (or do you have a "special" one where it does?)
> - The initial values for hwts_tx_en and hwts_rx_en are already 0
> - There is no code path for which it is possible for hwts_tx_en or
>   hwts_rx_en to have been non-zero prior to this call making them
>   zero.

If everybody agree it is not needed, I'm fine getting it out of the
patch, but please consider my worries below.

I'm afraid your third statement might happen to be not exactly true.
It's due to this same path the hwts_tx_en could end-up being set, as we
have if() on phy_has_hwtstamp(phydev), so the path can set hwts_xx_en if
this code gets somehow run when phydev->phy is set to 0, or attached PHY
is not yet has PTP property.

I'm not sure it can happen, but essential thing here is that I have no
evidence it can't, and another place to ensure hwts_xx_en fields are
cleared would be at the time of attachment of PTP-aware PHY, by check
and clear and that time, yet even here it'd rely on PTP-awareness being
already established at the moment.

The second variant is harder for me to figure, yet is less reliable, so,
overall, I preferred to keep the proposed solution that I believe should
work no matter what, and let somebody who is more fluid in the code-base
to get responsibility to remove it. For example, I didn't want to even
start to consider how all this behaves on cable connect/disconnect, if
up/down, or over hibernation.

>
> It is just "to be sure", in a very non-necessary way.

It is "to be sure", without "just", as I tried to explain above. If it
were my own code, I'd ask for an evidence that this part is not needed,
before getting rid of this safety belt.

>
> But nonetheless, it shouldn't be present in this patch either way, due
> to the fact that one patch should have one topic only, and the topic of
> this patch should be solving a clearly defined bug.

I actually don't care either way, but to be picky, the answer depends on
particular definition of the bug, and the bug I have chased, and its
definition I've used, even in the original series, requires simultaneous
fixes in 2 places of the code.

You have yet another bug in mind that part of my original patch happens
to solve, yes, but that, by itself, doesn't necessarily mean the patch
should be split. Nevertheless, I honestly tried to split it, according
to our agreement, but failed, see above, and I still willing to try
again, provided somebody actually needs it.

Thanks,
-- Sergey
Andrew Lunn July 12, 2020, 2:47 p.m. UTC | #3
> I did as you suggested:
> 
> [pretty]
>         fixes = Fixes: %h (\"%s\")
> [alias]
> 	fixes = show --no-patch --pretty='Fixes: %h (\"%s\")'
> 
> And that's what it gave me. Dunno, maybe its Git version that is
> responsible?

[core]
        abbrev = 12

	Andrew
Vladimir Oltean July 12, 2020, 3:01 p.m. UTC | #4
On Sun, Jul 12, 2020 at 05:16:56PM +0300, Sergey Organov wrote:
> Vladimir Oltean <olteanv@gmail.com> writes:
> 
> > Hi Sergey,
> >
> > On Sat, Jul 11, 2020 at 03:08:42PM +0300, Sergey Organov wrote:
> >> Fix support for external PTP-aware devices such as DSA or PTP PHY:
> >> 
> >> Make sure we never time stamp tx packets when hardware time stamping
> >> is disabled.
> >> 
> >> Check for PTP PHY being in use and then pass ioctls related to time
> >> stamping of Ethernet packets to the PTP PHY rather than handle them
> >> ourselves. In addition, disable our own hardware time stamping in this
> >> case.
> >> 
> >> Fixes: 6605b73 ("FEC: Add time stamping code and a PTP hardware clock")
> >
> > Please use a 12-character sha1sum. Try to use the "pretty" format
> > specifier I gave you in the original thread, it saves you from
> > counting,
> 
> I did as you suggested:
> 
> [pretty]
>         fixes = Fixes: %h (\"%s\")
> [alias]
> 	fixes = show --no-patch --pretty='Fixes: %h (\"%s\")'
> 
> And that's what it gave me. Dunno, maybe its Git version that is
> responsible?
> 
> I now tried to find a way to specify the number of digits in the
> abbreviated hash in the format, but failed. There is likely some global
> setting for minimum number of digits, but I'm yet to find it. Any idea?
> 

Sorry, my fault. I gave you only partial settings. Use this:

[core]
	abbrev = 12
[pretty]
	fixes = Fixes: %h (\"%s\")

> > and also from people complaining once it gets merged:
> >
> > https://www.google.com/search?q=stephen+rothwell+%22fixes+tag+needs+some+work%22
> >
> >> Signed-off-by: Sergey Organov <sorganov@gmail.com>
> >> ---
> >> 
> >> v2:
> >>   - Extracted from larger patch series
> >>   - Description/comments updated according to discussions
> >>   - Added Fixes: tag
> >> 
> >>  drivers/net/ethernet/freescale/fec.h      |  1 +
> >>  drivers/net/ethernet/freescale/fec_main.c | 23 +++++++++++++++++------
> >>  drivers/net/ethernet/freescale/fec_ptp.c  | 12 ++++++++++++
> >>  3 files changed, 30 insertions(+), 6 deletions(-)
> >> 
> >> diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
> >> index d8d76da..832a217 100644
> >> --- a/drivers/net/ethernet/freescale/fec.h
> >> +++ b/drivers/net/ethernet/freescale/fec.h
> >> @@ -590,6 +590,7 @@ struct fec_enet_private {
> >>  void fec_ptp_init(struct platform_device *pdev, int irq_idx);
> >>  void fec_ptp_stop(struct platform_device *pdev);
> >>  void fec_ptp_start_cyclecounter(struct net_device *ndev);
> >> +void fec_ptp_disable_hwts(struct net_device *ndev);
> >>  int fec_ptp_set(struct net_device *ndev, struct ifreq *ifr);
> >>  int fec_ptp_get(struct net_device *ndev, struct ifreq *ifr);
> >>  
> >> diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
> >> index 3982285..cc7fbfc 100644
> >> --- a/drivers/net/ethernet/freescale/fec_main.c
> >> +++ b/drivers/net/ethernet/freescale/fec_main.c
> >> @@ -1294,8 +1294,13 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
> >>  			ndev->stats.tx_bytes += skb->len;
> >>  		}
> >>  
> >> -		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS) &&
> >> -			fep->bufdesc_ex) {
> >> +		/* NOTE: SKBTX_IN_PROGRESS being set does not imply it's we who
> >> +		 * are to time stamp the packet, so we still need to check time
> >> +		 * stamping enabled flag.
> >> +		 */
> >> +		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS &&
> >> +			     fep->hwts_tx_en) &&
> >> +		    fep->bufdesc_ex) {
> >>  			struct skb_shared_hwtstamps shhwtstamps;
> >>  			struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
> >>  
> >> @@ -2723,10 +2728,16 @@ static int fec_enet_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd)
> >>  		return -ENODEV;
> >>  
> >>  	if (fep->bufdesc_ex) {
> >> -		if (cmd == SIOCSHWTSTAMP)
> >> -			return fec_ptp_set(ndev, rq);
> >> -		if (cmd == SIOCGHWTSTAMP)
> >> -			return fec_ptp_get(ndev, rq);
> >> +		bool use_fec_hwts = !phy_has_hwtstamp(phydev);
> >
> > I thought we were in agreement that FEC does not support PHY
> > timestamping at this point, and this patch would only be fixing DSA
> > switches (even though PHYs would need this fixed too, when support is
> > added for them)? I would definitely not introduce support (and
> > incomplete, at that) for a new feature in a bugfix patch.
> >
> > But it looks like we aren't.
> 
> We were indeed, and, honestly, I did prepare the split version of the
> changes. But then I felt uneasy describing these commits, as I realized
> that I fix single source file and single original commit by adding
> proper support for a single feature that is described in your (single)
> recent document, but with 2 separate commits, each of which solves only
> half of the problem. I felt I need to somehow explain why could somebody
> want half a fix, and didn't know how, so I've merged them back into
> single commit.
> 

Right now there are 2 mainline DSA timestamping drivers that could be
paired with the FEC driver: mv88e6xxx and sja1105 (there is a third one,
felix, which is an embedded L2 switch, so its DSA master is known and
fixed, and it's not FEC). In practice, there are boards out there that
use FEC in conjunction with both these DSA switch families.

As far as I understand. the reason why SKBTX_IN_PROGRESS exists is for
skb_tx_timestamp() to only provide a software timestamp if the hardware
timestamping isn't going to. So hardware timestamping logic must signal
its intention. With SO_TIMESTAMPING, this should not be strictly
necessary, as this UAPI supports multiple sources of timestamping
(including software and hardware together), but I think
SKBTX_IN_PROGRESS predates this UAPI and timestamping should continue to
work with older socket options.

Now, out of the 2 mainline DSA drivers, 1 of them isn't setting
SKBTX_IN_PROGRESS, and that is mv88e6xxx. So mv88e6xxx isn't triggerring
this bug. I'm not sure why it isn't setting the flag. It might very well
be that the author of the patch had a board with a FEC DSA master, and
setting this flag made bad things happen, so he just left it unset.
Doesn't really matter.
But sja1105 is setting the flag:

https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git/tree/drivers/net/dsa/sja1105/sja1105_ptp.c#n890

So, at the very least, you are fixing PTP on DSA setups with FEC as
master and sja1105 as switch. Boards like that do exist.

> In case you insist they are to be separate, I do keep the split version
> in my git tree, but to finish it that way, I'd like to clarify a few
> details:
> 
> 1. Should it be patch series with 2 commits, or 2 entirely separate
> patches?
> 

Entirely separate.

> 2. If patch series, which change should go first? Here please notice
> that ioctl() change makes no sense without SKBTX fix unconditionally,
> while SKBTX fix makes no sense without ioctl() fix for PTP PHY users
> only.
> 

Please look at the SKBTX fix from the perspective of code that currently
exists in mainline, and not from the perspective of what you have in
mind.

> 3. If entirely separate patches, should I somehow refer to SKBTX patch in
> ioctl() one (and/or vice versa), to make it explicit they are
> (inter)dependent? 
> 

Nope. The PHY timestamping support will go to David's net-next, this
common PHY/DSA bugfix to net, and they'll meet sooner rather than later.

> 4. How/if should I explain why anybody would benefit from applying
> SKBTX patch, yet be in trouble applying ioctl() one? 
> 

Could you please rephrase? I don't understand the part about being in
trouble for applying the ioctl patch.

Thanks,
-Vladimir
Sergey Organov July 12, 2020, 5:29 p.m. UTC | #5
Vladimir Oltean <olteanv@gmail.com> writes:

> On Sun, Jul 12, 2020 at 05:16:56PM +0300, Sergey Organov wrote:
>> Vladimir Oltean <olteanv@gmail.com> writes:
>> 
>> > Hi Sergey,
>> >
>> > On Sat, Jul 11, 2020 at 03:08:42PM +0300, Sergey Organov wrote:
>> >> Fix support for external PTP-aware devices such as DSA or PTP PHY:
>> >> 
>> >> Make sure we never time stamp tx packets when hardware time stamping
>> >> is disabled.
>> >> 
>> >> Check for PTP PHY being in use and then pass ioctls related to time
>> >> stamping of Ethernet packets to the PTP PHY rather than handle them
>> >> ourselves. In addition, disable our own hardware time stamping in this
>> >> case.
>> >> 
>> >> Fixes: 6605b73 ("FEC: Add time stamping code and a PTP hardware clock")
>> >
>> > Please use a 12-character sha1sum. Try to use the "pretty" format
>> > specifier I gave you in the original thread, it saves you from
>> > counting,
>> 
>> I did as you suggested:
>> 
>> [pretty]
>>         fixes = Fixes: %h (\"%s\")
>> [alias]
>> 	fixes = show --no-patch --pretty='Fixes: %h (\"%s\")'
>> 
>> And that's what it gave me. Dunno, maybe its Git version that is
>> responsible?
>> 
>> I now tried to find a way to specify the number of digits in the
>> abbreviated hash in the format, but failed. There is likely some global
>> setting for minimum number of digits, but I'm yet to find it. Any idea?
>> 
>
> Sorry, my fault. I gave you only partial settings. Use this:
>
> [core]
> 	abbrev = 12

Thanks, Vladimir and Andrew!

>
>> > and also from people complaining once it gets merged:
>> >
>> > https://www.google.com/search?q=stephen+rothwell+%22fixes+tag+needs+some+work%22
>> >
>> >> Signed-off-by: Sergey Organov <sorganov@gmail.com>
>> >> ---
>> >> 
>> >> v2:
>> >>   - Extracted from larger patch series
>> >>   - Description/comments updated according to discussions
>> >>   - Added Fixes: tag
>> >> 
>> >>  drivers/net/ethernet/freescale/fec.h      |  1 +
>> >>  drivers/net/ethernet/freescale/fec_main.c | 23 +++++++++++++++++------
>> >>  drivers/net/ethernet/freescale/fec_ptp.c  | 12 ++++++++++++
>> >>  3 files changed, 30 insertions(+), 6 deletions(-)
>> >> 
>> >> diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
>> >> index d8d76da..832a217 100644
>> >> --- a/drivers/net/ethernet/freescale/fec.h
>> >> +++ b/drivers/net/ethernet/freescale/fec.h
>> >> @@ -590,6 +590,7 @@ struct fec_enet_private {
>> >>  void fec_ptp_init(struct platform_device *pdev, int irq_idx);
>> >>  void fec_ptp_stop(struct platform_device *pdev);
>> >>  void fec_ptp_start_cyclecounter(struct net_device *ndev);
>> >> +void fec_ptp_disable_hwts(struct net_device *ndev);
>> >>  int fec_ptp_set(struct net_device *ndev, struct ifreq *ifr);
>> >>  int fec_ptp_get(struct net_device *ndev, struct ifreq *ifr);
>> >>  
>> >> diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
>> >> index 3982285..cc7fbfc 100644
>> >> --- a/drivers/net/ethernet/freescale/fec_main.c
>> >> +++ b/drivers/net/ethernet/freescale/fec_main.c
>> >> @@ -1294,8 +1294,13 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
>> >>  			ndev->stats.tx_bytes += skb->len;
>> >>  		}
>> >>  
>> >> -		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS) &&
>> >> -			fep->bufdesc_ex) {
>> >> +		/* NOTE: SKBTX_IN_PROGRESS being set does not imply it's we who
>> >> +		 * are to time stamp the packet, so we still need to check time
>> >> +		 * stamping enabled flag.
>> >> +		 */
>> >> +		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS &&
>> >> +			     fep->hwts_tx_en) &&
>> >> +		    fep->bufdesc_ex) {
>> >>  			struct skb_shared_hwtstamps shhwtstamps;
>> >>  			struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
>> >>  

[...]

> As far as I understand. the reason why SKBTX_IN_PROGRESS exists is for
> skb_tx_timestamp() to only provide a software timestamp if the hardware
> timestamping isn't going to. So hardware timestamping logic must signal
> its intention. With SO_TIMESTAMPING, this should not be strictly
> necessary, as this UAPI supports multiple sources of timestamping
> (including software and hardware together),

As a side note, I tried, but didn't find a way to get 2 timestamps,
software and hardware, for the same packet. It looks like once upon a
time it was indeed supported by:

SOF_TIMESTAMPING_SYS_HARDWARE: This option is deprecated and ignored.

> but I think
> SKBTX_IN_PROGRESS predates this UAPI and timestamping should continue to
> work with older socket options.

<rant>The UAPI to all this is rather messy, starting with ugly tricks to
convert PTP file descriptors to clock IDs, followed by strange ways to
figure correct PTP clock for given Ethernet interface, followed by
entirely different methods of getting time stamping capabilities and
configuring them, and so forth.</rant>

>
> Now, out of the 2 mainline DSA drivers, 1 of them isn't setting
> SKBTX_IN_PROGRESS, and that is mv88e6xxx. So mv88e6xxx isn't triggerring
> this bug. I'm not sure why it isn't setting the flag. It might very well
> be that the author of the patch had a board with a FEC DSA master, and
> setting this flag made bad things happen, so he just left it unset.
> Doesn't really matter.
> But sja1105 is setting the flag:
>
> https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git/tree/drivers/net/dsa/sja1105/sja1105_ptp.c#n890
>
> So, at the very least, you are fixing PTP on DSA setups with FEC as
> master and sja1105 as switch. Boards like that do exist.

I'll do as you suggest, but I want to say that I didn't question your
claim that my proposed changes may fix some existing PTP/DSA setup.

What I still find doubtful is that this fact necessarily means that the
part of the patch that fixes some other bug must be submitted
separately. If it were the rule, almost no patch that needs to fix 2
separate places would be accepted, as there might be some bug somewhere
that could be fixed by 1 change, no?

In this particular case, you just happen to identify such a bug
immediately, but I, as patch submitter, should not be expected to check
all the kernel for other possible bugs that my changes may happen to
fix, no?

It seems like I misunderstand something very basic and that bothers me.

>
>> In case you insist they are to be separate, I do keep the split version
>> in my git tree, but to finish it that way, I'd like to clarify a few
>> details:
>> 
>> 1. Should it be patch series with 2 commits, or 2 entirely separate
>> patches?
>> 
>
> Entirely separate.

OK, will do a separate patch, as you suggest.

[...]

>
>> 3. If entirely separate patches, should I somehow refer to SKBTX patch in
>> ioctl() one (and/or vice versa), to make it explicit they are
>> (inter)dependent? 
>> 
>
> Nope. The PHY timestamping support will go to David's net-next, this
> common PHY/DSA bugfix to net, and they'll meet sooner rather than
> later.

I'll do as you suggest, separating the patches, yet I fail to see why
PHY /time stamping bug fix/ should go to another tree than PHY/DSA /time
stamping bug fix/? What's the essential difference? Could you please
clarify?

Thanks,
-- Sergey
Vladimir Oltean July 12, 2020, 7:33 p.m. UTC | #6
On Sun, Jul 12, 2020 at 08:29:46PM +0300, Sergey Organov wrote:
> Vladimir Oltean <olteanv@gmail.com> writes:
> 
> > As far as I understand. the reason why SKBTX_IN_PROGRESS exists is for
> > skb_tx_timestamp() to only provide a software timestamp if the hardware
> > timestamping isn't going to. So hardware timestamping logic must signal
> > its intention. With SO_TIMESTAMPING, this should not be strictly
> > necessary, as this UAPI supports multiple sources of timestamping
> > (including software and hardware together),
> 
> As a side note, I tried, but didn't find a way to get 2 timestamps,
> software and hardware, for the same packet. It looks like once upon a
> time it was indeed supported by:
> 
> SOF_TIMESTAMPING_SYS_HARDWARE: This option is deprecated and ignored.
> 

With SO_TIMESTAMPING, it is supported through
SOF_TIMESTAMPING_OPT_TX_SWHW.
With APIs pre-SO_TIMESTAMPING (such as SO_TIMESTAMPNS), my understanding
is that it is not supported. One timestamp would overwrite the other. So
this needs to be avoided somehow, and this is how SKBTX_IN_PROGRESS came
to be.

> > but I think
> > SKBTX_IN_PROGRESS predates this UAPI and timestamping should continue to
> > work with older socket options.
> 
> <rant>The UAPI to all this is rather messy, starting with ugly tricks to
> convert PTP file descriptors to clock IDs, followed by strange ways to
> figure correct PTP clock for given Ethernet interface, followed by
> entirely different methods of getting time stamping capabilities and
> configuring them, and so forth.</rant>
> 

Yes, but I don't think this really matters in any way here.

> >
> > Now, out of the 2 mainline DSA drivers, 1 of them isn't setting
> > SKBTX_IN_PROGRESS, and that is mv88e6xxx. So mv88e6xxx isn't triggerring
> > this bug. I'm not sure why it isn't setting the flag. It might very well
> > be that the author of the patch had a board with a FEC DSA master, and
> > setting this flag made bad things happen, so he just left it unset.
> > Doesn't really matter.
> > But sja1105 is setting the flag:
> >
> > https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git/tree/drivers/net/dsa/sja1105/sja1105_ptp.c#n890
> >
> > So, at the very least, you are fixing PTP on DSA setups with FEC as
> > master and sja1105 as switch. Boards like that do exist.
> 
> I'll do as you suggest, but I want to say that I didn't question your
> claim that my proposed changes may fix some existing PTP/DSA setup.
> 
> What I still find doubtful is that this fact necessarily means that the
> part of the patch that fixes some other bug must be submitted
> separately. If it were the rule, almost no patch that needs to fix 2
> separate places would be accepted, as there might be some bug somewhere
> that could be fixed by 1 change, no?
> 

So, I am asking you to flag this as a separate bugfix for DSA
timestamping for multiple reasons:

- because you are not "fixing PHY timestamping", more on that separately
- because I am looking at this problem from the perspective of a user
  who has a problem with DSA timestamping (aka code that already exists,
  and that is already claimed to work). They're going to be searching
  through the git log for potentially relevant changes, and even if they
  might notice a patch that "adds support for PHY timestamping in FEC"*,
  it might not register as something relevant to them, and skip it.
  Just as you don't care about DSA, neither does that hypothetical user
  care about PHY timestamping.



According to its design principles, DSA TX timestamping is supposed to
"just work". The FEC driver should have been DSA-ready from day one,
however it wasn't. In fact, subtle requirements from the MAC driver,
such as not indulging in solipsism (like gianfar, fec and probably many
more other drivers, in more subtle ways **), were unobvious enough to
the designers of DSA timestamping that they didn't, probably, even see
this coming.

Look, if the argument you're trying to make is that you should be using
this tag instead:

Fixes: 90af1059c52c ("net: dsa: forward timestamping callbacks to switch drivers")

since what gianfar/fec/etc were doing was not illegal as part of the
rules "back then", then I won't oppose that.



And according to the design principles of PHY TX timestamping, that is
not supposed to "just work" unless there is MAC driver collaboration.
That collaboration is _obviously_ not there for FEC, you don't even need
to open one kernel source code file to see that, just run a simple
command from user space, see below.

> In this particular case, you just happen to identify such a bug
> immediately, but I, as patch submitter, should not be expected to check
> all the kernel for other possible bugs that my changes may happen to
> fix, no?
> 

Well, ideally you would, it gives reviewers and readers confidence that
you understand the changes you are making. Some time in the future, when
you're no longer going to be reachable for questions, the commit still
needs to speak for itself.

> It seems like I misunderstand something very basic and that bothers me.
> 

Yes, but I can't seem to pinpoint what that is...
I _think_ the problem seems to be that you're not differentiating your
point of view from the mainline kernel's point of view.

> >
> >> In case you insist they are to be separate, I do keep the split version
> >> in my git tree, but to finish it that way, I'd like to clarify a few
> >> details:
> >> 
> >> 1. Should it be patch series with 2 commits, or 2 entirely separate
> >> patches?
> >> 
> >
> > Entirely separate.
> 
> OK, will do a separate patch, as you suggest.
> 
> [...]
> 
> >
> >> 3. If entirely separate patches, should I somehow refer to SKBTX patch in
> >> ioctl() one (and/or vice versa), to make it explicit they are
> >> (inter)dependent? 
> >> 
> >
> > Nope. The PHY timestamping support will go to David's net-next, this
> > common PHY/DSA bugfix to net, and they'll meet sooner rather than
> > later.
> 
> I'll do as you suggest, separating the patches, yet I fail to see why
> PHY /time stamping bug fix/ should go to another tree than PHY/DSA /time
> stamping bug fix/? What's the essential difference? Could you please
> clarify?
> 

The essential difference is that for PHY timestamping, there is no
feature that is claimed to work which doesn't work.

If you run "ethtool -T eth0" on a FEC network interface, it will always
report its own PHC, and never the PHC of a PHY. So, you cannot claim
that you are fixing PHY timestamping, since PHY timestamping is not
advertised. That's not what a bug fix is, at least not around here, with
its associated backporting efforts.

The only way you could have claimed that this was fixing PHY
timestamping was if "ethtool -T eth0" was reporting a PHY PHC, however
timestamps were not coming from the PHY.
From the perspective of the mainline kernel, that can never happen.
From your perspective as a developer, in your private work tree, where
_you_ added the necessary wiring for PHY timestamping, I fully
understand that this is exactly what happened _to_you_.
I am not saying that PHY timestamping doesn't need this issue fixed. It
does, and if it weren't for DSA, it would have simply been a "new
feature", and it would have been ok to have everything in the same patch.

The fact that you figured out so quickly what's going on just means
you're very smart. It took me 2 months to find out the same bug coming
from the DSA side of things.

> Thanks,
> -- Sergey

* That was my first complaint about your patch. You've changed that
  since, and your new commit is odd because it's now adding a new
  feature in a bug fix, therefore for a totally different reason.

** After I fixed gianfar, I did grep for other drivers that might be
   suffering from the same issue. The FEC driver did *not* strike me as
   obviously broken, and I was *specifically* searching for this bug
   pattern.

Thanks,
-Vladimir
Sergey Organov July 12, 2020, 10:32 p.m. UTC | #7
Vladimir Oltean <olteanv@gmail.com> writes:

> On Sun, Jul 12, 2020 at 08:29:46PM +0300, Sergey Organov wrote:
>> Vladimir Oltean <olteanv@gmail.com> writes:
>> 
>> > As far as I understand. the reason why SKBTX_IN_PROGRESS exists is for
>> > skb_tx_timestamp() to only provide a software timestamp if the hardware
>> > timestamping isn't going to. So hardware timestamping logic must signal
>> > its intention. With SO_TIMESTAMPING, this should not be strictly
>> > necessary, as this UAPI supports multiple sources of timestamping
>> > (including software and hardware together),
>> 
>> As a side note, I tried, but didn't find a way to get 2 timestamps,
>> software and hardware, for the same packet. It looks like once upon a
>> time it was indeed supported by:
>> 
>> SOF_TIMESTAMPING_SYS_HARDWARE: This option is deprecated and ignored.
>> 
>
> With SO_TIMESTAMPING, it is supported through
> SOF_TIMESTAMPING_OPT_TX_SWHW.

This one I overlooked, -- thanks for pointing!

> With APIs pre-SO_TIMESTAMPING (such as SO_TIMESTAMPNS), my understanding
> is that it is not supported. One timestamp would overwrite the other. So
> this needs to be avoided somehow, and this is how SKBTX_IN_PROGRESS came
> to be.
>
>
>> > but I think
>> > SKBTX_IN_PROGRESS predates this UAPI and timestamping should continue to
>> > work with older socket options.
>> 
>> <rant>The UAPI to all this is rather messy, starting with ugly tricks to
>> convert PTP file descriptors to clock IDs, followed by strange ways to
>> figure correct PTP clock for given Ethernet interface, followed by
>> entirely different methods of getting time stamping capabilities and
>> configuring them, and so forth.</rant>
>> 
>
> Yes, but I don't think this really matters in any way here.

Surprisingly it does, as you've got a wrong premise that ethtool doesn't
support external PTP PHY on FEC. And I think it's due to complexities of
the current implementation, where ethtool has entirely separate path of
doing things, with zero help from the FEC driver, see below.

[...]

>> I'll do as you suggest, but I want to say that I didn't question your
>> claim that my proposed changes may fix some existing PTP/DSA setup.
>> 
>> What I still find doubtful is that this fact necessarily means that the
>> part of the patch that fixes some other bug must be submitted
>> separately. If it were the rule, almost no patch that needs to fix 2
>> separate places would be accepted, as there might be some bug somewhere
>> that could be fixed by 1 change, no?
>> 
>
> So, I am asking you to flag this as a separate bugfix for DSA
> timestamping for multiple reasons:

OK, you want it, I'll do it, however:

>
> - because you are not "fixing PHY timestamping", more on that
> separately

I believe I do, more on that below.

> - because I am looking at this problem from the perspective of a user
>   who has a problem with DSA timestamping (aka code that already exists,
>   and that is already claimed to work). They're going to be searching
>   through the git log for potentially relevant changes, and even if they
>   might notice a patch that "adds support for PHY timestamping in FEC"*,
>   it might not register as something relevant to them, and skip it.
>   Just as you don't care about DSA, neither does that hypothetical user
>   care about PHY timestamping.

I don't think it's fair. I believe I do care about both, please re-read
the subject and description of the patch:

"net: fec: fix hardware time stamping by external devices"

that sure must be relevant for any external device, be it PHY or DSA,
and then:

"Fix support for external PTP-aware devices such as DSA or PTP PHY:"

And while formally I do add "support for external PTP PHY in FEC", as
you mention, I still consider it a bug-fix, as ethtool already correctly
supports this, see below for more background.

[...]

>> > Nope. The PHY timestamping support will go to David's net-next, this
>> > common PHY/DSA bugfix to net, and they'll meet sooner rather than
>> > later.
>> 
>> I'll do as you suggest, separating the patches, yet I fail to see why
>> PHY /time stamping bug fix/ should go to another tree than PHY/DSA /time
>> stamping bug fix/? What's the essential difference? Could you please
>> clarify?
>> 
>
> The essential difference is that for PHY timestamping, there is no
> feature that is claimed to work which doesn't work.

I believe this is a feature that is supposed to work, yet it doesn't,
see below.

>
> If you run "ethtool -T eth0" on a FEC network interface, it will always
> report its own PHC, and never the PHC of a PHY. So, you cannot claim
> that you are fixing PHY timestamping, since PHY timestamping is not
> advertised. That's not what a bug fix is, at least not around here, with
> its associated backporting efforts.

You can't actually try it as you don't have the hardware, right? As for
me, rather than running exactly ethtool, I do corresponding ioctl() in
my program, and the kernel does report features of my external PTP PHY,
not of internal one of the FEC, without my patches!

> The only way you could have claimed that this was fixing PHY
> timestamping was if "ethtool -T eth0" was reporting a PHY PHC, however
> timestamps were not coming from the PHY.

That's /exactly/ the case! Moreover, my original work is on 4.9.146
kernel, so ethtool works correctly at least since then. Here is quote from
my original question that I already gave reference to:

<quote>
Almost everything works fine out of the box, except hardware
timestamping. The problems are that I apparently get timestamps from fec
built-in PTP instead of external PHY, and that

  ioctl(fd, SIOCSHWTSTAMP, &ifr)

ends up being executed by fec1 built-in PTP code instead of being
forwarded to the external PHY, and that this happens despite the call to

   info.cmd = ETHTOOL_GET_TS_INFO;                                                                             
   ioctl(fd, SIOCETHTOOL, &ifr);                                                                     

returning phc_index = 1 that corresponds to external PHY, and reports
features of the external PHY, leading to major inconsistency as seen
from user-space.
</quote>

You see? This is exactly the case where I could claim fixing PHY time
stamping even according to your own expertise!

> From the perspective of the mainline kernel, that can never happen.

Yet in happened to me, and in some way because of the UAPI deficiencies
I've mentioned, as ethtool has entirely separate code path, that happens
to be correct for a long time already.

> From your perspective as a developer, in your private work tree, where
> _you_ added the necessary wiring for PHY timestamping, I fully
> understand that this is exactly what happened _to_you_.
> I am not saying that PHY timestamping doesn't need this issue fixed. It
> does, and if it weren't for DSA, it would have simply been a "new
> feature", and it would have been ok to have everything in the same
> patch.

Except that it's not a "new feature", but a bug-fix of an existing one,
as I see it.

>
> The fact that you figured out so quickly what's going on just means
> you're very smart. It took me 2 months to find out the same bug coming
> from the DSA side of things.

Thanks, but I'd rather attribute this to the fact that in my case there
was the code that worked correctly, so I was lucky to get support point
to rely upon.

Thanks,
-- Sergey
Vladimir Oltean July 12, 2020, 11:15 p.m. UTC | #8
On Mon, Jul 13, 2020 at 01:32:09AM +0300, Sergey Organov wrote:
> >
> > If you run "ethtool -T eth0" on a FEC network interface, it will always
> > report its own PHC, and never the PHC of a PHY. So, you cannot claim
> > that you are fixing PHY timestamping, since PHY timestamping is not
> > advertised. That's not what a bug fix is, at least not around here, with
> > its associated backporting efforts.
>
> You can't actually try it as you don't have the hardware, right? As for
> me, rather than running exactly ethtool, I do corresponding ioctl() in
> my program, and the kernel does report features of my external PTP PHY,
> not of internal one of the FEC, without my patches!
>
> > The only way you could have claimed that this was fixing PHY
> > timestamping was if "ethtool -T eth0" was reporting a PHY PHC, however
> > timestamps were not coming from the PHY.
>
> That's /exactly/ the case! Moreover, my original work is on 4.9.146
> kernel, so ethtool works correctly at least since then. Here is quote from
> my original question that I already gave reference to:
>
> <quote>
> Almost everything works fine out of the box, except hardware
> timestamping. The problems are that I apparently get timestamps from fec
> built-in PTP instead of external PHY, and that
>
>   ioctl(fd, SIOCSHWTSTAMP, &ifr)
>
> ends up being executed by fec1 built-in PTP code instead of being
> forwarded to the external PHY, and that this happens despite the call to
>
>    info.cmd = ETHTOOL_GET_TS_INFO;
>    ioctl(fd, SIOCETHTOOL, &ifr);
>
> returning phc_index = 1 that corresponds to external PHY, and reports
> features of the external PHY, leading to major inconsistency as seen
> from user-space.
> </quote>
>
> You see? This is exactly the case where I could claim fixing PHY time
> stamping even according to your own expertise!
>
> > From the perspective of the mainline kernel, that can never happen.
>
> Yet in happened to me, and in some way because of the UAPI deficiencies
> I've mentioned, as ethtool has entirely separate code path, that happens
> to be correct for a long time already.
>

Yup, you are right:

int __ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info)
{
	const struct ethtool_ops *ops = dev->ethtool_ops;
	struct phy_device *phydev = dev->phydev;

	memset(info, 0, sizeof(*info));
	info->cmd = ETHTOOL_GET_TS_INFO;

	if (phy_has_tsinfo(phydev))
		return phy_ts_info(phydev, info);
	if (ops->get_ts_info)
		return ops->get_ts_info(dev, info);

	info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
				SOF_TIMESTAMPING_SOFTWARE;
	info->phc_index = -1;

	return 0;
}

Very bad design choice indeed...
Given the fact that the PHY timestamping needs massaging from MAC driver
for plenty of other reasons, now of all things, ethtool just decided
it's not going to consult the MAC driver about the PHC it intends to
expose to user space, and just say "here's the PHY, deal with it". This
is a structural bug, I would say.

> > From your perspective as a developer, in your private work tree, where
> > _you_ added the necessary wiring for PHY timestamping, I fully
> > understand that this is exactly what happened _to_you_.
> > I am not saying that PHY timestamping doesn't need this issue fixed. It
> > does, and if it weren't for DSA, it would have simply been a "new
> > feature", and it would have been ok to have everything in the same
> > patch.
>
> Except that it's not a "new feature", but a bug-fix of an existing one,
> as I see it.
>

See above. It's clear that the intention of the PHY timestamping support
is for MAC drivers to opt-in, otherwise some mechanism would have been
devised such that not every single one of them would need to check for
phy_has_hwtstamp() in .ndo_do_ioctl(). That simply doesn't scale. Also,
it seems that automatically calling phy_ts_info from
__ethtool_get_ts_info is not coherent with that intention.

I need to think more about this. Anyway, if your aim is to "reduce
confusion" for others walking in your foot steps, I think this is much
worthier of your time: avoiding the inconsistent situation where the MAC
driver is obviously not ready for PHY timestamping, however not all
parts of the kernel are in agreement with that, and tell the user
something else.

Thanks,
-Vladimir
Sergey Organov July 14, 2020, 12:39 p.m. UTC | #9
Vladimir Oltean <olteanv@gmail.com> writes:

> On Mon, Jul 13, 2020 at 01:32:09AM +0300, Sergey Organov wrote:

[...]

>> > From the perspective of the mainline kernel, that can never happen.
>>
>> Yet in happened to me, and in some way because of the UAPI deficiencies
>> I've mentioned, as ethtool has entirely separate code path, that happens
>> to be correct for a long time already.
>>
>
> Yup, you are right:
>

[...]

> Very bad design choice indeed...
> Given the fact that the PHY timestamping needs massaging from MAC driver
> for plenty of other reasons, now of all things, ethtool just decided
> it's not going to consult the MAC driver about the PHC it intends to
> expose to user space, and just say "here's the PHY, deal with it". This
> is a structural bug, I would say.
>
>> > From your perspective as a developer, in your private work tree, where
>> > _you_ added the necessary wiring for PHY timestamping, I fully
>> > understand that this is exactly what happened _to_you_.
>> > I am not saying that PHY timestamping doesn't need this issue fixed. It
>> > does, and if it weren't for DSA, it would have simply been a "new
>> > feature", and it would have been ok to have everything in the same
>> > patch.
>>
>> Except that it's not a "new feature", but a bug-fix of an existing one,
>> as I see it.
>>
>
> See above. It's clear that the intention of the PHY timestamping support
> is for MAC drivers to opt-in, otherwise some mechanism would have been
> devised such that not every single one of them would need to check for
> phy_has_hwtstamp() in .ndo_do_ioctl(). That simply doesn't scale. Also,
> it seems that automatically calling phy_ts_info from
> __ethtool_get_ts_info is not coherent with that intention.
>
> I need to think more about this. Anyway, if your aim is to "reduce
> confusion" for others walking in your foot steps, I think this is much
> worthier of your time: avoiding the inconsistent situation where the MAC
> driver is obviously not ready for PHY timestamping, however not all
> parts of the kernel are in agreement with that, and tell the user
> something else.

You see, I have a problem on kernel 4.9.146. After I apply this patch,
the problem goes away, at least for FEC/PHY combo that I care about, and
chances are high that for DSA as well, according to your own expertise.
Why should I care what is or is not ready for what to get a bug-fix
patch into the kernel? Why should I guess some vague "intentions" or
spend my time elsewhere?

Also please notice that if, as you suggest, I will propose only half of
the patch that will fix DSA only, then I will create confusion for
FEC/PHY users that will have no way to figure they need another part of
the fix to get their setup to work.

Could we please finally agree that, as what I suggest is indeed a simple
bug-fix, we could safely let it into the kernel?

Thanks,
-- Sergey
Richard Cochran July 14, 2020, 2:01 p.m. UTC | #10
On Sat, Jul 11, 2020 at 03:08:42PM +0300, Sergey Organov wrote:
> Fix support for external PTP-aware devices such as DSA or PTP PHY:
> 
> Make sure we never time stamp tx packets when hardware time stamping
> is disabled.
> 
> Check for PTP PHY being in use and then pass ioctls related to time
> stamping of Ethernet packets to the PTP PHY rather than handle them
> ourselves. In addition, disable our own hardware time stamping in this
> case.
> 
> Fixes: 6605b73 ("FEC: Add time stamping code and a PTP hardware clock")
> Signed-off-by: Sergey Organov <sorganov@gmail.com>
> ---
> 
> v2:
>   - Extracted from larger patch series
>   - Description/comments updated according to discussions
>   - Added Fixes: tag

Acked-by: Richard Cochran <richardcochran@gmail.com>
Vladimir Oltean July 14, 2020, 2:23 p.m. UTC | #11
On Tue, Jul 14, 2020 at 03:39:04PM +0300, Sergey Organov wrote:
> Vladimir Oltean <olteanv@gmail.com> writes:
> 
> > On Mon, Jul 13, 2020 at 01:32:09AM +0300, Sergey Organov wrote:
> 
> [...]
> 
> >> > From the perspective of the mainline kernel, that can never happen.
> >>
> >> Yet in happened to me, and in some way because of the UAPI deficiencies
> >> I've mentioned, as ethtool has entirely separate code path, that happens
> >> to be correct for a long time already.
> >>
> >
> > Yup, you are right:
> >
> 
> [...]
> 
> > Very bad design choice indeed...
> > Given the fact that the PHY timestamping needs massaging from MAC driver
> > for plenty of other reasons, now of all things, ethtool just decided
> > it's not going to consult the MAC driver about the PHC it intends to
> > expose to user space, and just say "here's the PHY, deal with it". This
> > is a structural bug, I would say.
> >
> >> > From your perspective as a developer, in your private work tree, where
> >> > _you_ added the necessary wiring for PHY timestamping, I fully
> >> > understand that this is exactly what happened _to_you_.
> >> > I am not saying that PHY timestamping doesn't need this issue fixed. It
> >> > does, and if it weren't for DSA, it would have simply been a "new
> >> > feature", and it would have been ok to have everything in the same
> >> > patch.
> >>
> >> Except that it's not a "new feature", but a bug-fix of an existing one,
> >> as I see it.
> >>
> >
> > See above. It's clear that the intention of the PHY timestamping support
> > is for MAC drivers to opt-in, otherwise some mechanism would have been
> > devised such that not every single one of them would need to check for
> > phy_has_hwtstamp() in .ndo_do_ioctl(). That simply doesn't scale. Also,
> > it seems that automatically calling phy_ts_info from
> > __ethtool_get_ts_info is not coherent with that intention.
> >
> > I need to think more about this. Anyway, if your aim is to "reduce
> > confusion" for others walking in your foot steps, I think this is much
> > worthier of your time: avoiding the inconsistent situation where the MAC
> > driver is obviously not ready for PHY timestamping, however not all
> > parts of the kernel are in agreement with that, and tell the user
> > something else.
> 
> You see, I have a problem on kernel 4.9.146. After I apply this patch,
> the problem goes away, at least for FEC/PHY combo that I care about, and
> chances are high that for DSA as well, according to your own expertise.
> Why should I care what is or is not ready for what to get a bug-fix
> patch into the kernel? Why should I guess some vague "intentions" or
> spend my time elsewhere?
> 
> Also please notice that if, as you suggest, I will propose only half of
> the patch that will fix DSA only, then I will create confusion for
> FEC/PHY users that will have no way to figure they need another part of
> the fix to get their setup to work.
> 
> Could we please finally agree that, as what I suggest is indeed a simple
> bug-fix, we could safely let it into the kernel?
> 
> Thanks,
> -- Sergey

I cannot contradict you, you have all the arguments on your side. The
person who added support for "ethtool -T" in commit c8f3a8c31069
("ethtool: Introduce a method for getting time stamping capabilities.")
made a fundamental mistake in that they exposed broken functionality to
the user, in case CONFIG_NETWORK_PHY_TIMESTAMPING is enabled and the MAC
driver doesn't fulfill the requirements, be they skb_tx_timestamp(),
phy_has_hwtstamp() and what not. So, therefore, any patch that is adding
PHY timestamping compatibility in a MAC driver can rightfully claim that
it is fixing a bug, a sloppy design. Fair enough.

The only reason why I mentioned about spending your time on useful
things is because in your previous series you seemed to be concerned
about that. In retrospect, I believe you agree with me that your
confusion would have been significantly lower if the output of "ethtool
-T" was in harmony with the actual source of hardware timestamps.
Now that we discussed it through and I did see your point, I just
suggested what I believe to be the fundamental issue here, don't shoot
the messenger. Of course you are free to spend your time however you
want to.

Acked-by: Vladimir Oltean <olteanv@gmail.com>

Thanks,
-Vladimir
Sergey Organov July 14, 2020, 2:27 p.m. UTC | #12
Richard Cochran <richardcochran@gmail.com> writes:

> On Sat, Jul 11, 2020 at 03:08:42PM +0300, Sergey Organov wrote:
>> Fix support for external PTP-aware devices such as DSA or PTP PHY:
>> 
>> Make sure we never time stamp tx packets when hardware time stamping
>> is disabled.
>> 
>> Check for PTP PHY being in use and then pass ioctls related to time
>> stamping of Ethernet packets to the PTP PHY rather than handle them
>> ourselves. In addition, disable our own hardware time stamping in this
>> case.
>> 
>> Fixes: 6605b73 ("FEC: Add time stamping code and a PTP hardware clock")
>> Signed-off-by: Sergey Organov <sorganov@gmail.com>
>> ---
>> 
>> v2:
>>   - Extracted from larger patch series
>>   - Description/comments updated according to discussions
>>   - Added Fixes: tag
>
> Acked-by: Richard Cochran <richardcochran@gmail.com>

Thanks for reviewing!

-- Sergey
Sergey Organov July 14, 2020, 2:35 p.m. UTC | #13
Vladimir Oltean <olteanv@gmail.com> writes:

> On Tue, Jul 14, 2020 at 03:39:04PM +0300, Sergey Organov wrote:
>> Vladimir Oltean <olteanv@gmail.com> writes:
>> 
>> > On Mon, Jul 13, 2020 at 01:32:09AM +0300, Sergey Organov wrote:
>> 
>> [...]
>> 
>> >> > From the perspective of the mainline kernel, that can never happen.
>> >>
>> >> Yet in happened to me, and in some way because of the UAPI
>> >> deficiencies
>> >> I've mentioned, as ethtool has entirely separate code path, that
>> >> happens
>> >> to be correct for a long time already.
>> >>
>> >
>> > Yup, you are right:
>> >
>> 
>> [...]
>> 
>> > Very bad design choice indeed...
>> > Given the fact that the PHY timestamping needs massaging from MAC
>> > driver
>> > for plenty of other reasons, now of all things, ethtool just decided
>> > it's not going to consult the MAC driver about the PHC it intends to
>> > expose to user space, and just say "here's the PHY, deal with it". This
>> > is a structural bug, I would say.
>> >
>> >> > From your perspective as a developer, in your private work
>> >> > tree, where
>> >> > _you_ added the necessary wiring for PHY timestamping, I fully
>> >> > understand that this is exactly what happened _to_you_.
>> >> > I am not saying that PHY timestamping doesn't need this issue
>> >> > fixed. It
>> >> > does, and if it weren't for DSA, it would have simply been a "new
>> >> > feature", and it would have been ok to have everything in the same
>> >> > patch.
>> >>
>> >> Except that it's not a "new feature", but a bug-fix of an
>> >> existing one,
>> >> as I see it.
>> >>
>> >
>> > See above. It's clear that the intention of the PHY timestamping
>> > support
>> > is for MAC drivers to opt-in, otherwise some mechanism would have been
>> > devised such that not every single one of them would need to check for
>> > phy_has_hwtstamp() in .ndo_do_ioctl(). That simply doesn't scale. Also,
>> > it seems that automatically calling phy_ts_info from
>> > __ethtool_get_ts_info is not coherent with that intention.
>> >
>> > I need to think more about this. Anyway, if your aim is to "reduce
>> > confusion" for others walking in your foot steps, I think this is much
>> > worthier of your time: avoiding the inconsistent situation where
>> > the MAC
>> > driver is obviously not ready for PHY timestamping, however not all
>> > parts of the kernel are in agreement with that, and tell the user
>> > something else.
>> 
>> You see, I have a problem on kernel 4.9.146. After I apply this patch,
>> the problem goes away, at least for FEC/PHY combo that I care about, and
>> chances are high that for DSA as well, according to your own expertise.
>> Why should I care what is or is not ready for what to get a bug-fix
>> patch into the kernel? Why should I guess some vague "intentions" or
>> spend my time elsewhere?
>> 
>> Also please notice that if, as you suggest, I will propose only half of
>> the patch that will fix DSA only, then I will create confusion for
>> FEC/PHY users that will have no way to figure they need another part of
>> the fix to get their setup to work.
>> 
>> Could we please finally agree that, as what I suggest is indeed a simple
>> bug-fix, we could safely let it into the kernel?
>> 
>> Thanks,
>> -- Sergey
>
> I cannot contradict you, you have all the arguments on your side. The
> person who added support for "ethtool -T" in commit c8f3a8c31069
> ("ethtool: Introduce a method for getting time stamping capabilities.")
> made a fundamental mistake in that they exposed broken functionality to
> the user, in case CONFIG_NETWORK_PHY_TIMESTAMPING is enabled and the MAC
> driver doesn't fulfill the requirements, be they skb_tx_timestamp(),
> phy_has_hwtstamp() and what not. So, therefore, any patch that is adding
> PHY timestamping compatibility in a MAC driver can rightfully claim that
> it is fixing a bug, a sloppy design. Fair enough.

OK, thanks!

>
> The only reason why I mentioned about spending your time on useful
> things is because in your previous series you seemed to be concerned
> about that. In retrospect, I believe you agree with me that your
> confusion would have been significantly lower if the output of "ethtool
> -T" was in harmony with the actual source of hardware timestamps.
> Now that we discussed it through and I did see your point, I just
> suggested what I believe to be the fundamental issue here, don't shoot
> the messenger. Of course you are free to spend your time however you
> want to.

I do care about these things indeed, it's only that right now what I
care most is to get the fixes into the kernel.

Then we can think without hurry about how all this could be improved.

> Acked-by: Vladimir Oltean <olteanv@gmail.com>

Thanks for reviewing, and again for helpful and beneficial discussion!

-- Sergey
Vladimir Oltean July 14, 2020, 2:44 p.m. UTC | #14
On Tue, Jul 14, 2020 at 05:23:24PM +0300, Vladimir Oltean wrote:
> On Tue, Jul 14, 2020 at 03:39:04PM +0300, Sergey Organov wrote:
> > Vladimir Oltean <olteanv@gmail.com> writes:
> > 
> > > On Mon, Jul 13, 2020 at 01:32:09AM +0300, Sergey Organov wrote:
> > 
> > [...]
> > 
> > >> > From the perspective of the mainline kernel, that can never happen.
> > >>
> > >> Yet in happened to me, and in some way because of the UAPI deficiencies
> > >> I've mentioned, as ethtool has entirely separate code path, that happens
> > >> to be correct for a long time already.
> > >>
> > >
> > > Yup, you are right:
> > >
> > 
> > [...]
> > 
> > > Very bad design choice indeed...
> > > Given the fact that the PHY timestamping needs massaging from MAC driver
> > > for plenty of other reasons, now of all things, ethtool just decided
> > > it's not going to consult the MAC driver about the PHC it intends to
> > > expose to user space, and just say "here's the PHY, deal with it". This
> > > is a structural bug, I would say.
> > >
> > >> > From your perspective as a developer, in your private work tree, where
> > >> > _you_ added the necessary wiring for PHY timestamping, I fully
> > >> > understand that this is exactly what happened _to_you_.
> > >> > I am not saying that PHY timestamping doesn't need this issue fixed. It
> > >> > does, and if it weren't for DSA, it would have simply been a "new
> > >> > feature", and it would have been ok to have everything in the same
> > >> > patch.
> > >>
> > >> Except that it's not a "new feature", but a bug-fix of an existing one,
> > >> as I see it.
> > >>
> > >
> > > See above. It's clear that the intention of the PHY timestamping support
> > > is for MAC drivers to opt-in, otherwise some mechanism would have been
> > > devised such that not every single one of them would need to check for
> > > phy_has_hwtstamp() in .ndo_do_ioctl(). That simply doesn't scale. Also,
> > > it seems that automatically calling phy_ts_info from
> > > __ethtool_get_ts_info is not coherent with that intention.
> > >
> > > I need to think more about this. Anyway, if your aim is to "reduce
> > > confusion" for others walking in your foot steps, I think this is much
> > > worthier of your time: avoiding the inconsistent situation where the MAC
> > > driver is obviously not ready for PHY timestamping, however not all
> > > parts of the kernel are in agreement with that, and tell the user
> > > something else.
> > 
> > You see, I have a problem on kernel 4.9.146. After I apply this patch,
> > the problem goes away, at least for FEC/PHY combo that I care about, and
> > chances are high that for DSA as well, according to your own expertise.
> > Why should I care what is or is not ready for what to get a bug-fix
> > patch into the kernel? Why should I guess some vague "intentions" or
> > spend my time elsewhere?
> > 
> > Also please notice that if, as you suggest, I will propose only half of
> > the patch that will fix DSA only, then I will create confusion for
> > FEC/PHY users that will have no way to figure they need another part of
> > the fix to get their setup to work.
> > 
> > Could we please finally agree that, as what I suggest is indeed a simple
> > bug-fix, we could safely let it into the kernel?
> > 
> > Thanks,
> > -- Sergey
> 
> I cannot contradict you, you have all the arguments on your side. The
> person who added support for "ethtool -T" in commit c8f3a8c31069
> ("ethtool: Introduce a method for getting time stamping capabilities.")
> made a fundamental mistake in that they exposed broken functionality to
> the user, in case CONFIG_NETWORK_PHY_TIMESTAMPING is enabled and the MAC
> driver doesn't fulfill the requirements, be they skb_tx_timestamp(),
> phy_has_hwtstamp() and what not. So, therefore, any patch that is adding
> PHY timestamping compatibility in a MAC driver can rightfully claim that
> it is fixing a bug, a sloppy design. Fair enough.
> 
> The only reason why I mentioned about spending your time on useful
> things is because in your previous series you seemed to be concerned
> about that. In retrospect, I believe you agree with me that your
> confusion would have been significantly lower if the output of "ethtool
> -T" was in harmony with the actual source of hardware timestamps.
> Now that we discussed it through and I did see your point, I just
> suggested what I believe to be the fundamental issue here, don't shoot
> the messenger. Of course you are free to spend your time however you
> want to.
> 
> Acked-by: Vladimir Oltean <olteanv@gmail.com>
> 
> Thanks,
> -Vladimir

Of course, it would be good if you sent a new version with the sha1sum
of the Fixes: tag having the right length, otherwise people will
complain.

Thanks,
-Vladimir
Sergey Organov July 14, 2020, 4:18 p.m. UTC | #15
Vladimir Oltean <olteanv@gmail.com> writes:


[...]

>> Acked-by: Vladimir Oltean <olteanv@gmail.com>
>> 
>> Thanks,
>> -Vladimir
>
> Of course, it would be good if you sent a new version with the sha1sum
> of the Fixes: tag having the right length, otherwise people will
> complain.

Ah, thanks for reminding! I entirely forgot about it due to this lengthy
discussion. Will do!

Thanks,
-- Sergey
diff mbox series

Patch

diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index d8d76da..832a217 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -590,6 +590,7 @@  struct fec_enet_private {
 void fec_ptp_init(struct platform_device *pdev, int irq_idx);
 void fec_ptp_stop(struct platform_device *pdev);
 void fec_ptp_start_cyclecounter(struct net_device *ndev);
+void fec_ptp_disable_hwts(struct net_device *ndev);
 int fec_ptp_set(struct net_device *ndev, struct ifreq *ifr);
 int fec_ptp_get(struct net_device *ndev, struct ifreq *ifr);
 
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 3982285..cc7fbfc 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1294,8 +1294,13 @@  fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
 			ndev->stats.tx_bytes += skb->len;
 		}
 
-		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS) &&
-			fep->bufdesc_ex) {
+		/* NOTE: SKBTX_IN_PROGRESS being set does not imply it's we who
+		 * are to time stamp the packet, so we still need to check time
+		 * stamping enabled flag.
+		 */
+		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS &&
+			     fep->hwts_tx_en) &&
+		    fep->bufdesc_ex) {
 			struct skb_shared_hwtstamps shhwtstamps;
 			struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
 
@@ -2723,10 +2728,16 @@  static int fec_enet_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd)
 		return -ENODEV;
 
 	if (fep->bufdesc_ex) {
-		if (cmd == SIOCSHWTSTAMP)
-			return fec_ptp_set(ndev, rq);
-		if (cmd == SIOCGHWTSTAMP)
-			return fec_ptp_get(ndev, rq);
+		bool use_fec_hwts = !phy_has_hwtstamp(phydev);
+
+		if (cmd == SIOCSHWTSTAMP) {
+			if (use_fec_hwts)
+				return fec_ptp_set(ndev, rq);
+			fec_ptp_disable_hwts(ndev);
+		} else if (cmd == SIOCGHWTSTAMP) {
+			if (use_fec_hwts)
+				return fec_ptp_get(ndev, rq);
+		}
 	}
 
 	return phy_mii_ioctl(phydev, rq, cmd);
diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c
index 945643c..f8a592c 100644
--- a/drivers/net/ethernet/freescale/fec_ptp.c
+++ b/drivers/net/ethernet/freescale/fec_ptp.c
@@ -452,6 +452,18 @@  static int fec_ptp_enable(struct ptp_clock_info *ptp,
 	return -EOPNOTSUPP;
 }
 
+/**
+ * fec_ptp_disable_hwts - disable hardware time stamping
+ * @ndev: pointer to net_device
+ */
+void fec_ptp_disable_hwts(struct net_device *ndev)
+{
+	struct fec_enet_private *fep = netdev_priv(ndev);
+
+	fep->hwts_tx_en = 0;
+	fep->hwts_rx_en = 0;
+}
+
 int fec_ptp_set(struct net_device *ndev, struct ifreq *ifr)
 {
 	struct fec_enet_private *fep = netdev_priv(ndev);