diff mbox series

[v2] net: phy: marvell10g: support XFI rate matching mode

Message ID 76ee08645fd35182911fd2bac2546e455c4b662c.1593327891.git.baruch@tkos.co.il
State Accepted
Delegated to: David Miller
Headers show
Series [v2] net: phy: marvell10g: support XFI rate matching mode | expand

Commit Message

Baruch Siach June 28, 2020, 7:04 a.m. UTC
When the hardware MACTYPE hardware configuration pins are set to "XFI
with Rate Matching" the PHY interface operate at fixed 10Gbps speed. The
MAC buffer packets in both directions to match various wire speeds.

Read the MAC Type field in the Port Control register, and set the MAC
interface speed accordingly.

Signed-off-by: Baruch Siach <baruch@tkos.co.il>
---
v2: Move rate matching state read to config_init (RMK)
---
 drivers/net/phy/marvell10g.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

Comments

Russell King (Oracle) June 29, 2020, 9:22 a.m. UTC | #1
On Sun, Jun 28, 2020 at 10:04:51AM +0300, Baruch Siach wrote:
> When the hardware MACTYPE hardware configuration pins are set to "XFI
> with Rate Matching" the PHY interface operate at fixed 10Gbps speed. The
> MAC buffer packets in both directions to match various wire speeds.
> 
> Read the MAC Type field in the Port Control register, and set the MAC
> interface speed accordingly.
> 
> Signed-off-by: Baruch Siach <baruch@tkos.co.il>
> ---
> v2: Move rate matching state read to config_init (RMK)

Not quite what I was after, but it'll do for now.

My only system which has a 3310 PHY and is configured for rate matching
(using an XAUI interface, mode 1) seems to be sick - the 3310 no longer
correctly negotiates on the media side (will only link at 100M/Half and
only passes traffic in one direction), which makes any development with
it rather difficult.  Either the media side drivers have failed or the
magnetics.

I was also hoping for some discussion, as I bought up a few points
about the 3310's rate matching - unless you have the version with
MACsec, the PHY expects the host side to rate limit the egress rate to
the media rate and will _not_ send pause frames.  If you have MACsec,
and the MACsec hardware is enabled (although may not be encrypting),
then the PHY will send pause frames to the host as the internal buffer
fills.

Then there's the whole question of what phydev->speed etc should be set
to - the media speed or the host side link speed with the PHY, and then
how the host side should configure itself.  At least the 88E6390x
switch will force itself to the media side speed using that while in
XAUI mode, resulting in a non-functioning speed.  So should the host
side force itself to 10G whenever in something like XAUI mode?

What do we do about the egress rate - ignore that statement and hope
that the 3310 doesn't create bad packets on the wire if we fill up its
internal buffer?

> ---
>  drivers/net/phy/marvell10g.c | 22 ++++++++++++++++++++++
>  1 file changed, 22 insertions(+)
> 
> diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
> index d4c2e62b2439..a7610eb55f30 100644
> --- a/drivers/net/phy/marvell10g.c
> +++ b/drivers/net/phy/marvell10g.c
> @@ -80,6 +80,8 @@ enum {
>  	MV_V2_PORT_CTRL		= 0xf001,
>  	MV_V2_PORT_CTRL_SWRST	= BIT(15),
>  	MV_V2_PORT_CTRL_PWRDOWN = BIT(11),
> +	MV_V2_PORT_MAC_TYPE_MASK = 0x7,
> +	MV_V2_PORT_MAC_TYPE_RATE_MATCH = 0x6,
>  	/* Temperature control/read registers (88X3310 only) */
>  	MV_V2_TEMP_CTRL		= 0xf08a,
>  	MV_V2_TEMP_CTRL_MASK	= 0xc000,
> @@ -91,6 +93,7 @@ enum {
>  
>  struct mv3310_priv {
>  	u32 firmware_ver;
> +	bool rate_match;
>  
>  	struct device *hwmon_dev;
>  	char *hwmon_name;
> @@ -458,7 +461,9 @@ static bool mv3310_has_pma_ngbaset_quirk(struct phy_device *phydev)
>  
>  static int mv3310_config_init(struct phy_device *phydev)
>  {
> +	struct mv3310_priv *priv = dev_get_drvdata(&phydev->mdio.dev);
>  	int err;
> +	int val;
>  
>  	/* Check that the PHY interface type is compatible */
>  	if (phydev->interface != PHY_INTERFACE_MODE_SGMII &&
> @@ -475,6 +480,12 @@ static int mv3310_config_init(struct phy_device *phydev)
>  	if (err)
>  		return err;
>  
> +	val = phy_read_mmd(phydev, MDIO_MMD_VEND2, MV_V2_PORT_CTRL);
> +	if (val < 0)
> +		return val;
> +	priv->rate_match = ((val & MV_V2_PORT_MAC_TYPE_MASK) ==
> +			MV_V2_PORT_MAC_TYPE_RATE_MATCH);
> +
>  	/* Enable EDPD mode - saving 600mW */
>  	return mv3310_set_edpd(phydev, ETHTOOL_PHY_EDPD_DFLT_TX_MSECS);
>  }
> @@ -581,6 +592,17 @@ static int mv3310_aneg_done(struct phy_device *phydev)
>  
>  static void mv3310_update_interface(struct phy_device *phydev)
>  {
> +	struct mv3310_priv *priv = dev_get_drvdata(&phydev->mdio.dev);
> +
> +	/* In "XFI with Rate Matching" mode the PHY interface is fixed at
> +	 * 10Gb. The PHY adapts the rate to actual wire speed with help of
> +	 * internal 16KB buffer.
> +	 */
> +	if (priv->rate_match) {
> +		phydev->interface = PHY_INTERFACE_MODE_10GBASER;
> +		return;
> +	}
> +
>  	if ((phydev->interface == PHY_INTERFACE_MODE_SGMII ||
>  	     phydev->interface == PHY_INTERFACE_MODE_2500BASEX ||
>  	     phydev->interface == PHY_INTERFACE_MODE_10GBASER) &&
> -- 
> 2.27.0
> 
>
David Miller June 30, 2020, 12:25 a.m. UTC | #2
From: Baruch Siach <baruch@tkos.co.il>
Date: Sun, 28 Jun 2020 10:04:51 +0300

> When the hardware MACTYPE hardware configuration pins are set to "XFI
> with Rate Matching" the PHY interface operate at fixed 10Gbps speed. The
> MAC buffer packets in both directions to match various wire speeds.
> 
> Read the MAC Type field in the Port Control register, and set the MAC
> interface speed accordingly.
> 
> Signed-off-by: Baruch Siach <baruch@tkos.co.il>
> ---
> v2: Move rate matching state read to config_init (RMK)

Applied to net-next, thanks.
Baruch Siach July 1, 2020, 7:23 a.m. UTC | #3
Hi Russell,

On Mon, Jun 29 2020, Russell King - ARM Linux admin wrote:
> On Sun, Jun 28, 2020 at 10:04:51AM +0300, Baruch Siach wrote:
>> When the hardware MACTYPE hardware configuration pins are set to "XFI
>> with Rate Matching" the PHY interface operate at fixed 10Gbps speed. The
>> MAC buffer packets in both directions to match various wire speeds.
>> 
>> Read the MAC Type field in the Port Control register, and set the MAC
>> interface speed accordingly.
>> 
>> Signed-off-by: Baruch Siach <baruch@tkos.co.il>
>> ---
>> v2: Move rate matching state read to config_init (RMK)
>
> Not quite what I was after, but it'll do for now.

Thanks for you review.

> My only system which has a 3310 PHY and is configured for rate matching
> (using an XAUI interface, mode 1) seems to be sick - the 3310 no longer
> correctly negotiates on the media side (will only link at 100M/Half and
> only passes traffic in one direction), which makes any development with
> it rather difficult.  Either the media side drivers have failed or the
> magnetics.
>
> I was also hoping for some discussion, as I bought up a few points
> about the 3310's rate matching - unless you have the version with
> MACsec, the PHY expects the host side to rate limit the egress rate to
> the media rate and will _not_ send pause frames.  If you have MACsec,
> and the MACsec hardware is enabled (although may not be encrypting),
> then the PHY will send pause frames to the host as the internal buffer
> fills.

Flow control is disabled anyway in my use case (vpp).

> Then there's the whole question of what phydev->speed etc should be set
> to - the media speed or the host side link speed with the PHY, and then
> how the host side should configure itself.  At least the 88E6390x
> switch will force itself to the media side speed using that while in
> XAUI mode, resulting in a non-functioning speed.  So should the host
> side force itself to 10G whenever in something like XAUI mode?

How does the switch discover the media side speed? Is there some sort of
in-band information exchange?

> What do we do about the egress rate - ignore that statement and hope
> that the 3310 doesn't create bad packets on the wire if we fill up its
> internal buffer?

I will keep that in mind when stress testing the network. We might need
a way to set IPG on the MAC side if the MAC supports that (mvpp2 in this
case).

baruch

>>  drivers/net/phy/marvell10g.c | 22 ++++++++++++++++++++++
>>  1 file changed, 22 insertions(+)
>> 
>> diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
>> index d4c2e62b2439..a7610eb55f30 100644
>> --- a/drivers/net/phy/marvell10g.c
>> +++ b/drivers/net/phy/marvell10g.c
>> @@ -80,6 +80,8 @@ enum {
>>  	MV_V2_PORT_CTRL		= 0xf001,
>>  	MV_V2_PORT_CTRL_SWRST	= BIT(15),
>>  	MV_V2_PORT_CTRL_PWRDOWN = BIT(11),
>> +	MV_V2_PORT_MAC_TYPE_MASK = 0x7,
>> +	MV_V2_PORT_MAC_TYPE_RATE_MATCH = 0x6,
>>  	/* Temperature control/read registers (88X3310 only) */
>>  	MV_V2_TEMP_CTRL		= 0xf08a,
>>  	MV_V2_TEMP_CTRL_MASK	= 0xc000,
>> @@ -91,6 +93,7 @@ enum {
>>  
>>  struct mv3310_priv {
>>  	u32 firmware_ver;
>> +	bool rate_match;
>>  
>>  	struct device *hwmon_dev;
>>  	char *hwmon_name;
>> @@ -458,7 +461,9 @@ static bool mv3310_has_pma_ngbaset_quirk(struct phy_device *phydev)
>>  
>>  static int mv3310_config_init(struct phy_device *phydev)
>>  {
>> +	struct mv3310_priv *priv = dev_get_drvdata(&phydev->mdio.dev);
>>  	int err;
>> +	int val;
>>  
>>  	/* Check that the PHY interface type is compatible */
>>  	if (phydev->interface != PHY_INTERFACE_MODE_SGMII &&
>> @@ -475,6 +480,12 @@ static int mv3310_config_init(struct phy_device *phydev)
>>  	if (err)
>>  		return err;
>>  
>> +	val = phy_read_mmd(phydev, MDIO_MMD_VEND2, MV_V2_PORT_CTRL);
>> +	if (val < 0)
>> +		return val;
>> +	priv->rate_match = ((val & MV_V2_PORT_MAC_TYPE_MASK) ==
>> +			MV_V2_PORT_MAC_TYPE_RATE_MATCH);
>> +
>>  	/* Enable EDPD mode - saving 600mW */
>>  	return mv3310_set_edpd(phydev, ETHTOOL_PHY_EDPD_DFLT_TX_MSECS);
>>  }
>> @@ -581,6 +592,17 @@ static int mv3310_aneg_done(struct phy_device *phydev)
>>  
>>  static void mv3310_update_interface(struct phy_device *phydev)
>>  {
>> +	struct mv3310_priv *priv = dev_get_drvdata(&phydev->mdio.dev);
>> +
>> +	/* In "XFI with Rate Matching" mode the PHY interface is fixed at
>> +	 * 10Gb. The PHY adapts the rate to actual wire speed with help of
>> +	 * internal 16KB buffer.
>> +	 */
>> +	if (priv->rate_match) {
>> +		phydev->interface = PHY_INTERFACE_MODE_10GBASER;
>> +		return;
>> +	}
>> +
>>  	if ((phydev->interface == PHY_INTERFACE_MODE_SGMII ||
>>  	     phydev->interface == PHY_INTERFACE_MODE_2500BASEX ||
>>  	     phydev->interface == PHY_INTERFACE_MODE_10GBASER) &&
>> -- 
>> 2.27.0
>> 
>>
Russell King (Oracle) July 1, 2020, 9:06 a.m. UTC | #4
On Wed, Jul 01, 2020 at 10:23:12AM +0300, Baruch Siach wrote:
> Hi Russell,
> 
> On Mon, Jun 29 2020, Russell King - ARM Linux admin wrote:
> > Then there's the whole question of what phydev->speed etc should be set
> > to - the media speed or the host side link speed with the PHY, and then
> > how the host side should configure itself.  At least the 88E6390x
> > switch will force itself to the media side speed using that while in
> > XAUI mode, resulting in a non-functioning speed.  So should the host
> > side force itself to 10G whenever in something like XAUI mode?
> 
> How does the switch discover the media side speed? Is there some sort of
> in-band information exchange?

The media-side results are passed via phydev->speed and phydev->duplex,
and therefore will be passed through phylink. mvpp2 will ignore them
for 10GBASE-R as it has separate MACs - XLG and GMAC, but 88E6390x, it's
just one.  Consequently, it's possible that the port mode is in XAUI,
but you can force the speed to (e.g.) 100M.

What I know from what I can do with this media-side broken 88X3310, is
that it will pass data if the 88E6390x is forced to 10G, but not if it's
forced to 100M.

We're moving from a situation where MAC drivers can expect (with either
phylib or phylink):

	interface = <some 10G interface>
	speed is always 10G
	duplex is always full

to:

	interface = <some 10G interface>
	speed is 10, 100, 1G or 10G
	duplex is half or full

So, adding rate-matching brings with it a non-obvious change in the API
of phylib and phylink:

* what do the phydev->{speed,duplex,pause,asym_pause} represent - the
  media side parameters or the PHY to MAC parameters?
* what do the "speed, duplex, pause" passed into mac_link_up() refer to,
  the media side, or the link side?
  
Both of those need to be properly documented and explained.

The next two points, I haven't re-read the 3310 datasheet.

We also need to consider a situation which is less obvious: if the PHY
is operating in rate matching mode, doesn't generate pause frames
itself as its rate matching buffers fill, but the media side negotiated
for pause frames.  Should we be advertising no support for pause frames
in this case?  Will the PHY pass pause frames through as a priority?
Consider that in a 16k outbound buffer, there could be up to 10 full
sized frames queued, so if the link partner is asking us to stop
sending, it could take up to 10 frames before we actually stop.

What are the requirements for half duplex in rate matching mode - is
that handled internally by the PHY, or do we need to disable all half
duplex advertisements in the PHY. When rate matching, the PHY can no
longer signal the MAC when a collision occurs, as it would normally
do without rate matching.

I think I've covered everything, but may have missed something. I do
think we need documentation updated before we should accept this patch
so that we have the phylib and phylink behaviour in this case clearly
defined from the outset.
diff mbox series

Patch

diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index d4c2e62b2439..a7610eb55f30 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -80,6 +80,8 @@  enum {
 	MV_V2_PORT_CTRL		= 0xf001,
 	MV_V2_PORT_CTRL_SWRST	= BIT(15),
 	MV_V2_PORT_CTRL_PWRDOWN = BIT(11),
+	MV_V2_PORT_MAC_TYPE_MASK = 0x7,
+	MV_V2_PORT_MAC_TYPE_RATE_MATCH = 0x6,
 	/* Temperature control/read registers (88X3310 only) */
 	MV_V2_TEMP_CTRL		= 0xf08a,
 	MV_V2_TEMP_CTRL_MASK	= 0xc000,
@@ -91,6 +93,7 @@  enum {
 
 struct mv3310_priv {
 	u32 firmware_ver;
+	bool rate_match;
 
 	struct device *hwmon_dev;
 	char *hwmon_name;
@@ -458,7 +461,9 @@  static bool mv3310_has_pma_ngbaset_quirk(struct phy_device *phydev)
 
 static int mv3310_config_init(struct phy_device *phydev)
 {
+	struct mv3310_priv *priv = dev_get_drvdata(&phydev->mdio.dev);
 	int err;
+	int val;
 
 	/* Check that the PHY interface type is compatible */
 	if (phydev->interface != PHY_INTERFACE_MODE_SGMII &&
@@ -475,6 +480,12 @@  static int mv3310_config_init(struct phy_device *phydev)
 	if (err)
 		return err;
 
+	val = phy_read_mmd(phydev, MDIO_MMD_VEND2, MV_V2_PORT_CTRL);
+	if (val < 0)
+		return val;
+	priv->rate_match = ((val & MV_V2_PORT_MAC_TYPE_MASK) ==
+			MV_V2_PORT_MAC_TYPE_RATE_MATCH);
+
 	/* Enable EDPD mode - saving 600mW */
 	return mv3310_set_edpd(phydev, ETHTOOL_PHY_EDPD_DFLT_TX_MSECS);
 }
@@ -581,6 +592,17 @@  static int mv3310_aneg_done(struct phy_device *phydev)
 
 static void mv3310_update_interface(struct phy_device *phydev)
 {
+	struct mv3310_priv *priv = dev_get_drvdata(&phydev->mdio.dev);
+
+	/* In "XFI with Rate Matching" mode the PHY interface is fixed at
+	 * 10Gb. The PHY adapts the rate to actual wire speed with help of
+	 * internal 16KB buffer.
+	 */
+	if (priv->rate_match) {
+		phydev->interface = PHY_INTERFACE_MODE_10GBASER;
+		return;
+	}
+
 	if ((phydev->interface == PHY_INTERFACE_MODE_SGMII ||
 	     phydev->interface == PHY_INTERFACE_MODE_2500BASEX ||
 	     phydev->interface == PHY_INTERFACE_MODE_10GBASER) &&