diff mbox series

[net-next,03/15] net/mlx5e: PFC stall prevention support

Message ID 20180323223925.21678-4-saeedm@mellanox.com
State Changes Requested, archived
Delegated to: David Miller
Headers show
Series [net-next,01/15] net/mlx5e: Expose PFC stall prevention counters | expand

Commit Message

Saeed Mahameed March 23, 2018, 10:39 p.m. UTC
From: Inbar Karmy <inbark@mellanox.com>

Implement set/get functions to configure PFC stall prevention
timeout by tunables api through ethtool.
By default the stall prevention timeout is configured to 8 sec.
Timeout range is: 80-8000 msec.
Enabling stall prevention without a specific timeout will set
the timeout to 100 msec.

Signed-off-by: Inbar Karmy <inbark@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   | 57 +++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/port.c     | 64 +++++++++++++++++++---
 include/linux/mlx5/mlx5_ifc.h                      | 17 ++++--
 include/linux/mlx5/port.h                          |  6 ++
 4 files changed, 132 insertions(+), 12 deletions(-)

Comments

Andrew Lunn March 24, 2018, 3:07 p.m. UTC | #1
On Fri, Mar 23, 2018 at 03:39:13PM -0700, Saeed Mahameed wrote:
> From: Inbar Karmy <inbark@mellanox.com>
> 
> Implement set/get functions to configure PFC stall prevention
> timeout by tunables api through ethtool.
> By default the stall prevention timeout is configured to 8 sec.
> Timeout range is: 80-8000 msec.
> Enabling stall prevention without a specific timeout will set
> the timeout to 100 msec.

Hi Inbar

I think this last sentence should be talking about auto, not without a
specific timeout.

> 
> Signed-off-by: Inbar Karmy <inbark@mellanox.com>
> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
> ---
>  .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   | 57 +++++++++++++++++++
>  drivers/net/ethernet/mellanox/mlx5/core/port.c     | 64 +++++++++++++++++++---
>  include/linux/mlx5/mlx5_ifc.h                      | 17 ++++--
>  include/linux/mlx5/port.h                          |  6 ++
>  4 files changed, 132 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
> index cc8048f68f11..62061fd23143 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
> @@ -1066,6 +1066,57 @@ static int mlx5e_get_rxnfc(struct net_device *netdev,
>  	return err;
>  }
>  
> +#define MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC		100
> +#define MLX5E_PFC_PREVEN_TOUT_MAX_MSEC		8000
> +#define MLX5E_PFC_PREVEN_MINOR_PRECENT		85
> +#define MLX5E_PFC_PREVEN_TOUT_MIN_MSEC		80
> +#define MLX5E_DEVICE_STALL_MINOR_WATERMARK(critical_tout) \
> +	max_t(u16, MLX5E_PFC_PREVEN_TOUT_MIN_MSEC, \
> +	      (critical_tout * MLX5E_PFC_PREVEN_MINOR_PRECENT) / 100)
> +
> +static int mlx5e_get_pfc_prevention_tout(struct net_device *netdev,
> +					 u16 *pfc_prevention_tout)
> +{
> +	struct mlx5e_priv *priv    = netdev_priv(netdev);
> +	struct mlx5_core_dev *mdev = priv->mdev;
> +
> +	if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, pfcc_mask) ||
> +	    !MLX5_CAP_DEBUG((priv)->mdev, stall_detect))
> +		return -EOPNOTSUPP;
> +
> +	return mlx5_query_port_stall_watermark(mdev, pfc_prevention_tout, NULL);

Shouldn't you map a value of MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC back to 
PFC_STORM_PREVENTION_AUTO?

	Andrew
Gal Pressman March 25, 2018, 10:28 a.m. UTC | #2
On 24-Mar-18 18:07, Andrew Lunn wrote:
> On Fri, Mar 23, 2018 at 03:39:13PM -0700, Saeed Mahameed wrote:
>> From: Inbar Karmy <inbark@mellanox.com>
>>
>> Implement set/get functions to configure PFC stall prevention
>> timeout by tunables api through ethtool.
>> By default the stall prevention timeout is configured to 8 sec.
>> Timeout range is: 80-8000 msec.
>> Enabling stall prevention without a specific timeout will set
>> the timeout to 100 msec.
> 
> Hi Inbar
> 
> I think this last sentence should be talking about auto, not without a
> specific timeout.

Hi Andrew,
Good catch, we will fix that.

> 
>>
>> Signed-off-by: Inbar Karmy <inbark@mellanox.com>
>> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
>> ---
>>  .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   | 57 +++++++++++++++++++
>>  drivers/net/ethernet/mellanox/mlx5/core/port.c     | 64 +++++++++++++++++++---
>>  include/linux/mlx5/mlx5_ifc.h                      | 17 ++++--
>>  include/linux/mlx5/port.h                          |  6 ++
>>  4 files changed, 132 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
>> index cc8048f68f11..62061fd23143 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
>> @@ -1066,6 +1066,57 @@ static int mlx5e_get_rxnfc(struct net_device *netdev,
>>  	return err;
>>  }
>>  
>> +#define MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC		100
>> +#define MLX5E_PFC_PREVEN_TOUT_MAX_MSEC		8000
>> +#define MLX5E_PFC_PREVEN_MINOR_PRECENT		85
>> +#define MLX5E_PFC_PREVEN_TOUT_MIN_MSEC		80
>> +#define MLX5E_DEVICE_STALL_MINOR_WATERMARK(critical_tout) \
>> +	max_t(u16, MLX5E_PFC_PREVEN_TOUT_MIN_MSEC, \
>> +	      (critical_tout * MLX5E_PFC_PREVEN_MINOR_PRECENT) / 100)
>> +
>> +static int mlx5e_get_pfc_prevention_tout(struct net_device *netdev,
>> +					 u16 *pfc_prevention_tout)
>> +{
>> +	struct mlx5e_priv *priv    = netdev_priv(netdev);
>> +	struct mlx5_core_dev *mdev = priv->mdev;
>> +
>> +	if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, pfcc_mask) ||
>> +	    !MLX5_CAP_DEBUG((priv)->mdev, stall_detect))
>> +		return -EOPNOTSUPP;
>> +
>> +	return mlx5_query_port_stall_watermark(mdev, pfc_prevention_tout, NULL);
> 
> Shouldn't you map a value of MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC back to 
> PFC_STORM_PREVENTION_AUTO?

We discussed this point internally, mapping MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC (100) to
PFC_STORM_PREVENTION_AUTO might cause confusion when the user explicitly asks for 100msec timeout
and gets auto in his following query.
Also, this way the "auto" timeout is visible to the user, which might help him get an initial
clue of which values are recommended.

> 
> 	Andrew
>
Andrew Lunn March 25, 2018, 4:18 p.m. UTC | #3
> > Shouldn't you map a value of MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC back to 
> > PFC_STORM_PREVENTION_AUTO?
> 
> We discussed this point internally, mapping MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC (100) to
> PFC_STORM_PREVENTION_AUTO might cause confusion when the user explicitly asks for 100msec timeout
> and gets auto in his following query.
> Also, this way the "auto" timeout is visible to the user, which might help him get an initial
> clue of which values are recommended.

Yes, this is a fair point, which is why i asked the question. Either
way, it can cause confusion. 'I configured it to auto, but it always
returns 100, not auto.'

Whatever is decided, it should be consistent across drivers. So please
add some documentation to the ethtool header file about what is
expected.

	Andrew
Gal Pressman March 27, 2018, 8:38 a.m. UTC | #4
On 25-Mar-18 19:18, Andrew Lunn wrote:
>>> Shouldn't you map a value of MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC back to 
>>> PFC_STORM_PREVENTION_AUTO?
>>
>> We discussed this point internally, mapping MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC (100) to
>> PFC_STORM_PREVENTION_AUTO might cause confusion when the user explicitly asks for 100msec timeout
>> and gets auto in his following query.
>> Also, this way the "auto" timeout is visible to the user, which might help him get an initial
>> clue of which values are recommended.
> 
> Yes, this is a fair point, which is why i asked the question. Either
> way, it can cause confusion. 'I configured it to auto, but it always
> returns 100, not auto.'
> 
> Whatever is decided, it should be consistent across drivers. So please
> add some documentation to the ethtool header file about what is
> expected.

We didn't want to limit other drivers implementation, but I agree that
consistency across drivers is important in this case.
We will find a proper place to document it.

> 
> 	Andrew
>
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index cc8048f68f11..62061fd23143 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1066,6 +1066,57 @@  static int mlx5e_get_rxnfc(struct net_device *netdev,
 	return err;
 }
 
+#define MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC		100
+#define MLX5E_PFC_PREVEN_TOUT_MAX_MSEC		8000
+#define MLX5E_PFC_PREVEN_MINOR_PRECENT		85
+#define MLX5E_PFC_PREVEN_TOUT_MIN_MSEC		80
+#define MLX5E_DEVICE_STALL_MINOR_WATERMARK(critical_tout) \
+	max_t(u16, MLX5E_PFC_PREVEN_TOUT_MIN_MSEC, \
+	      (critical_tout * MLX5E_PFC_PREVEN_MINOR_PRECENT) / 100)
+
+static int mlx5e_get_pfc_prevention_tout(struct net_device *netdev,
+					 u16 *pfc_prevention_tout)
+{
+	struct mlx5e_priv *priv    = netdev_priv(netdev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+
+	if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, pfcc_mask) ||
+	    !MLX5_CAP_DEBUG((priv)->mdev, stall_detect))
+		return -EOPNOTSUPP;
+
+	return mlx5_query_port_stall_watermark(mdev, pfc_prevention_tout, NULL);
+}
+
+static int mlx5e_set_pfc_prevention_tout(struct net_device *netdev,
+					 u16 pfc_preven)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+	u16 critical_tout;
+	u16 minor;
+
+	if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, pfcc_mask) ||
+	    !MLX5_CAP_DEBUG((priv)->mdev, stall_detect))
+		return -EOPNOTSUPP;
+
+	critical_tout = (pfc_preven == PFC_STORM_PREVENTION_AUTO) ?
+			MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC :
+			pfc_preven;
+
+	if (critical_tout != PFC_STORM_PREVENTION_DISABLE &&
+	    (critical_tout > MLX5E_PFC_PREVEN_TOUT_MAX_MSEC ||
+	     critical_tout < MLX5E_PFC_PREVEN_TOUT_MIN_MSEC)) {
+		netdev_info(netdev, "%s: pfc prevention tout not in range (%d-%d)\n",
+			    __func__, MLX5E_PFC_PREVEN_TOUT_MIN_MSEC,
+			    MLX5E_PFC_PREVEN_TOUT_MAX_MSEC);
+		return -EINVAL;
+	}
+
+	minor = MLX5E_DEVICE_STALL_MINOR_WATERMARK(critical_tout);
+	return mlx5_set_port_stall_watermark(mdev, critical_tout,
+					     minor);
+}
+
 static int mlx5e_get_tunable(struct net_device *dev,
 			     const struct ethtool_tunable *tuna,
 			     void *data)
@@ -1077,6 +1128,9 @@  static int mlx5e_get_tunable(struct net_device *dev,
 	case ETHTOOL_TX_COPYBREAK:
 		*(u32 *)data = priv->channels.params.tx_max_inline;
 		break;
+	case ETHTOOL_PFC_PREVENTION_TOUT:
+		err = mlx5e_get_pfc_prevention_tout(dev, data);
+		break;
 	default:
 		err = -EINVAL;
 		break;
@@ -1118,6 +1172,9 @@  static int mlx5e_set_tunable(struct net_device *dev,
 			break;
 		mlx5e_switch_priv_channels(priv, &new_channels, NULL);
 
+		break;
+	case ETHTOOL_PFC_PREVENTION_TOUT:
+		err = mlx5e_set_pfc_prevention_tout(dev, *(u16 *)data);
 		break;
 	default:
 		err = -EINVAL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index c37d00cd472a..fa9d0760dd36 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -483,6 +483,17 @@  int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev,
 }
 EXPORT_SYMBOL_GPL(mlx5_core_query_ib_ppcnt);
 
+static int mlx5_query_pfcc_reg(struct mlx5_core_dev *dev, u32 *out,
+			       u32 out_size)
+{
+	u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
+
+	MLX5_SET(pfcc_reg, in, local_port, 1);
+
+	return mlx5_core_access_reg(dev, in, sizeof(in), out,
+				    out_size, MLX5_REG_PFCC, 0, 0);
+}
+
 int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause)
 {
 	u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
@@ -500,13 +511,10 @@  EXPORT_SYMBOL_GPL(mlx5_set_port_pause);
 int mlx5_query_port_pause(struct mlx5_core_dev *dev,
 			  u32 *rx_pause, u32 *tx_pause)
 {
-	u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
 	u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
 	int err;
 
-	MLX5_SET(pfcc_reg, in, local_port, 1);
-	err = mlx5_core_access_reg(dev, in, sizeof(in), out,
-				   sizeof(out), MLX5_REG_PFCC, 0, 0);
+	err = mlx5_query_pfcc_reg(dev, out, sizeof(out));
 	if (err)
 		return err;
 
@@ -520,6 +528,49 @@  int mlx5_query_port_pause(struct mlx5_core_dev *dev,
 }
 EXPORT_SYMBOL_GPL(mlx5_query_port_pause);
 
+int mlx5_set_port_stall_watermark(struct mlx5_core_dev *dev,
+				  u16 stall_critical_watermark,
+				  u16 stall_minor_watermark)
+{
+	u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
+	u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
+
+	MLX5_SET(pfcc_reg, in, local_port, 1);
+	MLX5_SET(pfcc_reg, in, pptx_mask_n, 1);
+	MLX5_SET(pfcc_reg, in, pprx_mask_n, 1);
+	MLX5_SET(pfcc_reg, in, ppan_mask_n, 1);
+	MLX5_SET(pfcc_reg, in, critical_stall_mask, 1);
+	MLX5_SET(pfcc_reg, in, minor_stall_mask, 1);
+	MLX5_SET(pfcc_reg, in, device_stall_critical_watermark,
+		 stall_critical_watermark);
+	MLX5_SET(pfcc_reg, in, device_stall_minor_watermark, stall_minor_watermark);
+
+	return mlx5_core_access_reg(dev, in, sizeof(in), out,
+				    sizeof(out), MLX5_REG_PFCC, 0, 1);
+}
+
+int mlx5_query_port_stall_watermark(struct mlx5_core_dev *dev,
+				    u16 *stall_critical_watermark,
+				    u16 *stall_minor_watermark)
+{
+	u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
+	int err;
+
+	err = mlx5_query_pfcc_reg(dev, out, sizeof(out));
+	if (err)
+		return err;
+
+	if (stall_critical_watermark)
+		*stall_critical_watermark = MLX5_GET(pfcc_reg, out,
+						     device_stall_critical_watermark);
+
+	if (stall_minor_watermark)
+		*stall_minor_watermark = MLX5_GET(pfcc_reg, out,
+						  device_stall_minor_watermark);
+
+	return 0;
+}
+
 int mlx5_set_port_pfc(struct mlx5_core_dev *dev, u8 pfc_en_tx, u8 pfc_en_rx)
 {
 	u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
@@ -538,13 +589,10 @@  EXPORT_SYMBOL_GPL(mlx5_set_port_pfc);
 
 int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, u8 *pfc_en_rx)
 {
-	u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
 	u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
 	int err;
 
-	MLX5_SET(pfcc_reg, in, local_port, 1);
-	err = mlx5_core_access_reg(dev, in, sizeof(in), out,
-				   sizeof(out), MLX5_REG_PFCC, 0, 0);
+	err = mlx5_query_pfcc_reg(dev, out, sizeof(out));
 	if (err)
 		return err;
 
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index c7d50eccff9e..f3200a9696d6 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -7833,7 +7833,11 @@  struct mlx5_ifc_pifr_reg_bits {
 struct mlx5_ifc_pfcc_reg_bits {
 	u8         reserved_at_0[0x8];
 	u8         local_port[0x8];
-	u8         reserved_at_10[0x10];
+	u8         reserved_at_10[0xb];
+	u8         ppan_mask_n[0x1];
+	u8         minor_stall_mask[0x1];
+	u8         critical_stall_mask[0x1];
+	u8         reserved_at_1e[0x2];
 
 	u8         ppan[0x4];
 	u8         reserved_at_24[0x4];
@@ -7843,17 +7847,22 @@  struct mlx5_ifc_pfcc_reg_bits {
 
 	u8         pptx[0x1];
 	u8         aptx[0x1];
-	u8         reserved_at_42[0x6];
+	u8         pptx_mask_n[0x1];
+	u8         reserved_at_43[0x5];
 	u8         pfctx[0x8];
 	u8         reserved_at_50[0x10];
 
 	u8         pprx[0x1];
 	u8         aprx[0x1];
-	u8         reserved_at_62[0x6];
+	u8         pprx_mask_n[0x1];
+	u8         reserved_at_63[0x5];
 	u8         pfcrx[0x8];
 	u8         reserved_at_70[0x10];
 
-	u8         reserved_at_80[0x80];
+	u8         device_stall_minor_watermark[0x10];
+	u8         device_stall_critical_watermark[0x10];
+
+	u8         reserved_at_a0[0x60];
 };
 
 struct mlx5_ifc_pelc_reg_bits {
diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
index 035f0d4dc9fe..34aed6032f86 100644
--- a/include/linux/mlx5/port.h
+++ b/include/linux/mlx5/port.h
@@ -151,6 +151,12 @@  int mlx5_set_port_pfc(struct mlx5_core_dev *dev, u8 pfc_en_tx, u8 pfc_en_rx);
 int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx,
 			u8 *pfc_en_rx);
 
+int mlx5_set_port_stall_watermark(struct mlx5_core_dev *dev,
+				  u16 stall_critical_watermark,
+				  u16 stall_minor_watermark);
+int mlx5_query_port_stall_watermark(struct mlx5_core_dev *dev,
+				    u16 *stall_critical_watermark, u16 *stall_minor_watermark);
+
 int mlx5_max_tc(struct mlx5_core_dev *mdev);
 
 int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc);