diff mbox series

[net-next,v2,6/8] bnxt_en: Implement ethtool -X to set indirection table.

Message ID 1593760787-31695-7-git-send-email-michael.chan@broadcom.com
State Changes Requested
Delegated to: David Miller
Headers show
Series bnxt_en: Driver update for net-next. | expand

Commit Message

Michael Chan July 3, 2020, 7:19 a.m. UTC
With the new infrastructure in place, we can now support the setting of
the indirection table from ethtool.

When changing channels, in a rare case that firmware cannot reserve the
rings that were promised, the RSS map will revert to default.

v2: When changing channels, if the RSS table size changes and RSS map
    is non-default, return error.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c         |  7 ++++-
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 37 +++++++++++++++++++++++
 2 files changed, 43 insertions(+), 1 deletion(-)

Comments

Jakub Kicinski July 3, 2020, 11:37 p.m. UTC | #1
On Fri,  3 Jul 2020 03:19:45 -0400 Michael Chan wrote:
> With the new infrastructure in place, we can now support the setting of
> the indirection table from ethtool.
> 
> When changing channels, in a rare case that firmware cannot reserve the
> rings that were promised, the RSS map will revert to default.
> 
> v2: When changing channels, if the RSS table size changes and RSS map
>     is non-default, return error.

Sorry, still not what I had in mind :(

> Signed-off-by: Michael Chan <michael.chan@broadcom.com>

> diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> index 6c90a94..0edb692 100644
> --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> @@ -6061,6 +6061,10 @@ static int __bnxt_reserve_rings(struct bnxt *bp)
>  		rx = rx_rings << 1;
>  	cp = sh ? max_t(int, tx, rx_rings) : tx + rx_rings;
>  	bp->tx_nr_rings = tx;
> +
> +	/* Reset the RSS indirection if we cannot reserve all the RX rings */
> +	if (rx_rings != bp->rx_nr_rings)
> +		bp->dev->priv_flags &= ~IFF_RXFH_CONFIGURED;

Ethtool core will check that if user set RSS table explicitly and now
changes ring count - the RSS table will not point to rings which will
no longer exist. IOW if RSS table is set we're likely increasing the
number of rings, and I'd venture to guess that the FW is not gonna give
us less rings than we previously had. So it seems like we may be
clearing the flag, and the RSS table unnecessarily here.

What I was suggesting, was that it perhaps be better to modulo the
number or rings in __bnxt_fill_hw_rss_tbl* by the actual table size,
but leave the user-set RSS table in place. That'd be the lowest LoC.

Also I still think that there should be a warning printed when FW gave
us less rings than expected.

>  	bp->rx_nr_rings = rx_rings;
>  	bp->cp_nr_rings = cp;
>  
> @@ -8265,7 +8269,8 @@ int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init)
>  			rc = bnxt_init_int_mode(bp);
>  		bnxt_ulp_irq_restart(bp, rc);
>  	}
> -	bnxt_set_dflt_rss_indir_tbl(bp);
> +	if (!netif_is_rxfh_configured(bp->dev))
> +		bnxt_set_dflt_rss_indir_tbl(bp);
>  
>  	if (rc) {
>  		netdev_err(bp->dev, "ring reservation/IRQ init failure rc: %d\n", rc);
> diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
> index 46f3978..9098818 100644
> --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
> +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
> @@ -926,6 +926,13 @@ static int bnxt_set_channels(struct net_device *dev,
>  		return rc;
>  	}
>  
> +	if (bnxt_get_nr_rss_ctxs(bp, req_rx_rings) !=
> +	    bnxt_get_nr_rss_ctxs(bp, bp->rx_nr_rings) &&
> +	    (dev->priv_flags & IFF_RXFH_CONFIGURED)) {

In this case copy the old values over and zero-fill the new rings.

> +		netdev_warn(dev, "RSS table size change required, RSS table entries must be default to proceed\n");
> +		return -EINVAL;
> +	}
> +
>  	if (netif_running(dev)) {
>  		if (BNXT_PF(bp)) {
>  			/* TODO CHIMP_FW: Send message to all VF's
> @@ -1315,6 +1322,35 @@ static int bnxt_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
>  	return 0;
>  }
>  
> +static int bnxt_set_rxfh(struct net_device *dev, const u32 *indir,
> +			 const u8 *key, const u8 hfunc)
> +{
> +	struct bnxt *bp = netdev_priv(dev);
> +	int rc = 0;
> +
> +	if (hfunc && hfunc != ETH_RSS_HASH_TOP)
> +		return -EOPNOTSUPP;
> +
> +	if (key)
> +		return -EOPNOTSUPP;
> +
> +	if (indir) {
> +		u32 i, pad, tbl_size = bnxt_get_rxfh_indir_size(dev);
> +
> +		for (i = 0; i < tbl_size; i++)
> +			bp->rss_indir_tbl[i] = indir[i];
> +		pad = bp->rss_indir_tbl_entries - tbl_size;
> +		if (pad)
> +			memset(&bp->rss_indir_tbl[i], 0, pad * sizeof(u16));
> +	}

I see in patch 4 there is a:

	bool no_rss = !(vnic->flags & BNXT_VNIC_RSS_FLAG);

Should there be some check here to only allow users to change the
indirection table if RSS is supported / allowed?
Michael Chan July 4, 2020, 12:31 a.m. UTC | #2
On Fri, Jul 3, 2020 at 4:37 PM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Fri,  3 Jul 2020 03:19:45 -0400 Michael Chan wrote:
> > diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> > index 6c90a94..0edb692 100644
> > --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> > +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> > @@ -6061,6 +6061,10 @@ static int __bnxt_reserve_rings(struct bnxt *bp)
> >               rx = rx_rings << 1;
> >       cp = sh ? max_t(int, tx, rx_rings) : tx + rx_rings;
> >       bp->tx_nr_rings = tx;
> > +
> > +     /* Reset the RSS indirection if we cannot reserve all the RX rings */
> > +     if (rx_rings != bp->rx_nr_rings)
> > +             bp->dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
>
> Ethtool core will check that if user set RSS table explicitly and now
> changes ring count - the RSS table will not point to rings which will
> no longer exist. IOW if RSS table is set we're likely increasing the
> number of rings, and I'd venture to guess that the FW is not gonna give
> us less rings than we previously had. So it seems like we may be
> clearing the flag, and the RSS table unnecessarily here.

Right, the user most likely wants to increase the rings with RSS map
already set.  The user can decrease rings too if the higher rings
don't have any weight.  We call bnxt_check_rings() to see if firmware
has the requested rings before we proceed.  Once we proceed, we're
committed and if firmware cannot give us the rings it promised
earlier, the RSS map may no longer be valid if the rings are even
fewer than what we had before.  This is rare, as I said earlier, but
it can happen, especially on the VFs.  The resources are less
guaranteed on the VFs.

>
> What I was suggesting, was that it perhaps be better to modulo the
> number or rings in __bnxt_fill_hw_rss_tbl* by the actual table size,
> but leave the user-set RSS table in place. That'd be the lowest LoC.
>
> Also I still think that there should be a warning printed when FW gave
> us less rings than expected.

Sure, we can add some warnings if we don't warn already.

>
> >       bp->rx_nr_rings = rx_rings;
> >       bp->cp_nr_rings = cp;
> >
> > @@ -8265,7 +8269,8 @@ int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init)
> >                       rc = bnxt_init_int_mode(bp);
> >               bnxt_ulp_irq_restart(bp, rc);
> >       }
> > -     bnxt_set_dflt_rss_indir_tbl(bp);
> > +     if (!netif_is_rxfh_configured(bp->dev))
> > +             bnxt_set_dflt_rss_indir_tbl(bp);
> >
> >       if (rc) {
> >               netdev_err(bp->dev, "ring reservation/IRQ init failure rc: %d\n", rc);
> > diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
> > index 46f3978..9098818 100644
> > --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
> > +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
> > @@ -926,6 +926,13 @@ static int bnxt_set_channels(struct net_device *dev,
> >               return rc;
> >       }
> >
> > +     if (bnxt_get_nr_rss_ctxs(bp, req_rx_rings) !=
> > +         bnxt_get_nr_rss_ctxs(bp, bp->rx_nr_rings) &&
> > +         (dev->priv_flags & IFF_RXFH_CONFIGURED)) {
>
> In this case copy the old values over and zero-fill the new rings.

If the existing RSS table has 128 entries and a custom map.  And now
the table needs to expand to 192 entries with 64 new entries, we
cannot just copy, right?  The weights will all be messed up if we try.
I think requiring the user to change back to default or force it back
to default are the only sane options.

>
> > +             netdev_warn(dev, "RSS table size change required, RSS table entries must be default to proceed\n");
> > +             return -EINVAL;
> > +     }
> > +
> >       if (netif_running(dev)) {
> >               if (BNXT_PF(bp)) {
> >                       /* TODO CHIMP_FW: Send message to all VF's
> > @@ -1315,6 +1322,35 @@ static int bnxt_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
> >       return 0;
> >  }
> >
> > +static int bnxt_set_rxfh(struct net_device *dev, const u32 *indir,
> > +                      const u8 *key, const u8 hfunc)
> > +{
> > +     struct bnxt *bp = netdev_priv(dev);
> > +     int rc = 0;
> > +
> > +     if (hfunc && hfunc != ETH_RSS_HASH_TOP)
> > +             return -EOPNOTSUPP;
> > +
> > +     if (key)
> > +             return -EOPNOTSUPP;
> > +
> > +     if (indir) {
> > +             u32 i, pad, tbl_size = bnxt_get_rxfh_indir_size(dev);
> > +
> > +             for (i = 0; i < tbl_size; i++)
> > +                     bp->rss_indir_tbl[i] = indir[i];
> > +             pad = bp->rss_indir_tbl_entries - tbl_size;
> > +             if (pad)
> > +                     memset(&bp->rss_indir_tbl[i], 0, pad * sizeof(u16));
> > +     }
>
> I see in patch 4 there is a:
>
>         bool no_rss = !(vnic->flags & BNXT_VNIC_RSS_FLAG);
>
> Should there be some check here to only allow users to change the
> indirection table if RSS is supported / allowed?

RSS is always supported on the function.  The first VNIC hw structure
that we allocate always supports RSS.  We allocate additional VNICs
for aRFS and those VNICs do not use RSS and the BNXT_VNIC_RSS_FLAG
will not be set on those VNICs.  That's what the code in patch 4 is
doing.  The bp->rss_indir_tbl always applies to the first VNIC that
supports RSS.
Jakub Kicinski July 5, 2020, 4:40 p.m. UTC | #3
On Fri, 3 Jul 2020 17:31:59 -0700 Michael Chan wrote:
> > >       bp->rx_nr_rings = rx_rings;
> > >       bp->cp_nr_rings = cp;
> > >
> > > @@ -8265,7 +8269,8 @@ int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init)
> > >                       rc = bnxt_init_int_mode(bp);
> > >               bnxt_ulp_irq_restart(bp, rc);
> > >       }
> > > -     bnxt_set_dflt_rss_indir_tbl(bp);
> > > +     if (!netif_is_rxfh_configured(bp->dev))
> > > +             bnxt_set_dflt_rss_indir_tbl(bp);
> > >
> > >       if (rc) {
> > >               netdev_err(bp->dev, "ring reservation/IRQ init failure rc: %d\n", rc);
> > > diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
> > > index 46f3978..9098818 100644
> > > --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
> > > +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
> > > @@ -926,6 +926,13 @@ static int bnxt_set_channels(struct net_device *dev,
> > >               return rc;
> > >       }
> > >
> > > +     if (bnxt_get_nr_rss_ctxs(bp, req_rx_rings) !=
> > > +         bnxt_get_nr_rss_ctxs(bp, bp->rx_nr_rings) &&
> > > +         (dev->priv_flags & IFF_RXFH_CONFIGURED)) {  
> >
> > In this case copy the old values over and zero-fill the new rings.  
> 
> If the existing RSS table has 128 entries and a custom map.  And now
> the table needs to expand to 192 entries with 64 new entries, we
> cannot just copy, right?  The weights will all be messed up if we try.
> I think requiring the user to change back to default or force it back
> to default are the only sane options.

I see it now, you're right.
diff mbox series

Patch

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 6c90a94..0edb692 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -6061,6 +6061,10 @@  static int __bnxt_reserve_rings(struct bnxt *bp)
 		rx = rx_rings << 1;
 	cp = sh ? max_t(int, tx, rx_rings) : tx + rx_rings;
 	bp->tx_nr_rings = tx;
+
+	/* Reset the RSS indirection if we cannot reserve all the RX rings */
+	if (rx_rings != bp->rx_nr_rings)
+		bp->dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
 	bp->rx_nr_rings = rx_rings;
 	bp->cp_nr_rings = cp;
 
@@ -8265,7 +8269,8 @@  int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init)
 			rc = bnxt_init_int_mode(bp);
 		bnxt_ulp_irq_restart(bp, rc);
 	}
-	bnxt_set_dflt_rss_indir_tbl(bp);
+	if (!netif_is_rxfh_configured(bp->dev))
+		bnxt_set_dflt_rss_indir_tbl(bp);
 
 	if (rc) {
 		netdev_err(bp->dev, "ring reservation/IRQ init failure rc: %d\n", rc);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 46f3978..9098818 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -926,6 +926,13 @@  static int bnxt_set_channels(struct net_device *dev,
 		return rc;
 	}
 
+	if (bnxt_get_nr_rss_ctxs(bp, req_rx_rings) !=
+	    bnxt_get_nr_rss_ctxs(bp, bp->rx_nr_rings) &&
+	    (dev->priv_flags & IFF_RXFH_CONFIGURED)) {
+		netdev_warn(dev, "RSS table size change required, RSS table entries must be default to proceed\n");
+		return -EINVAL;
+	}
+
 	if (netif_running(dev)) {
 		if (BNXT_PF(bp)) {
 			/* TODO CHIMP_FW: Send message to all VF's
@@ -1315,6 +1322,35 @@  static int bnxt_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
 	return 0;
 }
 
+static int bnxt_set_rxfh(struct net_device *dev, const u32 *indir,
+			 const u8 *key, const u8 hfunc)
+{
+	struct bnxt *bp = netdev_priv(dev);
+	int rc = 0;
+
+	if (hfunc && hfunc != ETH_RSS_HASH_TOP)
+		return -EOPNOTSUPP;
+
+	if (key)
+		return -EOPNOTSUPP;
+
+	if (indir) {
+		u32 i, pad, tbl_size = bnxt_get_rxfh_indir_size(dev);
+
+		for (i = 0; i < tbl_size; i++)
+			bp->rss_indir_tbl[i] = indir[i];
+		pad = bp->rss_indir_tbl_entries - tbl_size;
+		if (pad)
+			memset(&bp->rss_indir_tbl[i], 0, pad * sizeof(u16));
+	}
+
+	if (netif_running(bp->dev)) {
+		bnxt_close_nic(bp, false, false);
+		rc = bnxt_open_nic(bp, false, false);
+	}
+	return rc;
+}
+
 static void bnxt_get_drvinfo(struct net_device *dev,
 			     struct ethtool_drvinfo *info)
 {
@@ -3621,6 +3657,7 @@  void bnxt_ethtool_free(struct bnxt *bp)
 	.get_rxfh_indir_size    = bnxt_get_rxfh_indir_size,
 	.get_rxfh_key_size      = bnxt_get_rxfh_key_size,
 	.get_rxfh               = bnxt_get_rxfh,
+	.set_rxfh		= bnxt_set_rxfh,
 	.flash_device		= bnxt_flash_device,
 	.get_eeprom_len         = bnxt_get_eeprom_len,
 	.get_eeprom             = bnxt_get_eeprom,