Patchwork [net-next,6/6] tg3: Make the RSS indir tbl admin configurable

login
register
mail settings
Submitter Matt Carlson
Date Dec. 14, 2011, 9:10 p.m.
Message ID <1323897002-17295-7-git-send-email-mcarlson@broadcom.com>
Download mbox | patch
Permalink /patch/131477/
State Changes Requested
Delegated to: David Miller
Headers show

Comments

Matt Carlson - Dec. 14, 2011, 9:10 p.m.
This patch adds the ethtool callbacks necessary to change the rss
indirection table from userspace.  When setting the indirection table
through set_rxfh_indir, an indirection table size of zero is
interpreted to mean that the admin wants to relinquish control of the
table to the driver.  Should the number of interrupts change (e.g.
across a close / open call, or through a reset), any indirection table
values that exceed the number of RSS queues or interrupt vectors will
be automatically scaled back to values within range.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Signed-off-by: Michael Chan <mchan@broadcom.com>
Reviewed-by: Benjamin Li <benli@broadcom.com>
---
 drivers/net/ethernet/broadcom/tg3.c |  128 ++++++++++++++++++++++++++++++++++-
 drivers/net/ethernet/broadcom/tg3.h |    1 +
 2 files changed, 128 insertions(+), 1 deletions(-)
Ben Hutchings - Dec. 14, 2011, 9:50 p.m.
On Wed, 2011-12-14 at 13:10 -0800, Matt Carlson wrote:
> This patch adds the ethtool callbacks necessary to change the rss
> indirection table from userspace.  When setting the indirection table
> through set_rxfh_indir, an indirection table size of zero is
> interpreted to mean that the admin wants to relinquish control of the
> table to the driver.

I'm not convinced that this is a particularly useful option, but I won't
object.  But please document this as an optional driver behaviour in
<linux/ethtool.h>, and add support for this in the ethtool command (e.g.
a 'reset' or 'default' keyword).

> Should the number of interrupts change (e.g.
> across a close / open call, or through a reset), any indirection table
> values that exceed the number of RSS queues or interrupt vectors will
> be automatically scaled back to values within range.
> 
> Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
> Signed-off-by: Michael Chan <mchan@broadcom.com>
> Reviewed-by: Benjamin Li <benli@broadcom.com>
> ---
>  drivers/net/ethernet/broadcom/tg3.c |  128 ++++++++++++++++++++++++++++++++++-
>  drivers/net/ethernet/broadcom/tg3.h |    1 +
>  2 files changed, 128 insertions(+), 1 deletions(-)
> 
> diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
> index 8bf11ca..f684be9 100644
> --- a/drivers/net/ethernet/broadcom/tg3.c
> +++ b/drivers/net/ethernet/broadcom/tg3.c
> @@ -8229,9 +8229,18 @@ void tg3_rss_init_indir_tbl(struct tg3 *tp)
>  
>  	if (tp->irq_cnt <= 2)
>  		memset(&tp->rss_ind_tbl[0], 0, sizeof(tp->rss_ind_tbl));
> -	else
> +	else if (tg3_flag(tp, USER_INDIR_TBL)) {
> +		/* Validate table against current IRQ count */
> +		for (i = 0; i < TG3_RSS_INDIR_TBL_SIZE; i++) {
> +			if (tp->rss_ind_tbl[i] >= tp->irq_cnt - 1) {
> +				/* Cap the vector index */
> +				tp->rss_ind_tbl[i] = tp->irq_cnt - 2;

A modulo operation might make more sense.  But I don't suppose this
failure is going to happen often enough for it to be important.

> +			}
> +		}
> +	} else {
>  		for (i = 0; i < TG3_RSS_INDIR_TBL_SIZE; i++)
>  			tp->rss_ind_tbl[i] = i % (tp->irq_cnt - 1);
> +	}
>  }
>  
>  void tg3_rss_write_indir_tbl(struct tg3 *tp)
> @@ -10719,6 +10728,120 @@ static int tg3_get_sset_count(struct net_device *dev, int sset)
>  	}
>  }
>  
> +static int tg3_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
> +			 u32 *rules __always_unused)
> +{
> +	struct tg3 *tp = netdev_priv(dev);
> +
> +	if (!tg3_flag(tp, SUPPORT_MSIX))
> +		return -EINVAL;

Should be -EOPNOTSUPP.

> +	if (!netif_running(tp->dev))
> +		return -EAGAIN;

Why?  You do handle !netif_running() below...

> +	switch (info->cmd) {
> +	case ETHTOOL_GRXRINGS:
> +		if (netif_running(tp->dev))
> +			info->data = tp->irq_cnt;
> +		else {
> +			info->data = num_online_cpus();
> +			if (info->data > TG3_IRQ_MAX_VECS_RSS)
> +				info->data = TG3_IRQ_MAX_VECS_RSS;
> +		}
> +
> +		/* The first interrupt vector only
> +		 * handles link interrupts.
> +		 */
> +		info->data -= 1;
> +		return 0;
> +
> +	default:
> +		return -EOPNOTSUPP;
> +	}
> +}
> +
> +static int tg3_get_rxfh_indir(struct net_device *dev,
> +			      struct ethtool_rxfh_indir *indir)
> +{
> +	struct tg3 *tp = netdev_priv(dev);
> +	int i;
> +
> +	if (!tg3_flag(tp, SUPPORT_MSIX))
> +		return -EINVAL;

-EOPNOTSUPP

> +	if (!indir->size) {
> +		indir->size = TG3_RSS_INDIR_TBL_SIZE;
> +		return 0;
> +	}
> +
> +	if (indir->size != TG3_RSS_INDIR_TBL_SIZE)
> +		return -EINVAL;

This is enough to make the ethtool command work, but you're really
supposed to copy min(indir->size, TG3_RSS_INDIR_TBL_SIZE) entries.

> +	for (i = 0; i < TG3_RSS_INDIR_TBL_SIZE; i++)
> +		indir->ring_index[i] = tp->rss_ind_tbl[i];
> +
> +	return 0;
> +}
> +
> +static int tg3_set_rxfh_indir(struct net_device *dev,
> +			      const struct ethtool_rxfh_indir *indir)
> +{
[...]

This function looks fine.

Ben.
Matt Carlson - Dec. 15, 2011, 9:03 p.m.
On Wed, Dec 14, 2011 at 01:50:59PM -0800, Ben Hutchings wrote:
> On Wed, 2011-12-14 at 13:10 -0800, Matt Carlson wrote:
> > This patch adds the ethtool callbacks necessary to change the rss
> > indirection table from userspace.  When setting the indirection table
> > through set_rxfh_indir, an indirection table size of zero is
> > interpreted to mean that the admin wants to relinquish control of the
> > table to the driver.
> 
> I'm not convinced that this is a particularly useful option, but I won't
> object.  But please document this as an optional driver behaviour in
> <linux/ethtool.h>, and add support for this in the ethtool command (e.g.
> a 'reset' or 'default' keyword).

Will do.

> > Should the number of interrupts change (e.g.
> > across a close / open call, or through a reset), any indirection table
> > values that exceed the number of RSS queues or interrupt vectors will
> > be automatically scaled back to values within range.
> > 
> > Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
> > Signed-off-by: Michael Chan <mchan@broadcom.com>
> > Reviewed-by: Benjamin Li <benli@broadcom.com>
> > ---
> >  drivers/net/ethernet/broadcom/tg3.c |  128 ++++++++++++++++++++++++++++++++++-
> >  drivers/net/ethernet/broadcom/tg3.h |    1 +
> >  2 files changed, 128 insertions(+), 1 deletions(-)
> > 
> > diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
> > index 8bf11ca..f684be9 100644
> > --- a/drivers/net/ethernet/broadcom/tg3.c
> > +++ b/drivers/net/ethernet/broadcom/tg3.c
> > @@ -8229,9 +8229,18 @@ void tg3_rss_init_indir_tbl(struct tg3 *tp)
> >  
> >  	if (tp->irq_cnt <= 2)
> >  		memset(&tp->rss_ind_tbl[0], 0, sizeof(tp->rss_ind_tbl));
> > -	else
> > +	else if (tg3_flag(tp, USER_INDIR_TBL)) {
> > +		/* Validate table against current IRQ count */
> > +		for (i = 0; i < TG3_RSS_INDIR_TBL_SIZE; i++) {
> > +			if (tp->rss_ind_tbl[i] >= tp->irq_cnt - 1) {
> > +				/* Cap the vector index */
> > +				tp->rss_ind_tbl[i] = tp->irq_cnt - 2;
> 
> A modulo operation might make more sense.  But I don't suppose this
> failure is going to happen often enough for it to be important.

I suppose that makes more sense.  TG3 only has at-most 4 RSS queues, so
there really isn't much difference.  Your idea places more of the
traffic in the first RSS queue, which has a slight performance
advantage.  Thanks for making me rethink this. :)

> > +			}
> > +		}
> > +	} else {
> >  		for (i = 0; i < TG3_RSS_INDIR_TBL_SIZE; i++)
> >  			tp->rss_ind_tbl[i] = i % (tp->irq_cnt - 1);
> > +	}
> >  }
> >  
> >  void tg3_rss_write_indir_tbl(struct tg3 *tp)
> > @@ -10719,6 +10728,120 @@ static int tg3_get_sset_count(struct net_device *dev, int sset)
> >  	}
> >  }
> >  
> > +static int tg3_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
> > +			 u32 *rules __always_unused)
> > +{
> > +	struct tg3 *tp = netdev_priv(dev);
> > +
> > +	if (!tg3_flag(tp, SUPPORT_MSIX))
> > +		return -EINVAL;
> 
> Should be -EOPNOTSUPP.

Will do.

> > +	if (!netif_running(tp->dev))
> > +		return -EAGAIN;
> 
> Why?  You do handle !netif_running() below...

Right.  This isn't needed.

> > +	switch (info->cmd) {
> > +	case ETHTOOL_GRXRINGS:
> > +		if (netif_running(tp->dev))
> > +			info->data = tp->irq_cnt;
> > +		else {
> > +			info->data = num_online_cpus();
> > +			if (info->data > TG3_IRQ_MAX_VECS_RSS)
> > +				info->data = TG3_IRQ_MAX_VECS_RSS;
> > +		}
> > +
> > +		/* The first interrupt vector only
> > +		 * handles link interrupts.
> > +		 */
> > +		info->data -= 1;
> > +		return 0;
> > +
> > +	default:
> > +		return -EOPNOTSUPP;
> > +	}
> > +}
> > +
> > +static int tg3_get_rxfh_indir(struct net_device *dev,
> > +			      struct ethtool_rxfh_indir *indir)
> > +{
> > +	struct tg3 *tp = netdev_priv(dev);
> > +	int i;
> > +
> > +	if (!tg3_flag(tp, SUPPORT_MSIX))
> > +		return -EINVAL;
> 
> -EOPNOTSUPP

Will do.

> > +	if (!indir->size) {
> > +		indir->size = TG3_RSS_INDIR_TBL_SIZE;
> > +		return 0;
> > +	}
> > +
> > +	if (indir->size != TG3_RSS_INDIR_TBL_SIZE)
> > +		return -EINVAL;
> 
> This is enough to make the ethtool command work, but you're really
> supposed to copy min(indir->size, TG3_RSS_INDIR_TBL_SIZE) entries.

Could you elaborate on this?  I'm confused because I can't figure out
how returning half of an indirection table could be useful.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ben Hutchings - Dec. 15, 2011, 9:13 p.m.
On Thu, 2011-12-15 at 13:03 -0800, Matt Carlson wrote:
> On Wed, Dec 14, 2011 at 01:50:59PM -0800, Ben Hutchings wrote:
> > On Wed, 2011-12-14 at 13:10 -0800, Matt Carlson wrote:
[...]
> > > +	if (!indir->size) {
> > > +		indir->size = TG3_RSS_INDIR_TBL_SIZE;
> > > +		return 0;
> > > +	}
> > > +
> > > +	if (indir->size != TG3_RSS_INDIR_TBL_SIZE)
> > > +		return -EINVAL;
> > 
> > This is enough to make the ethtool command work, but you're really
> > supposed to copy min(indir->size, TG3_RSS_INDIR_TBL_SIZE) entries.
> 
> Could you elaborate on this?  I'm confused because I can't figure out
> how returning half of an indirection table could be useful.

It's a generalisation of the zero-length and full-length cases.  But no,
it isn't very useful, nor did I actually specify that anywhere!

Maybe there should be a driver operation to get the table size, and then
the core can make sure that drivers only ever deal with full-table
buffers.  Though that wouldn't cover your reset-to-default case.

Ben.
Matt Carlson - Dec. 15, 2011, 11:37 p.m.
On Thu, Dec 15, 2011 at 01:13:47PM -0800, Ben Hutchings wrote:
> On Thu, 2011-12-15 at 13:03 -0800, Matt Carlson wrote:
> > On Wed, Dec 14, 2011 at 01:50:59PM -0800, Ben Hutchings wrote:
> > > On Wed, 2011-12-14 at 13:10 -0800, Matt Carlson wrote:
> [...]
> > > > +	if (!indir->size) {
> > > > +		indir->size = TG3_RSS_INDIR_TBL_SIZE;
> > > > +		return 0;
> > > > +	}
> > > > +
> > > > +	if (indir->size != TG3_RSS_INDIR_TBL_SIZE)
> > > > +		return -EINVAL;
> > > 
> > > This is enough to make the ethtool command work, but you're really
> > > supposed to copy min(indir->size, TG3_RSS_INDIR_TBL_SIZE) entries.
> > 
> > Could you elaborate on this?  I'm confused because I can't figure out
> > how returning half of an indirection table could be useful.
> 
> It's a generalisation of the zero-length and full-length cases.  But no,
> it isn't very useful, nor did I actually specify that anywhere!
> 
> Maybe there should be a driver operation to get the table size, and then
> the core can make sure that drivers only ever deal with full-table
> buffers.  Though that wouldn't cover your reset-to-default case.

That's how your current implementation of ethtool works, doesn't it?  It
first makes a call to get_rxfh_indir() with a zero size parameter.  The
driver interprets this as a query for the size of the driver's
indirection table.  Subsequent calls to get_rxfh_indir() with a non-zero
size are interpreted as a request for the indirection table data.

Right now, I coded get_rxfh_indir() and set_rxfh_indir() to be strict in
what they accept.  The restrictions can be relaxed though if we can
outline what the driver should do with partial tables.  (I wonder if
these types of interpretations are better placed above the driver
though.  Then again, maybe not.  Tg3 devices can tune the size of the
indirection table by powers of two if needed at the cost of a reset.)

But IMO all these ideas make the reset-to-default case more desirable.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ben Hutchings - Dec. 15, 2011, 11:43 p.m.
On Thu, 2011-12-15 at 15:37 -0800, Matt Carlson wrote:
> On Thu, Dec 15, 2011 at 01:13:47PM -0800, Ben Hutchings wrote:
> > On Thu, 2011-12-15 at 13:03 -0800, Matt Carlson wrote:
> > > On Wed, Dec 14, 2011 at 01:50:59PM -0800, Ben Hutchings wrote:
> > > > On Wed, 2011-12-14 at 13:10 -0800, Matt Carlson wrote:
> > [...]
> > > > > +	if (!indir->size) {
> > > > > +		indir->size = TG3_RSS_INDIR_TBL_SIZE;
> > > > > +		return 0;
> > > > > +	}
> > > > > +
> > > > > +	if (indir->size != TG3_RSS_INDIR_TBL_SIZE)
> > > > > +		return -EINVAL;
> > > > 
> > > > This is enough to make the ethtool command work, but you're really
> > > > supposed to copy min(indir->size, TG3_RSS_INDIR_TBL_SIZE) entries.
> > > 
> > > Could you elaborate on this?  I'm confused because I can't figure out
> > > how returning half of an indirection table could be useful.
> > 
> > It's a generalisation of the zero-length and full-length cases.  But no,
> > it isn't very useful, nor did I actually specify that anywhere!
> > 
> > Maybe there should be a driver operation to get the table size, and then
> > the core can make sure that drivers only ever deal with full-table
> > buffers.  Though that wouldn't cover your reset-to-default case.
> 
> That's how your current implementation of ethtool works, doesn't it?  It
> first makes a call to get_rxfh_indir() with a zero size parameter.  The
> driver interprets this as a query for the size of the driver's
> indirection table.  Subsequent calls to get_rxfh_indir() with a non-zero
> size are interpreted as a request for the indirection table data.

Yes, and I think it's fine if we restrict input that way.

> Right now, I coded get_rxfh_indir() and set_rxfh_indir() to be strict in
> what they accept.  The restrictions can be relaxed though if we can
> outline what the driver should do with partial tables.  (I wonder if
> these types of interpretations are better placed above the driver
> though.

I agree; stand by for patches.

> Then again, maybe not.  Tg3 devices can tune the size of the
> indirection table by powers of two if needed at the cost of a reset.)

You can achieve the same effect repeating the entries of the smaller
table multiple times.

> But IMO all these ideas make the reset-to-default case more desirable.

Ben.

Patch

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 8bf11ca..f684be9 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -8229,9 +8229,18 @@  void tg3_rss_init_indir_tbl(struct tg3 *tp)
 
 	if (tp->irq_cnt <= 2)
 		memset(&tp->rss_ind_tbl[0], 0, sizeof(tp->rss_ind_tbl));
-	else
+	else if (tg3_flag(tp, USER_INDIR_TBL)) {
+		/* Validate table against current IRQ count */
+		for (i = 0; i < TG3_RSS_INDIR_TBL_SIZE; i++) {
+			if (tp->rss_ind_tbl[i] >= tp->irq_cnt - 1) {
+				/* Cap the vector index */
+				tp->rss_ind_tbl[i] = tp->irq_cnt - 2;
+			}
+		}
+	} else {
 		for (i = 0; i < TG3_RSS_INDIR_TBL_SIZE; i++)
 			tp->rss_ind_tbl[i] = i % (tp->irq_cnt - 1);
+	}
 }
 
 void tg3_rss_write_indir_tbl(struct tg3 *tp)
@@ -10719,6 +10728,120 @@  static int tg3_get_sset_count(struct net_device *dev, int sset)
 	}
 }
 
+static int tg3_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
+			 u32 *rules __always_unused)
+{
+	struct tg3 *tp = netdev_priv(dev);
+
+	if (!tg3_flag(tp, SUPPORT_MSIX))
+		return -EINVAL;
+
+	if (!netif_running(tp->dev))
+		return -EAGAIN;
+
+	switch (info->cmd) {
+	case ETHTOOL_GRXRINGS:
+		if (netif_running(tp->dev))
+			info->data = tp->irq_cnt;
+		else {
+			info->data = num_online_cpus();
+			if (info->data > TG3_IRQ_MAX_VECS_RSS)
+				info->data = TG3_IRQ_MAX_VECS_RSS;
+		}
+
+		/* The first interrupt vector only
+		 * handles link interrupts.
+		 */
+		info->data -= 1;
+		return 0;
+
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int tg3_get_rxfh_indir(struct net_device *dev,
+			      struct ethtool_rxfh_indir *indir)
+{
+	struct tg3 *tp = netdev_priv(dev);
+	int i;
+
+	if (!tg3_flag(tp, SUPPORT_MSIX))
+		return -EINVAL;
+
+	if (!indir->size) {
+		indir->size = TG3_RSS_INDIR_TBL_SIZE;
+		return 0;
+	}
+
+	if (indir->size != TG3_RSS_INDIR_TBL_SIZE)
+		return -EINVAL;
+
+	for (i = 0; i < TG3_RSS_INDIR_TBL_SIZE; i++)
+		indir->ring_index[i] = tp->rss_ind_tbl[i];
+
+	return 0;
+}
+
+static int tg3_set_rxfh_indir(struct net_device *dev,
+			      const struct ethtool_rxfh_indir *indir)
+{
+	struct tg3 *tp = netdev_priv(dev);
+	size_t i;
+
+	if (!tg3_flag(tp, SUPPORT_MSIX))
+		return -EINVAL;
+
+	if (!indir->size) {
+		tg3_flag_clear(tp, USER_INDIR_TBL);
+		tg3_rss_init_indir_tbl(tp);
+	} else {
+		int limit;
+
+		/* Validate size and indices */
+		if (indir->size != TG3_RSS_INDIR_TBL_SIZE)
+			return -EINVAL;
+
+		if (netif_running(dev))
+			limit = tp->irq_cnt;
+		else {
+			limit = num_online_cpus();
+			if (limit > TG3_IRQ_MAX_VECS_RSS)
+				limit = TG3_IRQ_MAX_VECS_RSS;
+		}
+
+		/* The first interrupt vector only
+		 * handles link interrupts.
+		 */
+		limit -= 1;
+
+		/* Check the indices in the table.
+		 * Leave the existing table unmodified
+		 * if an error is detected.
+		 */
+		for (i = 0; i < TG3_RSS_INDIR_TBL_SIZE; i++)
+			if (indir->ring_index[i] >= limit)
+				return -EINVAL;
+
+		tg3_flag_set(tp, USER_INDIR_TBL);
+
+		for (i = 0; i < TG3_RSS_INDIR_TBL_SIZE; i++)
+			tp->rss_ind_tbl[i] = indir->ring_index[i];
+	}
+
+	if (!netif_running(dev) || !tg3_flag(tp, ENABLE_RSS))
+		return 0;
+
+	/* It is legal to write the indirection
+	 * table while the device is running.
+	 */
+	tg3_full_lock(tp, 0);
+	tg3_rss_write_indir_tbl(tp);
+	tg3_full_unlock(tp);
+
+	return 0;
+}
+
 static void tg3_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
 {
 	switch (stringset) {
@@ -11949,6 +12072,9 @@  static const struct ethtool_ops tg3_ethtool_ops = {
 	.get_coalesce		= tg3_get_coalesce,
 	.set_coalesce		= tg3_set_coalesce,
 	.get_sset_count		= tg3_get_sset_count,
+	.get_rxnfc		= tg3_get_rxnfc,
+	.get_rxfh_indir		= tg3_get_rxfh_indir,
+	.set_rxfh_indir		= tg3_set_rxfh_indir,
 };
 
 static void __devinit tg3_get_eeprom_size(struct tg3 *tp)
diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h
index aea8f72..4800595 100644
--- a/drivers/net/ethernet/broadcom/tg3.h
+++ b/drivers/net/ethernet/broadcom/tg3.h
@@ -2932,6 +2932,7 @@  enum TG3_FLAGS {
 	TG3_FLAG_APE_HAS_NCSI,
 	TG3_FLAG_4K_FIFO_LIMIT,
 	TG3_FLAG_RESET_TASK_PENDING,
+	TG3_FLAG_USER_INDIR_TBL,
 	TG3_FLAG_5705_PLUS,
 	TG3_FLAG_IS_5788,
 	TG3_FLAG_5750_PLUS,