diff mbox

[RFC,1/4] net: support per queue tx_usecs in sysfs

Message ID 1448956892-15509-1-git-send-email-kan.liang@intel.com
State RFC
Headers show

Commit Message

kan.liang@intel.com Dec. 1, 2015, 8:01 a.m. UTC
From: Kan Liang <kan.liang@intel.com>

Network devices usually have many queues. Each queue has its own
tx_usecs options. Currently, we can only set all the queues with same
value by ethtool. This patch expose the tx_usecs in sysfs. So the user
can set/get per queue coalesce parameter tx_usecs by sysfs.

Signed-off-by: Kan Liang <kan.liang@intel.com>
---
 Documentation/networking/scaling.txt | 12 ++++++++++++
 include/linux/netdevice.h            |  8 ++++++++
 net/core/net-sysfs.c                 | 38 ++++++++++++++++++++++++++++++++++++
 3 files changed, 58 insertions(+)

Comments

Florian Fainelli Dec. 1, 2015, 10:13 p.m. UTC | #1
On 01/12/15 00:01, kan.liang@intel.com wrote:
> From: Kan Liang <kan.liang@intel.com>
> 
> Network devices usually have many queues. Each queue has its own
> tx_usecs options. Currently, we can only set all the queues with same
> value by ethtool. This patch expose the tx_usecs in sysfs. So the user
> can set/get per queue coalesce parameter tx_usecs by sysfs.

The new interface you propose makes things inconsistent, since we have
two separate configuration paths (sysfs and ethtool), and it would seem
better to have per-queue awareness in ethtool, since there is a whole
bunch of other parameters that could be configured on a per-queue basis.

Have you tried to extend existing ethtool interfaces to cover the need
for multiple queues?

> 
> Signed-off-by: Kan Liang <kan.liang@intel.com>
> ---
>  Documentation/networking/scaling.txt | 12 ++++++++++++
>  include/linux/netdevice.h            |  8 ++++++++
>  net/core/net-sysfs.c                 | 38 ++++++++++++++++++++++++++++++++++++
>  3 files changed, 58 insertions(+)
> 
> diff --git a/Documentation/networking/scaling.txt b/Documentation/networking/scaling.txt
> index 59f4db2..636192d 100644
> --- a/Documentation/networking/scaling.txt
> +++ b/Documentation/networking/scaling.txt
> @@ -431,6 +431,18 @@ a max-rate attribute is supported, by setting a Mbps value to
>  
>  A value of zero means disabled, and this is the default.
>  
> +Per Queue interrupt moderation:
> +=============================
> +
> +The interrupt moderation mechanism, which implemented by HW, employs
> +a series of timers to limit the number of interrupts it generates.
> +TX queue absolute delay timer can be set to a microseconds value with
> +
> +/sys/class/net/<dev>/queues/tx-<n>/tx_usecs
> +
> +For the device which doesn't support per queue interrupt moderation,
> +it shows "N/A".
> +
>  Further Information
>  ===================
>  RPS and RFS were introduced in kernel 2.6.35. XPS was incorporated into
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 7d2d1d7..9db5c57 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -1059,6 +1059,10 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
>   *	This function is used to get egress tunnel information for given skb.
>   *	This is useful for retrieving outer tunnel header parameters while
>   *	sampling packet.
> + * void (*ndo_set_per_queue_tx_usecs)(struct net_device *dev,
> + * 				      int index, u32 val);
> + * void (*ndo_get_per_queue_tx_usecs)(struct net_device *dev, int index);
> + * 	This function is used to set/get per queue coalesce parameter tx_usecs.
>   *
>   */
>  struct net_device_ops {
> @@ -1236,6 +1240,10 @@ struct net_device_ops {
>  							 bool proto_down);
>  	int			(*ndo_fill_metadata_dst)(struct net_device *dev,
>  						       struct sk_buff *skb);
> +	void			(*ndo_set_per_queue_tx_usecs)(struct net_device *dev,
> +							      int index, u32 val);
> +	u32			(*ndo_get_per_queue_tx_usecs)(struct net_device *dev,
> +							      int index);
>  };
>  
>  /**
> diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
> index f88a62a..48016b8 100644
> --- a/net/core/net-sysfs.c
> +++ b/net/core/net-sysfs.c
> @@ -1239,12 +1239,50 @@ static struct netdev_queue_attribute xps_cpus_attribute =
>      __ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map);
>  #endif /* CONFIG_XPS */
>  
> +static ssize_t tx_usecs_show(struct netdev_queue *queue,
> +			     struct netdev_queue_attribute *attr,
> +			     char *buf)
> +{
> +	struct net_device *dev = queue->dev;
> +	int index = queue - dev->_tx;
> +	u32 val;
> +
> +	if (dev->netdev_ops->ndo_get_per_queue_tx_usecs) {
> +		val = dev->netdev_ops->ndo_get_per_queue_tx_usecs(dev, index);
> +		return sprintf(buf, "%u\n", val);
> +	}
> +
> +	return sprintf(buf, "N/A\n");
> +}
> +
> +static ssize_t tx_usecs_store(struct netdev_queue *queue,
> +			      struct netdev_queue_attribute *attr,
> +			      const char *buf, size_t len)
> +{
> +	struct net_device *dev = queue->dev;
> +	int index = queue - dev->_tx;
> +	u32 val, ret;
> +
> +	ret = kstrtouint(buf, 0, &val);
> +	if (ret < 0)
> +		return -EINVAL;
> +
> +	if (dev->netdev_ops->ndo_set_per_queue_tx_usecs)
> +		dev->netdev_ops->ndo_set_per_queue_tx_usecs(dev, index, val);
> +
> +	return len;
> +}
> +
> +static struct netdev_queue_attribute tx_usecs_attribute =
> +    __ATTR(tx_usecs, S_IRUGO | S_IWUSR, tx_usecs_show, tx_usecs_store);
> +
>  static struct attribute *netdev_queue_default_attrs[] = {
>  	&queue_trans_timeout.attr,
>  #ifdef CONFIG_XPS
>  	&xps_cpus_attribute.attr,
>  	&queue_tx_maxrate.attr,
>  #endif
> +	&tx_usecs_attribute.attr,
>  	NULL
>  };
>  
>
Jesse Brandeburg Dec. 1, 2015, 11:44 p.m. UTC | #2
On Tue, 1 Dec 2015 14:13:34 -0800
Florian Fainelli <f.fainelli@gmail.com> wrote:

> On 01/12/15 00:01, kan.liang@intel.com wrote:
> > From: Kan Liang <kan.liang@intel.com>
> > 
> > Network devices usually have many queues. Each queue has its own
> > tx_usecs options. Currently, we can only set all the queues with same
> > value by ethtool. This patch expose the tx_usecs in sysfs. So the user
> > can set/get per queue coalesce parameter tx_usecs by sysfs.
> 
> The new interface you propose makes things inconsistent, since we have
> two separate configuration paths (sysfs and ethtool), and it would seem
> better to have per-queue awareness in ethtool, since there is a whole
> bunch of other parameters that could be configured on a per-queue basis.
> 
> Have you tried to extend existing ethtool interfaces to cover the need
> for multiple queues?

While I agree that ethtool provides a similar functionality, ethtool
was designed (particularly the ethtool -C/c commands) around one queue
NICs.  We can't change the output or functionality of the user
interface without breaking a bunch of user's scripts and stuff.

With this effort, Kan is laying groundwork for making further kernel
changes, and having the kernel call back in to drivers via ethtool
mechanisms that were designed before multiple queue adapters.

We can also next migrate the legacy ethtool interfaces to use these
new .ndo_ops should we wish.

These patches were provided with the intent of getting some feedback
about going down this path of making a *consistent* user interface that
is driver agnostic in sysfs, and supports multiple queue adapters.
Alexander H Duyck Dec. 2, 2015, 1:57 a.m. UTC | #3
On Tue, Dec 1, 2015 at 3:44 PM, Jesse Brandeburg
<jesse.brandeburg@intel.com> wrote:
> On Tue, 1 Dec 2015 14:13:34 -0800
> Florian Fainelli <f.fainelli@gmail.com> wrote:
>
>> On 01/12/15 00:01, kan.liang@intel.com wrote:
>> > From: Kan Liang <kan.liang@intel.com>
>> >
>> > Network devices usually have many queues. Each queue has its own
>> > tx_usecs options. Currently, we can only set all the queues with same
>> > value by ethtool. This patch expose the tx_usecs in sysfs. So the user
>> > can set/get per queue coalesce parameter tx_usecs by sysfs.
>>
>> The new interface you propose makes things inconsistent, since we have
>> two separate configuration paths (sysfs and ethtool), and it would seem
>> better to have per-queue awareness in ethtool, since there is a whole
>> bunch of other parameters that could be configured on a per-queue basis.
>>
>> Have you tried to extend existing ethtool interfaces to cover the need
>> for multiple queues?
>
> While I agree that ethtool provides a similar functionality, ethtool
> was designed (particularly the ethtool -C/c commands) around one queue
> NICs.  We can't change the output or functionality of the user
> interface without breaking a bunch of user's scripts and stuff.

Actually it seems like it should be fairly easy to extend the existing
interface.  If for example you were to add a couple new keywords such
as rx-queue or tx-queue what you could do is make it so that you would
then specifically overwrite or access the values of a given Tx queue
or Rx queue instead of doing it generically for the entire device.

> With this effort, Kan is laying groundwork for making further kernel
> changes, and having the kernel call back in to drivers via ethtool
> mechanisms that were designed before multiple queue adapters.
>
> We can also next migrate the legacy ethtool interfaces to use these
> new .ndo_ops should we wish.

Maybe that is the path this should start taking now.  Another thing to
keep in mind is that not all drivers make use of the rx-usecs value
the way the Intel drivers do.  As such we need to be able to still
support all the various interrupt moderation types that are supported
by any NICs that might make use of this feature.

> These patches were provided with the intent of getting some feedback
> about going down this path of making a *consistent* user interface that
> is driver agnostic in sysfs, and supports multiple queue adapters.

If you are truly going for something that is driver agnostic you need
to start looking at other drivers.  For example I don't see how this
current implementation deals with the tx/rx_frames values provided in
the mlx4 drivers for their interrupt moderation.  Also it seems like
the assumption here is that you are going to want to run all of the
queues statically without allowing dynamic interrupt moderation.  I
would think that this might be something that could be managed per
queue as well.
David Miller Dec. 2, 2015, 2:23 a.m. UTC | #4
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 01 Dec 2015 14:13:34 -0800

> The new interface you propose makes things inconsistent, since we have
> two separate configuration paths (sysfs and ethtool), and it would seem
> better to have per-queue awareness in ethtool, since there is a whole
> bunch of other parameters that could be configured on a per-queue basis.

Agreed, we have to extend ethtool to support this, in order to provide
a consistent interface.

We could even do this by encapsulating one ethtool command within
another, the outer ethtool command says "apply the inner op to
queue(s) N".
David Miller Dec. 2, 2015, 2:27 a.m. UTC | #5
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Tue, 1 Dec 2015 15:44:54 -0800

> While I agree that ethtool provides a similar functionality, ethtool
> was designed (particularly the ethtool -C/c commands) around one queue
> NICs.  We can't change the output or functionality of the user
> interface without breaking a bunch of user's scripts and stuff.

See my suggestion, we can encapsulate existing ethtool commands inside
of a new command that specifies operations that are to be applied to
specific queues.

Do not use sysfs for this.
diff mbox

Patch

diff --git a/Documentation/networking/scaling.txt b/Documentation/networking/scaling.txt
index 59f4db2..636192d 100644
--- a/Documentation/networking/scaling.txt
+++ b/Documentation/networking/scaling.txt
@@ -431,6 +431,18 @@  a max-rate attribute is supported, by setting a Mbps value to
 
 A value of zero means disabled, and this is the default.
 
+Per Queue interrupt moderation:
+=============================
+
+The interrupt moderation mechanism, which implemented by HW, employs
+a series of timers to limit the number of interrupts it generates.
+TX queue absolute delay timer can be set to a microseconds value with
+
+/sys/class/net/<dev>/queues/tx-<n>/tx_usecs
+
+For the device which doesn't support per queue interrupt moderation,
+it shows "N/A".
+
 Further Information
 ===================
 RPS and RFS were introduced in kernel 2.6.35. XPS was incorporated into
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7d2d1d7..9db5c57 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1059,6 +1059,10 @@  typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
  *	This function is used to get egress tunnel information for given skb.
  *	This is useful for retrieving outer tunnel header parameters while
  *	sampling packet.
+ * void (*ndo_set_per_queue_tx_usecs)(struct net_device *dev,
+ * 				      int index, u32 val);
+ * void (*ndo_get_per_queue_tx_usecs)(struct net_device *dev, int index);
+ * 	This function is used to set/get per queue coalesce parameter tx_usecs.
  *
  */
 struct net_device_ops {
@@ -1236,6 +1240,10 @@  struct net_device_ops {
 							 bool proto_down);
 	int			(*ndo_fill_metadata_dst)(struct net_device *dev,
 						       struct sk_buff *skb);
+	void			(*ndo_set_per_queue_tx_usecs)(struct net_device *dev,
+							      int index, u32 val);
+	u32			(*ndo_get_per_queue_tx_usecs)(struct net_device *dev,
+							      int index);
 };
 
 /**
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index f88a62a..48016b8 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1239,12 +1239,50 @@  static struct netdev_queue_attribute xps_cpus_attribute =
     __ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map);
 #endif /* CONFIG_XPS */
 
+static ssize_t tx_usecs_show(struct netdev_queue *queue,
+			     struct netdev_queue_attribute *attr,
+			     char *buf)
+{
+	struct net_device *dev = queue->dev;
+	int index = queue - dev->_tx;
+	u32 val;
+
+	if (dev->netdev_ops->ndo_get_per_queue_tx_usecs) {
+		val = dev->netdev_ops->ndo_get_per_queue_tx_usecs(dev, index);
+		return sprintf(buf, "%u\n", val);
+	}
+
+	return sprintf(buf, "N/A\n");
+}
+
+static ssize_t tx_usecs_store(struct netdev_queue *queue,
+			      struct netdev_queue_attribute *attr,
+			      const char *buf, size_t len)
+{
+	struct net_device *dev = queue->dev;
+	int index = queue - dev->_tx;
+	u32 val, ret;
+
+	ret = kstrtouint(buf, 0, &val);
+	if (ret < 0)
+		return -EINVAL;
+
+	if (dev->netdev_ops->ndo_set_per_queue_tx_usecs)
+		dev->netdev_ops->ndo_set_per_queue_tx_usecs(dev, index, val);
+
+	return len;
+}
+
+static struct netdev_queue_attribute tx_usecs_attribute =
+    __ATTR(tx_usecs, S_IRUGO | S_IWUSR, tx_usecs_show, tx_usecs_store);
+
 static struct attribute *netdev_queue_default_attrs[] = {
 	&queue_trans_timeout.attr,
 #ifdef CONFIG_XPS
 	&xps_cpus_attribute.attr,
 	&queue_tx_maxrate.attr,
 #endif
+	&tx_usecs_attribute.attr,
 	NULL
 };