diff mbox

[net-next,V1,1/3] net: Add max rate tx queue attribute

Message ID 1426150405-7904-2-git-send-email-ogerlitz@mellanox.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Or Gerlitz March 12, 2015, 8:53 a.m. UTC
From: John Fastabend <john.r.fastabend@intel.com>

This adds a tx_maxrate attribute to the tx queue sysfs entry allowing
for max-rate limiting. Along with DCB-ETS and BQL this provides another
knob to tune queue performance. The limit units are Mbps.

By default it is disabled. To disable the rate limitation after it
has been set for a queue, it should be set to zero.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
---
 include/linux/netdevice.h |    9 ++++++
 net/core/net-sysfs.c      |   70 +++++++++++++++++++++++++++++++++++++--------
 2 files changed, 67 insertions(+), 12 deletions(-)

Comments

David Miller March 15, 2015, 5:07 a.m. UTC | #1
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Thu, 12 Mar 2015 10:53:23 +0200

> +	if (dev->netdev_ops->ndo_set_tx_maxrate) {
> +		err = dev->netdev_ops->ndo_set_tx_maxrate(dev, index, rate);
> +	} else {
> +		rtnl_unlock();
> +		return -EOPNOTSUPP;
> +	}
> +
> +	rtnl_unlock();
> +	if (!err) {
> +		queue->tx_maxrate = rate;
> +		return len;
> +	}

This is more succinctly expressed as:

	err = -EOPNOTSUPP;
	if (dev->netdev_ops->ndo_set_tx_maxrate) {
		err = dev->netdev_ops->ndo_set_tx_maxrate(dev, index, rate);
	if (!err) {
		queue->tx_maxrate = rate;
		return len;
	}
	return err;

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Or Gerlitz March 15, 2015, 7:41 a.m. UTC | #2
On 3/15/2015 7:07 AM, David Miller wrote:
> From: Or Gerlitz <ogerlitz@mellanox.com>
> Date: Thu, 12 Mar 2015 10:53:23 +0200
>
>> +	if (dev->netdev_ops->ndo_set_tx_maxrate) {
>> +		err = dev->netdev_ops->ndo_set_tx_maxrate(dev, index, rate);
>> +	} else {
>> +		rtnl_unlock();
>> +		return -EOPNOTSUPP;
>> +	}
>> +
>> +	rtnl_unlock();
>> +	if (!err) {
>> +		queue->tx_maxrate = rate;
>> +		return len;
>> +	}
> This is more succinctly expressed as:
>
> 	err = -EOPNOTSUPP;
> 	if (dev->netdev_ops->ndo_set_tx_maxrate) {
> 		err = dev->netdev_ops->ndo_set_tx_maxrate(dev, index, rate);
> 	if (!err) {
> 		queue->tx_maxrate = rate;
> 		return len;
> 	}
> 	return err;
>

sure, I'll fix that.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Florian Fainelli March 16, 2015, 8:01 p.m. UTC | #3
On 15/03/15 00:41, Or Gerlitz wrote:
> On 3/15/2015 7:07 AM, David Miller wrote:
>> From: Or Gerlitz <ogerlitz@mellanox.com>
>> This is more succinctly expressed as:
>>
>>     err = -EOPNOTSUPP;
>>     if (dev->netdev_ops->ndo_set_tx_maxrate) {
>>         err = dev->netdev_ops->ndo_set_tx_maxrate(dev, index, rate);
>>     if (!err) {
>>         queue->tx_maxrate = rate;
>>         return len;
>>     }
>>     return err;
>>
> 
> sure, I'll fix that.

Can you also include a patch which updates the sysfs documentation at
Documentation/ABI/testing/sysfs-class-net-queues? Thanks!
Tom Herbert March 16, 2015, 9:37 p.m. UTC | #4
On Mon, Mar 16, 2015 at 1:01 PM, Florian Fainelli <f.fainelli@gmail.com> wrote:
> On 15/03/15 00:41, Or Gerlitz wrote:
>> On 3/15/2015 7:07 AM, David Miller wrote:
>>> From: Or Gerlitz <ogerlitz@mellanox.com>
>>> This is more succinctly expressed as:
>>>
>>>     err = -EOPNOTSUPP;
>>>     if (dev->netdev_ops->ndo_set_tx_maxrate) {
>>>         err = dev->netdev_ops->ndo_set_tx_maxrate(dev, index, rate);
>>>     if (!err) {
>>>         queue->tx_maxrate = rate;
>>>         return len;
>>>     }
>>>     return err;
>>>
>>
>> sure, I'll fix that.
>
> Can you also include a patch which updates the sysfs documentation at
> Documentation/ABI/testing/sysfs-class-net-queues? Thanks!

As well as Documentation/networking/scaling.txt.

Thanks,
Tom


> --
> Florian
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1354ae8..e727677 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -587,6 +587,7 @@  struct netdev_queue {
 #ifdef CONFIG_BQL
 	struct dql		dql;
 #endif
+	unsigned long		tx_maxrate;
 } ____cacheline_aligned_in_smp;
 
 static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)
@@ -1025,6 +1026,11 @@  struct fib_info;
  *	the set of features that the stack has calculated and it returns
  *	those the driver believes to be appropriate.
  *
+ *	int (*ndo_set_tx_maxrate)(struct net_device *dev,
+				  int queue_index,
+				  u32 maxrate);
+ *	Called when a user wants to set a max-rate limitation of specific
+ *	TX queue.
  * int (*ndo_switch_parent_id_get)(struct net_device *dev,
  *				   struct netdev_phys_item_id *psid);
  *	Called to get an ID of the switch chip this port is part of.
@@ -1197,6 +1203,9 @@  struct net_device_ops {
 	netdev_features_t	(*ndo_features_check) (struct sk_buff *skb,
 						       struct net_device *dev,
 						       netdev_features_t features);
+	int			(*ndo_set_tx_maxrate)(struct net_device *dev,
+						      int queue_index,
+						      u32 maxrate);
 #ifdef CONFIG_NET_SWITCHDEV
 	int			(*ndo_switch_parent_id_get)(struct net_device *dev,
 							    struct netdev_phys_item_id *psid);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index cf30620..7822dda 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -951,6 +951,63 @@  static ssize_t show_trans_timeout(struct netdev_queue *queue,
 	return sprintf(buf, "%lu", trans_timeout);
 }
 
+#ifdef CONFIG_XPS
+static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue)
+{
+	struct net_device *dev = queue->dev;
+	int i;
+
+	for (i = 0; i < dev->num_tx_queues; i++)
+		if (queue == &dev->_tx[i])
+			break;
+
+	BUG_ON(i >= dev->num_tx_queues);
+
+	return i;
+}
+
+static ssize_t show_tx_maxrate(struct netdev_queue *queue,
+			       struct netdev_queue_attribute *attribute,
+			       char *buf)
+{
+	return sprintf(buf, "%lu\n", queue->tx_maxrate);
+}
+
+static ssize_t set_tx_maxrate(struct netdev_queue *queue,
+			      struct netdev_queue_attribute *attribute,
+			      const char *buf, size_t len)
+{
+	struct net_device *dev = queue->dev;
+	int err, index = get_netdev_queue_index(queue);
+	u32 rate = 0;
+
+	err = kstrtou32(buf, 10, &rate);
+	if (err < 0)
+		return err;
+
+	if (!rtnl_trylock())
+		return restart_syscall();
+
+	if (dev->netdev_ops->ndo_set_tx_maxrate) {
+		err = dev->netdev_ops->ndo_set_tx_maxrate(dev, index, rate);
+	} else {
+		rtnl_unlock();
+		return -EOPNOTSUPP;
+	}
+
+	rtnl_unlock();
+	if (!err) {
+		queue->tx_maxrate = rate;
+		return len;
+	}
+	return err;
+}
+
+static struct netdev_queue_attribute queue_tx_maxrate =
+	__ATTR(tx_maxrate, S_IRUGO | S_IWUSR,
+	       show_tx_maxrate, set_tx_maxrate);
+#endif
+
 static struct netdev_queue_attribute queue_trans_timeout =
 	__ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL);
 
@@ -1065,18 +1122,6 @@  static struct attribute_group dql_group = {
 #endif /* CONFIG_BQL */
 
 #ifdef CONFIG_XPS
-static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
-{
-	struct net_device *dev = queue->dev;
-	unsigned int i;
-
-	i = queue - dev->_tx;
-	BUG_ON(i >= dev->num_tx_queues);
-
-	return i;
-}
-
-
 static ssize_t show_xps_map(struct netdev_queue *queue,
 			    struct netdev_queue_attribute *attribute, char *buf)
 {
@@ -1153,6 +1198,7 @@  static struct attribute *netdev_queue_default_attrs[] = {
 	&queue_trans_timeout.attr,
 #ifdef CONFIG_XPS
 	&xps_cpus_attribute.attr,
+	&queue_tx_maxrate.attr,
 #endif
 	NULL
 };