[net-next] mqprio: Reserve last 32 classid values for HW traffic classes and misc IDs

Message ID 20171012182658.14632.9010.stgit@localhost.localdomain
State Accepted
Delegated to: David Miller
Headers show
Series
  • [net-next] mqprio: Reserve last 32 classid values for HW traffic classes and misc IDs
Related show

Commit Message

Alexander Duyck Oct. 12, 2017, 6:38 p.m.
From: Alexander Duyck <alexander.h.duyck@intel.com>

This patch makes a slight tweak to mqprio in order to bring the
classid values used back in line with what is used for mq. The general idea
is to reserve values :ffe0 - :ffef to identify hardware traffic classes
normally reported via dev->num_tc. By doing this we can maintain a
consistent behavior with mq for classid where :1 - :ffdf will represent a
physical qdisc mapped onto a Tx queue represented by classid - 1, and the
traffic classes will be mapped onto a known subset of classid values
reserved for our virtual qdiscs.

Note I reserved the range from :fff0 - :ffff since this way we might be
able to reuse these classid values with clsact and ingress which would mean
that for mq, mqprio, ingress, and clsact we should be able to maintain a
similar classid layout.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
---

So I thought I would put this out here as a first step towards trying to
address some of Jiri's concerns about wanting to have a consistent
userspace API.

The plan is to follow this up with patches to ingress and clsact to look at
exposing a set of virtual qdiscs similar to what we already have for the HW
traffic classes in mqprio, although I won't bother with the ability to dump
class stats since they don't actually enqueue anything.

 include/uapi/linux/pkt_sched.h |    1 +
 net/sched/sch_mqprio.c         |   79 +++++++++++++++++++++++-----------------
 2 files changed, 47 insertions(+), 33 deletions(-)

Comments

Jesus Sanchez-Palencia Oct. 12, 2017, 9:56 p.m. | #1
Hi Alex,


On 10/12/2017 11:38 AM, Alexander Duyck wrote:
> From: Alexander Duyck <alexander.h.duyck@intel.com>
> 
> This patch makes a slight tweak to mqprio in order to bring the
> classid values used back in line with what is used for mq. The general idea
> is to reserve values :ffe0 - :ffef to identify hardware traffic classes
> normally reported via dev->num_tc. By doing this we can maintain a
> consistent behavior with mq for classid where :1 - :ffdf will represent a
> physical qdisc mapped onto a Tx queue represented by classid - 1, and the
> traffic classes will be mapped onto a known subset of classid values
> reserved for our virtual qdiscs.
> 
> Note I reserved the range from :fff0 - :ffff since this way we might be
> able to reuse these classid values with clsact and ingress which would mean
> that for mq, mqprio, ingress, and clsact we should be able to maintain a
> similar classid layout.
> 
> Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
> ---
> 
> So I thought I would put this out here as a first step towards trying to
> address some of Jiri's concerns about wanting to have a consistent
> userspace API.
> 
> The plan is to follow this up with patches to ingress and clsact to look at
> exposing a set of virtual qdiscs similar to what we already have for the HW
> traffic classes in mqprio, although I won't bother with the ability to dump
> class stats since they don't actually enqueue anything.
> 
>  include/uapi/linux/pkt_sched.h |    1 +
>  net/sched/sch_mqprio.c         |   79 +++++++++++++++++++++++-----------------
>  2 files changed, 47 insertions(+), 33 deletions(-)
> 
> diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
> index 099bf5528fed..174f1cf7e7f9 100644
> --- a/include/uapi/linux/pkt_sched.h
> +++ b/include/uapi/linux/pkt_sched.h
> @@ -74,6 +74,7 @@ struct tc_estimator {
>  #define TC_H_INGRESS    (0xFFFFFFF1U)
>  #define TC_H_CLSACT	TC_H_INGRESS
>  
> +#define TC_H_MIN_PRIORITY	0xFFE0U
>  #define TC_H_MIN_INGRESS	0xFFF2U
>  #define TC_H_MIN_EGRESS		0xFFF3U
>  
> diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
> index 6bcdfe6e7b63..a61ef119a556 100644
> --- a/net/sched/sch_mqprio.c
> +++ b/net/sched/sch_mqprio.c
> @@ -115,6 +115,10 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
>  	if (!netif_is_multiqueue(dev))
>  		return -EOPNOTSUPP;
>  
> +	/* make certain can allocate enough classids to handle queues */
> +	if (dev->num_tx_queues >= TC_H_MIN_PRIORITY)
> +		return -ENOMEM;
> +
>  	if (!opt || nla_len(opt) < sizeof(*qopt))
>  		return -EINVAL;
>  
> @@ -193,7 +197,7 @@ static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch,
>  					     unsigned long cl)
>  {
>  	struct net_device *dev = qdisc_dev(sch);
> -	unsigned long ntx = cl - 1 - netdev_get_num_tc(dev);
> +	unsigned long ntx = cl - 1;
>  
>  	if (ntx >= dev->num_tx_queues)
>  		return NULL;
> @@ -282,38 +286,35 @@ static unsigned long mqprio_find(struct Qdisc *sch, u32 classid)
>  	struct net_device *dev = qdisc_dev(sch);
>  	unsigned int ntx = TC_H_MIN(classid);
>  
> -	if (ntx > dev->num_tx_queues + netdev_get_num_tc(dev))
> -		return 0;
> -	return ntx;
> +	/* There are essentially two regions here that have valid classid
> +	 * values. The first region will have a classid value of 1 through
> +	 * num_tx_queues. All of these are backed by actual Qdiscs.
> +	 */
> +	if (ntx < TC_H_MIN_PRIORITY)
> +		return (ntx <= dev->num_tx_queues) ? ntx : 0;
> +
> +	/* The second region represents the hardware traffic classes. These
> +	 * are represented by classid values of TC_H_MIN_PRIORITY through
> +	 * TC_H_MIN_PRIORITY + netdev_get_num_tc - 1
> +	 */
> +	return ((ntx - TC_H_MIN_PRIORITY) < netdev_get_num_tc(dev)) ? ntx : 0;
>  }
>  
>  static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl,
>  			 struct sk_buff *skb, struct tcmsg *tcm)
>  {
> -	struct net_device *dev = qdisc_dev(sch);
> +	if (cl < TC_H_MIN_PRIORITY) {
> +		struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
> +		struct net_device *dev = qdisc_dev(sch);
> +		int tc = netdev_txq_to_tc(dev, cl - 1);
>  
> -	if (cl <= netdev_get_num_tc(dev)) {
> +		tcm->tcm_parent = (tc < 0) ? 0 :
> +			TC_H_MAKE(TC_H_MAJ(sch->handle),
> +				  TC_H_MIN(tc + TC_H_MIN_PRIORITY));
> +		tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
> +	} else {
>  		tcm->tcm_parent = TC_H_ROOT;
>  		tcm->tcm_info = 0;
> -	} else {
> -		int i;
> -		struct netdev_queue *dev_queue;
> -
> -		dev_queue = mqprio_queue_get(sch, cl);
> -		tcm->tcm_parent = 0;
> -		for (i = 0; i < netdev_get_num_tc(dev); i++) {
> -			struct netdev_tc_txq tc = dev->tc_to_txq[i];
> -			int q_idx = cl - netdev_get_num_tc(dev);
> -
> -			if (q_idx > tc.offset &&
> -			    q_idx <= tc.offset + tc.count) {
> -				tcm->tcm_parent =
> -					TC_H_MAKE(TC_H_MAJ(sch->handle),
> -						  TC_H_MIN(i + 1));
> -				break;
> -			}
> -		}
> -		tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
>  	}
>  	tcm->tcm_handle |= TC_H_MIN(cl);
>  	return 0;
> @@ -324,15 +325,14 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
>  	__releases(d->lock)
>  	__acquires(d->lock)
>  {
> -	struct net_device *dev = qdisc_dev(sch);
> -
> -	if (cl <= netdev_get_num_tc(dev)) {
> +	if (cl >= TC_H_MIN_PRIORITY) {
>  		int i;
>  		__u32 qlen = 0;
>  		struct Qdisc *qdisc;
>  		struct gnet_stats_queue qstats = {0};
>  		struct gnet_stats_basic_packed bstats = {0};
> -		struct netdev_tc_txq tc = dev->tc_to_txq[cl - 1];
> +		struct net_device *dev = qdisc_dev(sch);
> +		struct netdev_tc_txq tc = dev->tc_to_txq[cl & TC_BITMASK];
>  
>  		/* Drop lock here it will be reclaimed before touching
>  		 * statistics this is required because the d->lock we
> @@ -385,12 +385,25 @@ static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
>  
>  	/* Walk hierarchy with a virtual class per tc */
>  	arg->count = arg->skip;
> -	for (ntx = arg->skip;
> -	     ntx < dev->num_tx_queues + netdev_get_num_tc(dev);
> -	     ntx++) {
> +	for (ntx = arg->skip; ntx < netdev_get_num_tc(dev); ntx++) {
> +		if (arg->fn(sch, ntx + TC_H_MIN_PRIORITY, arg) < 0) {
> +			arg->stop = 1;
> +			return;
> +		}
> +		arg->count++;
> +	}
> +
> +	/* Pad the values and skip over unused traffic classes */
> +	if (ntx < TC_MAX_QUEUE) {
> +		arg->count = TC_MAX_QUEUE;
> +		ntx = TC_MAX_QUEUE;
> +	}
> +
> +	/* Reset offset, sort out remaining per-queue qdiscs */
> +	for (ntx -= TC_MAX_QUEUE; ntx < dev->num_tx_queues; ntx++) {
>  		if (arg->fn(sch, ntx + 1, arg) < 0) {
>  			arg->stop = 1;
> -			break;
> +			return;
>  		}
>  		arg->count++;
>  	}
> 

Tested-by: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>

Looks good.


thanks,
Jesus
David Miller Oct. 16, 2017, 7:54 p.m. | #2
From: Alexander Duyck <alexander.duyck@gmail.com>
Date: Thu, 12 Oct 2017 11:38:45 -0700

> From: Alexander Duyck <alexander.h.duyck@intel.com>
> 
> This patch makes a slight tweak to mqprio in order to bring the
> classid values used back in line with what is used for mq. The general idea
> is to reserve values :ffe0 - :ffef to identify hardware traffic classes
> normally reported via dev->num_tc. By doing this we can maintain a
> consistent behavior with mq for classid where :1 - :ffdf will represent a
> physical qdisc mapped onto a Tx queue represented by classid - 1, and the
> traffic classes will be mapped onto a known subset of classid values
> reserved for our virtual qdiscs.
> 
> Note I reserved the range from :fff0 - :ffff since this way we might be
> able to reuse these classid values with clsact and ingress which would mean
> that for mq, mqprio, ingress, and clsact we should be able to maintain a
> similar classid layout.
> 
> Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>

Applied, thanks Alexander.

Patch

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 099bf5528fed..174f1cf7e7f9 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -74,6 +74,7 @@  struct tc_estimator {
 #define TC_H_INGRESS    (0xFFFFFFF1U)
 #define TC_H_CLSACT	TC_H_INGRESS
 
+#define TC_H_MIN_PRIORITY	0xFFE0U
 #define TC_H_MIN_INGRESS	0xFFF2U
 #define TC_H_MIN_EGRESS		0xFFF3U
 
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 6bcdfe6e7b63..a61ef119a556 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -115,6 +115,10 @@  static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
 	if (!netif_is_multiqueue(dev))
 		return -EOPNOTSUPP;
 
+	/* make certain can allocate enough classids to handle queues */
+	if (dev->num_tx_queues >= TC_H_MIN_PRIORITY)
+		return -ENOMEM;
+
 	if (!opt || nla_len(opt) < sizeof(*qopt))
 		return -EINVAL;
 
@@ -193,7 +197,7 @@  static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch,
 					     unsigned long cl)
 {
 	struct net_device *dev = qdisc_dev(sch);
-	unsigned long ntx = cl - 1 - netdev_get_num_tc(dev);
+	unsigned long ntx = cl - 1;
 
 	if (ntx >= dev->num_tx_queues)
 		return NULL;
@@ -282,38 +286,35 @@  static unsigned long mqprio_find(struct Qdisc *sch, u32 classid)
 	struct net_device *dev = qdisc_dev(sch);
 	unsigned int ntx = TC_H_MIN(classid);
 
-	if (ntx > dev->num_tx_queues + netdev_get_num_tc(dev))
-		return 0;
-	return ntx;
+	/* There are essentially two regions here that have valid classid
+	 * values. The first region will have a classid value of 1 through
+	 * num_tx_queues. All of these are backed by actual Qdiscs.
+	 */
+	if (ntx < TC_H_MIN_PRIORITY)
+		return (ntx <= dev->num_tx_queues) ? ntx : 0;
+
+	/* The second region represents the hardware traffic classes. These
+	 * are represented by classid values of TC_H_MIN_PRIORITY through
+	 * TC_H_MIN_PRIORITY + netdev_get_num_tc - 1
+	 */
+	return ((ntx - TC_H_MIN_PRIORITY) < netdev_get_num_tc(dev)) ? ntx : 0;
 }
 
 static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl,
 			 struct sk_buff *skb, struct tcmsg *tcm)
 {
-	struct net_device *dev = qdisc_dev(sch);
+	if (cl < TC_H_MIN_PRIORITY) {
+		struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
+		struct net_device *dev = qdisc_dev(sch);
+		int tc = netdev_txq_to_tc(dev, cl - 1);
 
-	if (cl <= netdev_get_num_tc(dev)) {
+		tcm->tcm_parent = (tc < 0) ? 0 :
+			TC_H_MAKE(TC_H_MAJ(sch->handle),
+				  TC_H_MIN(tc + TC_H_MIN_PRIORITY));
+		tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
+	} else {
 		tcm->tcm_parent = TC_H_ROOT;
 		tcm->tcm_info = 0;
-	} else {
-		int i;
-		struct netdev_queue *dev_queue;
-
-		dev_queue = mqprio_queue_get(sch, cl);
-		tcm->tcm_parent = 0;
-		for (i = 0; i < netdev_get_num_tc(dev); i++) {
-			struct netdev_tc_txq tc = dev->tc_to_txq[i];
-			int q_idx = cl - netdev_get_num_tc(dev);
-
-			if (q_idx > tc.offset &&
-			    q_idx <= tc.offset + tc.count) {
-				tcm->tcm_parent =
-					TC_H_MAKE(TC_H_MAJ(sch->handle),
-						  TC_H_MIN(i + 1));
-				break;
-			}
-		}
-		tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
 	}
 	tcm->tcm_handle |= TC_H_MIN(cl);
 	return 0;
@@ -324,15 +325,14 @@  static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 	__releases(d->lock)
 	__acquires(d->lock)
 {
-	struct net_device *dev = qdisc_dev(sch);
-
-	if (cl <= netdev_get_num_tc(dev)) {
+	if (cl >= TC_H_MIN_PRIORITY) {
 		int i;
 		__u32 qlen = 0;
 		struct Qdisc *qdisc;
 		struct gnet_stats_queue qstats = {0};
 		struct gnet_stats_basic_packed bstats = {0};
-		struct netdev_tc_txq tc = dev->tc_to_txq[cl - 1];
+		struct net_device *dev = qdisc_dev(sch);
+		struct netdev_tc_txq tc = dev->tc_to_txq[cl & TC_BITMASK];
 
 		/* Drop lock here it will be reclaimed before touching
 		 * statistics this is required because the d->lock we
@@ -385,12 +385,25 @@  static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 
 	/* Walk hierarchy with a virtual class per tc */
 	arg->count = arg->skip;
-	for (ntx = arg->skip;
-	     ntx < dev->num_tx_queues + netdev_get_num_tc(dev);
-	     ntx++) {
+	for (ntx = arg->skip; ntx < netdev_get_num_tc(dev); ntx++) {
+		if (arg->fn(sch, ntx + TC_H_MIN_PRIORITY, arg) < 0) {
+			arg->stop = 1;
+			return;
+		}
+		arg->count++;
+	}
+
+	/* Pad the values and skip over unused traffic classes */
+	if (ntx < TC_MAX_QUEUE) {
+		arg->count = TC_MAX_QUEUE;
+		ntx = TC_MAX_QUEUE;
+	}
+
+	/* Reset offset, sort out remaining per-queue qdiscs */
+	for (ntx -= TC_MAX_QUEUE; ntx < dev->num_tx_queues; ntx++) {
 		if (arg->fn(sch, ntx + 1, arg) < 0) {
 			arg->stop = 1;
-			break;
+			return;
 		}
 		arg->count++;
 	}