diff mbox series

[next-queue,v4,3/4] net/sched: Introduce Credit Based Shaper (CBS) qdisc

Message ID 20171004002831.18371-4-vinicius.gomes@intel.com
State Changes Requested
Headers show
Series TSN: Add qdisc based config interface for CBS | expand

Commit Message

Vinicius Costa Gomes Oct. 4, 2017, 12:28 a.m. UTC
This queueing discipline implements the shaper algorithm defined by
the 802.1Q-2014 Section 8.6.8.2 and detailed in Annex L.

It's primary usage is to apply some bandwidth reservation to user
defined traffic classes, which are mapped to different queues via the
mqprio qdisc.

Initially, it only supports offloading the traffic shaping work to
supporting controllers.

Later, when a software implementation is added, the current dependency
on being installed "under" mqprio can be lifted.

Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Signed-off-by: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
---
 include/linux/netdevice.h      |   1 +
 include/net/pkt_sched.h        |   9 ++
 include/uapi/linux/pkt_sched.h |  17 ++++
 net/sched/Kconfig              |  11 ++
 net/sched/Makefile             |   1 +
 net/sched/sch_cbs.c            | 225 +++++++++++++++++++++++++++++++++++++++++
 6 files changed, 264 insertions(+)
 create mode 100644 net/sched/sch_cbs.c

Comments

Jiri Pirko Oct. 4, 2017, 6:36 a.m. UTC | #1
Wed, Oct 04, 2017 at 02:28:30AM CEST, vinicius.gomes@intel.com wrote:
>This queueing discipline implements the shaper algorithm defined by
>the 802.1Q-2014 Section 8.6.8.2 and detailed in Annex L.
>
>It's primary usage is to apply some bandwidth reservation to user
>defined traffic classes, which are mapped to different queues via the
>mqprio qdisc.
>
>Initially, it only supports offloading the traffic shaping work to
>supporting controllers.
>
>Later, when a software implementation is added, the current dependency
>on being installed "under" mqprio can be lifted.
>
>Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
>Signed-off-by: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
>---
> include/linux/netdevice.h      |   1 +
> include/net/pkt_sched.h        |   9 ++
> include/uapi/linux/pkt_sched.h |  17 ++++
> net/sched/Kconfig              |  11 ++
> net/sched/Makefile             |   1 +
> net/sched/sch_cbs.c            | 225 +++++++++++++++++++++++++++++++++++++++++
> 6 files changed, 264 insertions(+)
> create mode 100644 net/sched/sch_cbs.c
>
>diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
>index e1d6ef130611..b8798adc214f 100644
>--- a/include/linux/netdevice.h
>+++ b/include/linux/netdevice.h
>@@ -775,6 +775,7 @@ enum tc_setup_type {
> 	TC_SETUP_CLSFLOWER,
> 	TC_SETUP_CLSMATCHALL,
> 	TC_SETUP_CLSBPF,
>+	TC_SETUP_CBS,

Please split this into 2 patches. One will introduce the new qdisc,
second will add offload capabilities.

[...]

	
>+static struct Qdisc_ops cbs_qdisc_ops __read_mostly = {
>+	.next		=	NULL,
>+	.id		=	"cbs",
>+	.priv_size	=	sizeof(struct cbs_sched_data),
>+	.enqueue	=	cbs_enqueue,
>+	.dequeue	=	qdisc_dequeue_head,
>+	.peek		=	qdisc_peek_dequeued,
>+	.init		=	cbs_init,
>+	.reset		=	qdisc_reset_queue,
>+	.destroy	=	cbs_destroy,
>+	.change		=	cbs_change,
>+	.dump		=	cbs_dump,
>+	.owner		=	THIS_MODULE,
>+};

I don't see a software implementation for this. Looks like you are
trying abuse tc subsystem to bypass kernel. Could you please explain
this? The golden rule is: implement in kernel, then offload.
Levi Pearson Oct. 5, 2017, 6:09 p.m. UTC | #2
On Wed, Oct 4, 2017 at 12:36 AM, Jiri Pirko <jiri@resnulli.us> wrote:

>>+static struct Qdisc_ops cbs_qdisc_ops __read_mostly = {
>>+      .next           =       NULL,
>>+      .id             =       "cbs",
>>+      .priv_size      =       sizeof(struct cbs_sched_data),
>>+      .enqueue        =       cbs_enqueue,
>>+      .dequeue        =       qdisc_dequeue_head,
>>+      .peek           =       qdisc_peek_dequeued,
>>+      .init           =       cbs_init,
>>+      .reset          =       qdisc_reset_queue,
>>+      .destroy        =       cbs_destroy,
>>+      .change         =       cbs_change,
>>+      .dump           =       cbs_dump,
>>+      .owner          =       THIS_MODULE,
>>+};
>
> I don't see a software implementation for this. Looks like you are
> trying abuse tc subsystem to bypass kernel. Could you please explain
> this? The golden rule is: implement in kernel, then offload.

It would be a shame if this were blocked due to a missing software
implementation. This module is analogous to (and designed to work
with) the mqprio module; it directly configures the 802.1Qav
(Forwarding and Queuing for Time-Sensitive Streams) functionality of
multi-queue NICs with that capability. I'm not sure what makes it seem
like an attempt to "bypass the kernel"; it's actually an attempt to
get an appropriate configuration path *into* the kernel, which has
been missing for some time.

While it would be valuable to have a CBS software-only implementation,
and Vinicius and colleagues have mentioned plans to implement one,
most users will have chosen Qav-compliant NICs and will prefer to use
the hardware capability. In fact they are often *already* using that
capability, but configure it via non-standardized interfaces in
out-of-tree or vendor-tree drivers. I believe it's valuable to have
the "knobs" fit in with the mqprio qdisc and the overall tc subsystem
rather than forcing users through various unrelated configuration
tools, but ultimately the hooks just need to be in the network
subsystem so the drivers can be told how the user wants to set the
registers.

It *might* be reasonable to add the functionality of this to mqprio
instead of a separate module, but this is only one of many possible
802.1Q shapers that could be selected and configured (with more being
defined by IEEE 802.1 working groups for different use cases), and it
seems cleaner to me to have their configuration be through separate
modules than crammed into an already-confusing one, especially since
mqprio has much broader applicability than CBS and it probably doesn't
make sense to burden all mqprio users with the configuration option
overhead.

This meets a specific need in industry (this is widely used in
automotive infotainment devices with broad hardware support across the
SoCs targeted at that industry) that is not well-served by a software
implementation of class-level shaping. As a maintainer of the OpenAvnu
project (sponsored by Avnu, an industry alliance formed around the TSN
standards), I will be integrating support for this as soon as it's
available to our traffic shaping management userspace tools, which
currently have to rely on out-of-tree drivers with custom interfaces
or the HTB shaper which can be configured close to CBS, but with
greatly increased overhead.


Levi
David Miller Oct. 5, 2017, 6:29 p.m. UTC | #3
From: Levi Pearson <levipearson@gmail.com>
Date: Thu, 5 Oct 2017 12:09:32 -0600

> It would be a shame if this were blocked due to a missing software
> implementation.

Quite the contrary, I think a software implementation is a minimum
requirement for inclusion of this feature.

Without a software implementation, there is no clear definition of
what is supposed to happen, and no clear way for people to test those
expectations unless they have the specific hardware.

I completely agree with Jiri.  Hardware offload first is _not_ how
we do things in the Linux networking.
Rodney Cummings Oct. 5, 2017, 6:41 p.m. UTC | #4
The IEEE Std 802.1Q specs for credit-based shaper require precise transmit decisions
within a 125 microsecond window of time.

Even with the Preempt RT patch or similar enhancements, that isn't very practical
as software-only. I doubt that software would conform to the standard's
requirements.

This is analogous to memory, or CPU.
.

> -----Original Message-----
> From: David Miller [mailto:davem@davemloft.net]
> Sent: Thursday, October 5, 2017 1:29 PM
> To: levipearson@gmail.com
> Cc: jiri@resnulli.us; vinicius.gomes@intel.com; netdev@vger.kernel.org;
> intel-wired-lan@lists.osuosl.org; jhs@mojatatu.com;
> xiyou.wangcong@gmail.com; andre.guedes@intel.com; ivan.briano@intel.com;
> jesus.sanchez-palencia@intel.com; boon.leong.ong@intel.com;
> richardcochran@gmail.com; henrik@austad.us; Rodney Cummings
> <rodney.cummings@ni.com>
> Subject: Re: [next-queue PATCH v4 3/4] net/sched: Introduce Credit Based
> Shaper (CBS) qdisc
> 
> From: Levi Pearson <levipearson@gmail.com>
> Date: Thu, 5 Oct 2017 12:09:32 -0600
> 
> > It would be a shame if this were blocked due to a missing software
> > implementation.
> 
> Quite the contrary, I think a software implementation is a minimum
> requirement for inclusion of this feature.
> 
> Without a software implementation, there is no clear definition of
> what is supposed to happen, and no clear way for people to test those
> expectations unless they have the specific hardware.
> 
> I completely agree with Jiri.  Hardware offload first is _not_ how
> we do things in the Linux networking.
David Miller Oct. 5, 2017, 7:05 p.m. UTC | #5
From: Rodney Cummings <rodney.cummings@ni.com>
Date: Thu, 5 Oct 2017 18:41:48 +0000

> The IEEE Std 802.1Q specs for credit-based shaper require precise transmit decisions
> within a 125 microsecond window of time.
> 
> Even with the Preempt RT patch or similar enhancements, that isn't very practical
> as software-only. I doubt that software would conform to the standard's
> requirements.
> 
> This is analogous to memory, or CPU.

I feel like this is looking for an excuse to not have to at least try to implement
the software version of CBS.
Rodney Cummings Oct. 5, 2017, 7:17 p.m. UTC | #6
No excuse. If the software cannot meet the standard's requirements, it is non-conformant,
which means it cannot be called a standard credit-based shaper.

But... I have no objection if someone wants to try software-only. I'm just saying that it
is a waste of time for me.

> -----Original Message-----
> From: David Miller [mailto:davem@davemloft.net]
> Sent: Thursday, October 5, 2017 2:05 PM
> To: Rodney Cummings <rodney.cummings@ni.com>
> Cc: levipearson@gmail.com; jiri@resnulli.us; vinicius.gomes@intel.com;
> netdev@vger.kernel.org; intel-wired-lan@lists.osuosl.org;
> jhs@mojatatu.com; xiyou.wangcong@gmail.com; andre.guedes@intel.com;
> ivan.briano@intel.com; jesus.sanchez-palencia@intel.com;
> boon.leong.ong@intel.com; richardcochran@gmail.com; henrik@austad.us
> Subject: Re: [next-queue PATCH v4 3/4] net/sched: Introduce Credit Based
> Shaper (CBS) qdisc
> 
> From: Rodney Cummings <rodney.cummings@ni.com>
> Date: Thu, 5 Oct 2017 18:41:48 +0000
> 
> > The IEEE Std 802.1Q specs for credit-based shaper require precise
> transmit decisions
> > within a 125 microsecond window of time.
> >
> > Even with the Preempt RT patch or similar enhancements, that isn't very
> practical
> > as software-only. I doubt that software would conform to the standard's
> > requirements.
> >
> > This is analogous to memory, or CPU.
> 
> I feel like this is looking for an excuse to not have to at least try to
> implement
> the software version of CBS.
Vinicius Costa Gomes Oct. 5, 2017, 7:57 p.m. UTC | #7
Hi Jiri,

Jiri Pirko <jiri@resnulli.us> writes:

> Wed, Oct 04, 2017 at 02:28:30AM CEST, vinicius.gomes@intel.com wrote:
>>This queueing discipline implements the shaper algorithm defined by
>>the 802.1Q-2014 Section 8.6.8.2 and detailed in Annex L.
>>
>>It's primary usage is to apply some bandwidth reservation to user
>>defined traffic classes, which are mapped to different queues via the
>>mqprio qdisc.
>>
>>Initially, it only supports offloading the traffic shaping work to
>>supporting controllers.
>>
>>Later, when a software implementation is added, the current dependency
>>on being installed "under" mqprio can be lifted.
>>
>>Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
>>Signed-off-by: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
>>---
>> include/linux/netdevice.h      |   1 +
>> include/net/pkt_sched.h        |   9 ++
>> include/uapi/linux/pkt_sched.h |  17 ++++
>> net/sched/Kconfig              |  11 ++
>> net/sched/Makefile             |   1 +
>> net/sched/sch_cbs.c            | 225 +++++++++++++++++++++++++++++++++++++++++
>> 6 files changed, 264 insertions(+)
>> create mode 100644 net/sched/sch_cbs.c
>>
>>diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
>>index e1d6ef130611..b8798adc214f 100644
>>--- a/include/linux/netdevice.h
>>+++ b/include/linux/netdevice.h
>>@@ -775,6 +775,7 @@ enum tc_setup_type {
>> 	TC_SETUP_CLSFLOWER,
>> 	TC_SETUP_CLSMATCHALL,
>> 	TC_SETUP_CLSBPF,
>>+	TC_SETUP_CBS,
>
> Please split this into 2 patches. One will introduce the new qdisc,
> second will add offload capabilities.
>

Of course.

> [...]
>
>
>>+static struct Qdisc_ops cbs_qdisc_ops __read_mostly = {
>>+	.next		=	NULL,
>>+	.id		=	"cbs",
>>+	.priv_size	=	sizeof(struct cbs_sched_data),
>>+	.enqueue	=	cbs_enqueue,
>>+	.dequeue	=	qdisc_dequeue_head,
>>+	.peek		=	qdisc_peek_dequeued,
>>+	.init		=	cbs_init,
>>+	.reset		=	qdisc_reset_queue,
>>+	.destroy	=	cbs_destroy,
>>+	.change		=	cbs_change,
>>+	.dump		=	cbs_dump,
>>+	.owner		=	THIS_MODULE,
>>+};
>
> I don't see a software implementation for this. Looks like you are
> trying abuse tc subsystem to bypass kernel. Could you please explain
> this? The golden rule is: implement in kernel, then offload.

The reason was that we didn't have a use case for the software
implementation right now, it would be added in a later series.

But as that was requested (and it makes sense), I will add it for the
next version of this series (it is already written, just need to test it
better).


Cheers,
--
Vinicius
Jiri Pirko Oct. 5, 2017, 9:15 p.m. UTC | #8
Thu, Oct 05, 2017 at 09:57:34PM CEST, vinicius.gomes@intel.com wrote:
>Hi Jiri,
>
>Jiri Pirko <jiri@resnulli.us> writes:
>
>> Wed, Oct 04, 2017 at 02:28:30AM CEST, vinicius.gomes@intel.com wrote:
>>>This queueing discipline implements the shaper algorithm defined by
>>>the 802.1Q-2014 Section 8.6.8.2 and detailed in Annex L.
>>>
>>>It's primary usage is to apply some bandwidth reservation to user
>>>defined traffic classes, which are mapped to different queues via the
>>>mqprio qdisc.
>>>
>>>Initially, it only supports offloading the traffic shaping work to
>>>supporting controllers.
>>>
>>>Later, when a software implementation is added, the current dependency
>>>on being installed "under" mqprio can be lifted.
>>>
>>>Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
>>>Signed-off-by: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
>>>---
>>> include/linux/netdevice.h      |   1 +
>>> include/net/pkt_sched.h        |   9 ++
>>> include/uapi/linux/pkt_sched.h |  17 ++++
>>> net/sched/Kconfig              |  11 ++
>>> net/sched/Makefile             |   1 +
>>> net/sched/sch_cbs.c            | 225 +++++++++++++++++++++++++++++++++++++++++
>>> 6 files changed, 264 insertions(+)
>>> create mode 100644 net/sched/sch_cbs.c
>>>
>>>diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
>>>index e1d6ef130611..b8798adc214f 100644
>>>--- a/include/linux/netdevice.h
>>>+++ b/include/linux/netdevice.h
>>>@@ -775,6 +775,7 @@ enum tc_setup_type {
>>> 	TC_SETUP_CLSFLOWER,
>>> 	TC_SETUP_CLSMATCHALL,
>>> 	TC_SETUP_CLSBPF,
>>>+	TC_SETUP_CBS,
>>
>> Please split this into 2 patches. One will introduce the new qdisc,
>> second will add offload capabilities.
>>
>
>Of course.
>
>> [...]
>>
>>
>>>+static struct Qdisc_ops cbs_qdisc_ops __read_mostly = {
>>>+	.next		=	NULL,
>>>+	.id		=	"cbs",
>>>+	.priv_size	=	sizeof(struct cbs_sched_data),
>>>+	.enqueue	=	cbs_enqueue,
>>>+	.dequeue	=	qdisc_dequeue_head,
>>>+	.peek		=	qdisc_peek_dequeued,
>>>+	.init		=	cbs_init,
>>>+	.reset		=	qdisc_reset_queue,
>>>+	.destroy	=	cbs_destroy,
>>>+	.change		=	cbs_change,
>>>+	.dump		=	cbs_dump,
>>>+	.owner		=	THIS_MODULE,
>>>+};
>>
>> I don't see a software implementation for this. Looks like you are
>> trying abuse tc subsystem to bypass kernel. Could you please explain
>> this? The golden rule is: implement in kernel, then offload.
>
>The reason was that we didn't have a use case for the software
>implementation right now, it would be added in a later series.

The policy is very strict, SW implementation first, HW implementation later.



>
>But as that was requested (and it makes sense), I will add it for the
>next version of this series (it is already written, just need to test it
>better).

Good.



>
>
>Cheers,
>--
>Vinicius
Levi Pearson Oct. 5, 2017, 9:23 p.m. UTC | #9
(apologies to davem for the repeat; I accidentally did a reply vs.
reply-all the first time)

On Thu, Oct 5, 2017 at 1:05 PM, David Miller <davem@davemloft.net> wrote:
> From: Rodney Cummings <rodney.cummings@ni.com>
> Date: Thu, 5 Oct 2017 18:41:48 +0000
>
>> The IEEE Std 802.1Q specs for credit-based shaper require precise transmit decisions
>> within a 125 microsecond window of time.
>>
>> Even with the Preempt RT patch or similar enhancements, that isn't very practical
>> as software-only. I doubt that software would conform to the standard's
>> requirements.
>>
>> This is analogous to memory, or CPU.
>
> I feel like this is looking for an excuse to not have to at least try to implement
> the software version of CBS.

I don't understand why you attribute this to excuse-making. Is the
objection due to the fact that the user interface is provided through
a qdisc module? In that case, is there a better configuration
interface for setting up traffic shaping registers that could be used
across all the NICs that provide the capability? There are quite a
number of them now, and the lack of kernel interfaces to the hardware
makes coordinating the userspace effort to support the protocols far
more difficult than it needs to be.

As a contrasting example, look at the DCB shaping functionality,
provided by the ETS shaper. It's specified in 802.1Q right next to the
CBS shaper. It has no software implementation in a qdisc module as far
as I can tell (although it should be less resource-intensive to
implement), yet there's a whole netlink protocol for configuring it. I
don't think it makes sense to tack on the dcb netlink interface to
every driver that implements Qav; most don't have the DCB shapers, and
the user-level control protocol for FQTSS is SRP instead of DCB's LLDP
extensions, so completely different userspace tools would be required
as well.

I just want a simple, standard interface for configuring some fairly
common and IEEE-standard hardware features related to AVB/TSN traffic
shaping. Do we need our own netlink protocol for TSN configuration? It
seems to be massive overkill for an interface to write a single
register, but I suppose it might also be used for configuring TSN
paramters in local switch devices, such as Qbv windows, which need
quite a bit more information. I would be happy to do some of the work,
but I'd like an idea of what kind of interface would be acceptable
before writing up an RFC implementation.


Levi
diff mbox series

Patch

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e1d6ef130611..b8798adc214f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -775,6 +775,7 @@  enum tc_setup_type {
 	TC_SETUP_CLSFLOWER,
 	TC_SETUP_CLSMATCHALL,
 	TC_SETUP_CLSBPF,
+	TC_SETUP_CBS,
 };
 
 /* These structures hold the attributes of xdp state that are being passed
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 259bc191ba59..7c597b050b36 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -146,4 +146,13 @@  static inline bool is_classid_clsact_egress(u32 classid)
 	       TC_H_MIN(classid) == TC_H_MIN(TC_H_MIN_EGRESS);
 }
 
+struct tc_cbs_qopt_offload {
+	u8 enable;
+	s32 queue;
+	s32 hicredit;
+	s32 locredit;
+	s32 idleslope;
+	s32 sendslope;
+};
+
 #endif
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 099bf5528fed..27c849c053cf 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -871,4 +871,21 @@  struct tc_pie_xstats {
 	__u32 maxq;             /* maximum queue size */
 	__u32 ecn_mark;         /* packets marked with ecn*/
 };
+
+/* CBS */
+struct tc_cbs_qopt {
+	__s32 hicredit;
+	__s32 locredit;
+	__s32 idleslope;
+	__s32 sendslope;
+};
+
+enum {
+	TCA_CBS_UNSPEC,
+	TCA_CBS_PARMS,
+	__TCA_CBS_MAX,
+};
+
+#define TCA_CBS_MAX (__TCA_CBS_MAX - 1)
+
 #endif
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index e70ed26485a2..c03d86a7775e 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -172,6 +172,17 @@  config NET_SCH_TBF
 	  To compile this code as a module, choose M here: the
 	  module will be called sch_tbf.
 
+config NET_SCH_CBS
+	tristate "Credit Based Shaper (CBS)"
+	---help---
+	  Say Y here if you want to use the Credit Based Shaper (CBS) packet
+	  scheduling algorithm.
+
+	  See the top of <file:net/sched/sch_cbs.c> for more details.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_cbs.
+
 config NET_SCH_GRED
 	tristate "Generic Random Early Detection (GRED)"
 	---help---
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 7b915d226de7..80c8f92d162d 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -52,6 +52,7 @@  obj-$(CONFIG_NET_SCH_FQ_CODEL)	+= sch_fq_codel.o
 obj-$(CONFIG_NET_SCH_FQ)	+= sch_fq.o
 obj-$(CONFIG_NET_SCH_HHF)	+= sch_hhf.o
 obj-$(CONFIG_NET_SCH_PIE)	+= sch_pie.o
+obj-$(CONFIG_NET_SCH_CBS)	+= sch_cbs.o
 
 obj-$(CONFIG_NET_CLS_U32)	+= cls_u32.o
 obj-$(CONFIG_NET_CLS_ROUTE4)	+= cls_route.o
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
new file mode 100644
index 000000000000..3e0fb0b92160
--- /dev/null
+++ b/net/sched/sch_cbs.c
@@ -0,0 +1,225 @@ 
+/*
+ * net/sched/sch_cbs.c	Credit Based Shaper
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Vinicius Costa Gomes <vinicius.gomes@intel.com>
+ *
+ */
+
+/* Credit Based Shaper (CBS)
+   =========================
+
+   This is a simple rate-limiting shaper aimed at TSN applications on
+   systems with known traffic workloads.
+
+   Its algorithm is defined by the IEEE 802.1Q-2014 Specification,
+   Section 8.6.8.2, and explained in more detail in the Annex L of the
+   same specification.
+
+   There are four tunables to be considered:
+
+	'idleslope': Idleslope is the rate of credits that is
+	accumulated (in kilobits per second) when there is at least
+	one packet waiting for transmission. Packets are transmitted
+	when the current value of credits is equal or greater than
+	zero. When there is no packet to be transmitted the amount of
+	credits is set to zero. This is the main tunable of the CBS
+	algorithm.
+
+	'sendslope':
+	Sendslope is the rate of credits that is depleted (it should be a
+	negative number of kilobits per second) when a transmission is
+	ocurring. It can be calculated as follows, (IEEE 802.1Q-2014 Section
+	8.6.8.2 item g):
+
+	sendslope = idleslope - port_transmit_rate
+
+	'hicredit': Hicredit defines the maximum amount of credits (in
+	bytes) that can be accumulated. Hicredit depends on the
+	characteristics of interfering traffic,
+	'max_interference_size' is the maximum size of any burst of
+	traffic that can delay the transmission of a frame that is
+	available for transmission for this traffic class, (IEEE
+	802.1Q-2014 Annex L, Equation L-3):
+
+	hicredit = max_interference_size * (idleslope / port_transmit_rate)
+
+	'locredit': Locredit is the minimum amount of credits that can
+	be reached. It is a function of the traffic flowing through
+	this qdisc (IEEE 802.1Q-2014 Annex L, Equation L-2):
+
+	locredit = max_frame_size * (sendslope / port_transmit_rate)
+*/
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/netlink.h>
+#include <net/sch_generic.h>
+#include <net/pkt_sched.h>
+
+struct cbs_sched_data {
+	s32 queue;
+	s32 locredit;
+	s32 hicredit;
+	s32 sendslope;
+	s32 idleslope;
+};
+
+static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+		       struct sk_buff **to_free)
+{
+	return qdisc_enqueue_tail(skb, sch);
+}
+
+static const struct nla_policy cbs_policy[TCA_CBS_MAX + 1] = {
+	[TCA_CBS_PARMS]	= { .len = sizeof(struct tc_cbs_qopt) },
+};
+
+static int cbs_change(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct cbs_sched_data *q = qdisc_priv(sch);
+	struct tc_cbs_qopt_offload cbs = { };
+	struct nlattr *tb[TCA_CBS_MAX + 1];
+	const struct net_device_ops *ops;
+	struct tc_cbs_qopt *qopt;
+	struct net_device *dev;
+	int err;
+
+	err = nla_parse_nested(tb, TCA_CBS_MAX, opt, cbs_policy, NULL);
+	if (err < 0)
+		return err;
+
+	err = -EINVAL;
+	if (!tb[TCA_CBS_PARMS])
+		goto done;
+
+	qopt = nla_data(tb[TCA_CBS_PARMS]);
+
+	dev = qdisc_dev(sch);
+	ops = dev->netdev_ops;
+
+	cbs.queue = q->queue;
+	cbs.enable = 1;
+	cbs.hicredit = qopt->hicredit;
+	cbs.locredit = qopt->locredit;
+	cbs.idleslope = qopt->idleslope;
+	cbs.sendslope = qopt->sendslope;
+
+	err = -EOPNOTSUPP;
+	if (!ops->ndo_setup_tc)
+		goto done;
+
+	err = ops->ndo_setup_tc(dev, TC_SETUP_CBS, &cbs);
+	if (err < 0)
+		goto done;
+
+	q->hicredit = cbs.hicredit;
+	q->locredit = cbs.locredit;
+	q->idleslope = cbs.idleslope;
+	q->sendslope = cbs.sendslope;
+
+done:
+	return err;
+}
+
+static int cbs_init(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct cbs_sched_data *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+
+	if (!opt)
+		return -EINVAL;
+
+	q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0);
+
+	return cbs_change(sch, opt);
+}
+
+static void cbs_destroy(struct Qdisc *sch)
+{
+	struct cbs_sched_data *q = qdisc_priv(sch);
+	struct tc_cbs_qopt_offload cbs = { };
+	const struct net_device_ops *ops;
+	struct net_device *dev;
+	int err;
+
+	q->hicredit = 0;
+	q->locredit = 0;
+	q->idleslope = 0;
+	q->sendslope = 0;
+
+	dev = qdisc_dev(sch);
+	ops = dev->netdev_ops;
+
+	if (!ops->ndo_setup_tc)
+		return;
+
+	cbs.queue = q->queue;
+	cbs.enable = 0;
+
+	err = ops->ndo_setup_tc(dev, TC_SETUP_CBS, &cbs);
+	if (err < 0)
+		pr_warn("Couldn't reset queue %d to default values\n",
+			cbs.queue);
+}
+
+static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct cbs_sched_data *q = qdisc_priv(sch);
+	struct nlattr *nest;
+	struct tc_cbs_qopt opt;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (!nest)
+		goto nla_put_failure;
+
+	opt.hicredit = q->hicredit;
+	opt.locredit = q->locredit;
+	opt.sendslope = q->sendslope;
+	opt.idleslope = q->idleslope;
+
+	if (nla_put(skb, TCA_CBS_PARMS, sizeof(opt), &opt))
+		goto nla_put_failure;
+
+	return nla_nest_end(skb, nest);
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -1;
+}
+
+static struct Qdisc_ops cbs_qdisc_ops __read_mostly = {
+	.next		=	NULL,
+	.id		=	"cbs",
+	.priv_size	=	sizeof(struct cbs_sched_data),
+	.enqueue	=	cbs_enqueue,
+	.dequeue	=	qdisc_dequeue_head,
+	.peek		=	qdisc_peek_dequeued,
+	.init		=	cbs_init,
+	.reset		=	qdisc_reset_queue,
+	.destroy	=	cbs_destroy,
+	.change		=	cbs_change,
+	.dump		=	cbs_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init cbs_module_init(void)
+{
+	return register_qdisc(&cbs_qdisc_ops);
+}
+
+static void __exit cbs_module_exit(void)
+{
+	unregister_qdisc(&cbs_qdisc_ops);
+}
+module_init(cbs_module_init)
+module_exit(cbs_module_exit)
+MODULE_LICENSE("GPL");