diff mbox series

[net-next] net: sched: pie: enable timestamp based delay calculation

Message ID 20190827141938.23483-1-gautamramk@gmail.com
State Changes Requested
Delegated to: David Miller
Headers show
Series [net-next] net: sched: pie: enable timestamp based delay calculation | expand

Commit Message

Gautam Ramakrishnan Aug. 27, 2019, 2:19 p.m. UTC
RFC 8033 suggests an alternative approach to calculate the queue
delay in PIE by using per packet timestamps. This patch enables the
PIE implementation to do this.

The calculation of queue delay is as follows:
	qdelay = now - packet_enqueue_time

To enable the use of timestamps:
	modprobe sch_pie use_timestamps=1

Signed-off-by: Gautam Ramakrishnan <gautamramk@gmail.com>
Signed-off-by: Leslie Monis <lesliemonis@gmail.com>
Signed-off-by: Mohit P. Tahiliani <tahiliani@nitk.edu.in>
Cc: Dave Taht <dave.taht@gmail.com>
---
 net/sched/sch_pie.c | 55 ++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 49 insertions(+), 6 deletions(-)

Comments

Eric Dumazet Aug. 27, 2019, 3:34 p.m. UTC | #1
On 8/27/19 4:19 PM, Gautam Ramakrishnan wrote:
> RFC 8033 suggests an alternative approach to calculate the queue
> delay in PIE by using per packet timestamps. This patch enables the
> PIE implementation to do this.
> 
> The calculation of queue delay is as follows:
> 	qdelay = now - packet_enqueue_time
> 
> To enable the use of timestamps:
> 	modprobe sch_pie use_timestamps=1


No module parameter is accepted these days.

Please add a new attribute instead,
so that pie can be used in both mode on the same host.

For a typical example of attribute addition, please take
a look at commit 48872c11b77271ef9b070bdc50afe6655c4eb9aa
("net_sched: sch_fq: add dctcp-like marking")
Dave Taht Aug. 27, 2019, 4:58 p.m. UTC | #2
On Tue, Aug 27, 2019 at 8:34 AM Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
>
>
> On 8/27/19 4:19 PM, Gautam Ramakrishnan wrote:
> > RFC 8033 suggests an alternative approach to calculate the queue
> > delay in PIE by using per packet timestamps. This patch enables the
> > PIE implementation to do this.
> >
> > The calculation of queue delay is as follows:
> >       qdelay = now - packet_enqueue_time
> >
> > To enable the use of timestamps:
> >       modprobe sch_pie use_timestamps=1
>
>
> No module parameter is accepted these days.
>
> Please add a new attribute instead,
> so that pie can be used in both mode on the same host.

I note that I think (but lack independent data) this improvement to
the rate estimator in pie should improve its usability and accuracy
in low rate or hw mq situations, and with some benchmarking to
show the cpu impact (at high and low rates) of this improvement as
well as the network
impact, the old way should probably be dropped and new way adopted without
needing a new variable to control it.

A commit showing the before/after cpu and network impact with a whole
bunch of flent benchmarks would be great.

(I'd also love to know if pie can be run with a lower target - like 5ms -
 with this mod in place)

>
> For a typical example of attribute addition, please take
> a look at commit 48872c11b77271ef9b070bdc50afe6655c4eb9aa
> ("net_sched: sch_fq: add dctcp-like marking")

utilizing ce_threshold in this way is actually independent of whether or
not you are using dctcp-style or rfc3168 style ecn marking.

It's "I'm self congesting... Dooo something! Anything, to reduce the load!"
Gautam Ramakrishnan Aug. 29, 2019, 7:21 a.m. UTC | #3
> > No module parameter is accepted these days.
> >
> > Please add a new attribute instead,
> > so that pie can be used in both mode on the same host.

We have prepared a new patch which sets the queue delay estimator as
an attribute instead of using module parameters

>
> I note that I think (but lack independent data) this improvement to
> the rate estimator in pie should improve its usability and accuracy
> in low rate or hw mq situations, and with some benchmarking to
> show the cpu impact (at high and low rates) of this improvement as
> well as the network
> impact, the old way should probably be dropped and new way adopted without
> needing a new variable to control it.
>
> A commit showing the before/after cpu and network impact with a whole
> bunch of flent benchmarks would be great.

We have tested for network impact using flent. However, we are unaware of
any standard methods to test for cpu impact. It would be helpful if
someone could suggest a method to test for cpu impact.
>
> (I'd also love to know if pie can be run with a lower target - like 5ms -
>  with this mod in place)

We will test it with target latencies of 5 and 10 ms as well.
>
> >
> > For a typical example of attribute addition, please take
> > a look at commit 48872c11b77271ef9b070bdc50afe6655c4eb9aa
> > ("net_sched: sch_fq: add dctcp-like marking")
>
> utilizing ce_threshold in this way is actually independent of whether or
> not you are using dctcp-style or rfc3168 style ecn marking.
>
> It's "I'm self congesting... Dooo something! Anything, to reduce the load!"
>
>
>
>
> --
>
> Dave Täht
> CTO, TekLibre, LLC
> http://www.teklibre.com
> Tel: 1-831-205-9740
diff mbox series

Patch

diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index df98a887eb89..1a19c77e6e42 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -19,12 +19,18 @@ 
 #include <linux/skbuff.h>
 #include <net/pkt_sched.h>
 #include <net/inet_ecn.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
 
 #define QUEUE_THRESHOLD 16384
 #define DQCOUNT_INVALID -1
 #define MAX_PROB 0xffffffffffffffff
 #define PIE_SCALE 8
 
+static unsigned int use_timestamps; /* to calculate delay */
+module_param(use_timestamps, int, 0644);
+MODULE_PARM_DESC(use_timestamps, "enables timestamp based delay calculation.");
+
 /* parameters used */
 struct pie_params {
 	psched_time_t target;	/* user specified target delay in pschedtime */
@@ -79,6 +85,27 @@  static void pie_params_init(struct pie_params *params)
 	params->bytemode = false;
 }
 
+/* private skb vars */
+struct pie_skb_cb {
+	psched_time_t enqueue_time;
+};
+
+static struct pie_skb_cb *get_pie_cb(const struct sk_buff *skb)
+{
+	qdisc_cb_private_validate(skb, sizeof(struct pie_skb_cb));
+	return (struct pie_skb_cb *)qdisc_skb_cb(skb)->data;
+}
+
+static psched_time_t pie_get_enqueue_time(const struct sk_buff *skb)
+{
+	return get_pie_cb(skb)->enqueue_time;
+}
+
+static void pie_set_enqueue_time(struct sk_buff *skb)
+{
+	get_pie_cb(skb)->enqueue_time = psched_get_time();
+}
+
 static void pie_vars_init(struct pie_vars *vars)
 {
 	vars->dq_count = DQCOUNT_INVALID;
@@ -172,6 +199,9 @@  static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 
 	/* we can enqueue the packet */
 	if (enqueue) {
+		if (use_timestamps)
+			pie_set_enqueue_time(skb);
+
 		q->stats.packets_in++;
 		if (qdisc_qlen(sch) > q->stats.maxq)
 			q->stats.maxq = qdisc_qlen(sch);
@@ -323,6 +353,10 @@  static void pie_process_dequeue(struct Qdisc *sch, struct sk_buff *skb)
 				else
 					q->vars.burst_time = 0;
 			}
+
+			if (use_timestamps)
+				q->vars.qdelay = now -
+						 pie_get_enqueue_time(skb);
 		}
 	}
 }
@@ -332,19 +366,25 @@  static void calculate_probability(struct Qdisc *sch)
 	struct pie_sched_data *q = qdisc_priv(sch);
 	u32 qlen = sch->qstats.backlog;	/* queue size in bytes */
 	psched_time_t qdelay = 0;	/* in pschedtime */
-	psched_time_t qdelay_old = q->vars.qdelay;	/* in pschedtime */
+	psched_time_t qdelay_old = 0;	/* in pschedtime */
 	s64 delta = 0;		/* determines the change in probability */
 	u64 oldprob;
 	u64 alpha, beta;
 	u32 power;
 	bool update_prob = true;
 
-	q->vars.qdelay_old = q->vars.qdelay;
+	if (use_timestamps) {
+		qdelay = q->vars.qdelay;
+		qdelay_old = q->vars.qdelay_old;
+	} else {
+		qdelay_old = q->vars.qdelay;
+		q->vars.qdelay_old = q->vars.qdelay;
 
-	if (q->vars.avg_dq_rate > 0)
-		qdelay = (qlen << PIE_SCALE) / q->vars.avg_dq_rate;
-	else
-		qdelay = 0;
+		if (q->vars.avg_dq_rate > 0)
+			qdelay = (qlen << PIE_SCALE) / q->vars.avg_dq_rate;
+		else
+			qdelay = 0;
+	}
 
 	/* If qdelay is zero and qlen is not, it means qlen is very small, less
 	 * than dequeue_rate, so we do not update probabilty in this round
@@ -438,6 +478,9 @@  static void calculate_probability(struct Qdisc *sch)
 	    q->vars.prob == 0 &&
 	    q->vars.avg_dq_rate > 0)
 		pie_vars_init(&q->vars);
+
+	if (use_timestamps)
+		q->vars.qdelay_old = qdelay;
 }
 
 static void pie_timer(struct timer_list *t)