diff mbox

pkt_sched: fq: fix pacing for small frames

Message ID 1384455496.28716.27.camel@edumazet-glaptop2.roam.corp.google.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Eric Dumazet Nov. 14, 2013, 6:58 p.m. UTC
From: Eric Dumazet <edumazet@google.com>

For performance reasons, sch_fq tried hard to not setup timers for every
sent packet, using a quantum based heuristic : A delay is setup only if
the flow exhausted its credit.

Problem is that application limited flows can refill their credit
for every queued packet, and they can evade pacing.

This problem can also be triggered when TCP flows use small MSS values,
as TSO auto sizing builds packets that are smaller than the default fq
quantum (3028 bytes) 

This patch adds a 40 ms delay to guard flow credit refill.

We'll send a patch adding ability to tune this threshold for linux-3.14,
but this value should be good enough for most workloads.

Fixes: afe4fd062416 ("pkt_sched: fq: Fair Queue packet scheduler")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Maciej Żenczykowski <maze@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
---
 net/sched/sch_fq.c |   28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

David Miller Nov. 14, 2013, 10:22 p.m. UTC | #1
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 14 Nov 2013 10:58:16 -0800

> @@ -655,9 +664,6 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
>  	if (tb[TCA_FQ_INITIAL_QUANTUM])
>  		q->initial_quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]);
>  
> -	if (tb[TCA_FQ_FLOW_DEFAULT_RATE])
> -		q->flow_default_rate = nla_get_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE]);
> -
>  	if (tb[TCA_FQ_FLOW_MAX_RATE])
>  		q->flow_max_rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]);
>  

I think it's at best confusing to suddenly stop ignoring a configuration
parameter the user is giving us.

Can you at least ratelimit warn if the parameter is specified so the user
has some chance to figure out what is happening?

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet Nov. 14, 2013, 10:51 p.m. UTC | #2
On Thu, 2013-11-14 at 17:22 -0500, David Miller wrote:
> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Thu, 14 Nov 2013 10:58:16 -0800
> 
> > @@ -655,9 +664,6 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
> >  	if (tb[TCA_FQ_INITIAL_QUANTUM])
> >  		q->initial_quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]);
> >  
> > -	if (tb[TCA_FQ_FLOW_DEFAULT_RATE])
> > -		q->flow_default_rate = nla_get_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE]);
> > -
> >  	if (tb[TCA_FQ_FLOW_MAX_RATE])
> >  		q->flow_max_rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]);
> >  
> 
> I think it's at best confusing to suddenly stop ignoring a configuration
> parameter the user is giving us.
> 
> Can you at least ratelimit warn if the parameter is specified so the user
> has some chance to figure out what is happening?

Oh this parameter was removed in 7eec4174ff29cd
("pkt_sched: fq: fix non TCP flows pacing"), I probably should
have added this warning at that time...

OK, we can warn the user, will send a v2.


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index fdc041c57853..46a0cacd7280 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -88,7 +88,7 @@  struct fq_sched_data {
 	struct fq_flow	internal;	/* for non classified or high prio packets */
 	u32		quantum;
 	u32		initial_quantum;
-	u32		flow_default_rate;/* rate per flow : bytes per second */
+	u32		flow_refill_delay;
 	u32		flow_max_rate;	/* optional max rate per flow */
 	u32		flow_plimit;	/* max packets per flow */
 	struct rb_root	*fq_root;
@@ -115,6 +115,7 @@  static struct fq_flow detached, throttled;
 static void fq_flow_set_detached(struct fq_flow *f)
 {
 	f->next = &detached;
+	f->age = jiffies;
 }
 
 static bool fq_flow_is_detached(const struct fq_flow *f)
@@ -160,8 +161,12 @@  static void fq_flow_add_tail(struct fq_flow_head *head, struct fq_flow *flow)
 }
 
 /* limit number of collected flows per round */
-#define FQ_GC_MAX 8
-#define FQ_GC_AGE (3*HZ)
+#define FQ_GC_MAX		8
+
+#define FQ_GC_AGE		(3*HZ)
+
+/* Flow credit is refilled after 40ms idle time */
+#define FQ_REFILL_DELAY		msecs_to_jiffies(40)
 
 static bool fq_gc_candidate(const struct fq_flow *f)
 {
@@ -373,17 +378,22 @@  static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	}
 
 	f->qlen++;
-	flow_queue_add(f, skb);
 	if (skb_is_retransmit(skb))
 		q->stat_tcp_retrans++;
 	sch->qstats.backlog += qdisc_pkt_len(skb);
 	if (fq_flow_is_detached(f)) {
 		fq_flow_add_tail(&q->new_flows, f);
-		if (q->quantum > f->credit)
-			f->credit = q->quantum;
+
+		if (time_after(jiffies, f->age + q->flow_refill_delay))
+			f->credit = max_t(u32, f->credit, q->quantum);
+
 		q->inactive_flows--;
 		qdisc_unthrottled(sch);
 	}
+
+	/* Note: this overwrites f->age */
+	flow_queue_add(f, skb);
+
 	if (unlikely(f == &q->internal)) {
 		q->stat_internal_packets++;
 		qdisc_unthrottled(sch);
@@ -461,7 +471,6 @@  begin:
 			fq_flow_add_tail(&q->old_flows, f);
 		} else {
 			fq_flow_set_detached(f);
-			f->age = jiffies;
 			q->inactive_flows++;
 		}
 		goto begin;
@@ -655,9 +664,6 @@  static int fq_change(struct Qdisc *sch, struct nlattr *opt)
 	if (tb[TCA_FQ_INITIAL_QUANTUM])
 		q->initial_quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]);
 
-	if (tb[TCA_FQ_FLOW_DEFAULT_RATE])
-		q->flow_default_rate = nla_get_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE]);
-
 	if (tb[TCA_FQ_FLOW_MAX_RATE])
 		q->flow_max_rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]);
 
@@ -705,7 +711,7 @@  static int fq_init(struct Qdisc *sch, struct nlattr *opt)
 	q->flow_plimit		= 100;
 	q->quantum		= 2 * psched_mtu(qdisc_dev(sch));
 	q->initial_quantum	= 10 * psched_mtu(qdisc_dev(sch));
-	q->flow_default_rate	= 0;
+	q->flow_refill_delay	= FQ_REFILL_DELAY;
 	q->flow_max_rate	= ~0U;
 	q->rate_enable		= 1;
 	q->new_flows.first	= NULL;