diff mbox

[2/2] : pkt_sched: Convert CBQ to tasklet_hrtimer.

Message ID 20090821.170333.153413841.davem@davemloft.net
State Accepted, archived
Delegated to: David Miller
Headers show

Commit Message

David Miller Aug. 22, 2009, 12:03 a.m. UTC
This code expects to run in softirq context, and bare hrtimers
run in hw IRQ context.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_cbq.c |   23 +++++++++++++----------
 1 files changed, 13 insertions(+), 10 deletions(-)

Comments

Thomas Gleixner Aug. 22, 2009, 9:17 a.m. UTC | #1
On Fri, 21 Aug 2009, David Miller wrote:
> This code expects to run in softirq context, and bare hrtimers
> run in hw IRQ context.
> 
> Signed-off-by: David S. Miller <davem@davemloft.net>

> @@ -510,12 +512,12 @@ static void cbq_ovl_delay(struct cbq_class *cl)
>  
>  			expires = ktime_set(0, 0);
>  			expires = ktime_add_ns(expires, PSCHED_TICKS2NS(sched));
> -			if (hrtimer_try_to_cancel(&q->delay_timer) &&
> -			    ktime_to_ns(ktime_sub(
> -					hrtimer_get_expires(&q->delay_timer),
> -					expires)) > 0)
> -				hrtimer_set_expires(&q->delay_timer, expires);
> -			hrtimer_restart(&q->delay_timer);
> +			ht = &q->delay_timer.timer;
> +			if (hrtimer_try_to_cancel(ht) &&
> +			    ktime_to_ns(ktime_sub(hrtimer_get_expires(ht),
> +						  expires)) > 0)
> +				hrtimer_set_expires(ht, expires);
> +			hrtimer_restart(ht);

This code looks still wrong. The new expiry time is only set when the
timer is already active. If the timer is not active then you restart
it with the last stale expiry time which is probably in the past, but
might be somewhere in the future as well. I doubt that this is the
intention.

So what you really want is:
 
 			cl->cpriority = TC_CBQ_MAXPRIO;
 			q->pmask |= (1<<TC_CBQ_MAXPRIO);
 
-			expires = ktime_set(0, 0);
-			expires = ktime_add_ns(expires, PSCHED_TICKS2NS(sched));
-			if (hrtimer_try_to_cancel(&q->delay_timer) &&
-			    ktime_to_ns(ktime_sub(
-					hrtimer_get_expires(&q->delay_timer),
-					expires)) > 0)
-				hrtimer_set_expires(&q->delay_timer, expires);
-			hrtimer_restart(&q->delay_timer);
+			ht = &q->delay_timer.timer;
+			expires = ns_to_ktime(PSCHED_TICKS2NS(sched));
+			if (!hrtimer_active(ht) ||
+			    expires.tv64 < hrtimer_get_expires(ht).tv64)
+				hrtimer_start(ht, expires, HRTIMER_MODE_ABS);
 			cl->delayed = 1;
 			cl->xstats.overactions++;
 			return;

This starts the timer when

     1) the timer is not active

     2) the timer is active and the new expiry time is before the
     	current one. (hrtimer_start takes care of stopping the active
     	timer)

Thanks,

	tglx
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller Aug. 23, 2009, 1:08 a.m. UTC | #2
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 22 Aug 2009 11:17:53 +0200 (CEST)

> On Fri, 21 Aug 2009, David Miller wrote:
>> This code expects to run in softirq context, and bare hrtimers
>> run in hw IRQ context.
>> 
>> Signed-off-by: David S. Miller <davem@davemloft.net>
> 
>> @@ -510,12 +512,12 @@ static void cbq_ovl_delay(struct cbq_class *cl)
>>  
>>  			expires = ktime_set(0, 0);
>>  			expires = ktime_add_ns(expires, PSCHED_TICKS2NS(sched));
>> -			if (hrtimer_try_to_cancel(&q->delay_timer) &&
>> -			    ktime_to_ns(ktime_sub(
>> -					hrtimer_get_expires(&q->delay_timer),
>> -					expires)) > 0)
>> -				hrtimer_set_expires(&q->delay_timer, expires);
>> -			hrtimer_restart(&q->delay_timer);
>> +			ht = &q->delay_timer.timer;
>> +			if (hrtimer_try_to_cancel(ht) &&
>> +			    ktime_to_ns(ktime_sub(hrtimer_get_expires(ht),
>> +						  expires)) > 0)
>> +				hrtimer_set_expires(ht, expires);
>> +			hrtimer_restart(ht);
> 
> This code looks still wrong.

I'm not convinced either way, the code logic here has been like
this since at least 2.2.x, where it reads:

	if (!cl->delayed) {
		unsigned long sched = jiffies;
 ...
		if (delay > 0) {
			sched += PSCHED_US2JIFFIE(delay) + cl->penalty;
 ...
			if (del_timer(&q->delay_timer) &&
			    (long)(q->delay_timer.expires - sched) > 0)
				q->delay_timer.expires = sched;
			add_timer(&q->delay_timer);

And what we have there now is a conversion to hrtimers.  The intention
to always schedule the timer is very clear in the above code snippet.

Furthermore, fixing this logic is a seperate change from dealing with
the softirq context issue.

So please review my patch in the context of a straight conversion to
tasklet_hrtimer, and let's deal with the timer offset logic here
seperately (and in -next, not 2.6.31-rcX)

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Thomas Gleixner Aug. 23, 2009, 7:22 a.m. UTC | #3
B1;2005;0cOn Sat, 22 Aug 2009, David Miller wrote:
> From: Thomas Gleixner <tglx@linutronix.de>
> Date: Sat, 22 Aug 2009 11:17:53 +0200 (CEST)
> 
> > On Fri, 21 Aug 2009, David Miller wrote:
> >> This code expects to run in softirq context, and bare hrtimers
> >> run in hw IRQ context.
> >> 
> >> Signed-off-by: David S. Miller <davem@davemloft.net>
> > 
> >> @@ -510,12 +512,12 @@ static void cbq_ovl_delay(struct cbq_class *cl)
> >>  
> >>  			expires = ktime_set(0, 0);
> >>  			expires = ktime_add_ns(expires, PSCHED_TICKS2NS(sched));
> >> -			if (hrtimer_try_to_cancel(&q->delay_timer) &&
> >> -			    ktime_to_ns(ktime_sub(
> >> -					hrtimer_get_expires(&q->delay_timer),
> >> -					expires)) > 0)
> >> -				hrtimer_set_expires(&q->delay_timer, expires);
> >> -			hrtimer_restart(&q->delay_timer);
> >> +			ht = &q->delay_timer.timer;
> >> +			if (hrtimer_try_to_cancel(ht) &&
> >> +			    ktime_to_ns(ktime_sub(hrtimer_get_expires(ht),
> >> +						  expires)) > 0)
> >> +				hrtimer_set_expires(ht, expires);
> >> +			hrtimer_restart(ht);
> > 
> > This code looks still wrong.
> 
> I'm not convinced either way, the code logic here has been like
> this since at least 2.2.x, where it reads:
> 
> 	if (!cl->delayed) {
> 		unsigned long sched = jiffies;
>  ...
> 		if (delay > 0) {
> 			sched += PSCHED_US2JIFFIE(delay) + cl->penalty;
>  ...
> 			if (del_timer(&q->delay_timer) &&
> 			    (long)(q->delay_timer.expires - sched) > 0)
> 				q->delay_timer.expires = sched;
> 			add_timer(&q->delay_timer);

That does not make more sense than the hrtimer version :)

> And what we have there now is a conversion to hrtimers.  The intention
> to always schedule the timer is very clear in the above code snippet.
> 
> Furthermore, fixing this logic is a seperate change from dealing with
> the softirq context issue.

Sure. Just mentioned it as a reminder.
 
> So please review my patch in the context of a straight conversion to
> tasklet_hrtimer, and let's deal with the timer offset logic here
> seperately (and in -next, not 2.6.31-rcX)

The straight conversion looks fine. Add my Acked-by.

Thanks,

	tglx
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller Aug. 24, 2009, 1:54 a.m. UTC | #4
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 23 Aug 2009 09:22:48 +0200 (CEST)

> B1;2005;0cOn Sat, 22 Aug 2009, David Miller wrote:
>> I'm not convinced either way, the code logic here has been like
>> this since at least 2.2.x, where it reads:
>> 
>> 	if (!cl->delayed) {
>> 		unsigned long sched = jiffies;
>>  ...
>> 		if (delay > 0) {
>> 			sched += PSCHED_US2JIFFIE(delay) + cl->penalty;
>>  ...
>> 			if (del_timer(&q->delay_timer) &&
>> 			    (long)(q->delay_timer.expires - sched) > 0)
>> 				q->delay_timer.expires = sched;
>> 			add_timer(&q->delay_timer);
> 
> That does not make more sense than the hrtimer version :)

Sure it does, at least to me.

It says: When 'delay > 0', either the timer fires immediately
('jiffies') or at some point in the future ('jiffies + delay +
penalty' or existing expiration, whichever is sooner).

The intention of the code seems very clear.

>> So please review my patch in the context of a straight conversion to
>> tasklet_hrtimer, and let's deal with the timer offset logic here
>> seperately (and in -next, not 2.6.31-rcX)
> 
> The straight conversion looks fine. Add my Acked-by.

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet Aug. 28, 2009, 3:54 p.m. UTC | #5
David Miller a écrit :
> This code expects to run in softirq context, and bare hrtimers
> run in hw IRQ context.
> 
> Signed-off-by: David S. Miller <davem@davemloft.net>

...

>  	sch->flags &= ~TCQ_F_THROTTLED;
> @@ -1214,7 +1216,7 @@ cbq_reset(struct Qdisc* sch)
>  	q->tx_class = NULL;
>  	q->tx_borrowed = NULL;
>  	qdisc_watchdog_cancel(&q->watchdog);
> -	hrtimer_cancel(&q->delay_timer);
> +	tasklet_hrtimer_cancel(&q->delay_timer);
>  	q->toplevel = TC_CBQ_MAXLEVEL;
>  	q->now = psched_get_time();
>  	q->now_rt = q->now;

David

I now have these dmesg warnings when playing with cbq

# tc qdisc del dev eth3 root
# tc qdisc add dev eth3 root handle 1: est 1sec 8sec cbq avpkt 1000 rate 1000Mbit bandwidth 1000Mbit
# tc qdisc del dev eth3 root

[12786.458485] Attempt to kill tasklet from interrupt
[12786.458522] Attempt to kill tasklet from interrupt
[12786.465261] Attempt to kill tasklet from interrupt
[12786.465286] Attempt to kill tasklet from interrupt

probably becauce cbq_reset() being called from interrupt context ?

Thanks
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller Aug. 28, 2009, 7:30 p.m. UTC | #6
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 28 Aug 2009 17:54:53 +0200

> I now have these dmesg warnings when playing with cbq
> 
> # tc qdisc del dev eth3 root
> # tc qdisc add dev eth3 root handle 1: est 1sec 8sec cbq avpkt 1000 rate 1000Mbit bandwidth 1000Mbit
> # tc qdisc del dev eth3 root
> 
> [12786.458485] Attempt to kill tasklet from interrupt
> [12786.458522] Attempt to kill tasklet from interrupt
> [12786.465261] Attempt to kill tasklet from interrupt
> [12786.465286] Attempt to kill tasklet from interrupt
> 
> probably becauce cbq_reset() being called from interrupt context ?

Grumble... I guess the tasklet_hrtimer infrastructure is less
of a direct plugin replacement for plain hrtimers than I thought.

Anyone have any ideas about how to deal with stuff like this?
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Thomas Gleixner Aug. 28, 2009, 7:51 p.m. UTC | #7
On Fri, 28 Aug 2009, David Miller wrote:

> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Fri, 28 Aug 2009 17:54:53 +0200
> 
> > I now have these dmesg warnings when playing with cbq
> > 
> > # tc qdisc del dev eth3 root
> > # tc qdisc add dev eth3 root handle 1: est 1sec 8sec cbq avpkt 1000 rate 1000Mbit bandwidth 1000Mbit
> > # tc qdisc del dev eth3 root
> > 
> > [12786.458485] Attempt to kill tasklet from interrupt
> > [12786.458522] Attempt to kill tasklet from interrupt
> > [12786.465261] Attempt to kill tasklet from interrupt
> > [12786.465286] Attempt to kill tasklet from interrupt
> > 
> > probably becauce cbq_reset() being called from interrupt context ?
> 
> Grumble... I guess the tasklet_hrtimer infrastructure is less
> of a direct plugin replacement for plain hrtimers than I thought.
> 
> Anyone have any ideas about how to deal with stuff like this?

Hmm. Is it sufficient to cancel just the timer and let an eventually
scheduled tasklet deal with the cleanup ?

Thanks,

	tglx

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index d5798e1..81652d6 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -163,7 +163,7 @@  struct cbq_sched_data
 	psched_time_t		now_rt;		/* Cached real time */
 	unsigned		pmask;
 
-	struct hrtimer		delay_timer;
+	struct tasklet_hrtimer	delay_timer;
 	struct qdisc_watchdog	watchdog;	/* Watchdog timer,
 						   started when CBQ has
 						   backlog, but cannot
@@ -503,6 +503,8 @@  static void cbq_ovl_delay(struct cbq_class *cl)
 		cl->undertime = q->now + delay;
 
 		if (delay > 0) {
+			struct hrtimer *ht;
+
 			sched += delay + cl->penalty;
 			cl->penalized = sched;
 			cl->cpriority = TC_CBQ_MAXPRIO;
@@ -510,12 +512,12 @@  static void cbq_ovl_delay(struct cbq_class *cl)
 
 			expires = ktime_set(0, 0);
 			expires = ktime_add_ns(expires, PSCHED_TICKS2NS(sched));
-			if (hrtimer_try_to_cancel(&q->delay_timer) &&
-			    ktime_to_ns(ktime_sub(
-					hrtimer_get_expires(&q->delay_timer),
-					expires)) > 0)
-				hrtimer_set_expires(&q->delay_timer, expires);
-			hrtimer_restart(&q->delay_timer);
+			ht = &q->delay_timer.timer;
+			if (hrtimer_try_to_cancel(ht) &&
+			    ktime_to_ns(ktime_sub(hrtimer_get_expires(ht),
+						  expires)) > 0)
+				hrtimer_set_expires(ht, expires);
+			hrtimer_restart(ht);
 			cl->delayed = 1;
 			cl->xstats.overactions++;
 			return;
@@ -621,7 +623,7 @@  static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 
 		time = ktime_set(0, 0);
 		time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
-		hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
+		tasklet_hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
 	}
 
 	sch->flags &= ~TCQ_F_THROTTLED;
@@ -1214,7 +1216,7 @@  cbq_reset(struct Qdisc* sch)
 	q->tx_class = NULL;
 	q->tx_borrowed = NULL;
 	qdisc_watchdog_cancel(&q->watchdog);
-	hrtimer_cancel(&q->delay_timer);
+	tasklet_hrtimer_cancel(&q->delay_timer);
 	q->toplevel = TC_CBQ_MAXLEVEL;
 	q->now = psched_get_time();
 	q->now_rt = q->now;
@@ -1397,7 +1399,8 @@  static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
 	q->link.minidle = -0x7FFFFFFF;
 
 	qdisc_watchdog_init(&q->watchdog, sch);
-	hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	tasklet_hrtimer_init(&q->delay_timer, cbq_undelay,
+			     CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 	q->delay_timer.function = cbq_undelay;
 	q->toplevel = TC_CBQ_MAXLEVEL;
 	q->now = psched_get_time();