diff mbox

[1/3] net: serialize hrtimer callback in sched_cbq

Message ID 1247832892.15751.35.camel@twins
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

Peter Zijlstra July 17, 2009, 12:14 p.m. UTC
On Tue, 2009-07-14 at 09:42 -0700, Linus Torvalds wrote:
> 
> On Tue, 14 Jul 2009, Peter Zijlstra wrote:
> > 
> > Linus really hated the softirq mode, which is what prompted me to change
> > that.
> > 
> > Now, it might be he only hated the particular interface and the
> > resulting code, but I think to remember he simply thought the whole
> > thing daft.
> 
> Yes. And I hated the bugs it had. 
> 
> Don't make something as core as timers any more complicated. Don't take 
> locks in timers and then complain about deadlocks. If your locking is 
> broken, don't make the core timers be idiotically broken.
> 
> Because it was. The code was a total mess to follow, and had bugs.

How would something like the below work for people?

---
 include/linux/hrtimer.h |   22 ++++++++++++++++++++--
 kernel/hrtimer.c        |   23 ++++++++++++++++++++++-
 2 files changed, 42 insertions(+), 3 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Oliver Hartkopp July 17, 2009, 1:26 p.m. UTC | #1
Peter Zijlstra wrote:
> On Tue, 2009-07-14 at 09:42 -0700, Linus Torvalds wrote:
>> On Tue, 14 Jul 2009, Peter Zijlstra wrote:
>>> Linus really hated the softirq mode, which is what prompted me to change
>>> that.
>>>
>>> Now, it might be he only hated the particular interface and the
>>> resulting code, but I think to remember he simply thought the whole
>>> thing daft.
>> Yes. And I hated the bugs it had. 
>>
>> Don't make something as core as timers any more complicated. Don't take 
>> locks in timers and then complain about deadlocks. If your locking is 
>> broken, don't make the core timers be idiotically broken.
>>
>> Because it was. The code was a total mess to follow, and had bugs.
> 
> How would something like the below work for people?

Would be fine to me.

It reduces the duplicated code as well as private structs for hrtimers &
tasklets. And finally your suggestion preserves the proper separation of the
hrtimers and the tasklets that are used as underlying concepts.

Regards,
Oliver (who wrote net/can/bcm.c)


> 
> ---
>  include/linux/hrtimer.h |   22 ++++++++++++++++++++--
>  kernel/hrtimer.c        |   23 ++++++++++++++++++++++-
>  2 files changed, 42 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
> index 4759917..e7559fe 100644
> --- a/include/linux/hrtimer.h
> +++ b/include/linux/hrtimer.h
> @@ -22,6 +22,7 @@
>  #include <linux/wait.h>
>  #include <linux/percpu.h>
>  #include <linux/timer.h>
> +#include <linux/interrupt.h>
>  
>  
>  struct hrtimer_clock_base;
> @@ -91,7 +92,6 @@ enum hrtimer_restart {
>   * @function:	timer expiry callback function
>   * @base:	pointer to the timer base (per cpu and per clock)
>   * @state:	state information (See bit values above)
> - * @cb_entry:	list head to enqueue an expired timer into the callback list
>   * @start_site:	timer statistics field to store the site where the timer
>   *		was started
>   * @start_comm: timer statistics field to store the name of the process which
> @@ -108,7 +108,6 @@ struct hrtimer {
>  	enum hrtimer_restart		(*function)(struct hrtimer *);
>  	struct hrtimer_clock_base	*base;
>  	unsigned long			state;
> -	struct list_head		cb_entry;
>  #ifdef CONFIG_TIMER_STATS
>  	int				start_pid;
>  	void				*start_site;
> @@ -116,6 +115,12 @@ struct hrtimer {
>  #endif
>  };
>  
> +struct hrtimer_softirq {
> +	struct hrtimer		timer;
> +	struct tasklet_struct	tasklet;
> +	enum hrtimer_restart	(*function)(struct hrtimer *);
> +};
> +
>  /**
>   * struct hrtimer_sleeper - simple sleeper structure
>   * @timer:	embedded timer structure
> @@ -335,6 +340,19 @@ static inline void hrtimer_init_on_stack(struct hrtimer *timer,
>  static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { }
>  #endif
>  
> +enum hrtimer_restart __hrtimer_softirq_trampoline(struct hrtimer *timer);
> +void __hrtimer_tasklet_trampoline(unsigned long data);
> +
> +static inline void hrtimer_softirq_init(struct hrtimer_softirq *stimer,
> +		enum hrtimer_restart (*func)(struct hrtimer *),
> +		clockid_t which_clock, enum hrtimer_mode mode)
> +{
> +	hrtimer_init(&stimer->timer, which_clock, mode);
> +	stimer->timer.function = __hrtimer_softirq_trampoline;
> +	tasklet_init(&stimer->tasklet, __hrtimer_tasklet_trampoline, stimer);
> +	stimer->function = func;
> +}
> +
>  /* Basic timer operations: */
>  extern int hrtimer_start(struct hrtimer *timer, ktime_t tim,
>  			 const enum hrtimer_mode mode);
> diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
> index ab5eb70..dae063c 100644
> --- a/kernel/hrtimer.c
> +++ b/kernel/hrtimer.c
> @@ -1098,7 +1098,6 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
>  		clock_id = CLOCK_MONOTONIC;
>  
>  	timer->base = &cpu_base->clock_base[clock_id];
> -	INIT_LIST_HEAD(&timer->cb_entry);
>  	hrtimer_init_timer_hres(timer);
>  
>  #ifdef CONFIG_TIMER_STATS
> @@ -1141,6 +1140,28 @@ int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp)
>  }
>  EXPORT_SYMBOL_GPL(hrtimer_get_res);
>  
> +enum hrtimer_restart __hrtimer_softirq_trampoline(struct hrtimer *timer)
> +{
> +	struct hrtimer_softirq *stimer =
> +		container_of(timer, struct hrtimer_softirq, timer);
> +
> +	tasklet_hi_schedule(&timer->tasklet);
> +
> +	return HRTIMER_NORESTART;
> +}
> +EXPORT_SYMBOL_GPL(__hrtimer_softirq_trampoline);
> +
> +void __hrtimer_tasklet_trampoline(unsigned long data)
> +{
> +	struct hrtimer_softirq *stimer = (void *)data;
> +	enum hrtimer_restart restart;
> +
> +	restart = stimer->function(&stimer->timer);
> +	if (restart != HRTIMER_NORESTART)
> +		hrtimer_restart(&stimer->timer);
> +}
> +EXPORT_SYMBOL_GPL(__hrtimer_tasklet_trampoline);
> +
>  static void __run_hrtimer(struct hrtimer *timer)
>  {
>  	struct hrtimer_clock_base *base = timer->base;
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Linus Torvalds July 17, 2009, 3:44 p.m. UTC | #2
On Fri, 17 Jul 2009, Peter Zijlstra wrote:
> 
> How would something like the below work for people?

This looks saner.

It was the insanity of having the core timer code know about different 
modes that caused all the sily problems.

Having a separate abstraction layer for "I want to get a softirq timeout" 
sounds fine, as long as the timer code itself never cares.

That said, I don't think this shoud be a "hrtimer" issue (reflected in 
your naming and include file choice). I think this is a softirq or tasklet 
(or whatever) issue, and should be named that way.

Why should the timer code (and header files) care about how you can use 
tasklets with them? It shouldn't. The timers should be seen as the really 
low-level critical code, and the timer code should never need to know 
about softirq's or tasklets or whatever.

So I think you shouldmove it to kernel/softirq.c.

			Linus
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller July 22, 2009, 3:18 a.m. UTC | #3
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 17 Jul 2009 14:14:52 +0200

> On Tue, 2009-07-14 at 09:42 -0700, Linus Torvalds wrote:
>> 
>> On Tue, 14 Jul 2009, Peter Zijlstra wrote:
>> > 
>> > Linus really hated the softirq mode, which is what prompted me to change
>> > that.
>> > 
>> > Now, it might be he only hated the particular interface and the
>> > resulting code, but I think to remember he simply thought the whole
>> > thing daft.
>> 
>> Yes. And I hated the bugs it had. 
>> 
>> Don't make something as core as timers any more complicated. Don't take 
>> locks in timers and then complain about deadlocks. If your locking is 
>> broken, don't make the core timers be idiotically broken.
>> 
>> Because it was. The code was a total mess to follow, and had bugs.
> 
> How would something like the below work for people?

I like it, but like Linus said it probably belongs in
kernel/softirq.c
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter Zijlstra July 22, 2009, 6:29 a.m. UTC | #4
On Tue, 2009-07-21 at 20:18 -0700, David Miller wrote:
> From: Peter Zijlstra <peterz@infradead.org>
> Date: Fri, 17 Jul 2009 14:14:52 +0200
> 
> > On Tue, 2009-07-14 at 09:42 -0700, Linus Torvalds wrote:
> >> 
> >> On Tue, 14 Jul 2009, Peter Zijlstra wrote:
> >> > 
> >> > Linus really hated the softirq mode, which is what prompted me to change
> >> > that.
> >> > 
> >> > Now, it might be he only hated the particular interface and the
> >> > resulting code, but I think to remember he simply thought the whole
> >> > thing daft.
> >> 
> >> Yes. And I hated the bugs it had. 
> >> 
> >> Don't make something as core as timers any more complicated. Don't take 
> >> locks in timers and then complain about deadlocks. If your locking is 
> >> broken, don't make the core timers be idiotically broken.
> >> 
> >> Because it was. The code was a total mess to follow, and had bugs.
> > 
> > How would something like the below work for people?
> 
> I like it, but like Linus said it probably belongs in
> kernel/softirq.c

Right, I meant to rework it, but stuff kept preempting me. I'll try and
get around to it today :-)
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 4759917..e7559fe 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -22,6 +22,7 @@ 
 #include <linux/wait.h>
 #include <linux/percpu.h>
 #include <linux/timer.h>
+#include <linux/interrupt.h>
 
 
 struct hrtimer_clock_base;
@@ -91,7 +92,6 @@  enum hrtimer_restart {
  * @function:	timer expiry callback function
  * @base:	pointer to the timer base (per cpu and per clock)
  * @state:	state information (See bit values above)
- * @cb_entry:	list head to enqueue an expired timer into the callback list
  * @start_site:	timer statistics field to store the site where the timer
  *		was started
  * @start_comm: timer statistics field to store the name of the process which
@@ -108,7 +108,6 @@  struct hrtimer {
 	enum hrtimer_restart		(*function)(struct hrtimer *);
 	struct hrtimer_clock_base	*base;
 	unsigned long			state;
-	struct list_head		cb_entry;
 #ifdef CONFIG_TIMER_STATS
 	int				start_pid;
 	void				*start_site;
@@ -116,6 +115,12 @@  struct hrtimer {
 #endif
 };
 
+struct hrtimer_softirq {
+	struct hrtimer		timer;
+	struct tasklet_struct	tasklet;
+	enum hrtimer_restart	(*function)(struct hrtimer *);
+};
+
 /**
  * struct hrtimer_sleeper - simple sleeper structure
  * @timer:	embedded timer structure
@@ -335,6 +340,19 @@  static inline void hrtimer_init_on_stack(struct hrtimer *timer,
 static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { }
 #endif
 
+enum hrtimer_restart __hrtimer_softirq_trampoline(struct hrtimer *timer);
+void __hrtimer_tasklet_trampoline(unsigned long data);
+
+static inline void hrtimer_softirq_init(struct hrtimer_softirq *stimer,
+		enum hrtimer_restart (*func)(struct hrtimer *),
+		clockid_t which_clock, enum hrtimer_mode mode)
+{
+	hrtimer_init(&stimer->timer, which_clock, mode);
+	stimer->timer.function = __hrtimer_softirq_trampoline;
+	tasklet_init(&stimer->tasklet, __hrtimer_tasklet_trampoline, stimer);
+	stimer->function = func;
+}
+
 /* Basic timer operations: */
 extern int hrtimer_start(struct hrtimer *timer, ktime_t tim,
 			 const enum hrtimer_mode mode);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index ab5eb70..dae063c 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1098,7 +1098,6 @@  static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
 		clock_id = CLOCK_MONOTONIC;
 
 	timer->base = &cpu_base->clock_base[clock_id];
-	INIT_LIST_HEAD(&timer->cb_entry);
 	hrtimer_init_timer_hres(timer);
 
 #ifdef CONFIG_TIMER_STATS
@@ -1141,6 +1140,28 @@  int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp)
 }
 EXPORT_SYMBOL_GPL(hrtimer_get_res);
 
+enum hrtimer_restart __hrtimer_softirq_trampoline(struct hrtimer *timer)
+{
+	struct hrtimer_softirq *stimer =
+		container_of(timer, struct hrtimer_softirq, timer);
+
+	tasklet_hi_schedule(&timer->tasklet);
+
+	return HRTIMER_NORESTART;
+}
+EXPORT_SYMBOL_GPL(__hrtimer_softirq_trampoline);
+
+void __hrtimer_tasklet_trampoline(unsigned long data)
+{
+	struct hrtimer_softirq *stimer = (void *)data;
+	enum hrtimer_restart restart;
+
+	restart = stimer->function(&stimer->timer);
+	if (restart != HRTIMER_NORESTART)
+		hrtimer_restart(&stimer->timer);
+}
+EXPORT_SYMBOL_GPL(__hrtimer_tasklet_trampoline);
+
 static void __run_hrtimer(struct hrtimer *timer)
 {
 	struct hrtimer_clock_base *base = timer->base;