Patchwork [v2,5/5] hpet 'driftfix': add code in hpet_timer() to compensate delayed callbacks and coalesced interrupts

login
register
mail settings
Submitter Ulrich Obergfell
Date April 8, 2011, 3:20 p.m.
Message ID <1302276042-3497-6-git-send-email-uobergfe@redhat.com>
Download mbox | patch
Permalink /patch/90390/
State New
Headers show

Comments

Ulrich Obergfell - April 8, 2011, 3:20 p.m.
Loss of periodic timer interrupts caused by delayed callbacks and by
interrupt coalescing is compensated by gradually injecting additional
interrupts during subsequent timer intervals, starting at a rate of
one additional interrupt per interval. If further interrupts are lost
while compensation is in progress, the rate is increased. A limit is
imposed on the rate and on the 'backlog' of lost interrupts that are
to be injected. If a guest o/s modifies the comparator register value
while compensation is in progress, the 'backlog' of lost interrupts
that are to be injected is scaled to the new value.

Injecting additional timer interrupts to compensate lost interrupts
can alleviate long term time drift. However, on a short time scale,
this method can have the side effect of making virtual machine time
intermittently pass faster than real time (depending on the guest's
time keeping algorithm). Compensation is disabled by default and can
be enabled for guests where this behaviour is acceptable.

Signed-off-by: Ulrich Obergfell <uobergfe@redhat.com>
---
 hw/hpet.c |   58 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 57 insertions(+), 1 deletions(-)
Jan Kiszka - April 8, 2011, 3:54 p.m.
On 2011-04-08 17:20, Ulrich Obergfell wrote:
> Loss of periodic timer interrupts caused by delayed callbacks and by
> interrupt coalescing is compensated by gradually injecting additional
> interrupts during subsequent timer intervals, starting at a rate of
> one additional interrupt per interval. If further interrupts are lost
> while compensation is in progress, the rate is increased. A limit is
> imposed on the rate and on the 'backlog' of lost interrupts that are
> to be injected. If a guest o/s modifies the comparator register value
> while compensation is in progress, the 'backlog' of lost interrupts
> that are to be injected is scaled to the new value.
> 
> Injecting additional timer interrupts to compensate lost interrupts
> can alleviate long term time drift. However, on a short time scale,
> this method can have the side effect of making virtual machine time
> intermittently pass faster than real time (depending on the guest's
> time keeping algorithm). Compensation is disabled by default and can
> be enabled for guests where this behaviour is acceptable.
> 
> Signed-off-by: Ulrich Obergfell <uobergfe@redhat.com>
> ---
>  hw/hpet.c |   58 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 files changed, 57 insertions(+), 1 deletions(-)
> 
> diff --git a/hw/hpet.c b/hw/hpet.c
> index b5625fb..5a25a96 100644
> --- a/hw/hpet.c
> +++ b/hw/hpet.c
> @@ -42,6 +42,9 @@
>  
>  #define HPET_MSI_SUPPORT        0
>  
> +#define MAX_IRQS_TO_INJECT (uint32_t)5000
> +#define MAX_IRQ_RATE       (uint32_t)10
> +
>  struct HPETState;
>  typedef struct HPETTimer {  /* timers */
>      uint8_t tn;             /*timer number*/
> @@ -309,8 +312,11 @@ static const VMStateDescription vmstate_hpet = {
>  static void hpet_timer(void *opaque)
>  {
>      HPETTimer *t = opaque;
> +    HPETState *s = t->state;
>      uint64_t diff;
>  
> +    int irq_delivered = 0;
> +    uint32_t irq_count = 0;
>      uint64_t period = t->period;
>      uint64_t cur_tick = hpet_get_ticks(t->state);
>  
> @@ -318,13 +324,36 @@ static void hpet_timer(void *opaque)
>          if (t->config & HPET_TN_32BIT) {
>              while (hpet_time_after(cur_tick, t->cmp)) {
>                  t->cmp = (uint32_t)(t->cmp + t->period);
> +                irq_count++;
>              }
>          } else {
>              while (hpet_time_after64(cur_tick, t->cmp)) {
>                  t->cmp += period;
> +                irq_count++;
>              }
>          }
>          diff = hpet_calculate_diff(t, cur_tick);
> +        if (s->driftfix) {
> +            if (t->saved_period != t->period) {
> +                uint64_t tmp = (t->irqs_to_inject * t->saved_period)
> +                                                  + t->ticks_not_accounted;
> +                t->irqs_to_inject = tmp / t->period;
> +                t->ticks_not_accounted = tmp % t->period;
> +                t->saved_period = t->period;
> +            }
> +            t->irqs_to_inject += irq_count;
> +            t->irqs_to_inject = MIN(t->irqs_to_inject, MAX_IRQS_TO_INJECT);
> +            if (t->irqs_to_inject > 1) {
> +                if (irq_count > 1) {
> +                    t->irq_rate++;
> +                    t->irq_rate = MIN(t->irq_rate, MAX_IRQ_RATE);
> +                }
> +                if (irq_count || t->divisor == 0) {
> +                    t->divisor = t->irq_rate;
> +                }
> +                diff /= t->divisor--;
> +            }
> +        }

Did I miss some change in the plan? I thought we were heading for a
generic, reusable driftfix tool box (or periodic timer service)? Or is
this intentionally an intermediate step?

>          qemu_mod_timer(t->qemu_timer,
>                         qemu_get_clock(vm_clock) + (int64_t)ticks_to_ns(diff));
>      } else if (t->config & HPET_TN_32BIT && !timer_is_periodic(t)) {
> @@ -335,7 +364,22 @@ static void hpet_timer(void *opaque)
>              t->wrap_flag = 0;
>          }
>      }
> -    update_irq(t, 1);
> +    if (s->driftfix && timer_is_periodic(t) && period != 0) {
> +        if (t->irqs_to_inject) {
> +            irq_delivered = update_irq(t, 1);
> +            if (irq_delivered) {
> +                t->irq_rate = MIN(t->irq_rate, t->irqs_to_inject);
> +                t->irqs_to_inject--;
> +            } else {
> +                if (irq_count) {
> +                    t->irq_rate++;
> +                    t->irq_rate = MIN(t->irq_rate, MAX_IRQ_RATE);
> +                }
> +            }
> +        }
> +    } else {
> +        update_irq(t, 1);
> +    }
>  }
>  
>  static void hpet_set_timer(HPETTimer *t)
> @@ -674,6 +718,12 @@ static void hpet_reset(DeviceState *d)
>          timer->config |=  0x00000004ULL << 32;
>          timer->period = 0ULL;
>          timer->wrap_flag = 0;
> +
> +        timer->saved_period = 0;
> +        timer->ticks_not_accounted = 0;
> +        timer->irqs_to_inject = 0;
> +        timer->irq_rate = 1;
> +        timer->divisor = 1;
>      }
>  
>      s->hpet_counter = 0ULL;
> @@ -734,6 +784,12 @@ static int hpet_init(SysBusDevice *dev)
>          timer->qemu_timer = qemu_new_timer(vm_clock, hpet_timer, timer);
>          timer->tn = i;
>          timer->state = s;
> +
> +        timer->saved_period = 0;
> +        timer->ticks_not_accounted = 0;
> +        timer->irqs_to_inject = 0;
> +        timer->irq_rate = 1;
> +        timer->divisor = 1;

We call reset after initializing devices, at least for cold-plugged ones.

Jan
Glauber Costa - April 8, 2011, 4:08 p.m.
On Fri, 2011-04-08 at 17:54 +0200, Jan Kiszka wrote:
> > +        }
> 
> Did I miss some change in the plan? I thought we were heading for a
> generic, reusable driftfix tool box (or periodic timer service)? Or is
> this intentionally an intermediate step? 

Which is a medium to long way in the future. There is benefit of having
this early, since it can deliver real functionality - a reliable hpet,
and converting to whatever the api may look like in the future.
Jan Kiszka - April 8, 2011, 4:12 p.m.
On 2011-04-08 18:08, Glauber Costa wrote:
> On Fri, 2011-04-08 at 17:54 +0200, Jan Kiszka wrote:
>>> +        }
>>
>> Did I miss some change in the plan? I thought we were heading for a
>> generic, reusable driftfix tool box (or periodic timer service)? Or is
>> this intentionally an intermediate step? 
> 
> Which is a medium to long way in the future. There is benefit of having
> this early, since it can deliver real functionality - a reliable hpet,
> and converting to whatever the api may look like in the future.
> 

Well, if the PIT and the RTC should use it as well, which is probably
rather a short term goal, we already have two more users.

Jan

Patch

diff --git a/hw/hpet.c b/hw/hpet.c
index b5625fb..5a25a96 100644
--- a/hw/hpet.c
+++ b/hw/hpet.c
@@ -42,6 +42,9 @@ 
 
 #define HPET_MSI_SUPPORT        0
 
+#define MAX_IRQS_TO_INJECT (uint32_t)5000
+#define MAX_IRQ_RATE       (uint32_t)10
+
 struct HPETState;
 typedef struct HPETTimer {  /* timers */
     uint8_t tn;             /*timer number*/
@@ -309,8 +312,11 @@  static const VMStateDescription vmstate_hpet = {
 static void hpet_timer(void *opaque)
 {
     HPETTimer *t = opaque;
+    HPETState *s = t->state;
     uint64_t diff;
 
+    int irq_delivered = 0;
+    uint32_t irq_count = 0;
     uint64_t period = t->period;
     uint64_t cur_tick = hpet_get_ticks(t->state);
 
@@ -318,13 +324,36 @@  static void hpet_timer(void *opaque)
         if (t->config & HPET_TN_32BIT) {
             while (hpet_time_after(cur_tick, t->cmp)) {
                 t->cmp = (uint32_t)(t->cmp + t->period);
+                irq_count++;
             }
         } else {
             while (hpet_time_after64(cur_tick, t->cmp)) {
                 t->cmp += period;
+                irq_count++;
             }
         }
         diff = hpet_calculate_diff(t, cur_tick);
+        if (s->driftfix) {
+            if (t->saved_period != t->period) {
+                uint64_t tmp = (t->irqs_to_inject * t->saved_period)
+                                                  + t->ticks_not_accounted;
+                t->irqs_to_inject = tmp / t->period;
+                t->ticks_not_accounted = tmp % t->period;
+                t->saved_period = t->period;
+            }
+            t->irqs_to_inject += irq_count;
+            t->irqs_to_inject = MIN(t->irqs_to_inject, MAX_IRQS_TO_INJECT);
+            if (t->irqs_to_inject > 1) {
+                if (irq_count > 1) {
+                    t->irq_rate++;
+                    t->irq_rate = MIN(t->irq_rate, MAX_IRQ_RATE);
+                }
+                if (irq_count || t->divisor == 0) {
+                    t->divisor = t->irq_rate;
+                }
+                diff /= t->divisor--;
+            }
+        }
         qemu_mod_timer(t->qemu_timer,
                        qemu_get_clock(vm_clock) + (int64_t)ticks_to_ns(diff));
     } else if (t->config & HPET_TN_32BIT && !timer_is_periodic(t)) {
@@ -335,7 +364,22 @@  static void hpet_timer(void *opaque)
             t->wrap_flag = 0;
         }
     }
-    update_irq(t, 1);
+    if (s->driftfix && timer_is_periodic(t) && period != 0) {
+        if (t->irqs_to_inject) {
+            irq_delivered = update_irq(t, 1);
+            if (irq_delivered) {
+                t->irq_rate = MIN(t->irq_rate, t->irqs_to_inject);
+                t->irqs_to_inject--;
+            } else {
+                if (irq_count) {
+                    t->irq_rate++;
+                    t->irq_rate = MIN(t->irq_rate, MAX_IRQ_RATE);
+                }
+            }
+        }
+    } else {
+        update_irq(t, 1);
+    }
 }
 
 static void hpet_set_timer(HPETTimer *t)
@@ -674,6 +718,12 @@  static void hpet_reset(DeviceState *d)
         timer->config |=  0x00000004ULL << 32;
         timer->period = 0ULL;
         timer->wrap_flag = 0;
+
+        timer->saved_period = 0;
+        timer->ticks_not_accounted = 0;
+        timer->irqs_to_inject = 0;
+        timer->irq_rate = 1;
+        timer->divisor = 1;
     }
 
     s->hpet_counter = 0ULL;
@@ -734,6 +784,12 @@  static int hpet_init(SysBusDevice *dev)
         timer->qemu_timer = qemu_new_timer(vm_clock, hpet_timer, timer);
         timer->tn = i;
         timer->state = s;
+
+        timer->saved_period = 0;
+        timer->ticks_not_accounted = 0;
+        timer->irqs_to_inject = 0;
+        timer->irq_rate = 1;
+        timer->divisor = 1;
     }
 
     /* 64-bit main counter; LegacyReplacementRoute. */