Patchwork [v4,5/5] hpet 'driftfix': add code in hpet_timer() to compensate delayed callbacks and coalesced interrupts

login
register
mail settings
Submitter Ulrich Obergfell
Date May 9, 2011, 7:03 a.m.
Message ID <1304924601-3848-6-git-send-email-uobergfe@redhat.com>
Download mbox | patch
Permalink /patch/94735/
State New
Headers show

Comments

Ulrich Obergfell - May 9, 2011, 7:03 a.m.
Loss of periodic timer interrupts caused by delayed callbacks and by
interrupt coalescing is compensated by gradually injecting additional
interrupts during subsequent timer intervals, starting at a rate of
one additional interrupt per interval. The injection of additional
interrupts is based on a backlog of unaccounted HPET clock periods
(new HPETTimer field 'ticks_not_accounted'). The backlog increases
due to delayed callbacks and coalesced interrupts, and it decreases
if an interrupt was injected successfully. If the backlog increases
while compensation is still in progress, the rate at which additional
interrupts are injected is increased too. A limit is imposed on the
backlog and on the rate.

Injecting additional timer interrupts to compensate lost interrupts
can alleviate long term time drift. However, on a short time scale,
this method can have the side effect of making virtual machine time
intermittently pass slower and faster than real time (depending on
the guest's time keeping algorithm). Compensation is disabled by
default and can be enabled for guests where this behaviour may be
acceptable.

Signed-off-by: Ulrich Obergfell <uobergfe@redhat.com>
---
 hw/hpet.c |   70 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 68 insertions(+), 2 deletions(-)
Zachary Amsden - May 12, 2011, 1:58 a.m.
On 05/09/2011 12:03 AM, Ulrich Obergfell wrote:
> Loss of periodic timer interrupts caused by delayed callbacks and by
> interrupt coalescing is compensated by gradually injecting additional
> interrupts during subsequent timer intervals, starting at a rate of
> one additional interrupt per interval. The injection of additional
> interrupts is based on a backlog of unaccounted HPET clock periods
> (new HPETTimer field 'ticks_not_accounted'). The backlog increases
> due to delayed callbacks and coalesced interrupts, and it decreases
> if an interrupt was injected successfully. If the backlog increases
> while compensation is still in progress, the rate at which additional
> interrupts are injected is increased too. A limit is imposed on the
> backlog and on the rate.
>
> Injecting additional timer interrupts to compensate lost interrupts
> can alleviate long term time drift. However, on a short time scale,
> this method can have the side effect of making virtual machine time
> intermittently pass slower and faster than real time (depending on
> the guest's time keeping algorithm). Compensation is disabled by
> default and can be enabled for guests where this behaviour may be
> acceptable.
>
> Signed-off-by: Ulrich Obergfell<uobergfe@redhat.com>
> ---
>   hw/hpet.c |   70 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>   1 files changed, 68 insertions(+), 2 deletions(-)
>
> diff --git a/hw/hpet.c b/hw/hpet.c
> index e57c654..519fc6b 100644
> --- a/hw/hpet.c
> +++ b/hw/hpet.c
> @@ -31,6 +31,7 @@
>   #include "hpet_emul.h"
>   #include "sysbus.h"
>   #include "mc146818rtc.h"
> +#include<assert.h>
>
>   //#define HPET_DEBUG
>   #ifdef HPET_DEBUG
> @@ -41,6 +42,9 @@
>
>   #define HPET_MSI_SUPPORT        0
>
> +#define MAX_TICKS_NOT_ACCOUNTED     (uint64_t)500000000 /* 5 sec */
> +#define MAX_IRQ_RATE                (uint32_t)10
> +
>   struct HPETState;
>   typedef struct HPETTimer {  /* timers */
>       uint8_t tn;             /*timer number*/
> @@ -324,14 +328,35 @@ static const VMStateDescription vmstate_hpet = {
>       }
>   };
>
> +static void hpet_timer_driftfix_reset(HPETTimer *t)
> +{
> +    if (t->state->driftfix&&  timer_is_periodic(t)) {
> +        t->ticks_not_accounted = t->prev_period = t->period;
>    

This is rather confusing.  Clearly, ticks_not_accounted isn't actually 
ticks not accounted, it's a different variable entirely which is based 
on the current period.

If it were actually ticks_not_accounted, it should be reset to zero.

> +        t->irq_rate = 1;
> +        t->divisor = 1;
> +    }
> +}
> +
> +static bool hpet_timer_has_tick_backlog(HPETTimer *t)
> +{
> +    uint64_t backlog = 0;
> +
> +    if (t->ticks_not_accounted>= t->period + t->prev_period) {
> +        backlog = t->ticks_not_accounted - (t->period + t->prev_period);
> +    }
> +    return (backlog>= t->period);
> +}
> +
>   /*
>    * timer expiration callback
>    */
>   static void hpet_timer(void *opaque)
>   {
>       HPETTimer *t = opaque;
> +    HPETState *s = t->state;
>       uint64_t diff;
> -
> +    int irq_delivered = 0;
> +    uint32_t period_count = 0;
>       uint64_t period = t->period;
>       uint64_t cur_tick = hpet_get_ticks(t->state);
>
> @@ -339,13 +364,37 @@ static void hpet_timer(void *opaque)
>           if (t->config&  HPET_TN_32BIT) {
>               while (hpet_time_after(cur_tick, t->cmp)) {
>                   t->cmp = (uint32_t)(t->cmp + t->period);
> +                t->ticks_not_accounted += t->period;
> +                period_count++;
>               }
>           } else {
>               while (hpet_time_after64(cur_tick, t->cmp)) {
>                   t->cmp += period;
> +                t->ticks_not_accounted += period;
> +                period_count++;
>               }
>           }
>           diff = hpet_calculate_diff(t, cur_tick);
> +        if (s->driftfix) {
> +            if (t->ticks_not_accounted>  MAX_TICKS_NOT_ACCOUNTED) {
> +                t->ticks_not_accounted = t->period + t->prev_period;
> +            }
> +            if (hpet_timer_has_tick_backlog(t)) {
> +                if (t->irq_rate == 1 || period_count>  1) {
> +                    t->irq_rate++;
> +                    t->irq_rate = MIN(t->irq_rate, MAX_IRQ_RATE);
> +                }
> +                if (t->divisor == 0) {
> +                    assert(period_count);
> +                }
> +                if (period_count) {
> +                    t->divisor = t->irq_rate;
> +                }
> +                diff /= t->divisor--;
>    

Why subtracting from the divisor?  Shouldn't the divisor and irq_rate 
change in lockstep?

> +            } else {
> +                t->irq_rate = 1;
> +            }
> +        }
>           qemu_mod_timer(t->qemu_timer,
>                          qemu_get_clock_ns(vm_clock) + (int64_t)ticks_to_ns(diff));
>       } else if (t->config&  HPET_TN_32BIT&&  !timer_is_periodic(t)) {
> @@ -356,7 +405,22 @@ static void hpet_timer(void *opaque)
>               t->wrap_flag = 0;
>           }
>       }
> -    update_irq(t, 1);
> +    if (s->driftfix&&  timer_is_periodic(t)&&  period != 0) {
> +        if (t->ticks_not_accounted>= t->period + t->prev_period) {
> +            irq_delivered = update_irq(t, 1);
> +            if (irq_delivered) {
> +                t->ticks_not_accounted -= t->prev_period;
> +                t->prev_period = t->period;
> +            } else {
> +                if (period_count) {
> +                    t->irq_rate++;
> +                    t->irq_rate = MIN(t->irq_rate, MAX_IRQ_RATE);
> +                }
> +            }
> +        }
> +    } else {
> +        update_irq(t, 1);
> +    }
>   }
>    

Overall I think the intention is correct, and the code may be as well, 
but anything with this amount of subtle manipulations and math tricks 
needs to be more well commented for others to understand it.

I'll try to parse it again later, hopefully with better luck,

Zach
Ulrich Obergfell - May 12, 2011, 9:48 a.m.
Hi Zachary,

1. re.:

>> +static void hpet_timer_driftfix_reset(HPETTimer *t)
>> +{
>> +    if (t->state->driftfix&&  timer_is_periodic(t)) {
>> +        t->ticks_not_accounted = t->prev_period = t->period;
>>    
>
> This is rather confusing.  Clearly, ticks_not_accounted isn't actually
> ticks not accounted, it's a different variable entirely which is based
> on the current period.
>
> If it were actually ticks_not_accounted, it should be reset to zero.


hpet_timer_driftfix_reset() is called at two sites before the periodic
timer is actually started via hpet_set_timer(). An interrupt shall be
delivered at the first callback of hpet_timer(), after t->period amount
of time has passed. The ticks are recorded as 'not accounted' until the
interrupt is delivered to the guest. The following message explains why
t->prev_period is needed in addition to t->period and how it is used.

  http://lists.gnu.org/archive/html/qemu-devel/2011-05/msg00275.html

On entry to hpet_timer(), t->ticks_not_accounted is >= t->prev_period,
and is it increased by t->period while the comparator register is being
advanced. This is also the place where we can detect whether the current
callback was delayed. If the period_count is > 1, the delay was so large
that we missed to inject an interrupt (which needs to be compensated).

             while (hpet_time_after(cur_tick, t->cmp)) {
                 t->cmp = (uint32_t)(t->cmp + t->period);
+                t->ticks_not_accounted += t->period;
+                period_count++;
             }

Please note that period_count can be zero. This happens during callbacks
that inject additional interrupts 'inside of' a period interval in order
to compensate missed and coalesced interrupts.

t->ticks_not_accounted is decreased only if an interrupt was delivered
to the guest. If an interrupt could not be delivered, the ticks that are
represented by that interrupt remain recorded as 'not accounted' (this
also triggers compensation of coalesced interrupts).

+            if (irq_delivered) {
+                t->ticks_not_accounted -= t->prev_period;
+                t->prev_period = t->period;
+            } else {
+                if (period_count) {
+                    t->irq_rate++;
+                    t->irq_rate = MIN(t->irq_rate, MAX_IRQ_RATE);
+                }
+            }


2. re.:

>> +                if (period_count) {
>> +                    t->divisor = t->irq_rate;
>> +                }
>> +                diff /= t->divisor--;
>>    
>
> Why subtracting from the divisor?  Shouldn't the divisor and irq_rate
> change in lockstep?


t->irq_rate is the rate at which interrupts are delivered to the guest,
relative to the period length. If t->irq_rate is 1, then one interrupt
shall be injected per period interval. If t->irq_rate is > 1, we are in
'compensation mode' (trying to inject additional interrupts 'inside of'
an interval).

- If t->irq_rate is 2, then two interrupts shall be injected during a
  period interval (one regular and one additional).

- If t->irq_rate is 3, then three interrupts shall be injected during a
  period interval (one regular and two additional).

- etc.

A non-zero period_count marks the start of an interval, at which the
divisor is set to t->irq_rate. Let's take a look at an example where
t->divisor = t->irq_rate = 3.

- The current period starts at t(p), the next period starts at t(p+1).
  We are now at t(p). The first additional interrupt shall be injected
  at a(1), the second at a(2). Hence, the next callback is scheduled
  to occur at a(1) = t(p) + diff / 3.

  t(p)                 a(1)                 a(2)                 t(p+1)
  +--------------------+--------------------+--------------------+ time
   <--------------------------- diff --------------------------->
   <---- diff / 3 ---->

- We are now in the callback at a(1). A new diff has been calculated,
  which is equal to the remaining time in the interval from a(1) to
  t(p+1). The second additional interrupt shall be injected at a(2).
  Hence, the next callback is scheduled to occur at a(2) = a(1) + diff / 2.

                       a(1)                 a(2)                 t(p+1)
                       +--------------------+--------------------+ time
                        <------------------ diff --------------->
                        <---- diff / 2 ---->

- We are now in the callback at a(2). A new diff has been calculated,
  which is equal to the remaining time in the interval from a(2) to
  t(p+1). The next callback marks the beginning of period t(p+1).

                                            a(2)                 t(p+1)
                                            +--------------------+ time
                                             <------ diff ------>
                                             <---- diff / 1 ---->

At t(p), the divisor is set to irq_rate (= 3). diff is divided by 3
and the divisor is decremented by one. At a(1), the divisor is 2.
diff is divided by 2 and the divisor is decremented by one. At a(2),
the divisor is 1. diff is divided by 1 and the divisor will be reset
at the beginning of t(p+1).


Regards,

Uli

Patch

diff --git a/hw/hpet.c b/hw/hpet.c
index e57c654..519fc6b 100644
--- a/hw/hpet.c
+++ b/hw/hpet.c
@@ -31,6 +31,7 @@ 
 #include "hpet_emul.h"
 #include "sysbus.h"
 #include "mc146818rtc.h"
+#include <assert.h>
 
 //#define HPET_DEBUG
 #ifdef HPET_DEBUG
@@ -41,6 +42,9 @@ 
 
 #define HPET_MSI_SUPPORT        0
 
+#define MAX_TICKS_NOT_ACCOUNTED     (uint64_t)500000000 /* 5 sec */
+#define MAX_IRQ_RATE                (uint32_t)10
+
 struct HPETState;
 typedef struct HPETTimer {  /* timers */
     uint8_t tn;             /*timer number*/
@@ -324,14 +328,35 @@  static const VMStateDescription vmstate_hpet = {
     }
 };
 
+static void hpet_timer_driftfix_reset(HPETTimer *t)
+{
+    if (t->state->driftfix && timer_is_periodic(t)) {
+        t->ticks_not_accounted = t->prev_period = t->period;
+        t->irq_rate = 1;
+        t->divisor = 1;
+    }
+}
+
+static bool hpet_timer_has_tick_backlog(HPETTimer *t)
+{
+    uint64_t backlog = 0;
+
+    if (t->ticks_not_accounted >= t->period + t->prev_period) {
+        backlog = t->ticks_not_accounted - (t->period + t->prev_period);
+    }
+    return (backlog >= t->period);
+}
+
 /*
  * timer expiration callback
  */
 static void hpet_timer(void *opaque)
 {
     HPETTimer *t = opaque;
+    HPETState *s = t->state;
     uint64_t diff;
-
+    int irq_delivered = 0;
+    uint32_t period_count = 0;
     uint64_t period = t->period;
     uint64_t cur_tick = hpet_get_ticks(t->state);
 
@@ -339,13 +364,37 @@  static void hpet_timer(void *opaque)
         if (t->config & HPET_TN_32BIT) {
             while (hpet_time_after(cur_tick, t->cmp)) {
                 t->cmp = (uint32_t)(t->cmp + t->period);
+                t->ticks_not_accounted += t->period;
+                period_count++;
             }
         } else {
             while (hpet_time_after64(cur_tick, t->cmp)) {
                 t->cmp += period;
+                t->ticks_not_accounted += period;
+                period_count++;
             }
         }
         diff = hpet_calculate_diff(t, cur_tick);
+        if (s->driftfix) {
+            if (t->ticks_not_accounted > MAX_TICKS_NOT_ACCOUNTED) {
+                t->ticks_not_accounted = t->period + t->prev_period;
+            }
+            if (hpet_timer_has_tick_backlog(t)) {
+                if (t->irq_rate == 1 || period_count > 1) {
+                    t->irq_rate++;
+                    t->irq_rate = MIN(t->irq_rate, MAX_IRQ_RATE);
+                }
+                if (t->divisor == 0) {
+                    assert(period_count);
+                }
+                if (period_count) {
+                    t->divisor = t->irq_rate;
+                }
+                diff /= t->divisor--;
+            } else {
+                t->irq_rate = 1;
+            }
+        }
         qemu_mod_timer(t->qemu_timer,
                        qemu_get_clock_ns(vm_clock) + (int64_t)ticks_to_ns(diff));
     } else if (t->config & HPET_TN_32BIT && !timer_is_periodic(t)) {
@@ -356,7 +405,22 @@  static void hpet_timer(void *opaque)
             t->wrap_flag = 0;
         }
     }
-    update_irq(t, 1);
+    if (s->driftfix && timer_is_periodic(t) && period != 0) {
+        if (t->ticks_not_accounted >= t->period + t->prev_period) {
+            irq_delivered = update_irq(t, 1);
+            if (irq_delivered) {
+                t->ticks_not_accounted -= t->prev_period;
+                t->prev_period = t->period;
+            } else {
+                if (period_count) {
+                    t->irq_rate++;
+                    t->irq_rate = MIN(t->irq_rate, MAX_IRQ_RATE);
+                }
+            }
+        }
+    } else {
+        update_irq(t, 1);
+    }
 }
 
 static void hpet_set_timer(HPETTimer *t)
@@ -525,6 +589,7 @@  static void hpet_ram_writel(void *opaque, target_phys_addr_t addr,
                 timer->period = (uint32_t)timer->period;
             }
             if (activating_bit(old_val, new_val, HPET_TN_ENABLE)) {
+                hpet_timer_driftfix_reset(timer);
                 hpet_set_timer(timer);
             } else if (deactivating_bit(old_val, new_val, HPET_TN_ENABLE)) {
                 hpet_del_timer(timer);
@@ -599,6 +664,7 @@  static void hpet_ram_writel(void *opaque, target_phys_addr_t addr,
                     ticks_to_ns(s->hpet_counter) - qemu_get_clock_ns(vm_clock);
                 for (i = 0; i < s->num_timers; i++) {
                     if ((&s->timer[i])->cmp != ~0ULL) {
+                        hpet_timer_driftfix_reset(&s->timer[i]);
                         hpet_set_timer(&s->timer[i]);
                     }
                 }