Patchwork [2/4] timer: protect timers_state's clock with seqlock

login
register
mail settings
Submitter pingfank@linux.vnet.ibm.com
Date Aug. 5, 2013, 7:33 a.m.
Message ID <1375688006-16780-3-git-send-email-pingfank@linux.vnet.ibm.com>
Download mbox | patch
Permalink /patch/264590/
State New
Headers show

Comments

pingfank@linux.vnet.ibm.com - Aug. 5, 2013, 7:33 a.m.
In kvm mode, vm_clock may be read outside BQL. This will make
timers_state --the foundation of vm_clock exposed to race condition.
Using private lock to protect it.

Note in tcg mode, vm_clock still read inside BQL, so icount is
left without change. As for cpu_ticks in timers_state, it
is still protected by BQL.

Lock rule: private lock innermost, ie BQL->"this lock"

Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
---
 cpus.c | 36 +++++++++++++++++++++++++++++-------
 1 file changed, 29 insertions(+), 7 deletions(-)
Paolo Bonzini - Aug. 5, 2013, 1:29 p.m.
> In kvm mode, vm_clock may be read outside BQL.

Not just in KVM mode (we will be able to use dataplane with TCG sooner
or later), actually.

Otherwise looks good!

Paolo

> This will make
> timers_state --the foundation of vm_clock exposed to race condition.
> Using private lock to protect it.
> 
> Note in tcg mode, vm_clock still read inside BQL, so icount is
> left without change. As for cpu_ticks in timers_state, it
> is still protected by BQL.
> 
> Lock rule: private lock innermost, ie BQL->"this lock"
> 
> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> ---
>  cpus.c | 36 +++++++++++++++++++++++++++++-------
>  1 file changed, 29 insertions(+), 7 deletions(-)
> 
> diff --git a/cpus.c b/cpus.c
> index 85e743d..ab92db9 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -107,12 +107,17 @@ static int64_t qemu_icount;
>  typedef struct TimersState {
>      int64_t cpu_ticks_prev;
>      int64_t cpu_ticks_offset;
> +    /* QemuClock will be read out of BQL, so protect is with private lock.
> +     * As for cpu_ticks, no requirement to read it outside BQL.
> +     * Lock rule: innermost
> +     */
> +    QemuSeqLock clock_seqlock;
>      int64_t cpu_clock_offset;
>      int32_t cpu_ticks_enabled;
>      int64_t dummy;
>  } TimersState;
>  
> -TimersState timers_state;
> +static TimersState timers_state;
>  
>  /* Return the virtual CPU time, based on the instruction counter.  */
>  int64_t cpu_get_icount(void)
> @@ -132,6 +137,7 @@ int64_t cpu_get_icount(void)
>  }
>  
>  /* return the host CPU cycle counter and handle stop/restart */
> +/* cpu_ticks is safely if holding BQL */
>  int64_t cpu_get_ticks(void)
>  {
>      if (use_icount) {
> @@ -156,33 +162,46 @@ int64_t cpu_get_ticks(void)
>  int64_t cpu_get_clock(void)
>  {
>      int64_t ti;
> -    if (!timers_state.cpu_ticks_enabled) {
> -        return timers_state.cpu_clock_offset;
> -    } else {
> -        ti = get_clock();
> -        return ti + timers_state.cpu_clock_offset;
> -    }
> +    unsigned start;
> +
> +    do {
> +        start = seqlock_read_begin(&timers_state.clock_seqlock);
> +        if (!timers_state.cpu_ticks_enabled) {
> +            ti = timers_state.cpu_clock_offset;
> +        } else {
> +            ti = get_clock();
> +            ti += timers_state.cpu_clock_offset;
> +        }
> +    } while (seqlock_read_check(&timers_state.clock_seqlock, start);
> +
> +    return ti;
>  }
>  
>  /* enable cpu_get_ticks() */
>  void cpu_enable_ticks(void)
>  {
> +    /* Here, the really thing protected by seqlock is cpu_clock. */
> +    seqlock_write_lock(&timers_state.clock_seqlock);
>      if (!timers_state.cpu_ticks_enabled) {
>          timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
>          timers_state.cpu_clock_offset -= get_clock();
>          timers_state.cpu_ticks_enabled = 1;
>      }
> +    seqlock_write_unlock(&timers_state.clock_seqlock);
>  }
>  
>  /* disable cpu_get_ticks() : the clock is stopped. You must not call
>     cpu_get_ticks() after that.  */
>  void cpu_disable_ticks(void)
>  {
> +    /* Here, the really thing protected by seqlock is cpu_clock. */
> +    seqlock_write_lock(&timers_state.clock_seqlock);
>      if (timers_state.cpu_ticks_enabled) {
>          timers_state.cpu_ticks_offset = cpu_get_ticks();
>          timers_state.cpu_clock_offset = cpu_get_clock();
>          timers_state.cpu_ticks_enabled = 0;
>      }
> +    seqlock_write_unlock(&timers_state.clock_seqlock);
>  }
>  
>  /* Correlation between real and virtual time is always going to be
> @@ -364,6 +383,9 @@ static const VMStateDescription vmstate_timers = {
>  
>  void configure_icount(const char *option)
>  {
> +    QemuMutex *mutex = g_malloc0(sizeof(QemuMutex));
> +    qemu_mutex_init(mutex);
> +    seqlock_init(&timers_state.clock_seqlock, mutex);
>      vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
>      if (!option) {
>          return;
> --
> 1.8.1.4
> 
>
pingfan liu - Aug. 6, 2013, 5:58 a.m.
On Mon, Aug 5, 2013 at 9:29 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>
>> In kvm mode, vm_clock may be read outside BQL.
>
> Not just in KVM mode (we will be able to use dataplane with TCG sooner
> or later), actually.
>
Oh. But this patch does not fix cpu_get_icount()'s thread-safe issue.
So currently, could I just change the commit log instead of fixing it?

Regards,
Pingfan

> Otherwise looks good!
>
> Paolo
>
>> This will make
>> timers_state --the foundation of vm_clock exposed to race condition.
>> Using private lock to protect it.
>>
>> Note in tcg mode, vm_clock still read inside BQL, so icount is
>> left without change. As for cpu_ticks in timers_state, it
>> is still protected by BQL.
>>
>> Lock rule: private lock innermost, ie BQL->"this lock"
>>
>> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>> ---
>>  cpus.c | 36 +++++++++++++++++++++++++++++-------
>>  1 file changed, 29 insertions(+), 7 deletions(-)
>>
>> diff --git a/cpus.c b/cpus.c
>> index 85e743d..ab92db9 100644
>> --- a/cpus.c
>> +++ b/cpus.c
>> @@ -107,12 +107,17 @@ static int64_t qemu_icount;
>>  typedef struct TimersState {
>>      int64_t cpu_ticks_prev;
>>      int64_t cpu_ticks_offset;
>> +    /* QemuClock will be read out of BQL, so protect is with private lock.
>> +     * As for cpu_ticks, no requirement to read it outside BQL.
>> +     * Lock rule: innermost
>> +     */
>> +    QemuSeqLock clock_seqlock;
>>      int64_t cpu_clock_offset;
>>      int32_t cpu_ticks_enabled;
>>      int64_t dummy;
>>  } TimersState;
>>
>> -TimersState timers_state;
>> +static TimersState timers_state;
>>
>>  /* Return the virtual CPU time, based on the instruction counter.  */
>>  int64_t cpu_get_icount(void)
>> @@ -132,6 +137,7 @@ int64_t cpu_get_icount(void)
>>  }
>>
>>  /* return the host CPU cycle counter and handle stop/restart */
>> +/* cpu_ticks is safely if holding BQL */
>>  int64_t cpu_get_ticks(void)
>>  {
>>      if (use_icount) {
>> @@ -156,33 +162,46 @@ int64_t cpu_get_ticks(void)
>>  int64_t cpu_get_clock(void)
>>  {
>>      int64_t ti;
>> -    if (!timers_state.cpu_ticks_enabled) {
>> -        return timers_state.cpu_clock_offset;
>> -    } else {
>> -        ti = get_clock();
>> -        return ti + timers_state.cpu_clock_offset;
>> -    }
>> +    unsigned start;
>> +
>> +    do {
>> +        start = seqlock_read_begin(&timers_state.clock_seqlock);
>> +        if (!timers_state.cpu_ticks_enabled) {
>> +            ti = timers_state.cpu_clock_offset;
>> +        } else {
>> +            ti = get_clock();
>> +            ti += timers_state.cpu_clock_offset;
>> +        }
>> +    } while (seqlock_read_check(&timers_state.clock_seqlock, start);
>> +
>> +    return ti;
>>  }
>>
>>  /* enable cpu_get_ticks() */
>>  void cpu_enable_ticks(void)
>>  {
>> +    /* Here, the really thing protected by seqlock is cpu_clock. */
>> +    seqlock_write_lock(&timers_state.clock_seqlock);
>>      if (!timers_state.cpu_ticks_enabled) {
>>          timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
>>          timers_state.cpu_clock_offset -= get_clock();
>>          timers_state.cpu_ticks_enabled = 1;
>>      }
>> +    seqlock_write_unlock(&timers_state.clock_seqlock);
>>  }
>>
>>  /* disable cpu_get_ticks() : the clock is stopped. You must not call
>>     cpu_get_ticks() after that.  */
>>  void cpu_disable_ticks(void)
>>  {
>> +    /* Here, the really thing protected by seqlock is cpu_clock. */
>> +    seqlock_write_lock(&timers_state.clock_seqlock);
>>      if (timers_state.cpu_ticks_enabled) {
>>          timers_state.cpu_ticks_offset = cpu_get_ticks();
>>          timers_state.cpu_clock_offset = cpu_get_clock();
>>          timers_state.cpu_ticks_enabled = 0;
>>      }
>> +    seqlock_write_unlock(&timers_state.clock_seqlock);
>>  }
>>
>>  /* Correlation between real and virtual time is always going to be
>> @@ -364,6 +383,9 @@ static const VMStateDescription vmstate_timers = {
>>
>>  void configure_icount(const char *option)
>>  {
>> +    QemuMutex *mutex = g_malloc0(sizeof(QemuMutex));
>> +    qemu_mutex_init(mutex);
>> +    seqlock_init(&timers_state.clock_seqlock, mutex);
>>      vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
>>      if (!option) {
>>          return;
>> --
>> 1.8.1.4
>>
>>
>
Paolo Bonzini - Aug. 6, 2013, 7:31 a.m.
On 08/06/2013 07:58 AM, liu ping fan wrote:
> On Mon, Aug 5, 2013 at 9:29 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>>
>>> In kvm mode, vm_clock may be read outside BQL.
>>
>> Not just in KVM mode (we will be able to use dataplane with TCG sooner
>> or later), actually.
>>
> Oh. But this patch does not fix cpu_get_icount()'s thread-safe issue.
> So currently, could I just change the commit log instead of fixing it?

Yeah, icount is a bit more complicated.  Just change the commit log.

Paolo
Stefan Hajnoczi - Aug. 6, 2013, 9:30 a.m.
On Mon, Aug 05, 2013 at 03:33:24PM +0800, Liu Ping Fan wrote:
> diff --git a/cpus.c b/cpus.c
> index 85e743d..ab92db9 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -107,12 +107,17 @@ static int64_t qemu_icount;
>  typedef struct TimersState {
>      int64_t cpu_ticks_prev;
>      int64_t cpu_ticks_offset;
> +    /* QemuClock will be read out of BQL, so protect is with private lock.
> +     * As for cpu_ticks, no requirement to read it outside BQL.
> +     * Lock rule: innermost
> +     */

Please document exactly which fields the lock protects.

>  /* enable cpu_get_ticks() */
>  void cpu_enable_ticks(void)
>  {
> +    /* Here, the really thing protected by seqlock is cpu_clock. */

What is cpu_clock?

> @@ -364,6 +383,9 @@ static const VMStateDescription vmstate_timers = {
>  
>  void configure_icount(const char *option)
>  {
> +    QemuMutex *mutex = g_malloc0(sizeof(QemuMutex));
> +    qemu_mutex_init(mutex);
> +    seqlock_init(&timers_state.clock_seqlock, mutex);

We always set up this mutex, so it could be a field in timers_state.
That avoids the g_malloc() without g_free().
pingfan liu - Aug. 7, 2013, 5:46 a.m.
On Tue, Aug 6, 2013 at 5:30 PM, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> On Mon, Aug 05, 2013 at 03:33:24PM +0800, Liu Ping Fan wrote:
>> diff --git a/cpus.c b/cpus.c
>> index 85e743d..ab92db9 100644
>> --- a/cpus.c
>> +++ b/cpus.c
>> @@ -107,12 +107,17 @@ static int64_t qemu_icount;
>>  typedef struct TimersState {
>>      int64_t cpu_ticks_prev;
>>      int64_t cpu_ticks_offset;
>> +    /* QemuClock will be read out of BQL, so protect is with private lock.
>> +     * As for cpu_ticks, no requirement to read it outside BQL.
>> +     * Lock rule: innermost
>> +     */
>
> Please document exactly which fields the lock protects.
>
The lock protects cpu_clock_offset. Will fix it.

>>  /* enable cpu_get_ticks() */
>>  void cpu_enable_ticks(void)
>>  {
>> +    /* Here, the really thing protected by seqlock is cpu_clock. */
>
> What is cpu_clock?
>
cpu_clock_offset
>> @@ -364,6 +383,9 @@ static const VMStateDescription vmstate_timers = {
>>
>>  void configure_icount(const char *option)
>>  {
>> +    QemuMutex *mutex = g_malloc0(sizeof(QemuMutex));
>> +    qemu_mutex_init(mutex);
>> +    seqlock_init(&timers_state.clock_seqlock, mutex);
>
> We always set up this mutex, so it could be a field in timers_state.
> That avoids the g_malloc() without g_free().
>
Will fix in next version

Thx&regards,
Pingfan

Patch

diff --git a/cpus.c b/cpus.c
index 85e743d..ab92db9 100644
--- a/cpus.c
+++ b/cpus.c
@@ -107,12 +107,17 @@  static int64_t qemu_icount;
 typedef struct TimersState {
     int64_t cpu_ticks_prev;
     int64_t cpu_ticks_offset;
+    /* QemuClock will be read out of BQL, so protect is with private lock.
+     * As for cpu_ticks, no requirement to read it outside BQL.
+     * Lock rule: innermost
+     */
+    QemuSeqLock clock_seqlock;
     int64_t cpu_clock_offset;
     int32_t cpu_ticks_enabled;
     int64_t dummy;
 } TimersState;
 
-TimersState timers_state;
+static TimersState timers_state;
 
 /* Return the virtual CPU time, based on the instruction counter.  */
 int64_t cpu_get_icount(void)
@@ -132,6 +137,7 @@  int64_t cpu_get_icount(void)
 }
 
 /* return the host CPU cycle counter and handle stop/restart */
+/* cpu_ticks is safely if holding BQL */
 int64_t cpu_get_ticks(void)
 {
     if (use_icount) {
@@ -156,33 +162,46 @@  int64_t cpu_get_ticks(void)
 int64_t cpu_get_clock(void)
 {
     int64_t ti;
-    if (!timers_state.cpu_ticks_enabled) {
-        return timers_state.cpu_clock_offset;
-    } else {
-        ti = get_clock();
-        return ti + timers_state.cpu_clock_offset;
-    }
+    unsigned start;
+
+    do {
+        start = seqlock_read_begin(&timers_state.clock_seqlock);
+        if (!timers_state.cpu_ticks_enabled) {
+            ti = timers_state.cpu_clock_offset;
+        } else {
+            ti = get_clock();
+            ti += timers_state.cpu_clock_offset;
+        }
+    } while (seqlock_read_check(&timers_state.clock_seqlock, start);
+
+    return ti;
 }
 
 /* enable cpu_get_ticks() */
 void cpu_enable_ticks(void)
 {
+    /* Here, the really thing protected by seqlock is cpu_clock. */
+    seqlock_write_lock(&timers_state.clock_seqlock);
     if (!timers_state.cpu_ticks_enabled) {
         timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
         timers_state.cpu_clock_offset -= get_clock();
         timers_state.cpu_ticks_enabled = 1;
     }
+    seqlock_write_unlock(&timers_state.clock_seqlock);
 }
 
 /* disable cpu_get_ticks() : the clock is stopped. You must not call
    cpu_get_ticks() after that.  */
 void cpu_disable_ticks(void)
 {
+    /* Here, the really thing protected by seqlock is cpu_clock. */
+    seqlock_write_lock(&timers_state.clock_seqlock);
     if (timers_state.cpu_ticks_enabled) {
         timers_state.cpu_ticks_offset = cpu_get_ticks();
         timers_state.cpu_clock_offset = cpu_get_clock();
         timers_state.cpu_ticks_enabled = 0;
     }
+    seqlock_write_unlock(&timers_state.clock_seqlock);
 }
 
 /* Correlation between real and virtual time is always going to be
@@ -364,6 +383,9 @@  static const VMStateDescription vmstate_timers = {
 
 void configure_icount(const char *option)
 {
+    QemuMutex *mutex = g_malloc0(sizeof(QemuMutex));
+    qemu_mutex_init(mutex);
+    seqlock_init(&timers_state.clock_seqlock, mutex);
     vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
     if (!option) {
         return;