diff mbox series

[RFC,13/26] cpus: only take BQL for sleeping threads

Message ID 20171031112610.10516.78685.stgit@pasha-VirtualBox
State New
Headers show
Series replay additions | expand

Commit Message

Pavel Dovgalyuk Oct. 31, 2017, 11:26 a.m. UTC
From: Alex Bennée <alex.bennee@linaro.org>

Now the only real need to hold the BQL is for when we sleep on the
cpu->halt conditional. The lock is actually dropped while the thread
sleeps so the actual window for contention is pretty small. This also
means we can remove the special case hack for exclusive work and
simply declare that work no longer has an implicit BQL held. This
isn't a major problem async work is generally only changing things in
the context of its own vCPU. If it needs to work across vCPUs it
should be using the exclusive mechanism or possibly taking the lock
itself.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Tested-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>

---
 cpus-common.c |   13 +++++--------
 cpus.c        |   10 ++++------
 2 files changed, 9 insertions(+), 14 deletions(-)

Comments

Paolo Bonzini Nov. 2, 2017, 11:08 a.m. UTC | #1
On 31/10/2017 12:26, Pavel Dovgalyuk wrote:
> From: Alex Bennée <alex.bennee@linaro.org>
> 
> Now the only real need to hold the BQL is for when we sleep on the
> cpu->halt conditional. The lock is actually dropped while the thread
> sleeps so the actual window for contention is pretty small. This also
> means we can remove the special case hack for exclusive work and
> simply declare that work no longer has an implicit BQL held. This
> isn't a major problem async work is generally only changing things in
> the context of its own vCPU. If it needs to work across vCPUs it
> should be using the exclusive mechanism or possibly taking the lock
> itself.
> 
> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
> Tested-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>

At least cpu_throttle_thread would fail with this patch.

Also I am not sure if the s390 SIGP handlers are ready for this.

Paolo

> 
> ---
>  cpus-common.c |   13 +++++--------
>  cpus.c        |   10 ++++------
>  2 files changed, 9 insertions(+), 14 deletions(-)
> 
> diff --git a/cpus-common.c b/cpus-common.c
> index 59f751e..64661c3 100644
> --- a/cpus-common.c
> +++ b/cpus-common.c
> @@ -310,6 +310,11 @@ void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func,
>      queue_work_on_cpu(cpu, wi);
>  }
>  
> +/* Work items run outside of the BQL. This is essential for avoiding a
> + * deadlock for exclusive work but also applies to non-exclusive work.
> + * If the work requires cross-vCPU changes then it should use the
> + * exclusive mechanism.
> + */
>  void process_queued_cpu_work(CPUState *cpu)
>  {
>      struct qemu_work_item *wi;
> @@ -327,17 +332,9 @@ void process_queued_cpu_work(CPUState *cpu)
>          }
>          qemu_mutex_unlock(&cpu->work_mutex);
>          if (wi->exclusive) {
> -            /* Running work items outside the BQL avoids the following deadlock:
> -             * 1) start_exclusive() is called with the BQL taken while another
> -             * CPU is running; 2) cpu_exec in the other CPU tries to takes the
> -             * BQL, so it goes to sleep; start_exclusive() is sleeping too, so
> -             * neither CPU can proceed.
> -             */
> -            qemu_mutex_unlock_iothread();
>              start_exclusive();
>              wi->func(cpu, wi->data);
>              end_exclusive();
> -            qemu_mutex_lock_iothread();
>          } else {
>              wi->func(cpu, wi->data);
>          }
> diff --git a/cpus.c b/cpus.c
> index efde5c1..de6dfce 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -1127,31 +1127,29 @@ static bool qemu_tcg_should_sleep(CPUState *cpu)
>  
>  static void qemu_tcg_wait_io_event(CPUState *cpu)
>  {
> -    qemu_mutex_lock_iothread();
>  
>      while (qemu_tcg_should_sleep(cpu)) {
> +        qemu_mutex_lock_iothread();
>          stop_tcg_kick_timer();
>          qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
> +        qemu_mutex_unlock_iothread();
>      }
>  
>      start_tcg_kick_timer();
>  
>      qemu_wait_io_event_common(cpu);
> -
> -    qemu_mutex_unlock_iothread();
>  }
>  
>  static void qemu_kvm_wait_io_event(CPUState *cpu)
>  {
> -    qemu_mutex_lock_iothread();
>  
>      while (cpu_thread_is_idle(cpu)) {
> +        qemu_mutex_lock_iothread();
>          qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
> +        qemu_mutex_unlock_iothread();
>      }
>  
>      qemu_wait_io_event_common(cpu);
> -
> -    qemu_mutex_unlock_iothread();
>  }
>  
>  static void *qemu_kvm_cpu_thread_fn(void *arg)
>
David Hildenbrand Nov. 2, 2017, 6:39 p.m. UTC | #2
On 02.11.2017 12:08, Paolo Bonzini wrote:
> On 31/10/2017 12:26, Pavel Dovgalyuk wrote:
>> From: Alex Bennée <alex.bennee@linaro.org>
>>
>> Now the only real need to hold the BQL is for when we sleep on the
>> cpu->halt conditional. The lock is actually dropped while the thread
>> sleeps so the actual window for contention is pretty small. This also
>> means we can remove the special case hack for exclusive work and
>> simply declare that work no longer has an implicit BQL held. This
>> isn't a major problem async work is generally only changing things in
>> the context of its own vCPU. If it needs to work across vCPUs it
>> should be using the exclusive mechanism or possibly taking the lock
>> itself.
>>
>> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
>> Tested-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
> 
> At least cpu_throttle_thread would fail with this patch.
> 
> Also I am not sure if the s390 SIGP handlers are ready for this.
> 

We have a global lock to the SIGP "facility". However we need the BQL in
order to inject interrupts into CPUs (otherwise it would trigger an
assert when injecting).

We inject Restart and Stop interrupts from run_on_cpu. This requires the
BQL. So Paolo should be right, this change would break s390x.


> Paolo
Paolo Bonzini Nov. 2, 2017, 8:03 p.m. UTC | #3
----- Original Message -----
> From: "David Hildenbrand" <david@redhat.com>
> To: "Paolo Bonzini" <pbonzini@redhat.com>, "Pavel Dovgalyuk" <Pavel.Dovgaluk@ispras.ru>, qemu-devel@nongnu.org
> Cc: kwolf@redhat.com, "peter maydell" <peter.maydell@linaro.org>, "boost lists" <boost.lists@gmail.com>,
> quintela@redhat.com, jasowang@redhat.com, mst@redhat.com, zuban32s@gmail.com, "maria klimushenkova"
> <maria.klimushenkova@ispras.ru>, dovgaluk@ispras.ru, kraxel@redhat.com, "alex bennee" <alex.bennee@linaro.org>
> Sent: Thursday, November 2, 2017 7:39:22 PM
> Subject: Re: [RFC PATCH 13/26] cpus: only take BQL for sleeping threads
> 
> On 02.11.2017 12:08, Paolo Bonzini wrote:
> > On 31/10/2017 12:26, Pavel Dovgalyuk wrote:
> >> From: Alex Bennée <alex.bennee@linaro.org>
> >>
> >> Now the only real need to hold the BQL is for when we sleep on the
> >> cpu->halt conditional. The lock is actually dropped while the thread
> >> sleeps so the actual window for contention is pretty small. This also
> >> means we can remove the special case hack for exclusive work and
> >> simply declare that work no longer has an implicit BQL held. This
> >> isn't a major problem async work is generally only changing things in
> >> the context of its own vCPU. If it needs to work across vCPUs it
> >> should be using the exclusive mechanism or possibly taking the lock
> >> itself.
> >>
> >> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
> >> Tested-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
> > 
> > At least cpu_throttle_thread would fail with this patch.
> > 
> > Also I am not sure if the s390 SIGP handlers are ready for this.
> > 
> 
> We have a global lock to the SIGP "facility". However we need the BQL in
> order to inject interrupts into CPUs (otherwise it would trigger an
> assert when injecting).
> 
> We inject Restart and Stop interrupts from run_on_cpu. This requires the
> BQL. So Paolo should be right, this change would break s390x.

I had some patches to access interrupt_request with the atomic builtins.  If
Pavel can first extract the other changes to the icount mechanism, I can
update them.

Paolo
Pavel Dovgalyuk Nov. 13, 2017, 8:52 a.m. UTC | #4
> From: Paolo Bonzini [mailto:pbonzini@redhat.com]
> > From: "David Hildenbrand" <david@redhat.com>
> > On 02.11.2017 12:08, Paolo Bonzini wrote:
> > > On 31/10/2017 12:26, Pavel Dovgalyuk wrote:
> > >> From: Alex Bennée <alex.bennee@linaro.org>
> > >>
> > >> Now the only real need to hold the BQL is for when we sleep on the
> > >> cpu->halt conditional. The lock is actually dropped while the thread
> > >> sleeps so the actual window for contention is pretty small. This also
> > >> means we can remove the special case hack for exclusive work and
> > >> simply declare that work no longer has an implicit BQL held. This
> > >> isn't a major problem async work is generally only changing things in
> > >> the context of its own vCPU. If it needs to work across vCPUs it
> > >> should be using the exclusive mechanism or possibly taking the lock
> > >> itself.
> > >>
> > >> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
> > >> Tested-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
> > >
> > > At least cpu_throttle_thread would fail with this patch.
> > >
> > > Also I am not sure if the s390 SIGP handlers are ready for this.
> > >
> >
> > We have a global lock to the SIGP "facility". However we need the BQL in
> > order to inject interrupts into CPUs (otherwise it would trigger an
> > assert when injecting).
> >
> > We inject Restart and Stop interrupts from run_on_cpu. This requires the
> > BQL. So Paolo should be right, this change would break s390x.
> 
> I had some patches to access interrupt_request with the atomic builtins.  If
> Pavel can first extract the other changes to the icount mechanism, I can
> update them.

What changes do you mean here?
I'm not sure that I understand clearly how threads interact with BQL.
These patches were authored by Alex and we'll have to get him into the discussion.

Pavel Dovgalyuk
Alex Bennée Nov. 13, 2017, 10:14 a.m. UTC | #5
Pavel Dovgalyuk <dovgaluk@ispras.ru> writes:

>> From: Paolo Bonzini [mailto:pbonzini@redhat.com]
>> > From: "David Hildenbrand" <david@redhat.com>
>> > On 02.11.2017 12:08, Paolo Bonzini wrote:
>> > > On 31/10/2017 12:26, Pavel Dovgalyuk wrote:
>> > >> From: Alex Bennée <alex.bennee@linaro.org>
>> > >>
>> > >> Now the only real need to hold the BQL is for when we sleep on the
>> > >> cpu->halt conditional. The lock is actually dropped while the thread
>> > >> sleeps so the actual window for contention is pretty small. This also
>> > >> means we can remove the special case hack for exclusive work and
>> > >> simply declare that work no longer has an implicit BQL held. This
>> > >> isn't a major problem async work is generally only changing things in
>> > >> the context of its own vCPU. If it needs to work across vCPUs it
>> > >> should be using the exclusive mechanism or possibly taking the lock
>> > >> itself.
>> > >>
>> > >> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
>> > >> Tested-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
>> > >
>> > > At least cpu_throttle_thread would fail with this patch.
>> > >
>> > > Also I am not sure if the s390 SIGP handlers are ready for this.
>> > >
>> >
>> > We have a global lock to the SIGP "facility". However we need the BQL in
>> > order to inject interrupts into CPUs (otherwise it would trigger an
>> > assert when injecting).
>> >
>> > We inject Restart and Stop interrupts from run_on_cpu. This requires the
>> > BQL. So Paolo should be right, this change would break s390x.
>> 
>> I had some patches to access interrupt_request with the atomic builtins.  If
>> Pavel can first extract the other changes to the icount mechanism, I can
>> update them.
>
> What changes do you mean here?
> I'm not sure that I understand clearly how threads interact with BQL.
> These patches were authored by Alex and we'll have to get him into the
> discussion.

Do you want me to re-spin my sub-set of the patches as a new base?
Paolo Bonzini Nov. 13, 2017, 10:58 a.m. UTC | #6
On 13/11/2017 11:14, Alex Bennée wrote:
> 
> Pavel Dovgalyuk <dovgaluk@ispras.ru> writes:
> 
>>> From: Paolo Bonzini [mailto:pbonzini@redhat.com]
>>>> From: "David Hildenbrand" <david@redhat.com>
>>>> On 02.11.2017 12:08, Paolo Bonzini wrote:
>>>>> On 31/10/2017 12:26, Pavel Dovgalyuk wrote:
>>>>>> From: Alex Bennée <alex.bennee@linaro.org>
>>>>>>
>>>>>> Now the only real need to hold the BQL is for when we sleep on the
>>>>>> cpu->halt conditional. The lock is actually dropped while the thread
>>>>>> sleeps so the actual window for contention is pretty small. This also
>>>>>> means we can remove the special case hack for exclusive work and
>>>>>> simply declare that work no longer has an implicit BQL held. This
>>>>>> isn't a major problem async work is generally only changing things in
>>>>>> the context of its own vCPU. If it needs to work across vCPUs it
>>>>>> should be using the exclusive mechanism or possibly taking the lock
>>>>>> itself.
>>>>>>
>>>>>> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
>>>>>> Tested-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
>>>>>
>>>>> At least cpu_throttle_thread would fail with this patch.
>>>>>
>>>>> Also I am not sure if the s390 SIGP handlers are ready for this.
>>>>>
>>>>
>>>> We have a global lock to the SIGP "facility". However we need the BQL in
>>>> order to inject interrupts into CPUs (otherwise it would trigger an
>>>> assert when injecting).
>>>>
>>>> We inject Restart and Stop interrupts from run_on_cpu. This requires the
>>>> BQL. So Paolo should be right, this change would break s390x.
>>>
>>> I had some patches to access interrupt_request with the atomic builtins.  If
>>> Pavel can first extract the other changes to the icount mechanism, I can
>>> update them.
>>
>> What changes do you mean here?
>> I'm not sure that I understand clearly how threads interact with BQL.
>> These patches were authored by Alex and we'll have to get him into the
>> discussion.
> 
> Do you want me to re-spin my sub-set of the patches as a new base?

I think the first part to be merged is changes to cpu-exec.c and
friends.  These might even go into 2.11.

Paolo
diff mbox series

Patch

diff --git a/cpus-common.c b/cpus-common.c
index 59f751e..64661c3 100644
--- a/cpus-common.c
+++ b/cpus-common.c
@@ -310,6 +310,11 @@  void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func,
     queue_work_on_cpu(cpu, wi);
 }
 
+/* Work items run outside of the BQL. This is essential for avoiding a
+ * deadlock for exclusive work but also applies to non-exclusive work.
+ * If the work requires cross-vCPU changes then it should use the
+ * exclusive mechanism.
+ */
 void process_queued_cpu_work(CPUState *cpu)
 {
     struct qemu_work_item *wi;
@@ -327,17 +332,9 @@  void process_queued_cpu_work(CPUState *cpu)
         }
         qemu_mutex_unlock(&cpu->work_mutex);
         if (wi->exclusive) {
-            /* Running work items outside the BQL avoids the following deadlock:
-             * 1) start_exclusive() is called with the BQL taken while another
-             * CPU is running; 2) cpu_exec in the other CPU tries to takes the
-             * BQL, so it goes to sleep; start_exclusive() is sleeping too, so
-             * neither CPU can proceed.
-             */
-            qemu_mutex_unlock_iothread();
             start_exclusive();
             wi->func(cpu, wi->data);
             end_exclusive();
-            qemu_mutex_lock_iothread();
         } else {
             wi->func(cpu, wi->data);
         }
diff --git a/cpus.c b/cpus.c
index efde5c1..de6dfce 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1127,31 +1127,29 @@  static bool qemu_tcg_should_sleep(CPUState *cpu)
 
 static void qemu_tcg_wait_io_event(CPUState *cpu)
 {
-    qemu_mutex_lock_iothread();
 
     while (qemu_tcg_should_sleep(cpu)) {
+        qemu_mutex_lock_iothread();
         stop_tcg_kick_timer();
         qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
+        qemu_mutex_unlock_iothread();
     }
 
     start_tcg_kick_timer();
 
     qemu_wait_io_event_common(cpu);
-
-    qemu_mutex_unlock_iothread();
 }
 
 static void qemu_kvm_wait_io_event(CPUState *cpu)
 {
-    qemu_mutex_lock_iothread();
 
     while (cpu_thread_is_idle(cpu)) {
+        qemu_mutex_lock_iothread();
         qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
+        qemu_mutex_unlock_iothread();
     }
 
     qemu_wait_io_event_common(cpu);
-
-    qemu_mutex_unlock_iothread();
 }
 
 static void *qemu_kvm_cpu_thread_fn(void *arg)