diff mbox series

[RFC,v4,04/71] cpu: make qemu_work_cond per-cpu

Message ID 20181025144644.15464-4-cota@braap.org
State New
Headers show
Series [RFC,v4,01/71] cpu: convert queued work to a QSIMPLEQ | expand

Commit Message

Emilio Cota Oct. 25, 2018, 2:45 p.m. UTC
This eliminates the need to use the BQL to queue CPU work.

While at it, give the per-cpu field a generic name ("cond") since
it will soon be used for more than just queueing CPU work.

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 include/qom/cpu.h |  6 ++--
 cpus-common.c     | 72 ++++++++++++++++++++++++++++++++++++++---------
 cpus.c            |  2 +-
 qom/cpu.c         |  1 +
 4 files changed, 63 insertions(+), 18 deletions(-)

Comments

Richard Henderson Oct. 26, 2018, 2:45 p.m. UTC | #1
On 10/25/18 3:45 PM, Emilio G. Cota wrote:
> This eliminates the need to use the BQL to queue CPU work.
> 
> While at it, give the per-cpu field a generic name ("cond") since
> it will soon be used for more than just queueing CPU work.
> 
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
>  include/qom/cpu.h |  6 ++--
>  cpus-common.c     | 72 ++++++++++++++++++++++++++++++++++++++---------
>  cpus.c            |  2 +-
>  qom/cpu.c         |  1 +
>  4 files changed, 63 insertions(+), 18 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~
Alex Bennée Oct. 30, 2018, 12:27 p.m. UTC | #2
Emilio G. Cota <cota@braap.org> writes:

> This eliminates the need to use the BQL to queue CPU work.
>
> While at it, give the per-cpu field a generic name ("cond") since
> it will soon be used for more than just queueing CPU work.
>
> Signed-off-by: Emilio G. Cota <cota@braap.org>

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

> ---
>  include/qom/cpu.h |  6 ++--
>  cpus-common.c     | 72 ++++++++++++++++++++++++++++++++++++++---------
>  cpus.c            |  2 +-
>  qom/cpu.c         |  1 +
>  4 files changed, 63 insertions(+), 18 deletions(-)
>
> diff --git a/include/qom/cpu.h b/include/qom/cpu.h
> index 7fdb5a2be0..2fad537a4f 100644
> --- a/include/qom/cpu.h
> +++ b/include/qom/cpu.h
> @@ -316,6 +316,7 @@ struct qemu_work_item;
>   * @mem_io_vaddr: Target virtual address at which the memory was accessed.
>   * @kvm_fd: vCPU file descriptor for KVM.
>   * @lock: Lock to prevent multiple access to per-CPU fields.
> + * @cond: Condition variable for per-CPU events.
>   * @work_list: List of pending asynchronous work.
>   * @trace_dstate_delayed: Delayed changes to trace_dstate (includes all changes
>   *                        to @trace_dstate).
> @@ -358,6 +359,7 @@ struct CPUState {
>
>      QemuMutex lock;
>      /* fields below protected by @lock */
> +    QemuCond cond;
>      QSIMPLEQ_HEAD(, qemu_work_item) work_list;
>
>      CPUAddressSpace *cpu_ases;
> @@ -769,12 +771,10 @@ bool cpu_is_stopped(CPUState *cpu);
>   * @cpu: The vCPU to run on.
>   * @func: The function to be executed.
>   * @data: Data to pass to the function.
> - * @mutex: Mutex to release while waiting for @func to run.
>   *
>   * Used internally in the implementation of run_on_cpu.
>   */
> -void do_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data,
> -                   QemuMutex *mutex);
> +void do_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data);
>
>  /**
>   * run_on_cpu:
> diff --git a/cpus-common.c b/cpus-common.c
> index 2913294cb7..71469c85ce 100644
> --- a/cpus-common.c
> +++ b/cpus-common.c
> @@ -26,7 +26,6 @@
>  static QemuMutex qemu_cpu_list_lock;
>  static QemuCond exclusive_cond;
>  static QemuCond exclusive_resume;
> -static QemuCond qemu_work_cond;
>
>  /* >= 1 if a thread is inside start_exclusive/end_exclusive.  Written
>   * under qemu_cpu_list_lock, read with atomic operations.
> @@ -42,7 +41,6 @@ void qemu_init_cpu_list(void)
>      qemu_mutex_init(&qemu_cpu_list_lock);
>      qemu_cond_init(&exclusive_cond);
>      qemu_cond_init(&exclusive_resume);
> -    qemu_cond_init(&qemu_work_cond);
>  }
>
>  void cpu_list_lock(void)
> @@ -113,23 +111,37 @@ struct qemu_work_item {
>      bool free, exclusive, done;
>  };
>
> -static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi)
> +/* Called with the CPU's lock held */
> +static void queue_work_on_cpu_locked(CPUState *cpu, struct qemu_work_item *wi)
>  {
> -    qemu_mutex_lock(&cpu->lock);
>      QSIMPLEQ_INSERT_TAIL(&cpu->work_list, wi, node);
>      wi->done = false;
> -    qemu_mutex_unlock(&cpu->lock);
>
>      qemu_cpu_kick(cpu);
>  }
>
> -void do_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data,
> -                   QemuMutex *mutex)
> +static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi)
> +{
> +    cpu_mutex_lock(cpu);
> +    queue_work_on_cpu_locked(cpu, wi);
> +    cpu_mutex_unlock(cpu);
> +}
> +
> +void do_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
>  {
>      struct qemu_work_item wi;
> +    bool has_bql = qemu_mutex_iothread_locked();
> +
> +    g_assert(no_cpu_mutex_locked());
>
>      if (qemu_cpu_is_self(cpu)) {
> -        func(cpu, data);
> +        if (has_bql) {
> +            func(cpu, data);
> +        } else {
> +            qemu_mutex_lock_iothread();
> +            func(cpu, data);
> +            qemu_mutex_unlock_iothread();
> +        }
>          return;
>      }
>
> @@ -139,13 +151,34 @@ void do_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data,
>      wi.free = false;
>      wi.exclusive = false;
>
> -    queue_work_on_cpu(cpu, &wi);
> +    cpu_mutex_lock(cpu);
> +    queue_work_on_cpu_locked(cpu, &wi);
> +
> +    /*
> +     * We are going to sleep on the CPU lock, so release the BQL.
> +     *
> +     * During the transition to per-CPU locks, we release the BQL _after_
> +     * having kicked the destination CPU (from queue_work_on_cpu_locked above).
> +     * This makes sure that the enqueued work will be seen by the CPU
> +     * after being woken up from the kick, since the CPU sleeps on the BQL.
> +     * Once we complete the transition to per-CPU locks, we will release
> +     * the BQL earlier in this function.
> +     */
> +    if (has_bql) {
> +        qemu_mutex_unlock_iothread();
> +    }
> +
>      while (!atomic_mb_read(&wi.done)) {
>          CPUState *self_cpu = current_cpu;
>
> -        qemu_cond_wait(&qemu_work_cond, mutex);
> +        qemu_cond_wait(&cpu->cond, &cpu->lock);
>          current_cpu = self_cpu;
>      }
> +    cpu_mutex_unlock(cpu);
> +
> +    if (has_bql) {
> +        qemu_mutex_lock_iothread();
> +    }
>  }
>
>  void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
> @@ -307,6 +340,7 @@ void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func,
>  void process_queued_cpu_work(CPUState *cpu)
>  {
>      struct qemu_work_item *wi;
> +    bool has_bql = qemu_mutex_iothread_locked();
>
>      qemu_mutex_lock(&cpu->lock);
>      if (QSIMPLEQ_EMPTY(&cpu->work_list)) {
> @@ -324,13 +358,23 @@ void process_queued_cpu_work(CPUState *cpu)
>               * BQL, so it goes to sleep; start_exclusive() is sleeping too, so
>               * neither CPU can proceed.
>               */
> -            qemu_mutex_unlock_iothread();
> +            if (has_bql) {
> +                qemu_mutex_unlock_iothread();
> +            }
>              start_exclusive();
>              wi->func(cpu, wi->data);
>              end_exclusive();
> -            qemu_mutex_lock_iothread();
> +            if (has_bql) {
> +                qemu_mutex_lock_iothread();
> +            }
>          } else {
> -            wi->func(cpu, wi->data);
> +            if (has_bql) {
> +                wi->func(cpu, wi->data);
> +            } else {
> +                qemu_mutex_lock_iothread();
> +                wi->func(cpu, wi->data);
> +                qemu_mutex_unlock_iothread();
> +            }
>          }
>          qemu_mutex_lock(&cpu->lock);
>          if (wi->free) {
> @@ -340,5 +384,5 @@ void process_queued_cpu_work(CPUState *cpu)
>          }
>      }
>      qemu_mutex_unlock(&cpu->lock);
> -    qemu_cond_broadcast(&qemu_work_cond);
> +    qemu_cond_broadcast(&cpu->cond);
>  }
> diff --git a/cpus.c b/cpus.c
> index 38cc9e1278..d0b7f8e02d 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -1236,7 +1236,7 @@ void qemu_init_cpu_loop(void)
>
>  void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
>  {
> -    do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
> +    do_run_on_cpu(cpu, func, data);
>  }
>
>  static void qemu_kvm_destroy_vcpu(CPUState *cpu)
> diff --git a/qom/cpu.c b/qom/cpu.c
> index d0758c907d..bb031a3a6a 100644
> --- a/qom/cpu.c
> +++ b/qom/cpu.c
> @@ -373,6 +373,7 @@ static void cpu_common_initfn(Object *obj)
>      cpu->nr_threads = 1;
>
>      qemu_mutex_init(&cpu->lock);
> +    qemu_cond_init(&cpu->cond);
>      QSIMPLEQ_INIT(&cpu->work_list);
>      QTAILQ_INIT(&cpu->breakpoints);
>      QTAILQ_INIT(&cpu->watchpoints);


--
Alex Bennée
diff mbox series

Patch

diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 7fdb5a2be0..2fad537a4f 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -316,6 +316,7 @@  struct qemu_work_item;
  * @mem_io_vaddr: Target virtual address at which the memory was accessed.
  * @kvm_fd: vCPU file descriptor for KVM.
  * @lock: Lock to prevent multiple access to per-CPU fields.
+ * @cond: Condition variable for per-CPU events.
  * @work_list: List of pending asynchronous work.
  * @trace_dstate_delayed: Delayed changes to trace_dstate (includes all changes
  *                        to @trace_dstate).
@@ -358,6 +359,7 @@  struct CPUState {
 
     QemuMutex lock;
     /* fields below protected by @lock */
+    QemuCond cond;
     QSIMPLEQ_HEAD(, qemu_work_item) work_list;
 
     CPUAddressSpace *cpu_ases;
@@ -769,12 +771,10 @@  bool cpu_is_stopped(CPUState *cpu);
  * @cpu: The vCPU to run on.
  * @func: The function to be executed.
  * @data: Data to pass to the function.
- * @mutex: Mutex to release while waiting for @func to run.
  *
  * Used internally in the implementation of run_on_cpu.
  */
-void do_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data,
-                   QemuMutex *mutex);
+void do_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data);
 
 /**
  * run_on_cpu:
diff --git a/cpus-common.c b/cpus-common.c
index 2913294cb7..71469c85ce 100644
--- a/cpus-common.c
+++ b/cpus-common.c
@@ -26,7 +26,6 @@ 
 static QemuMutex qemu_cpu_list_lock;
 static QemuCond exclusive_cond;
 static QemuCond exclusive_resume;
-static QemuCond qemu_work_cond;
 
 /* >= 1 if a thread is inside start_exclusive/end_exclusive.  Written
  * under qemu_cpu_list_lock, read with atomic operations.
@@ -42,7 +41,6 @@  void qemu_init_cpu_list(void)
     qemu_mutex_init(&qemu_cpu_list_lock);
     qemu_cond_init(&exclusive_cond);
     qemu_cond_init(&exclusive_resume);
-    qemu_cond_init(&qemu_work_cond);
 }
 
 void cpu_list_lock(void)
@@ -113,23 +111,37 @@  struct qemu_work_item {
     bool free, exclusive, done;
 };
 
-static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi)
+/* Called with the CPU's lock held */
+static void queue_work_on_cpu_locked(CPUState *cpu, struct qemu_work_item *wi)
 {
-    qemu_mutex_lock(&cpu->lock);
     QSIMPLEQ_INSERT_TAIL(&cpu->work_list, wi, node);
     wi->done = false;
-    qemu_mutex_unlock(&cpu->lock);
 
     qemu_cpu_kick(cpu);
 }
 
-void do_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data,
-                   QemuMutex *mutex)
+static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi)
+{
+    cpu_mutex_lock(cpu);
+    queue_work_on_cpu_locked(cpu, wi);
+    cpu_mutex_unlock(cpu);
+}
+
+void do_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
 {
     struct qemu_work_item wi;
+    bool has_bql = qemu_mutex_iothread_locked();
+
+    g_assert(no_cpu_mutex_locked());
 
     if (qemu_cpu_is_self(cpu)) {
-        func(cpu, data);
+        if (has_bql) {
+            func(cpu, data);
+        } else {
+            qemu_mutex_lock_iothread();
+            func(cpu, data);
+            qemu_mutex_unlock_iothread();
+        }
         return;
     }
 
@@ -139,13 +151,34 @@  void do_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data,
     wi.free = false;
     wi.exclusive = false;
 
-    queue_work_on_cpu(cpu, &wi);
+    cpu_mutex_lock(cpu);
+    queue_work_on_cpu_locked(cpu, &wi);
+
+    /*
+     * We are going to sleep on the CPU lock, so release the BQL.
+     *
+     * During the transition to per-CPU locks, we release the BQL _after_
+     * having kicked the destination CPU (from queue_work_on_cpu_locked above).
+     * This makes sure that the enqueued work will be seen by the CPU
+     * after being woken up from the kick, since the CPU sleeps on the BQL.
+     * Once we complete the transition to per-CPU locks, we will release
+     * the BQL earlier in this function.
+     */
+    if (has_bql) {
+        qemu_mutex_unlock_iothread();
+    }
+
     while (!atomic_mb_read(&wi.done)) {
         CPUState *self_cpu = current_cpu;
 
-        qemu_cond_wait(&qemu_work_cond, mutex);
+        qemu_cond_wait(&cpu->cond, &cpu->lock);
         current_cpu = self_cpu;
     }
+    cpu_mutex_unlock(cpu);
+
+    if (has_bql) {
+        qemu_mutex_lock_iothread();
+    }
 }
 
 void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
@@ -307,6 +340,7 @@  void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func,
 void process_queued_cpu_work(CPUState *cpu)
 {
     struct qemu_work_item *wi;
+    bool has_bql = qemu_mutex_iothread_locked();
 
     qemu_mutex_lock(&cpu->lock);
     if (QSIMPLEQ_EMPTY(&cpu->work_list)) {
@@ -324,13 +358,23 @@  void process_queued_cpu_work(CPUState *cpu)
              * BQL, so it goes to sleep; start_exclusive() is sleeping too, so
              * neither CPU can proceed.
              */
-            qemu_mutex_unlock_iothread();
+            if (has_bql) {
+                qemu_mutex_unlock_iothread();
+            }
             start_exclusive();
             wi->func(cpu, wi->data);
             end_exclusive();
-            qemu_mutex_lock_iothread();
+            if (has_bql) {
+                qemu_mutex_lock_iothread();
+            }
         } else {
-            wi->func(cpu, wi->data);
+            if (has_bql) {
+                wi->func(cpu, wi->data);
+            } else {
+                qemu_mutex_lock_iothread();
+                wi->func(cpu, wi->data);
+                qemu_mutex_unlock_iothread();
+            }
         }
         qemu_mutex_lock(&cpu->lock);
         if (wi->free) {
@@ -340,5 +384,5 @@  void process_queued_cpu_work(CPUState *cpu)
         }
     }
     qemu_mutex_unlock(&cpu->lock);
-    qemu_cond_broadcast(&qemu_work_cond);
+    qemu_cond_broadcast(&cpu->cond);
 }
diff --git a/cpus.c b/cpus.c
index 38cc9e1278..d0b7f8e02d 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1236,7 +1236,7 @@  void qemu_init_cpu_loop(void)
 
 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
 {
-    do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
+    do_run_on_cpu(cpu, func, data);
 }
 
 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
diff --git a/qom/cpu.c b/qom/cpu.c
index d0758c907d..bb031a3a6a 100644
--- a/qom/cpu.c
+++ b/qom/cpu.c
@@ -373,6 +373,7 @@  static void cpu_common_initfn(Object *obj)
     cpu->nr_threads = 1;
 
     qemu_mutex_init(&cpu->lock);
+    qemu_cond_init(&cpu->cond);
     QSIMPLEQ_INIT(&cpu->work_list);
     QTAILQ_INIT(&cpu->breakpoints);
     QTAILQ_INIT(&cpu->watchpoints);