Message ID | 1466375313-7562-8-git-send-email-sergey.fedorov@linaro.org |
---|---|
State | New |
Headers | show |
Sergey Fedorov <sergey.fedorov@linaro.org> writes: > From: Sergey Fedorov <serge.fdrv@gmail.com> > > This patch is based on the ideas found in work of KONRAD Frederic [1], > Alex Bennée [2], and Alvise Rigo [3]. > > This mechanism allows to perform an operation safely in a quiescent > state. Quiescent state means: (1) no vCPU is running and (2) BQL in > system-mode or 'exclusive_lock' in user-mode emulation is held while > performing the operation. This functionality is required e.g. for > performing translation buffer flush safely in multi-threaded user-mode > emulation. > > The existing CPU work queue is used to schedule such safe operations. A > new 'safe' flag is added into struct qemu_work_item to designate the > special requirements of the safe work. An operation in a quiescent sate > can be scheduled by using async_safe_run_on_cpu() function which is > actually the same as sync_run_on_cpu() except that it marks the queued > work item with the 'safe' flag set to true. Given this flag set > queue_work_on_cpu() atomically increments 'safe_work_pending' global > counter and kicks all the CPUs instead of just the target CPU as in case > of normal CPU work. This allows to force other CPUs to exit their > execution loops and wait in wait_safe_cpu_work() function for the safe > work to finish. When a CPU drains its work queue, if it encounters a > work item marked as safe, it first waits for other CPUs to exit their > execution loops, then called the work item function, and finally > decrements 'safe_work_pending' counter with signalling other CPUs to let > them continue execution as soon as all pending safe work items have been > processed. The 'tcg_pending_cpus' protected by 'exclusive_lock' in > user-mode or by 'qemu_global_mutex' in system-mode emulation is used to > determine if there is any CPU run and wait for it to exit the execution > loop. The fairness of all the CPU work queues is ensured by draining all > the pending safe work items before any CPU can run. > > [1] http://lists.nongnu.org/archive/html/qemu-devel/2015-08/msg01128.html > [2] http://lists.nongnu.org/archive/html/qemu-devel/2016-04/msg02531.html > [3] http://lists.nongnu.org/archive/html/qemu-devel/2016-05/msg04792.html > > Signed-off-by: Sergey Fedorov <serge.fdrv@gmail.com> > Signed-off-by: Sergey Fedorov <sergey.fedorov@linaro.org> > --- > cpu-exec-common.c | 45 ++++++++++++++++++++++++++++++++++++++++++++- > cpus.c | 16 ++++++++++++++++ > include/exec/exec-all.h | 2 ++ > include/qom/cpu.h | 14 ++++++++++++++ > linux-user/main.c | 2 +- > 5 files changed, 77 insertions(+), 2 deletions(-) > > diff --git a/cpu-exec-common.c b/cpu-exec-common.c > index 8184e0662cbd..3056324738f8 100644 > --- a/cpu-exec-common.c > +++ b/cpu-exec-common.c > @@ -25,6 +25,7 @@ > > bool exit_request; > CPUState *tcg_current_cpu; > +int tcg_pending_cpus; > > /* exit the current TB, but without causing any exception to be raised */ > void cpu_loop_exit_noexc(CPUState *cpu) > @@ -78,6 +79,15 @@ void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc) > siglongjmp(cpu->jmp_env, 1); > } > > +static int safe_work_pending; > + > +void wait_safe_cpu_work(void) > +{ > + while (atomic_mb_read(&safe_work_pending) > 0) { > + wait_cpu_work(); > + } > +} > + > static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi) > { > qemu_mutex_lock(&cpu->work_mutex); > @@ -89,9 +99,18 @@ static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi) > cpu->queued_work_last = wi; > wi->next = NULL; > wi->done = false; > + if (wi->safe) { > + atomic_inc(&safe_work_pending); > + } > qemu_mutex_unlock(&cpu->work_mutex); > > - qemu_cpu_kick(cpu); > + if (!wi->safe) { > + qemu_cpu_kick(cpu); > + } else { > + CPU_FOREACH(cpu) { > + qemu_cpu_kick(cpu); > + } > + } > } > > void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data) > @@ -106,6 +125,7 @@ void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data) > wi.func = func; > wi.data = data; > wi.free = false; > + wi.safe = false; > > queue_work_on_cpu(cpu, &wi); > while (!atomic_mb_read(&wi.done)) { > @@ -129,6 +149,20 @@ void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data) > wi->func = func; > wi->data = data; > wi->free = true; > + wi->safe = false; > + > + queue_work_on_cpu(cpu, wi); > +} > + > +void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data) > +{ > + struct qemu_work_item *wi; > + > + wi = g_malloc0(sizeof(struct qemu_work_item)); > + wi->func = func; > + wi->data = data; > + wi->free = true; > + wi->safe = true; > > queue_work_on_cpu(cpu, wi); > } > @@ -148,9 +182,18 @@ void flush_queued_work(CPUState *cpu) > if (!cpu->queued_work_first) { > cpu->queued_work_last = NULL; > } > + if (wi->safe) { > + while (tcg_pending_cpus) { > + wait_cpu_work(); > + } > + } > qemu_mutex_unlock(&cpu->work_mutex); > wi->func(cpu, wi->data); > qemu_mutex_lock(&cpu->work_mutex); > + if (wi->safe) { > + atomic_dec(&safe_work_pending); > + signal_cpu_work(); > + } > if (wi->free) { > g_free(wi); > } else { > diff --git a/cpus.c b/cpus.c > index 98f60f6f98f5..bb6bd8615cfc 100644 > --- a/cpus.c > +++ b/cpus.c > @@ -932,6 +932,18 @@ static void qemu_tcg_destroy_vcpu(CPUState *cpu) > { > } > > +static void tcg_cpu_exec_start(CPUState *cpu) > +{ > + tcg_pending_cpus++; > +} > + > +static void tcg_cpu_exec_end(CPUState *cpu) > +{ > + if (--tcg_pending_cpus) { > + signal_cpu_work(); > + } > +} Don't these need to be atomic? > + > static void qemu_wait_io_event_common(CPUState *cpu) > { > if (cpu->stop) { > @@ -956,6 +968,8 @@ static void qemu_tcg_wait_io_event(CPUState *cpu) > CPU_FOREACH(cpu) { > qemu_wait_io_event_common(cpu); > } > + > + wait_safe_cpu_work(); > } > > static void qemu_kvm_wait_io_event(CPUState *cpu) > @@ -1491,7 +1505,9 @@ static void tcg_exec_all(void) > (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0); > > if (cpu_can_run(cpu)) { > + tcg_cpu_exec_start(cpu); > r = tcg_cpu_exec(cpu); > + tcg_cpu_exec_end(cpu); > if (r == EXCP_DEBUG) { > cpu_handle_guest_debug(cpu); > break; > diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h > index 23b4b50e0a45..3bc44ed81473 100644 > --- a/include/exec/exec-all.h > +++ b/include/exec/exec-all.h > @@ -405,10 +405,12 @@ extern int singlestep; > > /* cpu-exec.c, accessed with atomic_mb_read/atomic_mb_set */ > extern CPUState *tcg_current_cpu; > +extern int tcg_pending_cpus; > extern bool exit_request; > > void wait_cpu_work(void); > void signal_cpu_work(void); > void flush_queued_work(CPUState *cpu); > +void wait_safe_cpu_work(void); > > #endif > diff --git a/include/qom/cpu.h b/include/qom/cpu.h > index 4e688f645b4a..5128fcc1745a 100644 > --- a/include/qom/cpu.h > +++ b/include/qom/cpu.h > @@ -231,6 +231,7 @@ struct qemu_work_item { > void *data; > int done; > bool free; > + bool safe; > }; > > /** > @@ -625,6 +626,19 @@ void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data); > void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data); > > /** > + * async_safe_run_on_cpu: > + * @cpu: The vCPU to run on. > + * @func: The function to be executed. > + * @data: Data to pass to the function. > + * > + * Schedules the function @func for execution on the vCPU @cpu asynchronously > + * and in quiescent state. Quiescent state means: (1) all other vCPUs are > + * halted and (2) #qemu_global_mutex (a.k.a. BQL) in system-mode or > + * #exclusive_lock in user-mode emulation is held while @func is executing. > + */ > +void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data); > + > +/** > * qemu_get_cpu: > * @index: The CPUState@cpu_index value of the CPU to obtain. > * > diff --git a/linux-user/main.c b/linux-user/main.c > index 5a68651159c2..6da3bb32186b 100644 > --- a/linux-user/main.c > +++ b/linux-user/main.c > @@ -113,7 +113,6 @@ static pthread_cond_t exclusive_cond = PTHREAD_COND_INITIALIZER; > static pthread_cond_t exclusive_resume = PTHREAD_COND_INITIALIZER; > static pthread_cond_t work_cond = PTHREAD_COND_INITIALIZER; > static bool exclusive_pending; > -static int tcg_pending_cpus; > > /* Make sure everything is in a consistent state for calling fork(). */ > void fork_start(void) > @@ -219,6 +218,7 @@ static inline void cpu_exec_end(CPUState *cpu) > } > exclusive_idle(); > flush_queued_work(cpu); > + wait_safe_cpu_work(); > pthread_mutex_unlock(&exclusive_lock); > } -- Alex Bennée
On 27/06/16 12:36, Alex Bennée wrote: > Sergey Fedorov <sergey.fedorov@linaro.org> writes: > >> From: Sergey Fedorov <serge.fdrv@gmail.com> >> (snip) >> diff --git a/cpus.c b/cpus.c >> index 98f60f6f98f5..bb6bd8615cfc 100644 >> --- a/cpus.c >> +++ b/cpus.c >> @@ -932,6 +932,18 @@ static void qemu_tcg_destroy_vcpu(CPUState *cpu) >> { >> } >> >> +static void tcg_cpu_exec_start(CPUState *cpu) >> +{ >> + tcg_pending_cpus++; >> +} >> + >> +static void tcg_cpu_exec_end(CPUState *cpu) >> +{ >> + if (--tcg_pending_cpus) { >> + signal_cpu_work(); >> + } >> +} > Don't these need to be atomic? 'tcg_pending_cpus' is protected by BQL. > >> + >> static void qemu_wait_io_event_common(CPUState *cpu) >> { >> if (cpu->stop) { >> (snip) Thanks, Sergey
Sergey Fedorov <serge.fdrv@gmail.com> writes: > On 27/06/16 12:36, Alex Bennée wrote: >> Sergey Fedorov <sergey.fedorov@linaro.org> writes: >> >>> From: Sergey Fedorov <serge.fdrv@gmail.com> >>> > (snip) >>> diff --git a/cpus.c b/cpus.c >>> index 98f60f6f98f5..bb6bd8615cfc 100644 >>> --- a/cpus.c >>> +++ b/cpus.c >>> @@ -932,6 +932,18 @@ static void qemu_tcg_destroy_vcpu(CPUState *cpu) >>> { >>> } >>> >>> +static void tcg_cpu_exec_start(CPUState *cpu) >>> +{ >>> + tcg_pending_cpus++; >>> +} >>> + >>> +static void tcg_cpu_exec_end(CPUState *cpu) >>> +{ >>> + if (--tcg_pending_cpus) { >>> + signal_cpu_work(); >>> + } >>> +} >> Don't these need to be atomic? > > 'tcg_pending_cpus' is protected by BQL. A quick comment above the function would help then. > >> >>> + >>> static void qemu_wait_io_event_common(CPUState *cpu) >>> { >>> if (cpu->stop) { >>> > (snip) > > Thanks, > Sergey -- Alex Bennée
Hi Sergey, On Mon, Jun 20, 2016 at 12:28 AM, Sergey Fedorov <sergey.fedorov@linaro.org> wrote: > > From: Sergey Fedorov <serge.fdrv@gmail.com> > > This patch is based on the ideas found in work of KONRAD Frederic [1], > Alex Bennée [2], and Alvise Rigo [3]. > > This mechanism allows to perform an operation safely in a quiescent > state. Quiescent state means: (1) no vCPU is running and (2) BQL in > system-mode or 'exclusive_lock' in user-mode emulation is held while > performing the operation. This functionality is required e.g. for > performing translation buffer flush safely in multi-threaded user-mode > emulation. > > The existing CPU work queue is used to schedule such safe operations. A > new 'safe' flag is added into struct qemu_work_item to designate the > special requirements of the safe work. An operation in a quiescent sate > can be scheduled by using async_safe_run_on_cpu() function which is > actually the same as sync_run_on_cpu() except that it marks the queued > work item with the 'safe' flag set to true. Given this flag set > queue_work_on_cpu() atomically increments 'safe_work_pending' global > counter and kicks all the CPUs instead of just the target CPU as in case > of normal CPU work. This allows to force other CPUs to exit their > execution loops and wait in wait_safe_cpu_work() function for the safe > work to finish. When a CPU drains its work queue, if it encounters a > work item marked as safe, it first waits for other CPUs to exit their > execution loops, then called the work item function, and finally > decrements 'safe_work_pending' counter with signalling other CPUs to let > them continue execution as soon as all pending safe work items have been > processed. The 'tcg_pending_cpus' protected by 'exclusive_lock' in > user-mode or by 'qemu_global_mutex' in system-mode emulation is used to > determine if there is any CPU run and wait for it to exit the execution > loop. The fairness of all the CPU work queues is ensured by draining all > the pending safe work items before any CPU can run. > > [1] http://lists.nongnu.org/archive/html/qemu-devel/2015-08/msg01128.html > [2] http://lists.nongnu.org/archive/html/qemu-devel/2016-04/msg02531.html > [3] http://lists.nongnu.org/archive/html/qemu-devel/2016-05/msg04792.html > > Signed-off-by: Sergey Fedorov <serge.fdrv@gmail.com> > Signed-off-by: Sergey Fedorov <sergey.fedorov@linaro.org> > --- > cpu-exec-common.c | 45 ++++++++++++++++++++++++++++++++++++++++++++- > cpus.c | 16 ++++++++++++++++ > include/exec/exec-all.h | 2 ++ > include/qom/cpu.h | 14 ++++++++++++++ > linux-user/main.c | 2 +- > 5 files changed, 77 insertions(+), 2 deletions(-) > > diff --git a/cpu-exec-common.c b/cpu-exec-common.c > index 8184e0662cbd..3056324738f8 100644 > --- a/cpu-exec-common.c > +++ b/cpu-exec-common.c > @@ -25,6 +25,7 @@ > > bool exit_request; > CPUState *tcg_current_cpu; > +int tcg_pending_cpus; > > /* exit the current TB, but without causing any exception to be raised */ > void cpu_loop_exit_noexc(CPUState *cpu) > @@ -78,6 +79,15 @@ void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc) > siglongjmp(cpu->jmp_env, 1); > } > > +static int safe_work_pending; > + > +void wait_safe_cpu_work(void) > +{ > + while (atomic_mb_read(&safe_work_pending) > 0) { > + wait_cpu_work(); > + } > +} > + Is this piece of code deadlock-safe once we are in mttcg mode? What happens when two threads call simultaneously async_safe_run_on_cpu? Thank you, alvise > > static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi) > { > qemu_mutex_lock(&cpu->work_mutex); > @@ -89,9 +99,18 @@ static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi) > cpu->queued_work_last = wi; > wi->next = NULL; > wi->done = false; > + if (wi->safe) { > + atomic_inc(&safe_work_pending); > + } > qemu_mutex_unlock(&cpu->work_mutex); > > - qemu_cpu_kick(cpu); > + if (!wi->safe) { > + qemu_cpu_kick(cpu); > + } else { > + CPU_FOREACH(cpu) { > + qemu_cpu_kick(cpu); > + } > + } > } > > void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data) > @@ -106,6 +125,7 @@ void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data) > wi.func = func; > wi.data = data; > wi.free = false; > + wi.safe = false; > > queue_work_on_cpu(cpu, &wi); > while (!atomic_mb_read(&wi.done)) { > @@ -129,6 +149,20 @@ void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data) > wi->func = func; > wi->data = data; > wi->free = true; > + wi->safe = false; > + > + queue_work_on_cpu(cpu, wi); > +} > + > +void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data) > +{ > + struct qemu_work_item *wi; > + > + wi = g_malloc0(sizeof(struct qemu_work_item)); > + wi->func = func; > + wi->data = data; > + wi->free = true; > + wi->safe = true; > > queue_work_on_cpu(cpu, wi); > } > @@ -148,9 +182,18 @@ void flush_queued_work(CPUState *cpu) > if (!cpu->queued_work_first) { > cpu->queued_work_last = NULL; > } > + if (wi->safe) { > + while (tcg_pending_cpus) { > + wait_cpu_work(); > + } > + } > qemu_mutex_unlock(&cpu->work_mutex); > wi->func(cpu, wi->data); > qemu_mutex_lock(&cpu->work_mutex); > + if (wi->safe) { > + atomic_dec(&safe_work_pending); > + signal_cpu_work(); > + } > if (wi->free) { > g_free(wi); > } else { > diff --git a/cpus.c b/cpus.c > index 98f60f6f98f5..bb6bd8615cfc 100644 > --- a/cpus.c > +++ b/cpus.c > @@ -932,6 +932,18 @@ static void qemu_tcg_destroy_vcpu(CPUState *cpu) > { > } > > +static void tcg_cpu_exec_start(CPUState *cpu) > +{ > + tcg_pending_cpus++; > +} > + > +static void tcg_cpu_exec_end(CPUState *cpu) > +{ > + if (--tcg_pending_cpus) { > + signal_cpu_work(); > + } > +} > + > static void qemu_wait_io_event_common(CPUState *cpu) > { > if (cpu->stop) { > @@ -956,6 +968,8 @@ static void qemu_tcg_wait_io_event(CPUState *cpu) > CPU_FOREACH(cpu) { > qemu_wait_io_event_common(cpu); > } > + > + wait_safe_cpu_work(); > } > > static void qemu_kvm_wait_io_event(CPUState *cpu) > @@ -1491,7 +1505,9 @@ static void tcg_exec_all(void) > (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0); > > if (cpu_can_run(cpu)) { > + tcg_cpu_exec_start(cpu); > r = tcg_cpu_exec(cpu); > + tcg_cpu_exec_end(cpu); > if (r == EXCP_DEBUG) { > cpu_handle_guest_debug(cpu); > break; > diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h > index 23b4b50e0a45..3bc44ed81473 100644 > --- a/include/exec/exec-all.h > +++ b/include/exec/exec-all.h > @@ -405,10 +405,12 @@ extern int singlestep; > > /* cpu-exec.c, accessed with atomic_mb_read/atomic_mb_set */ > extern CPUState *tcg_current_cpu; > +extern int tcg_pending_cpus; > extern bool exit_request; > > void wait_cpu_work(void); > void signal_cpu_work(void); > void flush_queued_work(CPUState *cpu); > +void wait_safe_cpu_work(void); > > #endif > diff --git a/include/qom/cpu.h b/include/qom/cpu.h > index 4e688f645b4a..5128fcc1745a 100644 > --- a/include/qom/cpu.h > +++ b/include/qom/cpu.h > @@ -231,6 +231,7 @@ struct qemu_work_item { > void *data; > int done; > bool free; > + bool safe; > }; > > /** > @@ -625,6 +626,19 @@ void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data); > void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data); > > /** > + * async_safe_run_on_cpu: > + * @cpu: The vCPU to run on. > + * @func: The function to be executed. > + * @data: Data to pass to the function. > + * > + * Schedules the function @func for execution on the vCPU @cpu asynchronously > + * and in quiescent state. Quiescent state means: (1) all other vCPUs are > + * halted and (2) #qemu_global_mutex (a.k.a. BQL) in system-mode or > + * #exclusive_lock in user-mode emulation is held while @func is executing. > + */ > +void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data); > + > +/** > * qemu_get_cpu: > * @index: The CPUState@cpu_index value of the CPU to obtain. > * > diff --git a/linux-user/main.c b/linux-user/main.c > index 5a68651159c2..6da3bb32186b 100644 > --- a/linux-user/main.c > +++ b/linux-user/main.c > @@ -113,7 +113,6 @@ static pthread_cond_t exclusive_cond = PTHREAD_COND_INITIALIZER; > static pthread_cond_t exclusive_resume = PTHREAD_COND_INITIALIZER; > static pthread_cond_t work_cond = PTHREAD_COND_INITIALIZER; > static bool exclusive_pending; > -static int tcg_pending_cpus; > > /* Make sure everything is in a consistent state for calling fork(). */ > void fork_start(void) > @@ -219,6 +218,7 @@ static inline void cpu_exec_end(CPUState *cpu) > } > exclusive_idle(); > flush_queued_work(cpu); > + wait_safe_cpu_work(); > pthread_mutex_unlock(&exclusive_lock); > } > > -- > 1.9.1 > >
On 01/07/16 19:29, Alvise Rigo wrote: > Hi Sergey, > > On Mon, Jun 20, 2016 at 12:28 AM, Sergey Fedorov > <sergey.fedorov@linaro.org> wrote: >> diff --git a/cpu-exec-common.c b/cpu-exec-common.c >> index 8184e0662cbd..3056324738f8 100644 >> --- a/cpu-exec-common.c >> +++ b/cpu-exec-common.c >> @@ -25,6 +25,7 @@ >> >> bool exit_request; >> CPUState *tcg_current_cpu; >> +int tcg_pending_cpus; >> >> /* exit the current TB, but without causing any exception to be raised */ >> void cpu_loop_exit_noexc(CPUState *cpu) >> @@ -78,6 +79,15 @@ void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc) >> siglongjmp(cpu->jmp_env, 1); >> } >> >> +static int safe_work_pending; >> + >> +void wait_safe_cpu_work(void) >> +{ >> + while (atomic_mb_read(&safe_work_pending) > 0) { >> + wait_cpu_work(); >> + } >> +} >> + > Is this piece of code deadlock-safe once we are in mttcg mode? It is supposed to be deadlock-safe. > What happens when two threads call simultaneously async_safe_run_on_cpu? > In this case each thread will roughly: - exit its execution loop; - take BQL; - decrement 'tcg_pending_cpus', signal 'qemu_work_cond' if zero; - start processing its work queue; - encountering safe work wait on 'qemu_work_cond' for 'tcg_pending_cpus' to become zero; - reacquire BQL; - process the safe work; - decrement 'safe_work_pending', signal 'qemu_work_cond' if zero; - when finished processing work, wait on 'qemu_work_cond' for 'safe_work_pending' to become zero; - reacquire BQL; - continue execution (releasing BQL). Hope this will help. Kind regards, Sergey.
diff --git a/cpu-exec-common.c b/cpu-exec-common.c index 8184e0662cbd..3056324738f8 100644 --- a/cpu-exec-common.c +++ b/cpu-exec-common.c @@ -25,6 +25,7 @@ bool exit_request; CPUState *tcg_current_cpu; +int tcg_pending_cpus; /* exit the current TB, but without causing any exception to be raised */ void cpu_loop_exit_noexc(CPUState *cpu) @@ -78,6 +79,15 @@ void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc) siglongjmp(cpu->jmp_env, 1); } +static int safe_work_pending; + +void wait_safe_cpu_work(void) +{ + while (atomic_mb_read(&safe_work_pending) > 0) { + wait_cpu_work(); + } +} + static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi) { qemu_mutex_lock(&cpu->work_mutex); @@ -89,9 +99,18 @@ static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi) cpu->queued_work_last = wi; wi->next = NULL; wi->done = false; + if (wi->safe) { + atomic_inc(&safe_work_pending); + } qemu_mutex_unlock(&cpu->work_mutex); - qemu_cpu_kick(cpu); + if (!wi->safe) { + qemu_cpu_kick(cpu); + } else { + CPU_FOREACH(cpu) { + qemu_cpu_kick(cpu); + } + } } void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data) @@ -106,6 +125,7 @@ void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data) wi.func = func; wi.data = data; wi.free = false; + wi.safe = false; queue_work_on_cpu(cpu, &wi); while (!atomic_mb_read(&wi.done)) { @@ -129,6 +149,20 @@ void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data) wi->func = func; wi->data = data; wi->free = true; + wi->safe = false; + + queue_work_on_cpu(cpu, wi); +} + +void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data) +{ + struct qemu_work_item *wi; + + wi = g_malloc0(sizeof(struct qemu_work_item)); + wi->func = func; + wi->data = data; + wi->free = true; + wi->safe = true; queue_work_on_cpu(cpu, wi); } @@ -148,9 +182,18 @@ void flush_queued_work(CPUState *cpu) if (!cpu->queued_work_first) { cpu->queued_work_last = NULL; } + if (wi->safe) { + while (tcg_pending_cpus) { + wait_cpu_work(); + } + } qemu_mutex_unlock(&cpu->work_mutex); wi->func(cpu, wi->data); qemu_mutex_lock(&cpu->work_mutex); + if (wi->safe) { + atomic_dec(&safe_work_pending); + signal_cpu_work(); + } if (wi->free) { g_free(wi); } else { diff --git a/cpus.c b/cpus.c index 98f60f6f98f5..bb6bd8615cfc 100644 --- a/cpus.c +++ b/cpus.c @@ -932,6 +932,18 @@ static void qemu_tcg_destroy_vcpu(CPUState *cpu) { } +static void tcg_cpu_exec_start(CPUState *cpu) +{ + tcg_pending_cpus++; +} + +static void tcg_cpu_exec_end(CPUState *cpu) +{ + if (--tcg_pending_cpus) { + signal_cpu_work(); + } +} + static void qemu_wait_io_event_common(CPUState *cpu) { if (cpu->stop) { @@ -956,6 +968,8 @@ static void qemu_tcg_wait_io_event(CPUState *cpu) CPU_FOREACH(cpu) { qemu_wait_io_event_common(cpu); } + + wait_safe_cpu_work(); } static void qemu_kvm_wait_io_event(CPUState *cpu) @@ -1491,7 +1505,9 @@ static void tcg_exec_all(void) (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0); if (cpu_can_run(cpu)) { + tcg_cpu_exec_start(cpu); r = tcg_cpu_exec(cpu); + tcg_cpu_exec_end(cpu); if (r == EXCP_DEBUG) { cpu_handle_guest_debug(cpu); break; diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 23b4b50e0a45..3bc44ed81473 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -405,10 +405,12 @@ extern int singlestep; /* cpu-exec.c, accessed with atomic_mb_read/atomic_mb_set */ extern CPUState *tcg_current_cpu; +extern int tcg_pending_cpus; extern bool exit_request; void wait_cpu_work(void); void signal_cpu_work(void); void flush_queued_work(CPUState *cpu); +void wait_safe_cpu_work(void); #endif diff --git a/include/qom/cpu.h b/include/qom/cpu.h index 4e688f645b4a..5128fcc1745a 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -231,6 +231,7 @@ struct qemu_work_item { void *data; int done; bool free; + bool safe; }; /** @@ -625,6 +626,19 @@ void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data); void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data); /** + * async_safe_run_on_cpu: + * @cpu: The vCPU to run on. + * @func: The function to be executed. + * @data: Data to pass to the function. + * + * Schedules the function @func for execution on the vCPU @cpu asynchronously + * and in quiescent state. Quiescent state means: (1) all other vCPUs are + * halted and (2) #qemu_global_mutex (a.k.a. BQL) in system-mode or + * #exclusive_lock in user-mode emulation is held while @func is executing. + */ +void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data); + +/** * qemu_get_cpu: * @index: The CPUState@cpu_index value of the CPU to obtain. * diff --git a/linux-user/main.c b/linux-user/main.c index 5a68651159c2..6da3bb32186b 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -113,7 +113,6 @@ static pthread_cond_t exclusive_cond = PTHREAD_COND_INITIALIZER; static pthread_cond_t exclusive_resume = PTHREAD_COND_INITIALIZER; static pthread_cond_t work_cond = PTHREAD_COND_INITIALIZER; static bool exclusive_pending; -static int tcg_pending_cpus; /* Make sure everything is in a consistent state for calling fork(). */ void fork_start(void) @@ -219,6 +218,7 @@ static inline void cpu_exec_end(CPUState *cpu) } exclusive_idle(); flush_queued_work(cpu); + wait_safe_cpu_work(); pthread_mutex_unlock(&exclusive_lock); }