@@ -393,6 +393,57 @@ static inline void cpu_exit_loop_lock_reset(CPUState *cpu)
{ }
#endif
+static inline void cpu_sleep_other(CPUState *cpu, CPUState *curr)
+{
+ assert(cpu->tcg_sleep_owner == NULL);
+ qemu_mutex_lock(cpu->tcg_work_lock);
+ cpu->tcg_sleep_requests++;
+ cpu->tcg_sleep_owner = curr;
+ qemu_mutex_unlock(cpu->tcg_work_lock);
+#ifdef CONFIG_SOFTMMU
+ cpu_exit(cpu);
+#else
+ /* cannot call cpu_exit(); cpu->exit_request is not for usermode */
+ smp_wmb();
+ cpu->tcg_exit_req = 1;
+#endif
+}
+
+/* call with no locks held */
+static inline void cpu_sleep_others(CPUState *curr)
+{
+ CPUState *cpu;
+
+ CPU_FOREACH(cpu) {
+ if (cpu == curr) {
+ continue;
+ }
+ cpu_sleep_other(cpu, curr);
+ }
+ /* wait until all other threads are out of the execution loop */
+ synchronize_rcu();
+}
+
+static inline void cpu_wake_others(CPUState *curr)
+{
+ CPUState *cpu;
+
+ CPU_FOREACH(cpu) {
+ if (cpu == curr) {
+ continue;
+ }
+ if (cpu->tcg_sleep_owner != curr) {
+ assert(!cpu->inited);
+ continue;
+ }
+ qemu_mutex_lock(cpu->tcg_work_lock);
+ cpu->tcg_sleep_requests--;
+ cpu->tcg_sleep_owner = NULL;
+ qemu_cond_signal(cpu->tcg_work_cond);
+ qemu_mutex_unlock(cpu->tcg_work_lock);
+ }
+}
+
/* main execution loop */
int cpu_exec(CPUState *cpu)
@@ -410,6 +461,44 @@ int cpu_exec(CPUState *cpu)
current_cpu = cpu;
+ /*
+ * Prevent threads that were created during a TCG work critical section
+ * (and that therefore didn't have cpu->tcg_work_owner set) from executing.
+ * What we do is then to not let them run by sending them out of the CPU
+ * loop until the tcg_work_pending flag goes down.
+ */
+ if (unlikely(!cpu->inited)) {
+ tb_lock();
+ tb_unlock();
+ cpu->inited = true;
+ }
+
+ if (cpu->tcg_work_func) {
+ cpu_sleep_others(cpu);
+ /*
+ * At this point all existing threads are sleeping.
+ * With the check above we make sure that threads that might be
+ * concurrently added at this point won't execute until the end of the
+ * work window, so we can safely call the work function.
+ */
+ cpu->tcg_work_func(cpu->tcg_work_arg);
+ cpu->tcg_work_func = NULL;
+ cpu->tcg_work_arg = NULL;
+
+ /* mark the end of the TCG work critical section */
+ tb_lock_nocheck();
+ tcg_ctx.tb_ctx.work_pending = false;
+ tb_unlock();
+ cpu_wake_others(cpu);
+ }
+
+ qemu_mutex_lock(cpu->tcg_work_lock);
+ assert(cpu->tcg_sleep_requests >= 0);
+ while (unlikely(cpu->tcg_sleep_requests)) {
+ qemu_cond_wait(cpu->tcg_work_cond, cpu->tcg_work_lock);
+ }
+ qemu_mutex_unlock(cpu->tcg_work_lock);
+
#ifndef CONFIG_USER_ONLY
/* FIXME: user-mode emulation probably needs a similar mechanism as well,
* for example for tb_flush.
@@ -579,6 +579,10 @@ void cpu_exec_init(CPUState *cpu, Error **errp)
qemu_mutex_init(&cpu->tb_jmp_cache_lock);
seqlock_init(&cpu->tb_jmp_cache_sequence, &cpu->tb_jmp_cache_lock);
+ cpu->tcg_work_cond = g_malloc(sizeof(*cpu->tcg_work_cond));
+ qemu_cond_init(cpu->tcg_work_cond);
+ cpu->tcg_work_lock = g_malloc(sizeof(*cpu->tcg_work_lock));
+ qemu_mutex_init(cpu->tcg_work_lock);
#ifndef CONFIG_USER_ONLY
cpu->as = &address_space_memory;
cpu->thread_id = qemu_get_thread_id();
@@ -198,6 +198,11 @@ struct TBContext {
int nb_tbs;
/* any access to the tbs or the page table must use this lock */
QemuMutex tb_lock;
+ /*
+ * This ensures that only one thread can perform safe work at a time.
+ * Protected by tb_lock; check the flag right after acquiring the lock.
+ */
+ bool work_pending;
/* statistics */
int tb_flush_count;
@@ -273,6 +273,15 @@ struct CPUState {
bool stop;
bool stopped;
bool cpu_loop_exit_locked;
+ bool inited;
+ /* tcg_work_* protected by tcg_work_lock */
+ QemuCond *tcg_work_cond;
+ QemuMutex *tcg_work_lock;
+ void (*tcg_work_func)(void *arg);
+ void *tcg_work_arg;
+ CPUState *tcg_sleep_owner;
+ int tcg_sleep_requests;
+
volatile sig_atomic_t exit_request;
uint32_t interrupt_request;
int singlestep_enabled;
@@ -582,6 +591,17 @@ void async_run_safe_work_on_cpu(CPUState *cpu, void (*func)(void *data),
bool async_safe_work_pending(void);
/**
+ * cpu_tcg_sched_work:
+ * @cpu: CPU thread to schedule the work on
+ * @func: function to be called when all other CPU threads are asleep
+ * @arg: argument to be passed to @func
+ *
+ * Schedule work to be done while all other CPU threads are put to sleep.
+ * Call with tb_lock held.
+ */
+void cpu_tcg_sched_work(CPUState *cpu, void (*func)(void *arg), void *arg);
+
+/**
* qemu_get_cpu:
* @index: The CPUState@cpu_index value of the CPU to obtain.
*
@@ -596,6 +596,7 @@ void tcg_pool_reset(TCGContext *s);
void tcg_pool_delete(TCGContext *s);
void tb_lock(void);
+void tb_lock_nocheck(void);
void tb_unlock(void);
bool tb_lock_recursive(void);
void tb_lock_reset(void);
@@ -133,13 +133,24 @@ TCGContext tcg_ctx;
/* translation block context */
__thread int have_tb_lock;
-void tb_lock(void)
+/* acquire tb_lock without checking for pending work */
+void tb_lock_nocheck(void)
{
assert(!have_tb_lock);
qemu_mutex_lock(&tcg_ctx.tb_ctx.tb_lock);
have_tb_lock++;
}
+void tb_lock(void)
+{
+ tb_lock_nocheck();
+ if (unlikely(tcg_ctx.tb_ctx.work_pending)) {
+ assert(current_cpu);
+ current_cpu->exception_index = EXCP_INTERRUPT;
+ cpu_loop_exit(current_cpu);
+ }
+}
+
void tb_unlock(void)
{
assert(have_tb_lock);
@@ -961,6 +972,16 @@ static void tb_page_check(void)
#endif
+void cpu_tcg_sched_work(CPUState *cpu, void (*func)(void *arg), void *arg)
+{
+ assert(have_tb_lock);
+ tcg_ctx.tb_ctx.work_pending = true;
+ cpu->tcg_work_func = func;
+ cpu->tcg_work_arg = arg;
+ cpu->exception_index = EXCP_INTERRUPT;
+ cpu_loop_exit(cpu);
+}
+
static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
{
TranslationBlock *tb1;
This is similar in intent to the async_safe_work mechanism. The main differences are: - Work is run on a single CPU thread *after* all others are put to sleep - Sleeping threads are woken up by the worker thread upon completing its job - A flag as been added to tcg_ctx so that only one thread can schedule work at a time. The flag is checked every time tb_lock is acquired. - Handles the possibility of CPU threads being created after the existing CPUs are put to sleep. This is easily triggered with many threads on a many-core host in usermode. - Works for both softmmu and usermode Signed-off-by: Emilio G. Cota <cota@braap.org> --- cpu-exec.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++ exec.c | 4 +++ include/exec/exec-all.h | 5 +++ include/qom/cpu.h | 20 +++++++++++ tcg/tcg.h | 1 + translate-all.c | 23 ++++++++++++- 6 files changed, 141 insertions(+), 1 deletion(-)