Patchwork [6/9] linux-user: don't do locking in single-threaded processes

login
register
mail settings
Submitter Ulrich Hecht
Date Oct. 16, 2009, 12:38 p.m.
Message ID <1255696735-21396-7-git-send-email-uli@suse.de>
Download mbox | patch
Permalink /patch/36203/
State New
Headers show

Comments

Ulrich Hecht - Oct. 16, 2009, 12:38 p.m.
Skips setting the tb_lock if a process doesn't have more than one thread,
which is usually the case. Results in about 20% performance gain (measured
with the s390x target, but the effect should be similar with other targets).

Signed-off-by: Ulrich Hecht <uli@suse.de>
---
 cpu-defs.h           |    8 ++++++++
 cpu-exec.c           |   14 ++++++++++++--
 linux-user/syscall.c |    1 +
 3 files changed, 21 insertions(+), 2 deletions(-)

Patch

diff --git a/cpu-defs.h b/cpu-defs.h
index 95068b5..c50c59e 100644
--- a/cpu-defs.h
+++ b/cpu-defs.h
@@ -135,6 +135,13 @@  typedef struct CPUWatchpoint {
 } CPUWatchpoint;
 
 #define CPU_TEMP_BUF_NLONGS 128
+
+#ifdef CONFIG_USER_ONLY
+#define MULTITHREAD uint32_t multithreaded;
+#else
+#define MULTITHREAD
+#endif
+
 #define CPU_COMMON                                                      \
     struct TranslationBlock *current_tb; /* currently executing TB  */  \
     /* soft mmu support */                                              \
@@ -149,6 +156,7 @@  typedef struct CPUWatchpoint {
     uint32_t stop;   /* Stop request */                                 \
     uint32_t stopped; /* Artificially stopped */                        \
     uint32_t interrupt_request;                                         \
+    MULTITHREAD /* needs locking when accessing TBs */                  \
     volatile sig_atomic_t exit_request;                                 \
     /* The meaning of the MMU modes is defined in the target code. */   \
     CPUTLBEntry tlb_table[NB_MMU_MODES][CPU_TLB_SIZE];                  \
diff --git a/cpu-exec.c b/cpu-exec.c
index 6b3391c..3fe2725 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -219,6 +219,9 @@  int cpu_exec(CPUState *env1)
     TranslationBlock *tb;
     uint8_t *tc_ptr;
     unsigned long next_tb;
+#ifdef CONFIG_USER_ONLY
+    uint32_t multithreaded;
+#endif
 
     if (cpu_halted(env1) == EXCP_HALTED)
         return EXCP_HALTED;
@@ -576,7 +579,11 @@  int cpu_exec(CPUState *env1)
 #endif
                 }
 #endif
-                spin_lock(&tb_lock);
+#ifdef CONFIG_USER_ONLY
+                multithreaded = env->multithreaded;
+                if (multithreaded)
+#endif
+                    spin_lock(&tb_lock);
                 tb = tb_find_fast();
                 /* Note: we do it here to avoid a gcc bug on Mac OS X when
                    doing it in tb_find_slow */
@@ -600,7 +607,10 @@  int cpu_exec(CPUState *env1)
                     tb_add_jump((TranslationBlock *)(next_tb & ~3), next_tb & 3, tb);
                 }
                 }
-                spin_unlock(&tb_lock);
+#ifdef CONFIG_USER_ONLY
+                if (multithreaded)
+#endif
+                    spin_unlock(&tb_lock);
                 env->current_tb = tb;
 
                 /* cpu_interrupt might be called while translating the
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 617e031..f2a53d5 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -3549,6 +3549,7 @@  static int do_fork(CPUState *env, unsigned int flags, abi_ulong newsp,
         ts = qemu_mallocz(sizeof(TaskState) + NEW_STACK_SIZE);
         init_task_state(ts);
         new_stack = ts->stack;
+        env->multithreaded = 1;
         /* we create a new CPU instance. */
         new_env = cpu_copy(env);
         /* Init regs that differ from the parent.  */