Patchwork Make cpu_single_env thread local (Linux only for now)

login
register
mail settings
Submitter Paolo Bonzini
Date Oct. 5, 2011, 7:08 a.m.
Message ID <4E8C0275.50107@redhat.com>
Download mbox | patch
Permalink /patch/117744/
State New
Headers show

Comments

Paolo Bonzini - Oct. 5, 2011, 7:08 a.m.
On 10/04/2011 07:26 PM, Jan Kiszka wrote:
> Looks like a start. But I would avoid macros and go for (static inline)
> functions where possible. And initialization should be explicit (so that
> you can start using TLS already inside constructors).

Here is the patch I wrote to do more or less the same thing, plus 
Windows support.  It's a bit different in that I wrote a macro that can 
be used as lvalue.

It doesn't let you use TLS inside constructors, however, unless you use 
constructor priorities.

Paolo
Jan Kiszka - Oct. 5, 2011, 7:52 a.m.
On 2011-10-05 09:08, Paolo Bonzini wrote:
> On 10/04/2011 07:26 PM, Jan Kiszka wrote:
>> Looks like a start. But I would avoid macros and go for (static inline)
>> functions where possible. And initialization should be explicit (so that
>> you can start using TLS already inside constructors).
> 
> Here is the patch I wrote to do more or less the same thing, plus
> Windows support.  It's a bit different in that I wrote a macro that can
> be used as lvalue.

Yeah, it probably makes sense to build the abstractions around __thread
so that - one day - we can drop the legacy wrappers.

Just do not prepend "tls__" in the gcc model (there is also some
inconsistency with prefixes in patch 3). And avoid leading "_" unless
they are dictated by the platform. And patch 3 needs to update
darwin-user/main.c as well.

> 
> It doesn't let you use TLS inside constructors, however, unless you use
> constructor priorities.

What is the default priority of constructors BTW? You picked the
highest, will others that do not specify one have the same? Then we
could also define a QEMU_CONSTRUCTOR wrapper with a lower priority.

Jan

Patch

From 497ed0672f7fe08d9654a0e5c11b682bea43a59e Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 5 Oct 2011 08:29:39 +0200
Subject: [PATCH 0/3] *** SUBJECT HERE ***

*** BLURB HERE ***

Paolo Bonzini (3):
  qemu-threads: add TLS wrappers
  windows
  tls

 configure           |   20 +++++++++++++++++
 coroutine-win32.c   |    7 ++++-
 cpu-all.h           |    4 ++-
 cpus.c              |   13 +++++++---
 exec.c              |    2 +-
 qemu-thread-posix.c |   42 ++++++++++++++++++++++++++++++++---
 qemu-thread-win32.c |   16 +++++++++++++
 qemu-tls-gcc.h      |   25 +++++++++++++++++++++
 qemu-tls-pthread.h  |   58 ++++++++++++++++++++++++++++++++++++++++++++++++++
 qemu-tls-win32.h    |   59 +++++++++++++++++++++++++++++++++++++++++++++++++++
 10 files changed, 234 insertions(+), 12 deletions(-)
 create mode 100644 qemu-tls-gcc.h
 create mode 100644 qemu-tls-pthread.h
 create mode 100644 qemu-tls-win32.h

-- 
1.7.6

From d8c3c4e789f9b86a66042a9181333e1a096b6b93 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 16 Aug 2011 10:37:44 -0700
Subject: [PATCH 1/3] qemu-threads: add TLS wrappers

Win32 emulated TLS is slow and is not available on all versions of GCC;
some versions of Unix only have pthread_getspecific as a means to access
TLS.

Actually, Win32 does have support for decent TLS, and GCC does not map
__thread to it.  But kind of unlike ELF TLS, it's perfectly possible
to declare TLS variables with simple C code!  For pthread_getspecific
we similarly allocate a memory block; we have to compute all the offsets
at load time, which is also cheaper than doing a pthread_key_create for
each variable.  Not optimal, but it works.

This patch adds wrappers to qemu-thread that will use __thread or
pthread_getspecific on POSIX systems, and the .tls segment on Windows.

It does kinda uglify the declarations, but not too much.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 configure           |   20 +++++++++++++++++
 coroutine-win32.c   |    7 ++++-
 qemu-thread-posix.c |   42 ++++++++++++++++++++++++++++++++---
 qemu-thread-win32.c |   16 +++++++++++++
 qemu-tls-gcc.h      |   25 +++++++++++++++++++++
 qemu-tls-pthread.h  |   58 ++++++++++++++++++++++++++++++++++++++++++++++++++
 qemu-tls-win32.h    |   59 +++++++++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 221 insertions(+), 6 deletions(-)
 create mode 100644 qemu-tls-gcc.h
 create mode 100644 qemu-tls-pthread.h
 create mode 100644 qemu-tls-win32.h

diff --git a/configure b/configure
index 59b1494..50d7b54 100755
--- a/configure
+++ b/configure
@@ -1215,6 +1215,23 @@  EOF
 fi
 
 ##########################################
+# __thread check
+
+if test "$mingw32" = "yes" ; then
+    tls_model=win32
+else
+    cat > $TMPC << EOF
+__thread int x;
+int main() { return x; }
+EOF
+    if compile_prog "" "" ; then
+        tls_model=gcc
+    else
+        tls_model=pthread
+    fi
+fi
+
+##########################################
 # zlib check
 
 if test "$zlib" != "no" ; then
@@ -2697,6 +2714,7 @@  echo "Documentation     $docs"
 [ ! -z "$uname_release" ] && \
 echo "uname -r          $uname_release"
 echo "NPTL support      $nptl"
+echo "TLS support       $tls_model"
 echo "GUEST_BASE        $guest_base"
 echo "PIE user targets  $user_pie"
 echo "vde support       $vde"
@@ -3580,6 +3598,8 @@  if test "$target_linux_user" = "yes" -o "$target_bsd_user" = "yes" ; then
   esac
 fi
 
+symlink $source_path/qemu-tls-$tls_model.h qemu-tls.h
+
 # use included Linux headers
 if test "$linux" = "yes" ; then
   includes="-I\$(SRC_PATH)/linux-headers $includes"
diff --git a/coroutine-win32.c b/coroutine-win32.c
index 4179609..708e220 100644
--- a/coroutine-win32.c
+++ b/coroutine-win32.c
@@ -24,6 +24,7 @@ 
 
 #include "qemu-common.h"
 #include "qemu-coroutine-int.h"
+#include "qemu-tls.h"
 
 typedef struct
 {
@@ -33,8 +34,10 @@  typedef struct
     CoroutineAction action;
 } CoroutineWin32;
 
-static __thread CoroutineWin32 leader;
-static __thread Coroutine *current;
+static DEFINE_TLS(CoroutineWin32, tls_leader);
+static DEFINE_TLS(Coroutine *, tls_current);
+#define leader get_tls(tls_leader)
+#define current get_tls(tls_current)
 
 CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
                                       CoroutineAction action)
diff --git a/qemu-thread-posix.c b/qemu-thread-posix.c
index ac3c0c9..acd04ff 100644
--- a/qemu-thread-posix.c
+++ b/qemu-thread-posix.c
@@ -18,6 +18,9 @@ 
 #include <stdint.h>
 #include <string.h>
 #include "qemu-thread.h"
+#include "qemu-common.h"
+#include "qemu-tls.h"
+#include "qemu-barrier.h"
 
 static void error_exit(int err, const char *msg)
 {
@@ -115,18 +118,44 @@  void qemu_cond_wait(QemuCond *cond, QemuMutex *mutex)
         error_exit(err, __func__);
 }
 
+size_t tls_size;
+pthread_key_t tls_key;
+
+static void __attribute__((constructor(102))) tls_init_thread(void)
+{
+    /* It's easier to always create the key, even if using GCC tls.  */
+    pthread_key_create(&tls_key, g_free);
+    _tls_init_thread();
+}
+
+typedef struct QemuThreadData {
+    void *(*start_routine)(void *);
+    void *arg;
+} QemuThreadData;
+
+static void *start_routine_wrapper(void *arg)
+{
+    QemuThreadData args = *(QemuThreadData *) arg;
+    g_free(arg);
+    _tls_init_thread();
+    return args.start_routine(args.arg);
+}
+
 void qemu_thread_create(QemuThread *thread,
-                       void *(*start_routine)(void*),
+                       void *(*start_routine)(void *),
                        void *arg)
 {
+    sigset_t set, oldset;
+    QemuThreadData *args = g_malloc(sizeof(QemuThreadData));
     int err;
 
-    /* Leave signal handling to the iothread.  */
-    sigset_t set, oldset;
+    args->start_routine = start_routine;
+    args->arg = arg;
 
+    /* Leave signal handling to the iothread.  */
     sigfillset(&set);
     pthread_sigmask(SIG_SETMASK, &set, &oldset);
-    err = pthread_create(&thread->thread, NULL, start_routine, arg);
+    err = pthread_create(&thread->thread, NULL, start_routine_wrapper, args);
     if (err)
         error_exit(err, __func__);
 
diff --git a/qemu-thread-win32.c b/qemu-thread-win32.c
index db8e744..118d92f 100644
--- a/qemu-thread-win32.c
+++ b/qemu-thread-win32.c
@@ -16,6 +16,22 @@ 
 #include <assert.h>
 #include <limits.h>
 
+/* TLS support.  */
+
+int __attribute__((section(".tls$000"))) _tls_start = 0;
+int __attribute__((section(".tls$ZZZ"))) _tls_end = 0;
+int _tls_index = 0;
+
+const IMAGE_TLS_DIRECTORY _tls_used __attribute__((used, section(".rdata$T"))) = {
+ (ULONG)(ULONG_PTR) &_tls_start, /* start of tls data */
+ (ULONG)(ULONG_PTR) &_tls_end,   /* end of tls data */
+ (ULONG)(ULONG_PTR) &_tls_index, /* address of tls_index */
+ (ULONG) 0,                      /* pointer to callbacks */
+ (ULONG) 0,                      /* size of tls zero fill */
+ (ULONG) 0                       /* characteristics */
+};
+
+
 static void error_exit(int err, const char *msg)
 {
     char *pstr;
diff --git a/qemu-tls-gcc.h b/qemu-tls-gcc.h
new file mode 100644
index 0000000..8cff148
--- /dev/null
+++ b/qemu-tls-gcc.h
@@ -0,0 +1,24 @@ 
+/*
+ * TLS with __thread
+ *
+ * Copyright Red Hat, Inc. 2011
+ *
+ * Authors:
+ *  Paolo Bonzini   <pbonzini@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_TLS_GCC_H
+#define QEMU_TLS_GCC_H
+
+#define DECLARE_TLS(type, x) extern __thread type tls__##x
+#define DEFINE_TLS(type, x)  __thread type tls__##x
+#define get_tls(x)           tls__##x
+
+static inline size_t tls_init(size_t size, size_t alignment) { return 0; }
+static inline void _tls_init_thread(void) {}
+
+#endif
diff --git a/qemu-tls-pthread.h b/qemu-tls-pthread.h
new file mode 100644
index 0000000..ef97528
--- /dev/null
+++ b/qemu-tls-pthread.h
@@ -0,0 +1,57 @@ 
+/*
+ * TLS with pthread_getspecific
+ *
+ * Copyright Red Hat, Inc. 2011
+ *
+ * Authors:
+ *  Paolo Bonzini   <pbonzini@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_TLS_PTHREAD_H
+#define QEMU_TLS_PTHREAD_H
+
+#include <pthread.h>
+#include <glib.h>
+
+#define DECLARE_TLS(type, x)                                     \
+  extern size_t tls_offset__##x;                                 \
+  extern type tls_dummy__##x
+
+#define DEFINE_TLS(type, x)                                      \
+  size_t tls_offset__##x;                                        \
+  static void __attribute__((constructor(101))) tls_init__##x(void) \
+  {                                                              \
+    tls_offset__##x = tls_init(sizeof(type), __alignof__(type)); \
+  }                                                              \
+  extern type tls_dummy__##x
+
+extern size_t tls_size;
+extern pthread_key_t tls_key;
+
+static inline size_t tls_init(size_t size, size_t alignment)
+{
+  size_t tls_offset = (tls_size + alignment - 1) & -alignment;
+  tls_size = tls_offset + size;
+  return tls_offset;
+}
+
+static inline void _tls_init_thread(void)
+{
+  void *mem = tls_size == 0 ? NULL : g_malloc0(tls_size);
+  pthread_setspecific(tls_key, mem);
+}
+
+static inline __attribute__((__const__)) void *_get_tls(size_t offset)
+{
+  char *base = pthread_getspecific(tls_key);
+  return &base[offset];
+}
+
+#define get_tls(x) \
+  (*(__typeof__(&tls_dummy__##x)) _get_tls(tls_offset__##x))
+
+#endif
diff --git a/qemu-tls-win32.h b/qemu-tls-win32.h
new file mode 100644
index 0000000..d04d48b
--- /dev/null
+++ b/qemu-tls-win32.h
@@ -0,0 +1,59 @@ 
+/*
+ * TLS with Win32 .tls sections
+ *
+ * Copyright Red Hat, Inc. 2011
+ *
+ * Authors:
+ *  Paolo Bonzini   <pbonzini@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_TLS_WIN32_H
+#define QEMU_TLS_WIN32_H
+
+#include <windows.h>
+#include <winnt.h>
+
+typedef struct _TEB {
+  NT_TIB NtTib;
+  void *EnvironmentPointer;
+  void *x[3];
+  char **ThreadLocalStoragePointer;
+} TEB, *PTEB;
+
+/* 1) The initial contents TLS variables is placed in the .tls section.  */
+
+#define DECLARE_TLS(type, x)  extern DEFINE_TLS(type, x)
+#define DEFINE_TLS(type, x)   type tls__##x __attribute__((section(".tls$AAA")))
+
+/* 2) _tls_index holds the number of our module.  The executable should be
+   zero, DLLs are numbered 1 and up.  The loader fills it in for us.  */
+
+extern int _tls_index;
+extern int _tls_start;
+static inline void _tls_init_thread(void) {}
+
+/* 3) Thus, Teb->ThreadLocalStoragePointer[_tls_index] is the base of
+   the TLS segment for this (thread, module) pair.  Each segment has
+   the same layout as this module's .tls segment and is initialized
+   with the content of the .tls segment; 0 is the _tls_start variable.
+   So, get_tls passes us the offset of the passed variable relative to
+   _tls_start, and we return that same offset plus the base of segment.  */
+
+static inline __attribute__((__const__)) void *_get_tls(size_t offset)
+{
+    PTEB Teb = NtCurrentTeb();
+    return (char *)(Teb->ThreadLocalStoragePointer[_tls_index]) + offset;
+}
+
+/* 4) get_tls, in addition to computing the offset, returns an lvalue.
+      "I got it.  Magic."  */
+
+#define get_tls(x)                                                 \
+  (*(__typeof__(tls__##x) *)                                       \
+    _get_tls((ULONG_PTR)&(tls__##x) - (ULONG_PTR)&_tls_start))
+
+#endif
-- 
1.7.6


From b10531473a833cf5e925f00461134b0bcd2295bb Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 29 Aug 2011 17:03:55 +0200
Subject: [PATCH 2/3] Prepare Windows port for thread-local cpu_single_env

Windows does not execute cpu_signal in VCPU-thread context,
so it won't be able to use cpu_single_env there.  However,
it has the CPUState available, so nothing is lost.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 cpus.c |   13 +++++++++----
 1 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/cpus.c b/cpus.c
index 8978779..822ce7a 100644
--- a/cpus.c
+++ b/cpus.c
@@ -176,10 +176,10 @@  static void cpu_handle_guest_debug(CPUState *env)
     env->stopped = 1;
 }
 
-static void cpu_signal(int sig)
+static inline void do_cpu_kick(CPUState *env)
 {
-    if (cpu_single_env) {
-        cpu_exit(cpu_single_env);
+    if (env) {
+        cpu_exit(env);
     }
     exit_request = 1;
 }
@@ -437,6 +437,11 @@  static void qemu_kvm_init_cpu_signals(CPUState *env)
     }
 }
 
+static void cpu_signal(int sig)
+{
+    do_cpu_kick(cpu_single_env);
+}
+
 static void qemu_tcg_init_cpu_signals(void)
 {
     sigset_t set;
@@ -708,7 +713,7 @@  static void qemu_cpu_kick_thread(CPUState *env)
 #else /* _WIN32 */
     if (!qemu_cpu_is_self(env)) {
         SuspendThread(env->thread->thread);
-        cpu_signal(0);
+        do_cpu_kick(env);
         ResumeThread(env->thread->thread);
     }
 #endif
-- 
1.7.6


From 497ed0672f7fe08d9654a0e5c11b682bea43a59e Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 29 Aug 2011 17:04:01 +0200
Subject: [PATCH 3/3] Make cpu_single_env thread-local

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 cpu-all.h |    4 +++-
 exec.c    |    2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/cpu-all.h b/cpu-all.h
index 42a5fa0..da457dc 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -20,6 +20,7 @@ 
 #define CPU_ALL_H
 
 #include "qemu-common.h"
+#include "qemu-tls.h"
 #include "cpu-common.h"
 
 /* some important defines:
@@ -334,7 +335,8 @@  void cpu_dump_statistics(CPUState *env, FILE *f, fprintf_function cpu_fprintf,
 void QEMU_NORETURN cpu_abort(CPUState *env, const char *fmt, ...)
     GCC_FMT_ATTR(2, 3);
 extern CPUState *first_cpu;
-extern CPUState *cpu_single_env;
+DECLARE_TLS(CPUState *,tls_cpu_single_env);
+#define cpu_single_env get_tls(tls_cpu_single_env)
 
 /* Flags for use in ENV->INTERRUPT_PENDING.
 
diff --git a/exec.c b/exec.c
index d0cbf15..66b82db 100644
--- a/exec.c
+++ b/exec.c
@@ -120,7 +120,7 @@  static MemoryRegion *system_io;
 CPUState *first_cpu;
 /* current CPU in the current thread. It is only valid inside
    cpu_exec() */
-CPUState *cpu_single_env;
+DEFINE_TLS(CPUState *,cpu_single_env);
 /* 0 = Do not count executed instructions.
    1 = Precise instruction counting.
    2 = Adaptive rate instruction counting.  */
-- 
1.7.6