diff mbox series

[v3] tcg: Fix execution on Apple Silicon

Message ID 20210113032806.18220-1-r.bolshakov@yadro.com
State New
Headers show
Series [v3] tcg: Fix execution on Apple Silicon | expand

Commit Message

Roman Bolshakov Jan. 13, 2021, 3:28 a.m. UTC
Pages can't be both write and executable at the same time on Apple
Silicon. macOS provides public API to switch write protection [1] for
JIT applications, like TCG.

1. https://developer.apple.com/documentation/apple_silicon/porting_just-in-time_compilers_to_apple_silicon

Signed-off-by: Roman Bolshakov <r.bolshakov@yadro.com>
---
v2: https://lists.gnu.org/archive/html/qemu-devel/2021-01/msg00146.html
Changes since v2:
 - Wrapped pthread_jit_write_protect_np() with __builtin_available() [1]
   to allow build with modern SDK while targeting older macOS (Joelle)
 - Dropped redundant calls to pthread_jit_write_protect_supported_np()
   (Alex)

v1: https://lists.gnu.org/archive/html/qemu-devel/2021-01/msg00073.html
Changes since v1:

 - Pruned not needed fiddling with W^X and dropped symmetry from write
   lock/unlock and renamed related functions.
   Similar approach is used in JavaScriptCore [2].

 - Moved jit helper functions to util/osdep
																		  As outlined in osdep.h, this matches to (2):
   * In an ideal world this header would contain only:
   *  (1) things which everybody needs
   *  (2) things without which code would work on most platforms but
   *      fail to compile or misbehave on a minority of host OSes

 - Fixed a checkpatch error

 - Limit new behaviour only to macOS 11.0 and above, because of the
   following declarations:

   __API_AVAILABLE(macos(11.0))
   __API_UNAVAILABLE(ios, tvos, watchos)
   void pthread_jit_write_protect_np(int enabled);

   __API_AVAILABLE(macos(11.0))
   __API_UNAVAILABLE(ios, tvos, watchos)
   int pthread_jit_write_protect_supported_np(void);

 1. https://developer.apple.com/videos/play/wwdc2017/411/
 2. https://bugs.webkit.org/attachment.cgi?id=402515&action=prettypatch

 accel/tcg/cpu-exec.c      |  2 ++
 accel/tcg/translate-all.c |  9 +++++++++
 include/qemu/osdep.h      |  7 +++++++
 tcg/tcg.c                 |  1 +
 util/osdep.c              | 20 ++++++++++++++++++++
 5 files changed, 39 insertions(+)

Comments

Richard Henderson Jan. 14, 2021, 1:35 a.m. UTC | #1
On 1/12/21 5:28 PM, Roman Bolshakov wrote:
> Pages can't be both write and executable at the same time on Apple
> Silicon. macOS provides public API to switch write protection [1] for
> JIT applications, like TCG.

So... considering that split w^x is now upstream, can we just call this once
per thread to enable write and leave it write?
Since we have the separate rw mapping.


r~
Joelle van Dyne Jan. 14, 2021, 1:55 a.m. UTC | #2
No because of macOS weirdness you still have to map with RWX and MAP_JIT
before you can use this. Split w^x is only useful in iOS where they don’t
provide this functionality.

-j

On Wednesday, January 13, 2021, Richard Henderson <
richard.henderson@linaro.org> wrote:

> On 1/12/21 5:28 PM, Roman Bolshakov wrote:
> > Pages can't be both write and executable at the same time on Apple
> > Silicon. macOS provides public API to switch write protection [1] for
> > JIT applications, like TCG.
>
> So... considering that split w^x is now upstream, can we just call this
> once
> per thread to enable write and leave it write?
> Since we have the separate rw mapping.
>
>
> r~
>
Richard Henderson Jan. 21, 2021, 6:34 p.m. UTC | #3
On 1/12/21 5:28 PM, Roman Bolshakov wrote:
> @@ -1083,6 +1083,12 @@ static bool alloc_code_gen_buffer_anon(size_t size, int prot,
>  {
>      void *buf;
>  
> +#if defined(MAC_OS_VERSION_11_0) && \
> +    MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_11_0
> +    if (__builtin_available(macOS 11.0, *)) {
> +        flags |= MAP_JIT;
> +    }
> +#endif

This hunk should be in alloc_code_gen_buffer, where we do the other flags
manipulation.

I'll drop this hunk and apply the rest, which is exclusively related to
toggling the jit bit.

> --- a/util/osdep.c
> +++ b/util/osdep.c
> @@ -606,3 +606,23 @@ writev(int fd, const struct iovec *iov, int iov_cnt)
>      return readv_writev(fd, iov, iov_cnt, true);
>  }
>  #endif
> +
> +#if defined(MAC_OS_VERSION_11_0) && \
> +    MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_11_0
> +void qemu_thread_jit_execute(void)
> +{
> +    if (__builtin_available(macOS 11.0, *)) {
> +        pthread_jit_write_protect_np(true);
> +    }
> +}
> +
> +void qemu_thread_jit_write(void)
> +{
> +    if (__builtin_available(macOS 11.0, *)) {
> +        pthread_jit_write_protect_np(false);
> +    }
> +}
> +#else
> +void qemu_thread_jit_write(void) {}
> +void qemu_thread_jit_execute(void) {}
> +#endif

I will move these inline in osdep.h, because it's either (1) a single function
call or (2) a nop.


r~
Richard Henderson Jan. 29, 2021, 8:18 p.m. UTC | #4
On 1/21/21 8:34 AM, Richard Henderson wrote:
> On 1/12/21 5:28 PM, Roman Bolshakov wrote:
>> @@ -1083,6 +1083,12 @@ static bool alloc_code_gen_buffer_anon(size_t size, int prot,
>>  {
>>      void *buf;
>>  
>> +#if defined(MAC_OS_VERSION_11_0) && \
>> +    MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_11_0
>> +    if (__builtin_available(macOS 11.0, *)) {
>> +        flags |= MAP_JIT;
>> +    }
>> +#endif
> 
> This hunk should be in alloc_code_gen_buffer, where we do the other flags
> manipulation.
> 
> I'll drop this hunk and apply the rest, which is exclusively related to
> toggling the jit bit.

Ping on this?

I would imagine that the patch would look something like

--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -1296,6 +1296,11 @@ static bool alloc_code_gen_buffer
 #ifdef CONFIG_TCG_INTERPRETER
     /* The tcg interpreter does not need execute permission. */
     prot = PROT_READ | PROT_WRITE;
+#elif defined(MAC_OS_VERSION_11_0) && \
+    MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_11_0
+    if (__builtin_available(macOS 11.0, *)) {
+        flags |= MAP_JIT;
+    }
 #elif defined(CONFIG_DARWIN)
     /* Applicable to both iOS and macOS (Apple Silicon). */
     if (!splitwx) {

But I don't know how CONFIG_DARWIN, iOS, and MAC_OS_VERSION interact, and I'm
not able to even compile-test the patch.
Certainly the final comment there looks suspicious, given the preceding MAC_OS
stanza...


r~
Roman Bolshakov Jan. 29, 2021, 8:50 p.m. UTC | #5
On Fri, Jan 29, 2021 at 10:18:58AM -1000, Richard Henderson wrote:
> On 1/21/21 8:34 AM, Richard Henderson wrote:
> > On 1/12/21 5:28 PM, Roman Bolshakov wrote:
> >> @@ -1083,6 +1083,12 @@ static bool alloc_code_gen_buffer_anon(size_t size, int prot,
> >>  {
> >>      void *buf;
> >>  
> >> +#if defined(MAC_OS_VERSION_11_0) && \
> >> +    MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_11_0
> >> +    if (__builtin_available(macOS 11.0, *)) {
> >> +        flags |= MAP_JIT;
> >> +    }
> >> +#endif
> > 
> > This hunk should be in alloc_code_gen_buffer, where we do the other flags
> > manipulation.
> > 
> > I'll drop this hunk and apply the rest, which is exclusively related to
> > toggling the jit bit.
> 
> Ping on this?
> 
Hi Richard,

> I would imagine that the patch would look something like
> 
> --- a/accel/tcg/translate-all.c
> +++ b/accel/tcg/translate-all.c
> @@ -1296,6 +1296,11 @@ static bool alloc_code_gen_buffer
>  #ifdef CONFIG_TCG_INTERPRETER
>      /* The tcg interpreter does not need execute permission. */
>      prot = PROT_READ | PROT_WRITE;
> +#elif defined(MAC_OS_VERSION_11_0) && \
> +    MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_11_0
> +    if (__builtin_available(macOS 11.0, *)) {
> +        flags |= MAP_JIT;
> +    }
>  #elif defined(CONFIG_DARWIN)
>      /* Applicable to both iOS and macOS (Apple Silicon). */
>      if (!splitwx) {
> 
> But I don't know how CONFIG_DARWIN, iOS, and MAC_OS_VERSION interact, and I'm
> not able to even compile-test the patch.
> Certainly the final comment there looks suspicious, given the preceding MAC_OS
> stanza...
> 

I thought you already added MAP_JIT in 6f70ddee19e. It's getting enabled
on my M1 laptop. Was it intended or not?

    /* Applicable to both iOS and macOS (Apple Silicon). */
    if (!splitwx) {
        flags |= MAP_JIT;
    }

TCG from master branch of QEMU works fine on M1. I'm not sure why do we
need to duplicate it.

Thanks,
Roman
Richard Henderson Jan. 30, 2021, 5:27 a.m. UTC | #6
On 1/29/21 10:50 AM, Roman Bolshakov wrote:
> On Fri, Jan 29, 2021 at 10:18:58AM -1000, Richard Henderson wrote:
>> On 1/21/21 8:34 AM, Richard Henderson wrote:
>>> On 1/12/21 5:28 PM, Roman Bolshakov wrote:
>>>> @@ -1083,6 +1083,12 @@ static bool alloc_code_gen_buffer_anon(size_t size, int prot,
>>>>  {
>>>>      void *buf;
>>>>  
>>>> +#if defined(MAC_OS_VERSION_11_0) && \
>>>> +    MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_11_0
>>>> +    if (__builtin_available(macOS 11.0, *)) {
>>>> +        flags |= MAP_JIT;
>>>> +    }
>>>> +#endif
>>>
>>> This hunk should be in alloc_code_gen_buffer, where we do the other flags
>>> manipulation.
>>>
>>> I'll drop this hunk and apply the rest, which is exclusively related to
>>> toggling the jit bit.
>>
>> Ping on this?
>>
> Hi Richard,
> 
>> I would imagine that the patch would look something like
>>
>> --- a/accel/tcg/translate-all.c
>> +++ b/accel/tcg/translate-all.c
>> @@ -1296,6 +1296,11 @@ static bool alloc_code_gen_buffer
>>  #ifdef CONFIG_TCG_INTERPRETER
>>      /* The tcg interpreter does not need execute permission. */
>>      prot = PROT_READ | PROT_WRITE;
>> +#elif defined(MAC_OS_VERSION_11_0) && \
>> +    MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_11_0
>> +    if (__builtin_available(macOS 11.0, *)) {
>> +        flags |= MAP_JIT;
>> +    }
>>  #elif defined(CONFIG_DARWIN)
>>      /* Applicable to both iOS and macOS (Apple Silicon). */
>>      if (!splitwx) {
>>
>> But I don't know how CONFIG_DARWIN, iOS, and MAC_OS_VERSION interact, and I'm
>> not able to even compile-test the patch.
>> Certainly the final comment there looks suspicious, given the preceding MAC_OS
>> stanza...
>>
> 
> I thought you already added MAP_JIT in 6f70ddee19e. It's getting enabled
> on my M1 laptop. Was it intended or not?
> 
>     /* Applicable to both iOS and macOS (Apple Silicon). */
>     if (!splitwx) {
>         flags |= MAP_JIT;
>     }
> 
> TCG from master branch of QEMU works fine on M1. I'm not sure why do we
> need to duplicate it.

I thought there was something about abi/api build issues.  If there's nothing
that needs doing, great!


r~
Roman Bolshakov Feb. 2, 2021, 8:54 a.m. UTC | #7
On Fri, Jan 29, 2021 at 07:27:57PM -1000, Richard Henderson wrote:
> On 1/29/21 10:50 AM, Roman Bolshakov wrote:
> > 
> > I thought you already added MAP_JIT in 6f70ddee19e. It's getting enabled
> > on my M1 laptop. Was it intended or not?
> > 
> >     /* Applicable to both iOS and macOS (Apple Silicon). */
> >     if (!splitwx) {
> >         flags |= MAP_JIT;
> >     }
> > 
> > TCG from master branch of QEMU works fine on M1. I'm not sure why do we
> > need to duplicate it.
> 
> I thought there was something about abi/api build issues.  If there's nothing
> that needs doing, great!
> 

Hi Richard,

You're correct that older versions of OS X/macOS might not have MAP_JIT
definition, so a simple wrapping of the hunk with ifdef MAP_JIT might be
sufficient (or guard it for Big Sur and above):

  #if defined(MAC_OS_VERSION_11_0) && \
      MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_11_0
      if (!splitwx && __builtin_available(macOS 11.0, *)) {
          flags |= MAP_JIT;
      }
  #endif

But I'm not sure if we want to support hosts older than 10.14.

Regards,
Roman
diff mbox series

Patch

diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index e0df9b6a1d..014810bf0a 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -185,6 +185,7 @@  cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
     }
 #endif /* DEBUG_DISAS */
 
+    qemu_thread_jit_execute();
     ret = tcg_qemu_tb_exec(env, tb_ptr);
     cpu->can_do_io = 1;
     /*
@@ -405,6 +406,7 @@  static inline void tb_add_jump(TranslationBlock *tb, int n,
 {
     uintptr_t old;
 
+    qemu_thread_jit_write();
     assert(n < ARRAY_SIZE(tb->jmp_list_next));
     qemu_spin_lock(&tb_next->jmp_lock);
 
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index e9de6ff9dd..f5f4c7cc17 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -1083,6 +1083,12 @@  static bool alloc_code_gen_buffer_anon(size_t size, int prot,
 {
     void *buf;
 
+#if defined(MAC_OS_VERSION_11_0) && \
+    MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_11_0
+    if (__builtin_available(macOS 11.0, *)) {
+        flags |= MAP_JIT;
+    }
+#endif
     buf = mmap(NULL, size, prot, flags, -1, 0);
     if (buf == MAP_FAILED) {
         error_setg_errno(errp, errno,
@@ -1669,7 +1675,9 @@  static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
 
 static void tb_phys_invalidate__locked(TranslationBlock *tb)
 {
+    qemu_thread_jit_write();
     do_tb_phys_invalidate(tb, true);
+    qemu_thread_jit_execute();
 }
 
 /* invalidate one TB
@@ -1871,6 +1879,7 @@  TranslationBlock *tb_gen_code(CPUState *cpu,
 #endif
 
     assert_memory_lock();
+    qemu_thread_jit_write();
 
     phys_pc = get_page_addr_code(env, pc);
 
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index f9ec8c84e9..929e970b0e 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -123,6 +123,10 @@  extern int daemon(int, int);
 #include "sysemu/os-posix.h"
 #endif
 
+#ifdef __APPLE__
+#include <AvailabilityMacros.h>
+#endif
+
 #include "glib-compat.h"
 #include "qemu/typedefs.h"
 
@@ -686,4 +690,7 @@  char *qemu_get_host_name(Error **errp);
  */
 size_t qemu_get_host_physmem(void);
 
+void qemu_thread_jit_write(void);
+void qemu_thread_jit_execute(void);
+
 #endif
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 472bf1755b..16b044eae7 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1112,6 +1112,7 @@  void tcg_prologue_init(TCGContext *s)
     s->pool_labels = NULL;
 #endif
 
+    qemu_thread_jit_write();
     /* Generate the prologue.  */
     tcg_target_qemu_prologue(s);
 
diff --git a/util/osdep.c b/util/osdep.c
index 66d01b9160..e211939a0c 100644
--- a/util/osdep.c
+++ b/util/osdep.c
@@ -606,3 +606,23 @@  writev(int fd, const struct iovec *iov, int iov_cnt)
     return readv_writev(fd, iov, iov_cnt, true);
 }
 #endif
+
+#if defined(MAC_OS_VERSION_11_0) && \
+    MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_11_0
+void qemu_thread_jit_execute(void)
+{
+    if (__builtin_available(macOS 11.0, *)) {
+        pthread_jit_write_protect_np(true);
+    }
+}
+
+void qemu_thread_jit_write(void)
+{
+    if (__builtin_available(macOS 11.0, *)) {
+        pthread_jit_write_protect_np(false);
+    }
+}
+#else
+void qemu_thread_jit_write(void) {}
+void qemu_thread_jit_execute(void) {}
+#endif