diff mbox

[05/11] tcg/i386: Make direct jump patching thread-safe

Message ID 1460044433-19282-6-git-send-email-sergey.fedorov@linaro.org
State New
Headers show

Commit Message

sergey.fedorov@linaro.org April 7, 2016, 3:53 p.m. UTC
From: Sergey Fedorov <serge.fdrv@gmail.com>

Ensure direct jump patching in i386 is atomic by:
 * naturally aligning a location of direct jump address;
 * using atomic_read()/atomic_set() for code patching.

Signed-off-by: Sergey Fedorov <serge.fdrv@gmail.com>
Signed-off-by: Sergey Fedorov <sergey.fedorov@linaro.org>
---
 include/exec/exec-all.h   |  2 +-
 tcg/i386/tcg-target.inc.c | 17 +++++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

Comments

Alex Bennée April 20, 2016, 9:55 a.m. UTC | #1
Sergey Fedorov <sergey.fedorov@linaro.org> writes:

> From: Sergey Fedorov <serge.fdrv@gmail.com>
>
> Ensure direct jump patching in i386 is atomic by:
>  * naturally aligning a location of direct jump address;
>  * using atomic_read()/atomic_set() for code patching.
>
> Signed-off-by: Sergey Fedorov <serge.fdrv@gmail.com>
> Signed-off-by: Sergey Fedorov <sergey.fedorov@linaro.org>
> ---
>  include/exec/exec-all.h   |  2 +-
>  tcg/i386/tcg-target.inc.c | 17 +++++++++++++++++
>  2 files changed, 18 insertions(+), 1 deletion(-)
>
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index 59709c9dd5c9..82399175fe80 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -312,7 +312,7 @@ void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr);
>  static inline void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr)
>  {
>      /* patch the branch destination */
> -    stl_le_p((void*)jmp_addr, addr - (jmp_addr + 4));
> +    atomic_set((int32_t *)jmp_addr, addr - (jmp_addr + 4));
>      /* no need to flush icache explicitly */
>  }
>  #elif defined(__s390x__)
> diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
> index 9187d34caf6d..3ffb7b3124d8 100644
> --- a/tcg/i386/tcg-target.inc.c
> +++ b/tcg/i386/tcg-target.inc.c
> @@ -1123,6 +1123,19 @@ static void tcg_out_jmp(TCGContext *s, tcg_insn_unit *dest)
>      tcg_out_branch(s, 0, dest);
>  }
>
> +static void tcg_out_nopn(TCGContext *s, int n)
> +{
> +    static const uint8_t nop1[] = { 0x90 };
> +    static const uint8_t nop2[] = { 0x66, 0x90 };
> +    static const uint8_t nop3[] = { 0x8d, 0x76, 0x00 };
> +    static const uint8_t *const nopn[] = { nop1, nop2, nop3 };
> +    int i;
> +    assert(n <= ARRAY_SIZE(nopn));
> +    for (i = 0; i < n; ++i) {
> +        tcg_out8(s, nopn[n - 1][i]);
> +    }
> +}

*shudder* I recall x86 instruction encoding is weird. Maybe a comment
 for the function to describe the 3 forms of NOP we have here?

> +
>  #if defined(CONFIG_SOFTMMU)
>  /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
>   *                                     int mmu_idx, uintptr_t ra)
> @@ -1777,6 +1790,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>      case INDEX_op_goto_tb:
>          if (s->tb_jmp_offset) {
>              /* direct jump method */
> +            /* align jump displacement for atomic pathing */

s/pathing/patching/

> +            if (((uintptr_t)s->code_ptr & 3) != 3) {
> +                tcg_out_nopn(s, 3 - ((uintptr_t)s->code_ptr & 3));
> +            }

apropos my previous comments. I think the intention could be made
clearer the use of well named helper functions to check alignment and
calculate number elements until next alignment.

>              tcg_out8(s, OPC_JMP_long); /* jmp im */
>              s->tb_jmp_offset[args[0]] = tcg_current_code_size(s);
>              tcg_out32(s, 0);


--
Alex Bennée
Sergey Fedorov April 20, 2016, 11:43 a.m. UTC | #2
On 20/04/16 12:55, Alex Bennée wrote:
> Sergey Fedorov <sergey.fedorov@linaro.org> writes:
>> diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
>> index 9187d34caf6d..3ffb7b3124d8 100644
>> --- a/tcg/i386/tcg-target.inc.c
>> +++ b/tcg/i386/tcg-target.inc.c
>> @@ -1123,6 +1123,19 @@ static void tcg_out_jmp(TCGContext *s, tcg_insn_unit *dest)
>>      tcg_out_branch(s, 0, dest);
>>  }
>>
>> +static void tcg_out_nopn(TCGContext *s, int n)
>> +{
>> +    static const uint8_t nop1[] = { 0x90 };
>> +    static const uint8_t nop2[] = { 0x66, 0x90 };
>> +    static const uint8_t nop3[] = { 0x8d, 0x76, 0x00 };
>> +    static const uint8_t *const nopn[] = { nop1, nop2, nop3 };
>> +    int i;
>> +    assert(n <= ARRAY_SIZE(nopn));
>> +    for (i = 0; i < n; ++i) {
>> +        tcg_out8(s, nopn[n - 1][i]);
>> +    }
>> +}
> *shudder* I recall x86 instruction encoding is weird. Maybe a comment
>  for the function to describe the 3 forms of NOP we have here?

Okay.

>
>> +
>>  #if defined(CONFIG_SOFTMMU)
>>  /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
>>   *                                     int mmu_idx, uintptr_t ra)
>> @@ -1777,6 +1790,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>>      case INDEX_op_goto_tb:
>>          if (s->tb_jmp_offset) {
>>              /* direct jump method */
>> +            /* align jump displacement for atomic pathing */
> s/pathing/patching/

Nice catch, thanks :)
 
Kind regards,
Sergey
Richard Henderson April 20, 2016, 3:04 p.m. UTC | #3
On 04/20/2016 02:55 AM, Alex Bennée wrote:
>> +static void tcg_out_nopn(TCGContext *s, int n)
>> +{
>> +    static const uint8_t nop1[] = { 0x90 };
>> +    static const uint8_t nop2[] = { 0x66, 0x90 };
>> +    static const uint8_t nop3[] = { 0x8d, 0x76, 0x00 };
>> +    static const uint8_t *const nopn[] = { nop1, nop2, nop3 };
>> +    int i;
>> +    assert(n <= ARRAY_SIZE(nopn));
>> +    for (i = 0; i < n; ++i) {
>> +        tcg_out8(s, nopn[n - 1][i]);
>> +    }
>> +}
>
> *shudder* I recall x86 instruction encoding is weird. Maybe a comment
>   for the function to describe the 3 forms of NOP we have here?

I think I'd prefer to drop the tables and do

   /* Emit 1 or 2 operand size prefixes for the standard one byte nop,
      xchg %eax,%eax, forming xchg %ax,%ax.  All cores accept the
      duplicate prefix, and all of the interesting recent cores can
      decode and discard the duplicates in a single cycle.  */
   for (i = 1; i < n; ++i) {
     tcg_out8(s, 0x66);
   }
   tcg_out8(s, 0x90);


r~
Sergey Fedorov April 20, 2016, 4:15 p.m. UTC | #4
On 20/04/16 18:04, Richard Henderson wrote:
> On 04/20/2016 02:55 AM, Alex Bennée wrote:
>>> +static void tcg_out_nopn(TCGContext *s, int n)
>>> +{
>>> +    static const uint8_t nop1[] = { 0x90 };
>>> +    static const uint8_t nop2[] = { 0x66, 0x90 };
>>> +    static const uint8_t nop3[] = { 0x8d, 0x76, 0x00 };
>>> +    static const uint8_t *const nopn[] = { nop1, nop2, nop3 };
>>> +    int i;
>>> +    assert(n <= ARRAY_SIZE(nopn));
>>> +    for (i = 0; i < n; ++i) {
>>> +        tcg_out8(s, nopn[n - 1][i]);
>>> +    }
>>> +}
>>
>> *shudder* I recall x86 instruction encoding is weird. Maybe a comment
>>   for the function to describe the 3 forms of NOP we have here?
>
> I think I'd prefer to drop the tables and do
>
>   /* Emit 1 or 2 operand size prefixes for the standard one byte nop,
>      xchg %eax,%eax, forming xchg %ax,%ax.  All cores accept the
>      duplicate prefix, and all of the interesting recent cores can
>      decode and discard the duplicates in a single cycle.  */
>   for (i = 1; i < n; ++i) {
>     tcg_out8(s, 0x66);
>   }
>   tcg_out8(s, 0x90);

It's fine if you are sure about that :)

Kind regards,
Sergey
diff mbox

Patch

diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 59709c9dd5c9..82399175fe80 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -312,7 +312,7 @@  void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr);
 static inline void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr)
 {
     /* patch the branch destination */
-    stl_le_p((void*)jmp_addr, addr - (jmp_addr + 4));
+    atomic_set((int32_t *)jmp_addr, addr - (jmp_addr + 4));
     /* no need to flush icache explicitly */
 }
 #elif defined(__s390x__)
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
index 9187d34caf6d..3ffb7b3124d8 100644
--- a/tcg/i386/tcg-target.inc.c
+++ b/tcg/i386/tcg-target.inc.c
@@ -1123,6 +1123,19 @@  static void tcg_out_jmp(TCGContext *s, tcg_insn_unit *dest)
     tcg_out_branch(s, 0, dest);
 }
 
+static void tcg_out_nopn(TCGContext *s, int n)
+{
+    static const uint8_t nop1[] = { 0x90 };
+    static const uint8_t nop2[] = { 0x66, 0x90 };
+    static const uint8_t nop3[] = { 0x8d, 0x76, 0x00 };
+    static const uint8_t *const nopn[] = { nop1, nop2, nop3 };
+    int i;
+    assert(n <= ARRAY_SIZE(nopn));
+    for (i = 0; i < n; ++i) {
+        tcg_out8(s, nopn[n - 1][i]);
+    }
+}
+
 #if defined(CONFIG_SOFTMMU)
 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
  *                                     int mmu_idx, uintptr_t ra)
@@ -1777,6 +1790,10 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_goto_tb:
         if (s->tb_jmp_offset) {
             /* direct jump method */
+            /* align jump displacement for atomic pathing */
+            if (((uintptr_t)s->code_ptr & 3) != 3) {
+                tcg_out_nopn(s, 3 - ((uintptr_t)s->code_ptr & 3));
+            }
             tcg_out8(s, OPC_JMP_long); /* jmp im */
             s->tb_jmp_offset[args[0]] = tcg_current_code_size(s);
             tcg_out32(s, 0);