diff mbox series

[RFC,v2,3/6] target/i386: Add native library calls

Message ID 20230607164750.829586-4-fufuyqqqqqq@gmail.com
State New
Headers show
Series Native Library Calls | expand

Commit Message

Yeqi Fu June 7, 2023, 4:47 p.m. UTC
Signed-off-by: Yeqi Fu <fufuyqqqqqq@gmail.com>
---
 target/i386/helper.h                 |  6 +++
 target/i386/tcg/translate.c          | 20 +++++++++
 target/i386/tcg/user/meson.build     |  1 +
 target/i386/tcg/user/native_helper.c | 65 ++++++++++++++++++++++++++++
 4 files changed, 92 insertions(+)
 create mode 100644 target/i386/tcg/user/native_helper.c

Comments

Richard Henderson June 7, 2023, 7:08 p.m. UTC | #1
On 6/7/23 09:47, Yeqi Fu wrote:
> +    arg0 = *(target_ulong *)g2h(cs, env->regs[R_ESP] + 4); \
> +    arg1 = *(target_ulong *)g2h(cs, env->regs[R_ESP] + 8); \
> +    arg2 = *(target_ulong *)g2h(cs, env->regs[R_ESP] + 12);

This is not correct, and will fail on big-endian hosts.

You need to use

     uintptr_t ra = GETPC();
     cpu_ldl_data_ra(env, guest_pointer, ra);

which will (amongst other things) take care of the byte swapping.

> +void helper_native_memcpy(CPUX86State *env)
> +{
> +    CPUState *cs = env_cpu(env);
> +    NATIVE_FN_W_3W();
> +    void *ret;
> +    void *dest = g2h(cs, arg0);
> +    void *src = g2h(cs, arg1);
> +    size_t n = (size_t)arg2;
> +    ret = memcpy(dest, src, n);
> +    env->regs[R_EAX] = (target_ulong)h2g(ret);
> +}

You need to do something for the case in which either src or dst is not accessible.

Routines like cpu_ldl_data_ra handle this for you, but you don't want to use that for memcpy.

There are several ways of doing this.  None of the existing helpers are ideal.

(A) void *dest = probe_write(env, arg0, arg2, MMU_USER_IDX, ra);
     void *src = probe_read(env, arg1, arg2, MMU_USER_IDX, ra);

which will raise SIGSEGV in case any byte of either region is not correctly mapped, and 
also perform the guest-to-host address remapping.  However, probe_* are written to expect 
probing of no more than one page.  Which means you'd need a loop, processing remaining 
page fractions.

(B) There is page_check_range(), which can check a large region, but doesn't handle 
address translation.  And you still wind up with a race condition if another thread 
changes page mappings at the same time.

(C) Perform the address translation etc yourself, and then protect the actual host memory 
operation in the same way as exec/cpu_ldst.h functions:

     set_helper_retaddr(ra);
     memcpy(dest, src, n);
     clear_helper_retaddr();

In this case you must also validate that 'n' is representable.  This is only an issue for 
32-bit host and 64-bit guest.  A check like (arg2 > SIZE_MAX) is likely to generate a 
silly warning about always false comparison on 64-bit hosts.  Therefore I suggest

     if (n != arg2) {
         /*
          * Overflow of size_t means that sequential pointer access would wrap.
          * We know that NULL is unmapped, so at least that one byte would fault.
          * There is nothing in the specification of memcpy that requires bytes
          * to be accessed in order, so we are allowed to fault early.
          */
         cpu_loop_exit_sigsegv(env_cpu(env), 0, MMU_DATA_LOAD, true, ra);
     }

Finally, you know the return value from the specification of memcpy: arg0.
There is no need to remap the return value back from host to guest space.


r~
Richard Henderson June 7, 2023, 7:19 p.m. UTC | #2
On 6/7/23 09:47, Yeqi Fu wrote:
> +    /* One unknown opcode for native call */
> +#if defined(CONFIG_USER_ONLY)  && defined(CONFIG_USER_NATIVE_CALL)
> +    case 0x1ff:
> +        uint16_t sig = x86_lduw_code(env, s);
> +        switch (sig) {
> +        case NATIVE_MEMCPY:
> +            gen_helper_native_memcpy(cpu_env);
> +            break;
> +        case NATIVE_MEMSET:
> +            gen_helper_native_memset(cpu_env);
> +            break;
> +        case NATIVE_MEMCMP:
> +            gen_helper_native_memcmp(cpu_env);
> +            break;
> +        default:
> +            goto unknown_op;
> +        }
> +        break;
> +#endif

This bit of code must be protected by native_calls_enabled() or some such, as we do with 
semihosting_enabled().

Which means that patch 6 should come before this, so that native_calls_enabled() can be 
true if and only if "-native-bypass" is given.


r~
diff mbox series

Patch

diff --git a/target/i386/helper.h b/target/i386/helper.h
index e627a93107..6c91655887 100644
--- a/target/i386/helper.h
+++ b/target/i386/helper.h
@@ -221,3 +221,9 @@  DEF_HELPER_3(rcrq, tl, env, tl, tl)
 #endif
 
 DEF_HELPER_1(rdrand, tl, env)
+
+#if defined(CONFIG_USER_ONLY)  && defined(CONFIG_USER_NATIVE_CALL)
+DEF_HELPER_1(native_memcpy, void, env)
+DEF_HELPER_1(native_memcmp, void, env)
+DEF_HELPER_1(native_memset, void, env)
+#endif
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 91c9c0c478..eb0c1e9566 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -33,6 +33,7 @@ 
 #include "helper-tcg.h"
 
 #include "exec/log.h"
+#include "native/native-func.h"
 
 #define PREFIX_REPZ   0x01
 #define PREFIX_REPNZ  0x02
@@ -6806,6 +6807,25 @@  static bool disas_insn(DisasContext *s, CPUState *cpu)
     case 0x1d0 ... 0x1fe:
         disas_insn_new(s, cpu, b);
         break;
+    /* One unknown opcode for native call */
+#if defined(CONFIG_USER_ONLY)  && defined(CONFIG_USER_NATIVE_CALL)
+    case 0x1ff:
+        uint16_t sig = x86_lduw_code(env, s);
+        switch (sig) {
+        case NATIVE_MEMCPY:
+            gen_helper_native_memcpy(cpu_env);
+            break;
+        case NATIVE_MEMSET:
+            gen_helper_native_memset(cpu_env);
+            break;
+        case NATIVE_MEMCMP:
+            gen_helper_native_memcmp(cpu_env);
+            break;
+        default:
+            goto unknown_op;
+        }
+        break;
+#endif
     default:
         goto unknown_op;
     }
diff --git a/target/i386/tcg/user/meson.build b/target/i386/tcg/user/meson.build
index 1df6bc4343..490808bd65 100644
--- a/target/i386/tcg/user/meson.build
+++ b/target/i386/tcg/user/meson.build
@@ -1,4 +1,5 @@ 
 i386_user_ss.add(when: ['CONFIG_TCG', 'CONFIG_USER_ONLY'], if_true: files(
   'excp_helper.c',
   'seg_helper.c',
+  'native_helper.c',
 ))
diff --git a/target/i386/tcg/user/native_helper.c b/target/i386/tcg/user/native_helper.c
new file mode 100644
index 0000000000..4a9b98eee2
--- /dev/null
+++ b/target/i386/tcg/user/native_helper.c
@@ -0,0 +1,65 @@ 
+/*
+ *  native function call helpers
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "exec/helper-proto.h"
+#include "exec/exec-all.h"
+#include "exec/cpu_ldst.h"
+#include "tcg/helper-tcg.h"
+#include "tcg/seg_helper.h"
+
+#ifdef TARGET_X86_64
+#define NATIVE_FN_W_3W()           \
+    target_ulong arg0, arg1, arg2; \
+    arg0 = env->regs[R_EDI];       \
+    arg1 = env->regs[R_ESI];       \
+    arg2 = env->regs[R_EDX];
+#else
+/*
+ *  linux x86 has several calling conventions. The following implementation
+ *  is for the most commonly used cdecl calling convention.
+ */
+#define NATIVE_FN_W_3W()                                   \
+    target_ulong arg0, arg1, arg2;                         \
+    arg0 = *(target_ulong *)g2h(cs, env->regs[R_ESP] + 4); \
+    arg1 = *(target_ulong *)g2h(cs, env->regs[R_ESP] + 8); \
+    arg2 = *(target_ulong *)g2h(cs, env->regs[R_ESP] + 12);
+#endif
+
+void helper_native_memcpy(CPUX86State *env)
+{
+    CPUState *cs = env_cpu(env);
+    NATIVE_FN_W_3W();
+    void *ret;
+    void *dest = g2h(cs, arg0);
+    void *src = g2h(cs, arg1);
+    size_t n = (size_t)arg2;
+    ret = memcpy(dest, src, n);
+    env->regs[R_EAX] = (target_ulong)h2g(ret);
+}
+
+void helper_native_memcmp(CPUX86State *env)
+{
+    CPUState *cs = env_cpu(env);
+    NATIVE_FN_W_3W();
+    int ret;
+    void *s1 = g2h(cs, arg0);
+    void *s2 = g2h(cs, arg1);
+    size_t n = (size_t)arg2;
+    ret = memcmp(s1, s2, n);
+    env->regs[R_EAX] = ret;
+}
+
+void helper_native_memset(CPUX86State *env)
+{
+    CPUState *cs = env_cpu(env);
+    NATIVE_FN_W_3W();
+    void *ret;
+    void *s = g2h(cs, arg0);
+    int c = (int)arg1;
+    size_t n = (size_t)arg2;
+    ret = memset(s, c, n);
+    env->regs[R_EAX] = (target_ulong)h2g(ret);
+}