Patchwork tcg-i386: Improve cmov detection

login
register
mail settings
Submitter Richard Henderson
Date Nov. 24, 2012, 5:39 p.m.
Message ID <1353778775-7477-1-git-send-email-rth@twiddle.net>
Download mbox | patch
Permalink /patch/201473/
State New
Headers show

Comments

Richard Henderson - Nov. 24, 2012, 5:39 p.m.
In addition to better compile-time detection, perform runtime detection.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/i386/tcg-target.c | 34 +++++++++++++++++++++++++++++++++-
 tcg/i386/tcg-target.h |  5 -----
 2 files changed, 33 insertions(+), 6 deletions(-)

Yall are right that there's no particularly good method with which
to detect i686 *or later*, and thus cmov support, in gcc.  If one
uses -march=native with any processor made in the last 5 years,
one will have at least SSE1 support.  So we can reasonably use that
as a clue.

To fill in the holes, we can do the check at runtime.  That does
involve a tiny amount of runtime overhead, testing a global variable.
I suspect that this is overhead is unmeasurable.


r~
Peter Maydell - Nov. 24, 2012, 6:12 p.m.
On 24 November 2012 17:39, Richard Henderson <rth@twiddle.net> wrote:
> --- a/tcg/i386/tcg-target.c
> +++ b/tcg/i386/tcg-target.c
> @@ -97,6 +97,20 @@ static const int tcg_target_call_oarg_regs[] = {
>  # define TCG_REG_L1 TCG_REG_EDX
>  #endif
>
> +/* Attempt to determine at compile-time whether the compiler assumes that
> +   cmov is available.  We get 64-bit for free.  P6 (i686) and later include
> +   support for cmov, but there is no one preprocessor define that determines
> +   this.  Assume that all processors that include sse also support cmov, so
> +   that we sorta future-proof this test against new preprocessor defines.  */
> +#include <cpuid.h>

MacOS gcc objects to this:
In file included from /Users/pm215/src/qemu/tcg/tcg.c:174:
/Users/pm215/src/qemu/tcg/i386/tcg-target.c:105:19: warning: cpuid.h:
No such file or directory

(though for some reason not as a fatal error).

-- PMM
Aurelien Jarno - Nov. 25, 2012, 1:44 p.m.
On Sat, Nov 24, 2012 at 09:39:35AM -0800, Richard Henderson wrote:
> In addition to better compile-time detection, perform runtime detection.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/i386/tcg-target.c | 34 +++++++++++++++++++++++++++++++++-
>  tcg/i386/tcg-target.h |  5 -----
>  2 files changed, 33 insertions(+), 6 deletions(-)
> 
> Yall are right that there's no particularly good method with which
> to detect i686 *or later*, and thus cmov support, in gcc.  If one
> uses -march=native with any processor made in the last 5 years,
> one will have at least SSE1 support.  So we can reasonably use that
> as a clue.
> 
> To fill in the holes, we can do the check at runtime.  That does
> involve a tiny amount of runtime overhead, testing a global variable.
> I suspect that this is overhead is unmeasurable.

If this overhead is unmesurable, and I think it is something true, I
think it would be better to just always use that on i386 (but not on
x86_64) instead of having a complex compile time detection that could
fail.

Otherwise the patch looks fine.


> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
> index 6f3ad3c..b333b46 100644
> --- a/tcg/i386/tcg-target.c
> +++ b/tcg/i386/tcg-target.c
> @@ -97,6 +97,20 @@ static const int tcg_target_call_oarg_regs[] = {
>  # define TCG_REG_L1 TCG_REG_EDX
>  #endif
>  
> +/* Attempt to determine at compile-time whether the compiler assumes that
> +   cmov is available.  We get 64-bit for free.  P6 (i686) and later include
> +   support for cmov, but there is no one preprocessor define that determines
> +   this.  Assume that all processors that include sse also support cmov, so
> +   that we sorta future-proof this test against new preprocessor defines.  */
> +#include <cpuid.h>
> +#if (TCG_TARGET_REG_BITS == 64 \
> +     || defined(__i686__) || defined(__pentium4__) \
> +     || defined(__athlon__) || defined(__SSE__))
> +# define have_cmov 1
> +#else
> +static bool have_cmov;
> +#endif
> +
>  static uint8_t *tb_ret_addr;
>  
>  static void patch_reloc(uint8_t *code_ptr, int type,
> @@ -943,7 +957,14 @@ static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
>                                TCGArg v1)
>  {
>      tcg_out_cmp(s, c1, c2, const_c2, 0);
> -    tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
> +    if (have_cmov) {
> +        tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
> +    } else {
> +        int over = gen_new_label();
> +        tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
> +        tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
> +        tcg_out_label(s, over, s->code_ptr);
> +    }
>  }
>  
>  #if TCG_TARGET_REG_BITS == 64
> @@ -2243,6 +2264,17 @@ static void tcg_target_qemu_prologue(TCGContext *s)
>  
>  static void tcg_target_init(TCGContext *s)
>  {
> +    /* If we could not determine cmov availablity at compile time, perform
> +       the check at runtime.  99% certainty that we're running on hardware
> +       that supports cmov, but we still need to check.  In case cmov is not
> +       available, we'll use a small forward branch.  */
> +#ifndef have_cmov
> +    {
> +        unsigned a, b, c, d;
> +        have_cmov = (__get_cpuid(1, &a, &b, &c, &d) && (d & bit_CMOV));
> +    }
> +#endif
> +
>  #if !defined(CONFIG_USER_ONLY)
>      /* fail safe */
>      if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry))
> diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
> index dbc6756..450078b 100644
> --- a/tcg/i386/tcg-target.h
> +++ b/tcg/i386/tcg-target.h
> @@ -90,12 +90,7 @@ typedef enum {
>  #define TCG_TARGET_HAS_nand_i32         0
>  #define TCG_TARGET_HAS_nor_i32          0
>  #define TCG_TARGET_HAS_deposit_i32      1
> -#if defined(__x86_64__) || defined(__i686__)
> -/* Use cmov only if the compiler is already doing so.  */
>  #define TCG_TARGET_HAS_movcond_i32      1
> -#else
> -#define TCG_TARGET_HAS_movcond_i32      0
> -#endif
>  
>  #if TCG_TARGET_REG_BITS == 64
>  #define TCG_TARGET_HAS_div2_i64         1
> -- 
> 1.7.11.7
> 
>
Richard Henderson - Nov. 26, 2012, 4:23 p.m.
On 11/24/2012 10:12 AM, Peter Maydell wrote:
> MacOS gcc objects to this:
> In file included from /Users/pm215/src/qemu/tcg/tcg.c:174:
> /Users/pm215/src/qemu/tcg/i386/tcg-target.c:105:19: warning: cpuid.h:
> No such file or directory
> 
> (though for some reason not as a fatal error).

Bizzare.

Out of curiosity, does llvm ship a cpuid.h?  Or am I going to be
better off not relying on that header at all?


r~
陳韋任 - Dec. 10, 2012, 3:42 p.m.
On Mon, Nov 26, 2012 at 08:23:10AM -0800, Richard Henderson wrote:
> On 11/24/2012 10:12 AM, Peter Maydell wrote:
> > MacOS gcc objects to this:
> > In file included from /Users/pm215/src/qemu/tcg/tcg.c:174:
> > /Users/pm215/src/qemu/tcg/i386/tcg-target.c:105:19: warning: cpuid.h:
> > No such file or directory
> > 
> > (though for some reason not as a fatal error).
> 
> Bizzare.
> 
> Out of curiosity, does llvm ship a cpuid.h?  Or am I going to be
> better off not relying on that header at all?

  I don't think LLVM ship cpuid.h.

Patch

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 6f3ad3c..b333b46 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -97,6 +97,20 @@  static const int tcg_target_call_oarg_regs[] = {
 # define TCG_REG_L1 TCG_REG_EDX
 #endif
 
+/* Attempt to determine at compile-time whether the compiler assumes that
+   cmov is available.  We get 64-bit for free.  P6 (i686) and later include
+   support for cmov, but there is no one preprocessor define that determines
+   this.  Assume that all processors that include sse also support cmov, so
+   that we sorta future-proof this test against new preprocessor defines.  */
+#include <cpuid.h>
+#if (TCG_TARGET_REG_BITS == 64 \
+     || defined(__i686__) || defined(__pentium4__) \
+     || defined(__athlon__) || defined(__SSE__))
+# define have_cmov 1
+#else
+static bool have_cmov;
+#endif
+
 static uint8_t *tb_ret_addr;
 
 static void patch_reloc(uint8_t *code_ptr, int type,
@@ -943,7 +957,14 @@  static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
                               TCGArg v1)
 {
     tcg_out_cmp(s, c1, c2, const_c2, 0);
-    tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
+    if (have_cmov) {
+        tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
+    } else {
+        int over = gen_new_label();
+        tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
+        tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
+        tcg_out_label(s, over, s->code_ptr);
+    }
 }
 
 #if TCG_TARGET_REG_BITS == 64
@@ -2243,6 +2264,17 @@  static void tcg_target_qemu_prologue(TCGContext *s)
 
 static void tcg_target_init(TCGContext *s)
 {
+    /* If we could not determine cmov availablity at compile time, perform
+       the check at runtime.  99% certainty that we're running on hardware
+       that supports cmov, but we still need to check.  In case cmov is not
+       available, we'll use a small forward branch.  */
+#ifndef have_cmov
+    {
+        unsigned a, b, c, d;
+        have_cmov = (__get_cpuid(1, &a, &b, &c, &d) && (d & bit_CMOV));
+    }
+#endif
+
 #if !defined(CONFIG_USER_ONLY)
     /* fail safe */
     if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry))
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index dbc6756..450078b 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -90,12 +90,7 @@  typedef enum {
 #define TCG_TARGET_HAS_nand_i32         0
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_deposit_i32      1
-#if defined(__x86_64__) || defined(__i686__)
-/* Use cmov only if the compiler is already doing so.  */
 #define TCG_TARGET_HAS_movcond_i32      1
-#else
-#define TCG_TARGET_HAS_movcond_i32      0
-#endif
 
 #if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_div2_i64         1