Patchwork [54/57] target-i386: Implement ADX extension

login
register
mail settings
Submitter Richard Henderson
Date Feb. 19, 2013, 5:40 p.m.
Message ID <1361295631-21316-55-git-send-email-rth@twiddle.net>
Download mbox | patch
Permalink /patch/221871/
State New
Headers show

Comments

Richard Henderson - Feb. 19, 2013, 5:40 p.m.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-i386/cc_helper.c |  30 +++++++++++++
 target-i386/cpu.c       |   4 +-
 target-i386/cpu.h       |   4 ++
 target-i386/helper.c    |   4 ++
 target-i386/translate.c | 109 ++++++++++++++++++++++++++++++++++++++++++++++--
 5 files changed, 146 insertions(+), 5 deletions(-)
Richard Henderson - Feb. 20, 2013, 7:22 a.m.
On 2013-02-19 09:40, Richard Henderson wrote:
> +                        /* Otherwise compute the carry-out in two steps.  */
> +                        tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_regs[reg]);
> +                        tcg_gen_setcond_tl(TCG_COND_LTU, cpu_tmp4,
> +                                           cpu_T[0], cpu_regs[reg]);
> +                        tcg_gen_add_tl(cpu_regs[reg], cpu_T[0], carry_in);
> +                        tcg_gen_setcond_tl(TCG_COND_LTU, carry_out,
> +                                           cpu_T[0], carry_in);

Typo in the carry_out computation here.  Fixed for the next revision.


r~

Patch

diff --git a/target-i386/cc_helper.c b/target-i386/cc_helper.c
index 5ea6a0a..6cf57a7 100644
--- a/target-i386/cc_helper.c
+++ b/target-i386/cc_helper.c
@@ -75,6 +75,24 @@  const uint8_t parity_table[256] = {
 
 #endif
 
+static target_ulong compute_all_adcx(target_ulong dst, target_ulong src1,
+                                     target_ulong src2)
+{
+    return (src1 & ~CC_C) | (dst * CC_C);
+}
+
+static target_ulong compute_all_adox(target_ulong dst, target_ulong src1,
+                                     target_ulong src2)
+{
+    return (src1 & ~CC_O) | (src2 * CC_O);
+}
+
+static target_ulong compute_all_adcox(target_ulong dst, target_ulong src1,
+                                      target_ulong src2)
+{
+    return (src1 & ~(CC_C | CC_O)) | (dst * CC_C) | (src2 * CC_O);
+}
+
 target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1,
                                    target_ulong src2, int op)
 {
@@ -162,6 +180,13 @@  target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1,
     case CC_OP_BMILGL:
         return compute_all_bmilgl(dst, src1);
 
+    case CC_OP_ADCX:
+        return compute_all_adcx(dst, src1, src2);
+    case CC_OP_ADOX:
+        return compute_all_adox(dst, src1, src2);
+    case CC_OP_ADCOX:
+        return compute_all_adcox(dst, src1, src2);
+
 #ifdef TARGET_X86_64
     case CC_OP_MULQ:
         return compute_all_mulq(dst, src1);
@@ -210,6 +235,7 @@  target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1,
     case CC_OP_SARW:
     case CC_OP_SARL:
     case CC_OP_SARQ:
+    case CC_OP_ADOX:
         return src1 & 1;
 
     case CC_OP_INCB:
@@ -228,6 +254,10 @@  target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1,
     case CC_OP_MULQ:
         return src1 != 0;
 
+    case CC_OP_ADCX:
+    case CC_OP_ADCOX:
+        return dst;
+
     case CC_OP_ADDB:
         return compute_c_addb(dst, src1);
     case CC_OP_ADDW:
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 0cb64ab..5582e5f 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -407,11 +407,11 @@  typedef struct x86_def_t {
           CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A)
 #define TCG_SVM_FEATURES 0
 #define TCG_7_0_EBX_FEATURES (CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_SMAP \
-          CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_BMI2)
+          CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ADX)
           /* missing:
           CPUID_7_0_EBX_FSGSBASE, CPUID_7_0_EBX_HLE, CPUID_7_0_EBX_AVX2,
           CPUID_7_0_EBX_ERMS, CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM,
-          CPUID_7_0_EBX_RDSEED, CPUID_7_0_EBX_ADX */
+          CPUID_7_0_EBX_RDSEED */
 
 /* built-in CPU model definitions
  */
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 960676b..e0443d8 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -641,6 +641,10 @@  typedef enum {
     CC_OP_BMILGL,
     CC_OP_BMILGQ,
 
+    CC_OP_ADCX, /* CC_DST = C, CC_SRC = rest.  */
+    CC_OP_ADOX, /* CC_DST = O, CC_SRC = rest.  */
+    CC_OP_ADCOX, /* CC_DST = C, CC_SRC2 = O, CC_SRC = rest.  */
+
     CC_OP_NB,
 } CCOp;
 
diff --git a/target-i386/helper.c b/target-i386/helper.c
index 74d600f..66c3624 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -113,6 +113,10 @@  static const char *cc_op_str[CC_OP_NB] = {
     "BMILGW",
     "BMILGL",
     "BMILGQ",
+
+    "ADCX",
+    "ADOX",
+    "ADCOX",
 };
 
 static void
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 68e30e6..7edfb55 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -210,6 +210,9 @@  static const uint8_t cc_op_live[CC_OP_NB] = {
     [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
     [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
     [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
+    [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
+    [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
+    [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 };
 
 static void set_cc_op(DisasContext *s, CCOp op)
@@ -994,6 +997,11 @@  static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
         t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
         return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
 
+    case CC_OP_ADCX:
+    case CC_OP_ADCOX:
+        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
+                             .mask = -1, .no_setcond = true };
+
     case CC_OP_EFLAGS:
     case CC_OP_SARB ... CC_OP_SARQ:
         /* CC_SRC & 1 */
@@ -1027,6 +1035,9 @@  static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
         gen_compute_eflags(s);
         /* FALLTHRU */
     case CC_OP_EFLAGS:
+    case CC_OP_ADCX:
+    case CC_OP_ADOX:
+    case CC_OP_ADCOX:
         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
                              .mask = CC_S };
     default:
@@ -1041,9 +1052,17 @@  static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
 /* compute eflags.O to reg */
 static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
 {
-    gen_compute_eflags(s);
-    return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
-                         .mask = CC_O };
+    switch (s->cc_op) {
+    case CC_OP_ADOX:
+    case CC_OP_ADCOX:
+        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
+                             .mask = -1, .no_setcond = true };
+
+    default:
+        gen_compute_eflags(s);
+        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
+                             .mask = CC_O };
+    }
 }
 
 /* compute eflags.Z to reg */
@@ -1054,6 +1073,9 @@  static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
         gen_compute_eflags(s);
         /* FALLTHRU */
     case CC_OP_EFLAGS:
+    case CC_OP_ADCX:
+    case CC_OP_ADOX:
+    case CC_OP_ADCOX:
         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
                              .mask = CC_Z };
     default:
@@ -4174,6 +4196,87 @@  static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_helper_pext(cpu_regs[reg], cpu_T[0], cpu_T[1]);
                 break;
 
+            case 0x1f6: /* adcx Gy, Ey */
+            case 0x2f6: /* adox Gy, Ey */
+                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
+                    goto illegal_op;
+                } else {
+                    TCGv carry_in, carry_out;
+                    int end_op;
+
+                    ot = (s->dflag == 2 ? OT_QUAD : OT_LONG);
+                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+
+                    /* Re-use the carry-out from a previous round.  */
+                    TCGV_UNUSED(carry_in);
+                    carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2);
+                    switch (s->cc_op) {
+                    case CC_OP_ADCX:
+                        if (b == 0x1f6) {
+                            carry_in = cpu_cc_dst;
+                            end_op = CC_OP_ADCX;
+                        } else {
+                            end_op = CC_OP_ADCOX;
+                        }
+                        break;
+                    case CC_OP_ADOX:
+                        if (b == 0x1f6) {
+                            end_op = CC_OP_ADCOX;
+                        } else {
+                            carry_in = cpu_cc_src2;
+                            end_op = CC_OP_ADOX;
+                        }
+                        break;
+                    case CC_OP_ADCOX:
+                        end_op = CC_OP_ADCOX;
+                        carry_in = carry_out;
+                        break;
+                    default:
+                        end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADCOX);
+                        break;
+                    }
+                    /* If we can't reuse carry-out, get it out of EFLAGS.  */
+                    if (TCGV_IS_UNUSED(carry_in)) {
+                        if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
+                            gen_compute_eflags(s);
+                        }
+                        carry_in = cpu_tmp0;
+                        tcg_gen_shri_tl(carry_in, cpu_cc_src,
+                                        ctz32(b == 0x1f6 ? CC_C : CC_O));
+                        tcg_gen_andi_tl(carry_in, carry_in, 1);
+                    }
+
+                    switch (ot) {
+#ifdef TARGET_X86_64
+                    case OT_LONG:
+                        /* If we know TL is 64-bit, and we want a 32-bit
+                           result, just do everything in 64-bit arithmetic.  */
+                        tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
+                        tcg_gen_ext32u_i64(cpu_T[0], cpu_T[0]);
+                        tcg_gen_add_i64(cpu_T[0], cpu_T[0], cpu_regs[reg]);
+                        tcg_gen_add_i64(cpu_T[0], cpu_T[0], carry_in);
+                        tcg_gen_ext32u_i64(cpu_regs[reg], cpu_T[0]);
+                        tcg_gen_shri_i64(carry_out, cpu_T[0], 32);
+                        break;
+#endif
+                    default:
+                        /* Otherwise compute the carry-out in two steps.  */
+                        tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_regs[reg]);
+                        tcg_gen_setcond_tl(TCG_COND_LTU, cpu_tmp4,
+                                           cpu_T[0], cpu_regs[reg]);
+                        tcg_gen_add_tl(cpu_regs[reg], cpu_T[0], carry_in);
+                        tcg_gen_setcond_tl(TCG_COND_LTU, carry_out,
+                                           cpu_T[0], carry_in);
+                        tcg_gen_or_tl(carry_out, carry_out, cpu_tmp4);
+                        break;
+                    }
+                    /* We began with all flags computed to CC_SRC, and we
+                       have now placed the carry-out in CC_DST.  All that
+                       is left is to record the CC_OP.  */
+                    set_cc_op(s, end_op);
+                }
+                break;
+
             case 0x1f7: /* shlx Gy, Ey, By */
             case 0x2f7: /* sarx Gy, Ey, By */
             case 0x3f7: /* shrx Gy, Ey, By */