Patchwork [13/13] alpha: Implement fp branch/cmov inline.

login
register
mail settings
Submitter Richard Henderson
Date Dec. 11, 2009, 11:07 p.m.
Message ID <002372539c19bfda9d58db793816872dada67d7e.1260580414.git.rth@twiddle.net>
Download mbox | patch
Permalink /patch/40943/
State New
Headers show

Comments

Richard Henderson - Dec. 11, 2009, 11:07 p.m.
The old fcmov implementation had a typo:
-        tcg_gen_mov_i64(cpu_fir[rc], cpu_fir[ra]);
which moved the condition, not the second source, to the destination.

But it's also easy to implement the simplified fp comparison inline.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-alpha/helper.h    |    7 --
 target-alpha/op_helper.c |   31 -------
 target-alpha/translate.c |  197 ++++++++++++++++++++++++++--------------------
 3 files changed, 110 insertions(+), 125 deletions(-)

Patch

diff --git a/target-alpha/helper.h b/target-alpha/helper.h
index 4eb3b6f..bedd3c0 100644
--- a/target-alpha/helper.h
+++ b/target-alpha/helper.h
@@ -77,13 +77,6 @@  DEF_HELPER_2(cmpgeq, i64, i64, i64)
 DEF_HELPER_2(cmpgle, i64, i64, i64)
 DEF_HELPER_2(cmpglt, i64, i64, i64)
 
-DEF_HELPER_1(cmpfeq, i64, i64)
-DEF_HELPER_1(cmpfne, i64, i64)
-DEF_HELPER_1(cmpflt, i64, i64)
-DEF_HELPER_1(cmpfle, i64, i64)
-DEF_HELPER_1(cmpfgt, i64, i64)
-DEF_HELPER_1(cmpfge, i64, i64)
-
 DEF_HELPER_2(cpys, i64, i64, i64)
 DEF_HELPER_2(cpysn, i64, i64, i64)
 DEF_HELPER_2(cpyse, i64, i64, i64)
diff --git a/target-alpha/op_helper.c b/target-alpha/op_helper.c
index d7f4fb2..8eba5ec 100644
--- a/target-alpha/op_helper.c
+++ b/target-alpha/op_helper.c
@@ -884,37 +884,6 @@  uint64_t helper_cmpglt(uint64_t a, uint64_t b)
         return 0;
 }
 
-uint64_t helper_cmpfeq (uint64_t a)
-{
-    return !(a & 0x7FFFFFFFFFFFFFFFULL);
-}
-
-uint64_t helper_cmpfne (uint64_t a)
-{
-    return (a & 0x7FFFFFFFFFFFFFFFULL);
-}
-
-uint64_t helper_cmpflt (uint64_t a)
-{
-    return (a & 0x8000000000000000ULL) && (a & 0x7FFFFFFFFFFFFFFFULL);
-}
-
-uint64_t helper_cmpfle (uint64_t a)
-{
-    return (a & 0x8000000000000000ULL) || !(a & 0x7FFFFFFFFFFFFFFFULL);
-}
-
-uint64_t helper_cmpfgt (uint64_t a)
-{
-    return !(a & 0x8000000000000000ULL) && (a & 0x7FFFFFFFFFFFFFFFULL);
-}
-
-uint64_t helper_cmpfge (uint64_t a)
-{
-    return !(a & 0x8000000000000000ULL) || !(a & 0x7FFFFFFFFFFFFFFFULL);
-}
-
-
 /* Floating point format conversion */
 uint64_t helper_cvtts (uint64_t a)
 {
diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index e426677..5b34fc6 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -294,77 +294,98 @@  static inline void gen_store_mem(DisasContext *ctx,
     tcg_temp_free(addr);
 }
 
-static inline void gen_bcond(DisasContext *ctx, TCGCond cond, int ra,
-                             int32_t disp, int mask)
+static void gen_bcond_pcload(DisasContext *ctx, int32_t disp, int lab_true)
 {
-    int l1, l2;
+    int lab_over = gen_new_label();
+
+    tcg_gen_movi_i64(cpu_pc, ctx->pc);
+    tcg_gen_br(lab_over);
+    gen_set_label(lab_true);
+    tcg_gen_movi_i64(cpu_pc, ctx->pc + (int64_t)(disp << 2));
+    gen_set_label(lab_over);
+}
+
+static void gen_bcond(DisasContext *ctx, TCGCond cond, int ra,
+                      int32_t disp, int mask)
+{
+    int lab_true = gen_new_label();
 
-    l1 = gen_new_label();
-    l2 = gen_new_label();
     if (likely(ra != 31)) {
         if (mask) {
             TCGv tmp = tcg_temp_new();
             tcg_gen_andi_i64(tmp, cpu_ir[ra], 1);
-            tcg_gen_brcondi_i64(cond, tmp, 0, l1);
+            tcg_gen_brcondi_i64(cond, tmp, 0, lab_true);
             tcg_temp_free(tmp);
-        } else
-            tcg_gen_brcondi_i64(cond, cpu_ir[ra], 0, l1);
+        } else {
+            tcg_gen_brcondi_i64(cond, cpu_ir[ra], 0, lab_true);
+        }
     } else {
         /* Very uncommon case - Do not bother to optimize.  */
         TCGv tmp = tcg_const_i64(0);
-        tcg_gen_brcondi_i64(cond, tmp, 0, l1);
+        tcg_gen_brcondi_i64(cond, tmp, 0, lab_true);
         tcg_temp_free(tmp);
     }
-    tcg_gen_movi_i64(cpu_pc, ctx->pc);
-    tcg_gen_br(l2);
-    gen_set_label(l1);
-    tcg_gen_movi_i64(cpu_pc, ctx->pc + (int64_t)(disp << 2));
-    gen_set_label(l2);
+    gen_bcond_pcload(ctx, disp, lab_true);
 }
 
-static inline void gen_fbcond(DisasContext *ctx, int opc, int ra, int32_t disp)
+/* Generate a forward TCG branch to LAB_TRUE if RA cmp 0.0.
+   This is complicated by the fact that -0.0 compares the same as +0.0.  */
+
+static void gen_fbcond_internal(TCGCond cond, TCGv src, int lab_true)
 {
-    int l1, l2;
+    int lab_false = -1;
+    uint64_t mzero = 1ull << 63;
     TCGv tmp;
-    TCGv src;
-
-    l1 = gen_new_label();
-    l2 = gen_new_label();
-    if (ra != 31) {
+    
+    switch (cond) {
+    case TCG_COND_LE:
+    case TCG_COND_GT:
+        /* For <= or >, the -0.0 value directly compares the way we want.  */
+        tcg_gen_brcondi_i64(cond, src, 0, lab_true);
+        break;
+
+    case TCG_COND_EQ:
+    case TCG_COND_NE:
+        /* For == or !=, we can simply mask off the sign bit and compare.  */
+        /* ??? Assume that the temporary is reclaimed at the branch.  */
         tmp = tcg_temp_new();
-        src = cpu_fir[ra];
-    } else  {
-        tmp = tcg_const_i64(0);
-        src = tmp;
-    }
-    switch (opc) {
-    case 0x31: /* FBEQ */
-        gen_helper_cmpfeq(tmp, src);
-        break;
-    case 0x32: /* FBLT */
-        gen_helper_cmpflt(tmp, src);
-        break;
-    case 0x33: /* FBLE */
-        gen_helper_cmpfle(tmp, src);
-        break;
-    case 0x35: /* FBNE */
-        gen_helper_cmpfne(tmp, src);
+        tcg_gen_andi_i64(tmp, src, mzero - 1);
+        tcg_gen_brcondi_i64(cond, tmp, 0, lab_true);
         break;
-    case 0x36: /* FBGE */
-        gen_helper_cmpfge(tmp, src);
+
+    case TCG_COND_GE:
+        /* For >=, emit two branches to the destination.  */
+        tcg_gen_brcondi_i64(cond, src, 0, lab_true);
+        tcg_gen_brcondi_i64(TCG_COND_EQ, src, mzero, lab_true);
         break;
-    case 0x37: /* FBGT */
-        gen_helper_cmpfgt(tmp, src);
+
+    case TCG_COND_LT:
+        /* For <, first filter out -0.0 to what will be the fallthru.  */
+        lab_false = gen_new_label();
+        tcg_gen_brcondi_i64(TCG_COND_EQ, src, mzero, lab_false);
+        tcg_gen_brcondi_i64(cond, src, 0, lab_true);
+        gen_set_label(lab_false);
         break;
+
     default:
         abort();
     }
-    tcg_gen_brcondi_i64(TCG_COND_NE, tmp, 0, l1);
-    tcg_gen_movi_i64(cpu_pc, ctx->pc);
-    tcg_gen_br(l2);
-    gen_set_label(l1);
-    tcg_gen_movi_i64(cpu_pc, ctx->pc + (int64_t)(disp << 2));
-    gen_set_label(l2);
+}
+
+static void gen_fbcond(DisasContext *ctx, TCGCond cond, int ra, int32_t disp)
+{
+    int lab_true;
+
+    if (unlikely(ra == 31)) {
+        /* Very uncommon case, but easier to optimize it to an integer
+           comparison than continuing with the floating point comparison.  */
+        gen_bcond(ctx, cond, ra, disp, 0);
+        return;
+    }
+
+    lab_true = gen_new_label();
+    gen_fbcond_internal(cond, cpu_fir[ra], lab_true);
+    gen_bcond_pcload(ctx, disp, lab_true);
 }
 
 static inline void gen_cmov(TCGCond inv_cond, int ra, int rb, int rc,
@@ -399,6 +420,28 @@  static inline void gen_cmov(TCGCond inv_cond, int ra, int rb, int rc,
     gen_set_label(l1);
 }
 
+static void gen_fcmov(TCGCond inv_cond, int ra, int rb, int rc)
+{
+    TCGv va = cpu_fir[ra];
+    int l1;
+
+    if (unlikely(rc == 31))
+        return;
+    if (unlikely(ra == 31)) {
+        /* ??? Assume that the temporary is reclaimed at the branch.  */
+        va = tcg_const_i64(0);
+    }
+
+    l1 = gen_new_label();
+    gen_fbcond_internal(inv_cond, va, l1);
+
+    if (rb != 31)
+        tcg_gen_mov_i64(cpu_fir[rc], cpu_fir[rb]);
+    else
+        tcg_gen_movi_i64(cpu_fir[rc], 0);
+    gen_set_label(l1);
+}
+
 #define FARITH2(name)                                       \
 static inline void glue(gen_f, name)(int rb, int rc)        \
 {                                                           \
@@ -482,38 +525,6 @@  FARITH3(cpys)
 FARITH3(cpysn)
 FARITH3(cpyse)
 
-#define FCMOV(name)                                                   \
-static inline void glue(gen_f, name)(int ra, int rb, int rc)          \
-{                                                                     \
-    int l1;                                                           \
-    TCGv tmp;                                                         \
-                                                                      \
-    if (unlikely(rc == 31))                                           \
-        return;                                                       \
-                                                                      \
-    l1 = gen_new_label();                                             \
-    tmp = tcg_temp_new();                                 \
-    if (ra != 31) {                                                   \
-        tmp = tcg_temp_new();                             \
-        gen_helper_ ## name (tmp, cpu_fir[ra]);                       \
-    } else  {                                                         \
-        tmp = tcg_const_i64(0);                                       \
-        gen_helper_ ## name (tmp, tmp);                               \
-    }                                                                 \
-    tcg_gen_brcondi_i64(TCG_COND_EQ, tmp, 0, l1);                     \
-    if (rb != 31)                                                     \
-        tcg_gen_mov_i64(cpu_fir[rc], cpu_fir[ra]);                    \
-    else                                                              \
-        tcg_gen_movi_i64(cpu_fir[rc], 0);                             \
-    gen_set_label(l1);                                                \
-}
-FCMOV(cmpfeq)
-FCMOV(cmpfne)
-FCMOV(cmpflt)
-FCMOV(cmpfge)
-FCMOV(cmpfle)
-FCMOV(cmpfgt)
-
 static inline uint64_t zapnot_mask(uint8_t lit)
 {
     uint64_t mask = 0;
@@ -1871,27 +1882,27 @@  static inline int translate_one(DisasContext *ctx, uint32_t insn)
             break;
         case 0x02A:
             /* FCMOVEQ */
-            gen_fcmpfeq(ra, rb, rc);
+            gen_fcmov(TCG_COND_NE, ra, rb, rc);
             break;
         case 0x02B:
             /* FCMOVNE */
-            gen_fcmpfne(ra, rb, rc);
+            gen_fcmov(TCG_COND_EQ, ra, rb, rc);
             break;
         case 0x02C:
             /* FCMOVLT */
-            gen_fcmpflt(ra, rb, rc);
+            gen_fcmov(TCG_COND_GE, ra, rb, rc);
             break;
         case 0x02D:
             /* FCMOVGE */
-            gen_fcmpfge(ra, rb, rc);
+            gen_fcmov(TCG_COND_LT, ra, rb, rc);
             break;
         case 0x02E:
             /* FCMOVLE */
-            gen_fcmpfle(ra, rb, rc);
+            gen_fcmov(TCG_COND_GT, ra, rb, rc);
             break;
         case 0x02F:
             /* FCMOVGT */
-            gen_fcmpfgt(ra, rb, rc);
+            gen_fcmov(TCG_COND_LE, ra, rb, rc);
             break;
         case 0x030:
             /* CVTQL */
@@ -2482,9 +2493,15 @@  static inline int translate_one(DisasContext *ctx, uint32_t insn)
         ret = 1;
         break;
     case 0x31: /* FBEQ */
+        gen_fbcond(ctx, TCG_COND_EQ, ra, disp21);
+        ret = 1;
+        break;
     case 0x32: /* FBLT */
+        gen_fbcond(ctx, TCG_COND_LT, ra, disp21);
+        ret = 1;
+        break;
     case 0x33: /* FBLE */
-        gen_fbcond(ctx, opc, ra, disp21);
+        gen_fbcond(ctx, TCG_COND_LE, ra, disp21);
         ret = 1;
         break;
     case 0x34:
@@ -2495,9 +2512,15 @@  static inline int translate_one(DisasContext *ctx, uint32_t insn)
         ret = 1;
         break;
     case 0x35: /* FBNE */
+        gen_fbcond(ctx, TCG_COND_NE, ra, disp21);
+        ret = 1;
+        break;
     case 0x36: /* FBGE */
+        gen_fbcond(ctx, TCG_COND_GE, ra, disp21);
+        ret = 1;
+        break;
     case 0x37: /* FBGT */
-        gen_fbcond(ctx, opc, ra, disp21);
+        gen_fbcond(ctx, TCG_COND_GT, ra, disp21);
         ret = 1;
         break;
     case 0x38: