Patchwork [2/3] tcg-mips: Implement setcond, setcond2.

login
register
mail settings
Submitter Richard Henderson
Date Dec. 19, 2009, 10:32 p.m.
Message ID <8f9e39de79e59163fb9a31b9e8433adf5c97d6ea.1261260692.git.rth@twiddle.net>
Download mbox | patch
Permalink /patch/41478/
State New
Headers show

Comments

Richard Henderson - Dec. 19, 2009, 10:32 p.m.
---
 tcg/mips/tcg-target.c |  177 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 177 insertions(+), 0 deletions(-)
Aurelien Jarno - Dec. 20, 2009, midnight
On Sat, Dec 19, 2009 at 10:38:01PM +0000, Richard Henderson wrote:
> ---
>  tcg/mips/tcg-target.c |  177 +++++++++++++++++++++++++++++++++++++++++++++++++
>  1 files changed, 177 insertions(+), 0 deletions(-)

While this code is surely highly optimized, it is not easily readable. I
think dropping support for constant argument as it is currently done in
brcond/brcond2 would help to make it more readable.

I'll work on that in the next days.

> diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
> index 8fcb5c9..2a2913d 100644
> --- a/tcg/mips/tcg-target.c
> +++ b/tcg/mips/tcg-target.c
> @@ -274,6 +274,8 @@ enum {
>      OPC_BEQ      = 0x04 << 26,
>      OPC_BNE      = 0x05 << 26,
>      OPC_ADDIU    = 0x09 << 26,
> +    OPC_SLTI     = 0x0A << 26,
> +    OPC_SLTIU    = 0x0B << 26,
>      OPC_ANDI     = 0x0C << 26,
>      OPC_ORI      = 0x0D << 26,
>      OPC_XORI     = 0x0E << 26,
> @@ -583,6 +585,170 @@ static void tcg_out_brcond2(TCGContext *s, int cond, int arg1,
>      reloc_pc16(label_ptr, (tcg_target_long) s->code_ptr);
>  }
>  
> +static void tcg_out_setcond(TCGContext *s, int cond, TCGArg arg0,
> +                            TCGArg arg1, TCGArg arg2, int const_arg2)
> +{
> +    int do_swap = 0, do_inv = 0;
> +
> +    switch (cond) {
> +    case TCG_COND_EQ:
> +    case TCG_COND_NE:
> +        /* Both of these forms require comparisons against zero.  */
> +        if (arg2 != 0) {
> +            if (const_arg2)
> +                tcg_out_opc_imm(s, OPC_XORI, arg0, arg1, arg2);
> +            else
> +                tcg_out_opc_reg(s, OPC_XOR, arg0, arg1, arg2);
> +            arg1 = arg0;
> +        }
> +        break;
> +
> +    case TCG_COND_GT:
> +    case TCG_COND_GTU:
> +        /* A > B  --> B < A */
> +        do_swap = 1;
> +        break;
> +
> +    case TCG_COND_GE:
> +    case TCG_COND_GEU:
> +        /* A >= B  --> !(A < B) */
> +        cond = tcg_invert_cond(cond);
> +        do_inv = 1;
> +        break;
> +
> +    case TCG_COND_LE:
> +    case TCG_COND_LEU:
> +        if (const_arg2 && arg2 < 32767) {
> +            /* A <= B  --> A < B+1, given that B+1 doesn't overflow.  */
> +            arg2++;
> +            cond = (cond == TCG_COND_LE ? TCG_COND_LT : TCG_COND_LTU);
> +        } else {
> +            /* A <= B  --> B >= A  --> !(B < A) */
> +            do_swap = do_inv = 1;
> +        }
> +        break;
> +    }
> +
> +    if (do_swap) {
> +        TCGArg t;
> +
> +        /* Since we allow constants in arg2, we must load (non-zero)
> +           constants into AT.  */
> +        if (const_arg2 && arg2 != 0) {
> +            tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_AT, arg2);
> +            arg2 = TCG_REG_AT;
> +        }
> +        t = arg1, arg1 = arg2, arg2 = arg1;
> +        const_arg2 = 0;
> +        cond = tcg_swap_cond(cond);
> +    }
> +
> +    switch (cond) {
> +    case TCG_COND_EQ:
> +        /* X == 0 --> (unsigned)X < 1.  */
> +        tcg_out_opc_imm(s, OPC_SLTIU, arg0, arg1, 1);
> +        break;
> +
> +    case TCG_COND_NE:
> +        /* X != 0 --> 0 < (unsigned)X.  */
> +        tcg_out_opc_reg(s, OPC_SLTU, arg0, TCG_REG_ZERO, arg1);
> +        break;
> +
> +    case TCG_COND_LT:
> +        if (const_arg2)
> +            tcg_out_opc_imm(s, OPC_SLTI, arg0, arg1, arg2);
> +        else
> +            tcg_out_opc_imm(s, OPC_SLT, arg0, arg1, arg2);
> +        break;
> +
> +    case TCG_COND_LTU:
> +        if (const_arg2)
> +            tcg_out_opc_imm(s, OPC_SLTIU, arg0, arg1, arg2);
> +        else
> +            tcg_out_opc_imm(s, OPC_SLTU, arg0, arg1, arg2);
> +        break;
> +
> +    default:
> +        tcg_abort ();
> +    }
> +
> +    if (do_inv) {
> +        tcg_out_opc_imm(s, OPC_XORI, arg0, arg0, 1);
> +    }
> +}
> +
> +static void tcg_out_setcond2(TCGContext *s, int cond, int dest,
> +                             int al, int ah, int bl, int bh,
> +                             int blconst, int bhconst)
> +{
> +    int cl, ch;
> +
> +    /* If we were to implement this function with brcond2 and two sets,
> +       we'd use 6-8 insns, including nops in the delay slots.  Here we
> +       generate (excluding possible constant loads, which brcond does
> +       not support) a minimum of 3 insns and a maximum of 5 insns.  */
> +
> +    switch (cond) {
> +    case TCG_COND_NE:
> +        /* (ah != bh || al != bl) */
> +        tcg_out_setcond(s, TCG_COND_NE, TCG_REG_T0, al, bl, blconst);
> +        tcg_out_setcond(s, TCG_COND_NE, dest, ah, bh, bhconst);
> +        tcg_out_opc_reg(s, OPC_OR, dest, dest, TCG_REG_T0);
> +        return;
> +
> +    case TCG_COND_EQ:
> +        /* (ah == bh && al == bl) */
> +        ch = -1;
> +        cl = TCG_COND_EQ;
> +        break;
> +
> +    case TCG_COND_LT:
> +        /* (ah < bh || (ah == bh && al < bl)) */
> +        ch = TCG_COND_LT;
> +        cl = TCG_COND_LTU;
> +        break;
> +    case TCG_COND_LTU:
> +        ch = (bh == 0 ? -1 : TCG_COND_LTU);
> +        cl = TCG_COND_LTU;
> +        break;
> +
> +    case TCG_COND_LE:
> +        /* (ah < bh || (ah == bh && al <= bl) */
> +        ch = TCG_COND_LT, cl = TCG_COND_LEU;
> +        break;
> +    case TCG_COND_LEU:
> +        ch = (bh == 0 ? -1 : TCG_COND_LTU);
> +        cl = TCG_COND_LEU;
> +        break;
> +
> +    case TCG_COND_GT:
> +    case TCG_COND_GTU:
> +        /* (ah > bh || (ah == bh && al > bl)) */
> +        ch = cond, cl = TCG_COND_GTU;
> +        break;
> +
> +    case TCG_COND_GE:
> +        /* (ah > bh || (ah == bh && al >= bl)) */
> +        ch = TCG_COND_GT, cl = TCG_COND_GEU;
> +        break;
> +    case TCG_COND_GEU:
> +        ch = TCG_COND_GTU, cl = TCG_COND_GEU;
> +        break;
> +
> +    default:
> +        tcg_abort ();
> +    }
> +
> +    tcg_out_setcond(s, cl, TCG_REG_AT, al, bl, blconst);
> +    tcg_out_setcond(s, TCG_COND_EQ, TCG_REG_T0, ah, bh, bhconst);
> +    tcg_out_opc_reg(s, OPC_AND, (ch == -1 ? dest : TCG_REG_T0),
> +                    TCG_REG_T0, TCG_REG_AT);
> +    if (ch != -1) {
> +        tcg_out_setcond(s, ch, dest, ah, bh, bhconst);
> +        tcg_out_opc_reg(s, OPC_OR, dest, dest, TCG_REG_T0);
> +    }
> +}
> +
>  #if defined(CONFIG_SOFTMMU)
>  
>  #include "../../softmmu_defs.h"
> @@ -1155,6 +1321,14 @@ static inline void tcg_out_op(TCGContext *s, int opc,
>          tcg_out_brcond2(s, args[4], args[0], args[1], args[2], args[3], args[5]);
>          break;
>  
> +    case INDEX_op_setcond_i32:
> +        tcg_out_setcond(s, args[3], args[0], args[1], args[2], const_args[2]);
> +        break;
> +    case INDEX_op_setcond2_i32:
> +        tcg_out_setcond2(s, args[5], args[0], args[1], args[2],
> +                         args[3], args[4], const_args[3], const_args[4]);
> +        break;
> +
>      case INDEX_op_qemu_ld8u:
>          tcg_out_qemu_ld(s, args, 0);
>          break;
> @@ -1233,6 +1407,9 @@ static const TCGTargetOpDef mips_op_defs[] = {
>      { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJZ", "rJZ" } },
>      { INDEX_op_brcond2_i32, { "rZ", "rZ", "rZ", "rZ" } },
>  
> +    { INDEX_op_setcond_i32, { "r", "r", "rJ" } },
> +    { INDEX_op_setcond2_i32, { "r", "r", "r", "rJ", "rJ" } },
> +
>  #if TARGET_LONG_BITS == 32
>      { INDEX_op_qemu_ld8u, { "L", "lZ" } },
>      { INDEX_op_qemu_ld8s, { "L", "lZ" } },
> -- 
> 1.6.5.2
> 
> 
> 
>

Patch

diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
index 8fcb5c9..2a2913d 100644
--- a/tcg/mips/tcg-target.c
+++ b/tcg/mips/tcg-target.c
@@ -274,6 +274,8 @@  enum {
     OPC_BEQ      = 0x04 << 26,
     OPC_BNE      = 0x05 << 26,
     OPC_ADDIU    = 0x09 << 26,
+    OPC_SLTI     = 0x0A << 26,
+    OPC_SLTIU    = 0x0B << 26,
     OPC_ANDI     = 0x0C << 26,
     OPC_ORI      = 0x0D << 26,
     OPC_XORI     = 0x0E << 26,
@@ -583,6 +585,170 @@  static void tcg_out_brcond2(TCGContext *s, int cond, int arg1,
     reloc_pc16(label_ptr, (tcg_target_long) s->code_ptr);
 }
 
+static void tcg_out_setcond(TCGContext *s, int cond, TCGArg arg0,
+                            TCGArg arg1, TCGArg arg2, int const_arg2)
+{
+    int do_swap = 0, do_inv = 0;
+
+    switch (cond) {
+    case TCG_COND_EQ:
+    case TCG_COND_NE:
+        /* Both of these forms require comparisons against zero.  */
+        if (arg2 != 0) {
+            if (const_arg2)
+                tcg_out_opc_imm(s, OPC_XORI, arg0, arg1, arg2);
+            else
+                tcg_out_opc_reg(s, OPC_XOR, arg0, arg1, arg2);
+            arg1 = arg0;
+        }
+        break;
+
+    case TCG_COND_GT:
+    case TCG_COND_GTU:
+        /* A > B  --> B < A */
+        do_swap = 1;
+        break;
+
+    case TCG_COND_GE:
+    case TCG_COND_GEU:
+        /* A >= B  --> !(A < B) */
+        cond = tcg_invert_cond(cond);
+        do_inv = 1;
+        break;
+
+    case TCG_COND_LE:
+    case TCG_COND_LEU:
+        if (const_arg2 && arg2 < 32767) {
+            /* A <= B  --> A < B+1, given that B+1 doesn't overflow.  */
+            arg2++;
+            cond = (cond == TCG_COND_LE ? TCG_COND_LT : TCG_COND_LTU);
+        } else {
+            /* A <= B  --> B >= A  --> !(B < A) */
+            do_swap = do_inv = 1;
+        }
+        break;
+    }
+
+    if (do_swap) {
+        TCGArg t;
+
+        /* Since we allow constants in arg2, we must load (non-zero)
+           constants into AT.  */
+        if (const_arg2 && arg2 != 0) {
+            tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_AT, arg2);
+            arg2 = TCG_REG_AT;
+        }
+        t = arg1, arg1 = arg2, arg2 = arg1;
+        const_arg2 = 0;
+        cond = tcg_swap_cond(cond);
+    }
+
+    switch (cond) {
+    case TCG_COND_EQ:
+        /* X == 0 --> (unsigned)X < 1.  */
+        tcg_out_opc_imm(s, OPC_SLTIU, arg0, arg1, 1);
+        break;
+
+    case TCG_COND_NE:
+        /* X != 0 --> 0 < (unsigned)X.  */
+        tcg_out_opc_reg(s, OPC_SLTU, arg0, TCG_REG_ZERO, arg1);
+        break;
+
+    case TCG_COND_LT:
+        if (const_arg2)
+            tcg_out_opc_imm(s, OPC_SLTI, arg0, arg1, arg2);
+        else
+            tcg_out_opc_imm(s, OPC_SLT, arg0, arg1, arg2);
+        break;
+
+    case TCG_COND_LTU:
+        if (const_arg2)
+            tcg_out_opc_imm(s, OPC_SLTIU, arg0, arg1, arg2);
+        else
+            tcg_out_opc_imm(s, OPC_SLTU, arg0, arg1, arg2);
+        break;
+
+    default:
+        tcg_abort ();
+    }
+
+    if (do_inv) {
+        tcg_out_opc_imm(s, OPC_XORI, arg0, arg0, 1);
+    }
+}
+
+static void tcg_out_setcond2(TCGContext *s, int cond, int dest,
+                             int al, int ah, int bl, int bh,
+                             int blconst, int bhconst)
+{
+    int cl, ch;
+
+    /* If we were to implement this function with brcond2 and two sets,
+       we'd use 6-8 insns, including nops in the delay slots.  Here we
+       generate (excluding possible constant loads, which brcond does
+       not support) a minimum of 3 insns and a maximum of 5 insns.  */
+
+    switch (cond) {
+    case TCG_COND_NE:
+        /* (ah != bh || al != bl) */
+        tcg_out_setcond(s, TCG_COND_NE, TCG_REG_T0, al, bl, blconst);
+        tcg_out_setcond(s, TCG_COND_NE, dest, ah, bh, bhconst);
+        tcg_out_opc_reg(s, OPC_OR, dest, dest, TCG_REG_T0);
+        return;
+
+    case TCG_COND_EQ:
+        /* (ah == bh && al == bl) */
+        ch = -1;
+        cl = TCG_COND_EQ;
+        break;
+
+    case TCG_COND_LT:
+        /* (ah < bh || (ah == bh && al < bl)) */
+        ch = TCG_COND_LT;
+        cl = TCG_COND_LTU;
+        break;
+    case TCG_COND_LTU:
+        ch = (bh == 0 ? -1 : TCG_COND_LTU);
+        cl = TCG_COND_LTU;
+        break;
+
+    case TCG_COND_LE:
+        /* (ah < bh || (ah == bh && al <= bl) */
+        ch = TCG_COND_LT, cl = TCG_COND_LEU;
+        break;
+    case TCG_COND_LEU:
+        ch = (bh == 0 ? -1 : TCG_COND_LTU);
+        cl = TCG_COND_LEU;
+        break;
+
+    case TCG_COND_GT:
+    case TCG_COND_GTU:
+        /* (ah > bh || (ah == bh && al > bl)) */
+        ch = cond, cl = TCG_COND_GTU;
+        break;
+
+    case TCG_COND_GE:
+        /* (ah > bh || (ah == bh && al >= bl)) */
+        ch = TCG_COND_GT, cl = TCG_COND_GEU;
+        break;
+    case TCG_COND_GEU:
+        ch = TCG_COND_GTU, cl = TCG_COND_GEU;
+        break;
+
+    default:
+        tcg_abort ();
+    }
+
+    tcg_out_setcond(s, cl, TCG_REG_AT, al, bl, blconst);
+    tcg_out_setcond(s, TCG_COND_EQ, TCG_REG_T0, ah, bh, bhconst);
+    tcg_out_opc_reg(s, OPC_AND, (ch == -1 ? dest : TCG_REG_T0),
+                    TCG_REG_T0, TCG_REG_AT);
+    if (ch != -1) {
+        tcg_out_setcond(s, ch, dest, ah, bh, bhconst);
+        tcg_out_opc_reg(s, OPC_OR, dest, dest, TCG_REG_T0);
+    }
+}
+
 #if defined(CONFIG_SOFTMMU)
 
 #include "../../softmmu_defs.h"
@@ -1155,6 +1321,14 @@  static inline void tcg_out_op(TCGContext *s, int opc,
         tcg_out_brcond2(s, args[4], args[0], args[1], args[2], args[3], args[5]);
         break;
 
+    case INDEX_op_setcond_i32:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2], const_args[2]);
+        break;
+    case INDEX_op_setcond2_i32:
+        tcg_out_setcond2(s, args[5], args[0], args[1], args[2],
+                         args[3], args[4], const_args[3], const_args[4]);
+        break;
+
     case INDEX_op_qemu_ld8u:
         tcg_out_qemu_ld(s, args, 0);
         break;
@@ -1233,6 +1407,9 @@  static const TCGTargetOpDef mips_op_defs[] = {
     { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJZ", "rJZ" } },
     { INDEX_op_brcond2_i32, { "rZ", "rZ", "rZ", "rZ" } },
 
+    { INDEX_op_setcond_i32, { "r", "r", "rJ" } },
+    { INDEX_op_setcond2_i32, { "r", "r", "r", "rJ", "rJ" } },
+
 #if TARGET_LONG_BITS == 32
     { INDEX_op_qemu_ld8u, { "L", "lZ" } },
     { INDEX_op_qemu_ld8s, { "L", "lZ" } },