Patchwork [2/2] tcg-ia64: implement add2_i32/i64 and sub2_i32/i64

login
register
mail settings
Submitter Aurelien Jarno
Date March 30, 2013, 3:33 p.m.
Message ID <1364657589-16123-2-git-send-email-aurelien@aurel32.net>
Download mbox | patch
Permalink /patch/232525/
State New
Headers show

Comments

Aurelien Jarno - March 30, 2013, 3:33 p.m.
Add 32-bit and 64-bit add2 and sub2 TCG ops.

On IA64, 32-bit ops should just ignore the 32 most significant bits of
registers, and can leave them with non-zero values. This means a 32-bit
comparison instruction should be used for add2_i32 and sub32_i32.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/ia64/tcg-target.c |   55 +++++++++++++++++++++++++++++++++++++++++++++++++
 tcg/ia64/tcg-target.h |    8 +++----
 2 files changed, 59 insertions(+), 4 deletions(-)
Richard Henderson - March 30, 2013, 3:54 p.m.
On 03/30/2013 08:33 AM, Aurelien Jarno wrote:
> +static inline void tcg_out_add2(TCGContext *s, TCGArg retl, TCGArg reth,
> +                                TCGArg arg1l, TCGArg arg1h,
> +                                TCGArg arg2l, TCGArg arg2h,
> +                                int cmp4)
> +{
> +    tcg_out_bundle(s, MmI,
> +                   tcg_opc_a1(TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, arg1l, arg2l),
> +                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                   tcg_opc_cmp_a(TCG_REG_P0, TCG_COND_LTU,
> +                                 TCG_REG_R2, arg1l, cmp4));

I seem to recall a 1-cycle cross-unit delay, going between M to I units?
Maybe that was just the itanic1, it's been so long...?

Anyway, in this case it's easy to avoid by putting the nop first and using
an mII bundle.

That said, the code looks correct.

Reviewed-by: Richard Henderson <rth@twiddle.net>


r~
Aurelien Jarno - March 30, 2013, 10:05 p.m.
On Sat, Mar 30, 2013 at 08:54:03AM -0700, Richard Henderson wrote:
> On 03/30/2013 08:33 AM, Aurelien Jarno wrote:
> > +static inline void tcg_out_add2(TCGContext *s, TCGArg retl, TCGArg reth,
> > +                                TCGArg arg1l, TCGArg arg1h,
> > +                                TCGArg arg2l, TCGArg arg2h,
> > +                                int cmp4)
> > +{
> > +    tcg_out_bundle(s, MmI,
> > +                   tcg_opc_a1(TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, arg1l, arg2l),
> > +                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> > +                   tcg_opc_cmp_a(TCG_REG_P0, TCG_COND_LTU,
> > +                                 TCG_REG_R2, arg1l, cmp4));
> 
> I seem to recall a 1-cycle cross-unit delay, going between M to I units?
> Maybe that was just the itanic1, it's been so long...?
> 
> Anyway, in this case it's easy to avoid by putting the nop first and using
> an mII bundle.

I'll do that in the next version, thanks for the review.

> That said, the code looks correct.
> 
> Reviewed-by: Richard Henderson <rth@twiddle.net>
> 
> 
> r~
> 
> 
> 
>

Patch

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index a46234d..552992e 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -211,6 +211,7 @@  enum {
 
 enum {
     OPC_ADD_A1                = 0x10000000000ull,
+    OPC_ADD1_A1               = 0x10008000000ull,
     OPC_AND_A1                = 0x10060000000ull,
     OPC_AND_A3                = 0x10160000000ull,
     OPC_ANDCM_A1              = 0x10068000000ull,
@@ -276,6 +277,7 @@  enum {
     OPC_ST8_M4                = 0x08cc0000000ull,
     OPC_SUB_A1                = 0x10028000000ull,
     OPC_SUB_A3                = 0x10128000000ull,
+    OPC_SUB1_A1               = 0x10020000000ull,
     OPC_UNPACK4_L_I2          = 0x0f860000000ull,
     OPC_XMA_L_F2              = 0x1d000000000ull,
     OPC_XMA_H_F2              = 0x1dc00000000ull,
@@ -1564,6 +1566,38 @@  static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret,
                    opc2);
 }
 
+static inline void tcg_out_add2(TCGContext *s, TCGArg retl, TCGArg reth,
+                                TCGArg arg1l, TCGArg arg1h,
+                                TCGArg arg2l, TCGArg arg2h,
+                                int cmp4)
+{
+    tcg_out_bundle(s, MmI,
+                   tcg_opc_a1(TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, arg1l, arg2l),
+                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   tcg_opc_cmp_a(TCG_REG_P0, TCG_COND_LTU,
+                                 TCG_REG_R2, arg1l, cmp4));
+    tcg_out_bundle(s, mII,
+                   tcg_opc_a1(TCG_REG_P6, OPC_ADD1_A1, reth, arg1h, arg2h),
+                   tcg_opc_a1(TCG_REG_P7, OPC_ADD_A1, reth, arg1h, arg2h),
+                   tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, retl, 0, TCG_REG_R2));
+}
+
+static inline void tcg_out_sub2(TCGContext *s, TCGArg retl, TCGArg reth,
+                                TCGArg arg1l, TCGArg arg1h,
+                                TCGArg arg2l, TCGArg arg2h,
+                                int cmp4)
+{
+    tcg_out_bundle(s, MmI,
+                   tcg_opc_a1(TCG_REG_P0, OPC_SUB_A1, TCG_REG_R2, arg1l, arg2l),
+                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   tcg_opc_cmp_a(TCG_REG_P0, TCG_COND_LTU,
+                                 arg1l, TCG_REG_R2, cmp4));
+    tcg_out_bundle(s, mII,
+                   tcg_opc_a1(TCG_REG_P6, OPC_SUB1_A1, reth, arg1h, arg2h),
+                   tcg_opc_a1(TCG_REG_P7, OPC_SUB_A1, reth, arg1h, arg2h),
+                   tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, retl, 0, TCG_REG_R2));
+}
+
 #if defined(CONFIG_SOFTMMU)
 
 #include "exec/softmmu_defs.h"
@@ -2131,6 +2165,23 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_alu(s, OPC_ADD_A1, args[0], args[1], const_args[1],
                     args[2], const_args[2]);
         break;
+    case INDEX_op_add2_i32:
+        tcg_out_add2(s, args[0], args[1], args[2],
+                        args[3], args[4], args[5], 1);
+        break;
+    case INDEX_op_add2_i64:
+        tcg_out_add2(s, args[0], args[1], args[2],
+                        args[3], args[4], args[5], 0);
+        break;
+    case INDEX_op_sub2_i32:
+        tcg_out_sub2(s, args[0], args[1], args[2],
+                        args[3], args[4], args[5], 1);
+        break;
+    case INDEX_op_sub2_i64:
+        tcg_out_sub2(s, args[0], args[1], args[2],
+                        args[3], args[4], args[5], 0);
+        break;
+
     case INDEX_op_sub_i32:
     case INDEX_op_sub_i64:
         tcg_out_alu(s, OPC_SUB_A1, args[0], args[1], const_args[1],
@@ -2352,7 +2403,9 @@  static const TCGTargetOpDef ia64_op_defs[] = {
     { INDEX_op_st_i32, { "rZ", "r" } },
 
     { INDEX_op_add_i32, { "r", "rI", "rI" } },
+    { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rZ", "rZ" } },
     { INDEX_op_sub_i32, { "r", "rI", "rI" } },
+    { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rZ", "rZ" } },
 
     { INDEX_op_and_i32, { "r", "rI", "rI" } },
     { INDEX_op_andc_i32, { "r", "rI", "rI" } },
@@ -2401,7 +2454,9 @@  static const TCGTargetOpDef ia64_op_defs[] = {
     { INDEX_op_st_i64, { "rZ", "r" } },
 
     { INDEX_op_add_i64, { "r", "rI", "rI" } },
+    { INDEX_op_add2_i64, { "r", "r", "rZ", "rZ", "rZ", "rZ" } },
     { INDEX_op_sub_i64, { "r", "rI", "rI" } },
+    { INDEX_op_sub2_i64, { "r", "r", "rZ", "rZ", "rZ", "rZ" } },
 
     { INDEX_op_and_i64, { "r", "rI", "rI" } },
     { INDEX_op_andc_i64, { "r", "rI", "rI" } },
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index 75e357e..6c70e41 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -136,10 +136,10 @@  typedef enum {
 #define TCG_TARGET_HAS_movcond_i64      1
 #define TCG_TARGET_HAS_deposit_i32      1
 #define TCG_TARGET_HAS_deposit_i64      1
-#define TCG_TARGET_HAS_add2_i32         0
-#define TCG_TARGET_HAS_add2_i64         0
-#define TCG_TARGET_HAS_sub2_i32         0
-#define TCG_TARGET_HAS_sub2_i64         0
+#define TCG_TARGET_HAS_add2_i32         1
+#define TCG_TARGET_HAS_add2_i64         1
+#define TCG_TARGET_HAS_sub2_i32         1
+#define TCG_TARGET_HAS_sub2_i64         1
 #define TCG_TARGET_HAS_mulu2_i32        1
 #define TCG_TARGET_HAS_mulu2_i64        1
 #define TCG_TARGET_HAS_muls2_i32        1