[2/2] tcg-ia64: implement add2_i32/i64 and sub2_i32/i64

Submitted by Aurelien Jarno on March 30, 2013, 3:33 p.m.

Details

Message ID 1364657589-16123-2-git-send-email-aurelien@aurel32.net
State New
Headers show

Commit Message

Aurelien Jarno March 30, 2013, 3:33 p.m.
Add 32-bit and 64-bit add2 and sub2 TCG ops.

On IA64, 32-bit ops should just ignore the 32 most significant bits of
registers, and can leave them with non-zero values. This means a 32-bit
comparison instruction should be used for add2_i32 and sub32_i32.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/ia64/tcg-target.c |   55 +++++++++++++++++++++++++++++++++++++++++++++++++
 tcg/ia64/tcg-target.h |    8 +++----
 2 files changed, 59 insertions(+), 4 deletions(-)

Comments

Richard Henderson March 30, 2013, 3:54 p.m.
On 03/30/2013 08:33 AM, Aurelien Jarno wrote:
> +static inline void tcg_out_add2(TCGContext *s, TCGArg retl, TCGArg reth,
> +                                TCGArg arg1l, TCGArg arg1h,
> +                                TCGArg arg2l, TCGArg arg2h,
> +                                int cmp4)
> +{
> +    tcg_out_bundle(s, MmI,
> +                   tcg_opc_a1(TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, arg1l, arg2l),
> +                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                   tcg_opc_cmp_a(TCG_REG_P0, TCG_COND_LTU,
> +                                 TCG_REG_R2, arg1l, cmp4));

I seem to recall a 1-cycle cross-unit delay, going between M to I units?
Maybe that was just the itanic1, it's been so long...?

Anyway, in this case it's easy to avoid by putting the nop first and using
an mII bundle.

That said, the code looks correct.

Reviewed-by: Richard Henderson <rth@twiddle.net>


r~
Aurelien Jarno March 30, 2013, 10:05 p.m.
On Sat, Mar 30, 2013 at 08:54:03AM -0700, Richard Henderson wrote:
> On 03/30/2013 08:33 AM, Aurelien Jarno wrote:
> > +static inline void tcg_out_add2(TCGContext *s, TCGArg retl, TCGArg reth,
> > +                                TCGArg arg1l, TCGArg arg1h,
> > +                                TCGArg arg2l, TCGArg arg2h,
> > +                                int cmp4)
> > +{
> > +    tcg_out_bundle(s, MmI,
> > +                   tcg_opc_a1(TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, arg1l, arg2l),
> > +                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> > +                   tcg_opc_cmp_a(TCG_REG_P0, TCG_COND_LTU,
> > +                                 TCG_REG_R2, arg1l, cmp4));
> 
> I seem to recall a 1-cycle cross-unit delay, going between M to I units?
> Maybe that was just the itanic1, it's been so long...?
> 
> Anyway, in this case it's easy to avoid by putting the nop first and using
> an mII bundle.

I'll do that in the next version, thanks for the review.

> That said, the code looks correct.
> 
> Reviewed-by: Richard Henderson <rth@twiddle.net>
> 
> 
> r~
> 
> 
> 
>

Patch hide | download patch | download mbox

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index a46234d..552992e 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -211,6 +211,7 @@  enum {
 
 enum {
     OPC_ADD_A1                = 0x10000000000ull,
+    OPC_ADD1_A1               = 0x10008000000ull,
     OPC_AND_A1                = 0x10060000000ull,
     OPC_AND_A3                = 0x10160000000ull,
     OPC_ANDCM_A1              = 0x10068000000ull,
@@ -276,6 +277,7 @@  enum {
     OPC_ST8_M4                = 0x08cc0000000ull,
     OPC_SUB_A1                = 0x10028000000ull,
     OPC_SUB_A3                = 0x10128000000ull,
+    OPC_SUB1_A1               = 0x10020000000ull,
     OPC_UNPACK4_L_I2          = 0x0f860000000ull,
     OPC_XMA_L_F2              = 0x1d000000000ull,
     OPC_XMA_H_F2              = 0x1dc00000000ull,
@@ -1564,6 +1566,38 @@  static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret,
                    opc2);
 }
 
+static inline void tcg_out_add2(TCGContext *s, TCGArg retl, TCGArg reth,
+                                TCGArg arg1l, TCGArg arg1h,
+                                TCGArg arg2l, TCGArg arg2h,
+                                int cmp4)
+{
+    tcg_out_bundle(s, MmI,
+                   tcg_opc_a1(TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, arg1l, arg2l),
+                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   tcg_opc_cmp_a(TCG_REG_P0, TCG_COND_LTU,
+                                 TCG_REG_R2, arg1l, cmp4));
+    tcg_out_bundle(s, mII,
+                   tcg_opc_a1(TCG_REG_P6, OPC_ADD1_A1, reth, arg1h, arg2h),
+                   tcg_opc_a1(TCG_REG_P7, OPC_ADD_A1, reth, arg1h, arg2h),
+                   tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, retl, 0, TCG_REG_R2));
+}
+
+static inline void tcg_out_sub2(TCGContext *s, TCGArg retl, TCGArg reth,
+                                TCGArg arg1l, TCGArg arg1h,
+                                TCGArg arg2l, TCGArg arg2h,
+                                int cmp4)
+{
+    tcg_out_bundle(s, MmI,
+                   tcg_opc_a1(TCG_REG_P0, OPC_SUB_A1, TCG_REG_R2, arg1l, arg2l),
+                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   tcg_opc_cmp_a(TCG_REG_P0, TCG_COND_LTU,
+                                 arg1l, TCG_REG_R2, cmp4));
+    tcg_out_bundle(s, mII,
+                   tcg_opc_a1(TCG_REG_P6, OPC_SUB1_A1, reth, arg1h, arg2h),
+                   tcg_opc_a1(TCG_REG_P7, OPC_SUB_A1, reth, arg1h, arg2h),
+                   tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, retl, 0, TCG_REG_R2));
+}
+
 #if defined(CONFIG_SOFTMMU)
 
 #include "exec/softmmu_defs.h"
@@ -2131,6 +2165,23 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_alu(s, OPC_ADD_A1, args[0], args[1], const_args[1],
                     args[2], const_args[2]);
         break;
+    case INDEX_op_add2_i32:
+        tcg_out_add2(s, args[0], args[1], args[2],
+                        args[3], args[4], args[5], 1);
+        break;
+    case INDEX_op_add2_i64:
+        tcg_out_add2(s, args[0], args[1], args[2],
+                        args[3], args[4], args[5], 0);
+        break;
+    case INDEX_op_sub2_i32:
+        tcg_out_sub2(s, args[0], args[1], args[2],
+                        args[3], args[4], args[5], 1);
+        break;
+    case INDEX_op_sub2_i64:
+        tcg_out_sub2(s, args[0], args[1], args[2],
+                        args[3], args[4], args[5], 0);
+        break;
+
     case INDEX_op_sub_i32:
     case INDEX_op_sub_i64:
         tcg_out_alu(s, OPC_SUB_A1, args[0], args[1], const_args[1],
@@ -2352,7 +2403,9 @@  static const TCGTargetOpDef ia64_op_defs[] = {
     { INDEX_op_st_i32, { "rZ", "r" } },
 
     { INDEX_op_add_i32, { "r", "rI", "rI" } },
+    { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rZ", "rZ" } },
     { INDEX_op_sub_i32, { "r", "rI", "rI" } },
+    { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rZ", "rZ" } },
 
     { INDEX_op_and_i32, { "r", "rI", "rI" } },
     { INDEX_op_andc_i32, { "r", "rI", "rI" } },
@@ -2401,7 +2454,9 @@  static const TCGTargetOpDef ia64_op_defs[] = {
     { INDEX_op_st_i64, { "rZ", "r" } },
 
     { INDEX_op_add_i64, { "r", "rI", "rI" } },
+    { INDEX_op_add2_i64, { "r", "r", "rZ", "rZ", "rZ", "rZ" } },
     { INDEX_op_sub_i64, { "r", "rI", "rI" } },
+    { INDEX_op_sub2_i64, { "r", "r", "rZ", "rZ", "rZ", "rZ" } },
 
     { INDEX_op_and_i64, { "r", "rI", "rI" } },
     { INDEX_op_andc_i64, { "r", "rI", "rI" } },
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index 75e357e..6c70e41 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -136,10 +136,10 @@  typedef enum {
 #define TCG_TARGET_HAS_movcond_i64      1
 #define TCG_TARGET_HAS_deposit_i32      1
 #define TCG_TARGET_HAS_deposit_i64      1
-#define TCG_TARGET_HAS_add2_i32         0
-#define TCG_TARGET_HAS_add2_i64         0
-#define TCG_TARGET_HAS_sub2_i32         0
-#define TCG_TARGET_HAS_sub2_i64         0
+#define TCG_TARGET_HAS_add2_i32         1
+#define TCG_TARGET_HAS_add2_i64         1
+#define TCG_TARGET_HAS_sub2_i32         1
+#define TCG_TARGET_HAS_sub2_i64         1
 #define TCG_TARGET_HAS_mulu2_i32        1
 #define TCG_TARGET_HAS_mulu2_i64        1
 #define TCG_TARGET_HAS_muls2_i32        1