diff mbox

[1/2] tcg-ia64: implement mulu2_i32/i64 and muls2_i32/i64

Message ID 1364657589-16123-1-git-send-email-aurelien@aurel32.net
State New
Headers show

Commit Message

Aurelien Jarno March 30, 2013, 3:33 p.m. UTC
Add 32-bit and 64-bit mulu2 and muls2 TCG ops.

On IA64, 32-bit ops should just ignore the 32 most significant bits of
registers, and can leave them with non-zero values. This means registers
should be zero/sign extended before doing the actual multiplying. This
leave some slots in the bundle to possibly load a constant.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/ia64/tcg-target.c |   93 +++++++++++++++++++++++++++++++++++++++++++++++++
 tcg/ia64/tcg-target.h |    8 ++---
 2 files changed, 97 insertions(+), 4 deletions(-)

Comments

Richard Henderson March 30, 2013, 4 p.m. UTC | #1
On 03/30/2013 08:33 AM, Aurelien Jarno wrote:
> +    if (const_arg1 && arg1 != 0) {
> +        opc2 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
> +                          TCG_REG_R2, arg1, TCG_REG_R0);
> +    } else if (is_signed) {
> +        opc2 = tcg_opc_i29(TCG_REG_P0, OPC_SXT4_I29, TCG_REG_R2, arg1);
> +    } else {
> +        opc2 = tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R2, arg1);
> +    }
> +    if (const_arg2 && arg2 != 0) {
> +        opc3 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
> +                          TCG_REG_R3, arg2, TCG_REG_R0);
> +    } else if (is_signed) {
> +        opc3 = tcg_opc_i29(TCG_REG_P0, OPC_SXT4_I29, TCG_REG_R3, arg2);
> +    } else {
> +        opc3 = tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R3, arg2);
> +    }
> +

Why the check form arg[12] != 0?  Seems like a wasted test, especially if
the optimizer is enabled and we ought never see it.

I would have said you shouldn't bother implementing the _i32 versions for
ia64, because the generic fallback would produce the same code.  But in
this case it's more about doing the job with fewer bundles.

Otherwise, the code looks correct,

Reviewed-by: Richard Henderson <rth@twiddle.net>


r~
Aurelien Jarno March 30, 2013, 10:04 p.m. UTC | #2
On Sat, Mar 30, 2013 at 09:00:40AM -0700, Richard Henderson wrote:
> On 03/30/2013 08:33 AM, Aurelien Jarno wrote:
> > +    if (const_arg1 && arg1 != 0) {
> > +        opc2 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
> > +                          TCG_REG_R2, arg1, TCG_REG_R0);
> > +    } else if (is_signed) {
> > +        opc2 = tcg_opc_i29(TCG_REG_P0, OPC_SXT4_I29, TCG_REG_R2, arg1);
> > +    } else {
> > +        opc2 = tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R2, arg1);
> > +    }
> > +    if (const_arg2 && arg2 != 0) {
> > +        opc3 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
> > +                          TCG_REG_R3, arg2, TCG_REG_R0);
> > +    } else if (is_signed) {
> > +        opc3 = tcg_opc_i29(TCG_REG_P0, OPC_SXT4_I29, TCG_REG_R3, arg2);
> > +    } else {
> > +        opc3 = tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R3, arg2);
> > +    }
> > +
> 
> Why the check form arg[12] != 0?  Seems like a wasted test, especially if
> the optimizer is enabled and we ought never see it.

It comes from copy and paste from other code, dating from before the
optimizer pass. It looks like some code can be removed.

> I would have said you shouldn't bother implementing the _i32 versions for
> ia64, because the generic fallback would produce the same code.  But in
> this case it's more about doing the job with fewer bundles.

Yes, the code would be basically the same, but with more bundles, so I
still think it make sense to implement it.

> Otherwise, the code looks correct,
> 
> Reviewed-by: Richard Henderson <rth@twiddle.net>
diff mbox

Patch

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index 2373d9e..a46234d 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -278,6 +278,8 @@  enum {
     OPC_SUB_A3                = 0x10128000000ull,
     OPC_UNPACK4_L_I2          = 0x0f860000000ull,
     OPC_XMA_L_F2              = 0x1d000000000ull,
+    OPC_XMA_H_F2              = 0x1dc00000000ull,
+    OPC_XMA_HU_F2             = 0x1d800000000ull,
     OPC_XOR_A1                = 0x10078000000ull,
     OPC_ZXT1_I29              = 0x00080000000ull,
     OPC_ZXT2_I29              = 0x00088000000ull,
@@ -1098,6 +1100,79 @@  static inline void tcg_out_mul(TCGContext *s, TCGArg ret,
                    tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
 }
 
+static inline void tcg_out_mul2_i32(TCGContext *s, TCGArg retl, TCGArg reth,
+                                    TCGArg arg1, int const_arg1,
+                                    TCGArg arg2, int const_arg2,
+                                    int is_signed)
+{
+    uint64_t opc2, opc3;
+
+    if (const_arg1 && arg1 != 0) {
+        opc2 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
+                          TCG_REG_R2, arg1, TCG_REG_R0);
+    } else if (is_signed) {
+        opc2 = tcg_opc_i29(TCG_REG_P0, OPC_SXT4_I29, TCG_REG_R2, arg1);
+    } else {
+        opc2 = tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R2, arg1);
+    }
+    if (const_arg2 && arg2 != 0) {
+        opc3 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
+                          TCG_REG_R3, arg2, TCG_REG_R0);
+    } else if (is_signed) {
+        opc3 = tcg_opc_i29(TCG_REG_P0, OPC_SXT4_I29, TCG_REG_R3, arg2);
+    } else {
+        opc3 = tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R3, arg2);
+    }
+
+    tcg_out_bundle(s, miI,
+                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   opc2,
+                   opc3);
+    tcg_out_bundle(s, mmI,
+                   tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18,
+                               TCG_REG_F6, TCG_REG_R2),
+                   tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18,
+                               TCG_REG_F7, TCG_REG_R3),
+                   tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+    tcg_out_bundle(s, mmF,
+                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   tcg_opc_f2 (TCG_REG_P0, OPC_XMA_L_F2, TCG_REG_F6, TCG_REG_F6,
+                               TCG_REG_F7, TCG_REG_F0));
+    tcg_out_bundle(s, MmI,
+                   tcg_opc_m19(TCG_REG_P0, OPC_GETF_SIG_M19, retl, TCG_REG_F6),
+                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, reth, retl, 32, 31));
+}
+
+static inline void tcg_out_mul2_i64(TCGContext *s, TCGArg retl,
+                                    TCGArg reth, TCGArg arg1, TCGArg arg2,
+                                    int is_signed)
+{
+    uint64_t opc_xma_h_f2;
+
+    opc_xma_h_f2 = is_signed ? OPC_XMA_H_F2 : OPC_XMA_HU_F2;
+
+    tcg_out_bundle(s, mmI,
+                   tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18, TCG_REG_F8, arg1),
+                   tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18, TCG_REG_F9, arg2),
+                   tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+    tcg_out_bundle(s, mmf,
+                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   tcg_opc_f2 (TCG_REG_P0, OPC_XMA_L_F2, TCG_REG_F6, TCG_REG_F8,
+                               TCG_REG_F9, TCG_REG_F0));
+    tcg_out_bundle(s, mmF,
+                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   tcg_opc_f2 (TCG_REG_P0, opc_xma_h_f2, TCG_REG_F7, TCG_REG_F8,
+                               TCG_REG_F9, TCG_REG_F0));
+    tcg_out_bundle(s, mmI,
+                   tcg_opc_m19(TCG_REG_P0, OPC_GETF_SIG_M19, retl, TCG_REG_F6),
+                   tcg_opc_m19(TCG_REG_P0, OPC_GETF_SIG_M19, reth, TCG_REG_F7),
+                   tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+}
+
 static inline void tcg_out_sar_i32(TCGContext *s, TCGArg ret, TCGArg arg1,
                                    TCGArg arg2, int const_arg2)
 {
@@ -2107,6 +2182,20 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_mul_i64:
         tcg_out_mul(s, args[0], args[1], args[2]);
         break;
+    case INDEX_op_mulu2_i32:
+        tcg_out_mul2_i32(s, args[0], args[1], args[2], const_args[2],
+                         args[3], const_args[3], 0);
+        break;
+    case INDEX_op_mulu2_i64:
+        tcg_out_mul2_i64(s, args[0], args[1], args[2], args[3], 0);
+        break;
+    case INDEX_op_muls2_i32:
+        tcg_out_mul2_i32(s, args[0], args[1], args[2], const_args[2],
+                         args[3], const_args[3], 1);
+        break;
+    case INDEX_op_muls2_i64:
+        tcg_out_mul2_i64(s, args[0], args[1], args[2], args[3], 1);
+        break;
 
     case INDEX_op_sar_i32:
         tcg_out_sar_i32(s, args[0], args[1], args[2], const_args[2]);
@@ -2275,6 +2364,8 @@  static const TCGTargetOpDef ia64_op_defs[] = {
     { INDEX_op_xor_i32, { "r", "rI", "rI" } },
 
     { INDEX_op_mul_i32, { "r", "rZ", "rZ" } },
+    { INDEX_op_mulu2_i32, { "r", "r", "rI", "rI" } },
+    { INDEX_op_muls2_i32, { "r", "r", "rI", "rI" } },
 
     { INDEX_op_sar_i32, { "r", "rZ", "ri" } },
     { INDEX_op_shl_i32, { "r", "rZ", "ri" } },
@@ -2322,6 +2413,8 @@  static const TCGTargetOpDef ia64_op_defs[] = {
     { INDEX_op_xor_i64, { "r", "rI", "rI" } },
 
     { INDEX_op_mul_i64, { "r", "rZ", "rZ" } },
+    { INDEX_op_mulu2_i64, { "r", "r", "rZ", "rZ" } },
+    { INDEX_op_muls2_i64, { "r", "r", "rZ", "rZ" } },
 
     { INDEX_op_sar_i64, { "r", "rZ", "ri" } },
     { INDEX_op_shl_i64, { "r", "rZ", "ri" } },
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index e3d72ea..75e357e 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -140,10 +140,10 @@  typedef enum {
 #define TCG_TARGET_HAS_add2_i64         0
 #define TCG_TARGET_HAS_sub2_i32         0
 #define TCG_TARGET_HAS_sub2_i64         0
-#define TCG_TARGET_HAS_mulu2_i32        0
-#define TCG_TARGET_HAS_mulu2_i64        0
-#define TCG_TARGET_HAS_muls2_i32        0
-#define TCG_TARGET_HAS_muls2_i64        0
+#define TCG_TARGET_HAS_mulu2_i32        1
+#define TCG_TARGET_HAS_mulu2_i64        1
+#define TCG_TARGET_HAS_muls2_i32        1
+#define TCG_TARGET_HAS_muls2_i64        1
 
 #define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16)
 #define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16)