diff mbox

[v2.1,13/21] tcg/i386: support remaining vector addition operations

Message ID 1486046099-17726-14-git-send-email-batuzovk@ispras.ru
State New
Headers show

Commit Message

Kirill Batuzov Feb. 2, 2017, 2:34 p.m. UTC
Signed-off-by: Kirill Batuzov <batuzovk@ispras.ru>
---

I believe checkpatch warning here to be false-positive.

---
 tcg/i386/tcg-target.h     | 10 +++++++++
 tcg/i386/tcg-target.inc.c | 54 +++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 62 insertions(+), 2 deletions(-)

Comments

Kirill Batuzov Feb. 21, 2017, 1:29 p.m. UTC | #1
On Tue, 21 Feb 2017, Philippe Mathieu-Daudé wrote:

> Hi Kirill,
> 
> could you check my previous comment?
>

Hi Philippe,

thank you for your comments. I've seen them and I'll apply changes you
suggested in the next version of the series. I was just hoping to get
a bit more feedback before I proceed to v3.
Alex Bennée Feb. 21, 2017, 4:21 p.m. UTC | #2
Kirill Batuzov <batuzovk@ispras.ru> writes:

> On Tue, 21 Feb 2017, Philippe Mathieu-Daudé wrote:
>
>> Hi Kirill,
>>
>> could you check my previous comment?
>>
>
> Hi Philippe,
>
> thank you for your comments. I've seen them and I'll apply changes you
> suggested in the next version of the series. I was just hoping to get
> a bit more feedback before I proceed to v3.

It is on my list to look at - however I'm in a bit of a crunch getting
the MTTCG stuff prepared before code freeze as well as preparing for a
company conference. Once that's out of the way I'll have a bit more
review time!

--
Alex Bennée
diff mbox

Patch

diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 755ebaa..bd6cfe1 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -172,7 +172,17 @@  extern bool have_popcnt;
 #endif
 
 #ifdef TCG_TARGET_HAS_REG128
+#define TCG_TARGET_HAS_add_i8x16        1
+#define TCG_TARGET_HAS_add_i16x8        1
 #define TCG_TARGET_HAS_add_i32x4        1
+#define TCG_TARGET_HAS_add_i64x2        1
+#endif
+
+#ifdef TCG_TARGET_HAS_REGV64
+#define TCG_TARGET_HAS_add_i8x8         1
+#define TCG_TARGET_HAS_add_i16x4        1
+#define TCG_TARGET_HAS_add_i32x2        1
+#define TCG_TARGET_HAS_add_i64x1        1
 #endif
 
 #define TCG_TARGET_deposit_i32_valid(ofs, len) \
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
index 208bb81..d8f0d81 100644
--- a/tcg/i386/tcg-target.inc.c
+++ b/tcg/i386/tcg-target.inc.c
@@ -168,6 +168,11 @@  static bool have_lzcnt;
 #else
 # define have_lzcnt 0
 #endif
+#if defined(CONFIG_CPUID_H) && defined(bit_AVX) && defined(bit_OSXSAVE)
+static bool have_avx;
+#else
+# define have_avx 0
+#endif
 
 static tcg_insn_unit *tb_ret_addr;
 
@@ -393,7 +398,10 @@  static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
 #define OPC_MOVQ_M2R    (0x7e | P_SSE_F30F)
 #define OPC_MOVQ_R2M    (0xd6 | P_SSE_660F)
 #define OPC_MOVQ_R2R    (0x7e | P_SSE_F30F)
+#define OPC_PADDB       (0xfc | P_SSE_660F)
+#define OPC_PADDW       (0xfd | P_SSE_660F)
 #define OPC_PADDD       (0xfe | P_SSE_660F)
+#define OPC_PADDQ       (0xd4 | P_SSE_660F)
 
 /* Group 1 opcode extensions for 0x80-0x83.
    These are also used as modifiers for OPC_ARITH.  */
@@ -1963,6 +1971,19 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     TCGArg a0, a1, a2;
     int c, const_a2, vexop, rexw = 0;
 
+    static const int vect_binop[] = {
+        [INDEX_op_add_i8x16] = OPC_PADDB,
+        [INDEX_op_add_i16x8] = OPC_PADDW,
+        [INDEX_op_add_i32x4] = OPC_PADDD,
+        [INDEX_op_add_i64x2] = OPC_PADDQ,
+
+        [INDEX_op_add_i8x8]  = OPC_PADDB,
+        [INDEX_op_add_i16x4] = OPC_PADDW,
+        [INDEX_op_add_i32x2] = OPC_PADDD,
+        [INDEX_op_add_i64x1] = OPC_PADDQ,
+    };
+
+
 #if TCG_TARGET_REG_BITS == 64
 # define OP_32_64(x) \
         case glue(glue(INDEX_op_, x), _i64): \
@@ -1972,6 +1993,17 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
 # define OP_32_64(x) \
         case glue(glue(INDEX_op_, x), _i32)
 #endif
+#define OP_V128_ALL(x) \
+        case glue(glue(INDEX_op_, x), _i8x16): \
+        case glue(glue(INDEX_op_, x), _i16x8): \
+        case glue(glue(INDEX_op_, x), _i32x4): \
+        case glue(glue(INDEX_op_, x), _i64x2)
+
+#define OP_V64_ALL(x) \
+        case glue(glue(INDEX_op_, x), _i8x8):  \
+        case glue(glue(INDEX_op_, x), _i16x4): \
+        case glue(glue(INDEX_op_, x), _i32x2): \
+        case glue(glue(INDEX_op_, x), _i64x1)
 
     /* Hoist the loads of the most common arguments.  */
     a0 = args[0];
@@ -2369,8 +2401,13 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_mb(s, a0);
         break;
 
-    case INDEX_op_add_i32x4:
-        tcg_out_modrm(s, OPC_PADDD, args[0], args[2]);
+    OP_V128_ALL(add):
+    OP_V64_ALL(add):
+        if (have_avx) {
+            tcg_out_vex_modrm(s, vect_binop[opc], args[0], args[1], args[2]);
+        } else {
+            tcg_out_modrm(s, vect_binop[opc], args[0], args[2]);
+        }
         break;
 
     case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
@@ -2383,6 +2420,8 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     }
 
 #undef OP_32_64
+#undef OP_V128_ALL
+#undef OP_V64_ALL
 }
 
 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
@@ -2613,7 +2652,14 @@  static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
             return &s2;
         }
 
+    case INDEX_op_add_i8x16:
+    case INDEX_op_add_i16x8:
     case INDEX_op_add_i32x4:
+    case INDEX_op_add_i64x2:
+    case INDEX_op_add_i8x8:
+    case INDEX_op_add_i16x4:
+    case INDEX_op_add_i32x2:
+    case INDEX_op_add_i64x1:
         return &V_0_V;
 
     default:
@@ -2728,6 +2774,10 @@  static void tcg_target_init(TCGContext *s)
 #ifdef bit_POPCNT
         have_popcnt = (c & bit_POPCNT) != 0;
 #endif
+#if defined(bit_AVX) && defined(bit_OSXSAVE)
+        have_avx = (c & (bit_AVX | bit_OSXSAVE)) == (bit_AVX | bit_OSXSAVE);
+#endif
+
     }
 
     if (max >= 7) {