diff mbox series

[v2,12/15] tcg/arm: Implement TCG_TARGET_HAS_bitsel_vec

Message ID 20210208024625.271018-13-richard.henderson@linaro.org
State New
Headers show
Series tcg/arm: host neon support | expand

Commit Message

Richard Henderson Feb. 8, 2021, 2:46 a.m. UTC
NEON has 3 instructions implementing this 4 argument operation,
with each insn overlapping a different logical input onto the
destination register.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/arm/tcg-target-con-set.h |  1 +
 tcg/arm/tcg-target.h         |  2 +-
 tcg/arm/tcg-target.c.inc     | 22 ++++++++++++++++++++--
 3 files changed, 22 insertions(+), 3 deletions(-)

Comments

Peter Maydell Feb. 8, 2021, 7:55 p.m. UTC | #1
On Mon, 8 Feb 2021 at 04:02, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> NEON has 3 instructions implementing this 4 argument operation,
> with each insn overlapping a different logical input onto the
> destination register.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> @@ -2899,6 +2904,18 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
>          }
>          return;
>
> +    case INDEX_op_bitsel_vec:
> +        a3 = args[3];
> +        if (a0 == a3) {
> +            tcg_out_vreg3(s, INSN_VBIT, q, 0, a0, a2, a1);
> +        } else if (a0 == a2) {
> +            tcg_out_vreg3(s, INSN_VBIF, q, 0, a0, a3, a1);
> +        } else {
> +            tcg_out_mov(s, type, a0, a1);

Side note: aarch64 tcg guards this tcg_out_mov with "if (a0 != a1)",
which if I understand correctly is superfluous and could be removed.

> +            tcg_out_vreg3(s, INSN_VBSL, q, 0, a0, a2, a3);
> +        }
> +        return;
> +

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

thanks
-- PMM
Richard Henderson Feb. 8, 2021, 10:27 p.m. UTC | #2
On 2/8/21 11:55 AM, Peter Maydell wrote:
> On Mon, 8 Feb 2021 at 04:02, Richard Henderson
> <richard.henderson@linaro.org> wrote:
>>
>> NEON has 3 instructions implementing this 4 argument operation,
>> with each insn overlapping a different logical input onto the
>> destination register.
>>
>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>> @@ -2899,6 +2904,18 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
>>          }
>>          return;
>>
>> +    case INDEX_op_bitsel_vec:
>> +        a3 = args[3];
>> +        if (a0 == a3) {
>> +            tcg_out_vreg3(s, INSN_VBIT, q, 0, a0, a2, a1);
>> +        } else if (a0 == a2) {
>> +            tcg_out_vreg3(s, INSN_VBIF, q, 0, a0, a3, a1);
>> +        } else {
>> +            tcg_out_mov(s, type, a0, a1);
> 
> Side note: aarch64 tcg guards this tcg_out_mov with "if (a0 != a1)",
> which if I understand correctly is superfluous and could be removed.

Yep, tcg_out_mov already does that test.


r~
diff mbox series

Patch

diff --git a/tcg/arm/tcg-target-con-set.h b/tcg/arm/tcg-target-con-set.h
index cc006f99cd..d02797cbf4 100644
--- a/tcg/arm/tcg-target-con-set.h
+++ b/tcg/arm/tcg-target-con-set.h
@@ -34,6 +34,7 @@  C_O1_I2(w, w, w)
 C_O1_I2(w, w, wO)
 C_O1_I2(w, w, wV)
 C_O1_I2(w, w, wZ)
+C_O1_I3(w, w, w, w)
 C_O1_I4(r, r, r, rI, rI)
 C_O1_I4(r, r, rIN, rIK, 0)
 C_O2_I1(r, r, l)
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 4815a34e75..d6222ba2db 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -169,7 +169,7 @@  extern bool use_neon_instructions;
 #define TCG_TARGET_HAS_mul_vec          1
 #define TCG_TARGET_HAS_sat_vec          1
 #define TCG_TARGET_HAS_minmax_vec       1
-#define TCG_TARGET_HAS_bitsel_vec       0
+#define TCG_TARGET_HAS_bitsel_vec       1
 #define TCG_TARGET_HAS_cmpsel_vec       0
 
 #define TCG_TARGET_DEFAULT_MO (0)
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index afd2807c09..875d975d4b 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -216,6 +216,10 @@  typedef enum {
     INSN_VSARI     = 0xf2800010,  /* VSHR.S */
     INSN_VSHRI     = 0xf3800010,  /* VSHR.U */
 
+    INSN_VBSL      = 0xf3100110,
+    INSN_VBIT      = 0xf3200110,
+    INSN_VBIF      = 0xf3300110,
+
     INSN_VTST      = 0xf2000810,
 
     INSN_VDUP_G    = 0xee800b10,  /* VDUP (ARM core register) */
@@ -2427,7 +2431,8 @@  static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
         return C_O1_I2(w, w, wV);
     case INDEX_op_cmp_vec:
         return C_O1_I2(w, w, wZ);
-
+    case INDEX_op_bitsel_vec:
+        return C_O1_I3(w, w, w, w);
     default:
         g_assert_not_reached();
     }
@@ -2748,7 +2753,7 @@  static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 {
     TCGType type = vecl + TCG_TYPE_V64;
     unsigned q = vecl;
-    TCGArg a0, a1, a2;
+    TCGArg a0, a1, a2, a3;
     int cmode, imm8;
 
     a0 = args[0];
@@ -2899,6 +2904,18 @@  static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
         }
         return;
 
+    case INDEX_op_bitsel_vec:
+        a3 = args[3];
+        if (a0 == a3) {
+            tcg_out_vreg3(s, INSN_VBIT, q, 0, a0, a2, a1);
+        } else if (a0 == a2) {
+            tcg_out_vreg3(s, INSN_VBIF, q, 0, a0, a3, a1);
+        } else {
+            tcg_out_mov(s, type, a0, a1);
+            tcg_out_vreg3(s, INSN_VBSL, q, 0, a0, a2, a3);
+        }
+        return;
+
     case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
     case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
     default:
@@ -2924,6 +2941,7 @@  int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
     case INDEX_op_sssub_vec:
     case INDEX_op_usadd_vec:
     case INDEX_op_ussub_vec:
+    case INDEX_op_bitsel_vec:
         return 1;
     case INDEX_op_abs_vec:
     case INDEX_op_cmp_vec: