[arm,5/X] Implement Q-bit-setting SIMD32 intrinsics
diff mbox series

Message ID b27a31a2-faf5-c8b8-5628-318d61302294@foss.arm.com
State New
Headers show
Series
  • [arm,5/X] Implement Q-bit-setting SIMD32 intrinsics
Related show

Commit Message

Kyrill Tkachov Nov. 7, 2019, 10:27 a.m. UTC
Hi all,

This patch implements some more Q-setting intrinsics of the 
multiply-accumulate
variety, but these are in the SIMD32 family in that they treat their 
operands
as packed SIMD values, but that's not important at the RTL level.

Bootstrapped and tested on arm-none-linux-gnueabihf.

Committing to trunk.
Thanks,
Kyrill

2019-11-07  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

     * config/arm/arm.md (arm_<simd32_op><add_clobber_q_name>_insn):
     New define_insns.
     (arm_<simd32_op>): New define_expands.
     * config/arm/arm_acle.h (__smlad, __smladx, __smlsd, __smlsdx,
     __smuad, __smuadx): Define.
     * config/arm/arm_acle_builtins.def: Define builtins for the above.
     * config/arm/iterators.md (SIMD32_TERNOP_Q): New int_iterator.
     (SIMD32_BINOP_Q): Likewise.
     (simd32_op): Handle the above.
     * config/arm/unspecs.md: Define unspecs for the above.

2019-11-07  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

     * gcc.target/arm/acle/simd32.c: Update test.

Patch
diff mbox series

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 884a224a991102955787600317581e6468463bea..7717f547ab4706183d2727013496c249edbe7abf 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -5865,6 +5865,62 @@ 
   [(set_attr "predicable" "yes")
    (set_attr "type" "alu_sreg")])
 
+(define_insn "arm_<simd32_op><add_clobber_q_name>_insn"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(unspec:SI
+	  [(match_operand:SI 1 "s_register_operand" "r")
+	   (match_operand:SI 2 "s_register_operand" "r")
+	   (match_operand:SI 3 "s_register_operand" "r")] SIMD32_TERNOP_Q))]
+  "TARGET_INT_SIMD && <add_clobber_q_pred>"
+  "<simd32_op>%?\\t%0, %1, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "alu_sreg")])
+
+(define_expand "arm_<simd32_op>"
+  [(set (match_operand:SI 0 "s_register_operand")
+	(unspec:SI
+	  [(match_operand:SI 1 "s_register_operand")
+	   (match_operand:SI 2 "s_register_operand")
+	   (match_operand:SI 3 "s_register_operand")] SIMD32_TERNOP_Q))]
+  "TARGET_INT_SIMD"
+  {
+    if (ARM_Q_BIT_READ)
+      emit_insn (gen_arm_<simd32_op>_setq_insn (operands[0], operands[1],
+						operands[2], operands[3]));
+    else
+      emit_insn (gen_arm_<simd32_op>_insn (operands[0], operands[1],
+					   operands[2], operands[3]));
+    DONE;
+  }
+)
+
+(define_insn "arm_<simd32_op><add_clobber_q_name>_insn"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(unspec:SI
+	  [(match_operand:SI 1 "s_register_operand" "r")
+	   (match_operand:SI 2 "s_register_operand" "r")] SIMD32_BINOP_Q))]
+  "TARGET_INT_SIMD && <add_clobber_q_pred>"
+  "<simd32_op>%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "alu_sreg")])
+
+(define_expand "arm_<simd32_op>"
+  [(set (match_operand:SI 0 "s_register_operand")
+	(unspec:SI
+	  [(match_operand:SI 1 "s_register_operand")
+	   (match_operand:SI 2 "s_register_operand")] SIMD32_BINOP_Q))]
+  "TARGET_INT_SIMD"
+  {
+    if (ARM_Q_BIT_READ)
+      emit_insn (gen_arm_<simd32_op>_setq_insn (operands[0], operands[1],
+						operands[2]));
+    else
+      emit_insn (gen_arm_<simd32_op>_insn (operands[0], operands[1],
+					   operands[2]));
+    DONE;
+  }
+)
+
 (define_insn "arm_sel"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
 	(unspec:SI
diff --git a/gcc/config/arm/arm_acle.h b/gcc/config/arm/arm_acle.h
index b8d02a5502f273fcba492bbeba2542b13334a8ea..c30645e3949f84321fb1dfe3afd06167ef859d62 100644
--- a/gcc/config/arm/arm_acle.h
+++ b/gcc/config/arm/arm_acle.h
@@ -522,6 +522,48 @@  __usub16 (uint16x2_t __a, uint16x2_t __b)
   return __builtin_arm_usub16 (__a, __b);
 }
 
+__extension__ extern __inline int32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__smlad (int16x2_t __a, int16x2_t __b, int32_t __c)
+{
+  return __builtin_arm_smlad (__a, __b, __c);
+}
+
+__extension__ extern __inline int32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__smladx (int16x2_t __a, int16x2_t __b, int32_t __c)
+{
+  return __builtin_arm_smladx (__a, __b, __c);
+}
+
+__extension__ extern __inline int32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__smlsd (int16x2_t __a, int16x2_t __b, int32_t __c)
+{
+  return __builtin_arm_smlsd (__a, __b, __c);
+}
+
+__extension__ extern __inline int32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__smlsdx (int16x2_t __a, int16x2_t __b, int32_t __c)
+{
+  return __builtin_arm_smlsdx (__a, __b, __c);
+}
+
+__extension__ extern __inline int32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__smuad (int16x2_t __a, int16x2_t __b)
+{
+  return __builtin_arm_smuad (__a, __b);
+}
+
+__extension__ extern __inline int32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__smuadx (int16x2_t __a, int16x2_t __b)
+{
+  return __builtin_arm_smuadx (__a, __b);
+}
+
 #endif
 
 #ifdef __ARM_FEATURE_SAT
diff --git a/gcc/config/arm/arm_acle_builtins.def b/gcc/config/arm/arm_acle_builtins.def
index 715c3c94e8c8f6355e880a36eb275be80d1a3912..018d89682c61a963961515823420f1b986cd40db 100644
--- a/gcc/config/arm/arm_acle_builtins.def
+++ b/gcc/config/arm/arm_acle_builtins.def
@@ -107,3 +107,10 @@  VAR1 (UBINOP, usax, si)
 VAR1 (UBINOP, usub16, si)
 
 VAR1 (UBINOP, sel, si)
+
+VAR1 (TERNOP, smlad, si)
+VAR1 (TERNOP, smladx, si)
+VAR1 (TERNOP, smlsd, si)
+VAR1 (TERNOP, smlsdx, si)
+VAR1 (BINOP, smuad, si)
+VAR1 (BINOP, smuadx, si)
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 2394a959d19b6a7f2cff8fb7609da6231dee14d6..72aba5e86fc20216bcba74f5cfa5b9f744497a6e 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -453,6 +453,11 @@ 
 				UNSPEC_SSAX UNSPEC_SSUB16 UNSPEC_UADD16
 				UNSPEC_UASX UNSPEC_USAX UNSPEC_USUB16])
 
+(define_int_iterator SIMD32_TERNOP_Q [UNSPEC_SMLAD UNSPEC_SMLADX UNSPEC_SMLSD
+				      UNSPEC_SMLSDX])
+
+(define_int_iterator SIMD32_BINOP_Q [UNSPEC_SMUAD UNSPEC_SMUADX])
+
 (define_int_iterator VQRDMLH_AS [UNSPEC_VQRDMLAH UNSPEC_VQRDMLSH])
 
 (define_int_iterator VFM_LANE_AS [UNSPEC_VFMA_LANE UNSPEC_VFMS_LANE])
@@ -1075,7 +1080,10 @@ 
 			    (UNSPEC_SASX "sasx") (UNSPEC_SSAX "ssax")
 			    (UNSPEC_SSUB16 "ssub16") (UNSPEC_UADD16 "uadd16")
 			    (UNSPEC_UASX "uasx") (UNSPEC_USAX "usax")
-			    (UNSPEC_USUB16 "usub16")])
+			    (UNSPEC_USUB16 "usub16") (UNSPEC_SMLAD "smlad")
+			    (UNSPEC_SMLADX "smladx") (UNSPEC_SMLSD "smlsd")
+			    (UNSPEC_SMLSDX "smlsdx") (UNSPEC_SMUAD "smuad")
+			    (UNSPEC_SMUADX "smuadx")])
 
 ;; Both kinds of return insn.
 (define_code_iterator RETURNS [return simple_return])
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index dfd6dea3421aaa1e1c597d4e7c6c0ad7a856b9fe..8bf6d9712054808143d308726c5c0f1d613c6ed4 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -146,6 +146,12 @@ 
   UNSPEC_UASX		; Represent the UASX operation.
   UNSPEC_USAX		; Represent the USAX operation.
   UNSPEC_USUB16		; Represent the USUB16 operation.
+  UNSPEC_SMLAD		; Represent the SMLAD operation.
+  UNSPEC_SMLADX		; Represent the SMLADX operation.
+  UNSPEC_SMLSD		; Represent the SMLSD operation.
+  UNSPEC_SMLSDX		; Represent the SMLSDX operation.
+  UNSPEC_SMUAD		; Represent the SMUAD operation.
+  UNSPEC_SMUADX		; Represent the SMUADX operation.
 ])
 
 
diff --git a/gcc/testsuite/gcc.target/arm/acle/simd32.c b/gcc/testsuite/gcc.target/arm/acle/simd32.c
index d4304d867f357085877983ca08cd245e444e1958..0db560c690e98cbf1c9e642a7a626a1a2ff8ece4 100644
--- a/gcc/testsuite/gcc.target/arm/acle/simd32.c
+++ b/gcc/testsuite/gcc.target/arm/acle/simd32.c
@@ -372,3 +372,51 @@  test_usub16 (uint16x2_t a, uint16x2_t b)
 }
 
 /* { dg-final { scan-assembler-times "\tusub16\t...?, ...?, ...?" 1 } } */
+
+int32_t
+test_smlad (int16x2_t a, int16x2_t b, int32_t c)
+{
+  return __smlad (a, b, c);
+}
+
+/* { dg-final { scan-assembler-times "\tsmlad\t...?, ...?, ...?, ...?" 1 } } */
+
+int32_t
+test_smladx (int16x2_t a, int16x2_t b, int32_t c)
+{
+  return __smladx (a, b, c);
+}
+
+/* { dg-final { scan-assembler-times "\tsmladx\t...?, ...?, ...?, ...?" 1 } } */
+
+int32_t
+test_smlsd (int16x2_t a, int16x2_t b, int32_t c)
+{
+  return __smlsd (a, b, c);
+}
+
+/* { dg-final { scan-assembler-times "\tsmlsd\t...?, ...?, ...?, ...?" 1 } } */
+
+int32_t
+test_smlsdx (int16x2_t a, int16x2_t b, int32_t c)
+{
+  return __smlsdx (a, b, c);
+}
+
+/* { dg-final { scan-assembler-times "\tsmlsdx\t...?, ...?, ...?, ...?" 1 } } */
+
+int32_t
+test_smuad (int16x2_t a, int16x2_t b)
+{
+  return __smuad (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tsmuad\t...?, ...?, ...?" 1 } } */
+
+int32_t
+test_smuadx (int16x2_t a, int16x2_t b)
+{
+  return __smuadx (a, b);
+}
+
+/* { dg-final { scan-assembler-times "\tsmuadx\t...?, ...?, ...?" 1 } } */