[arm,3/X] Implement __smla* intrinsics (Q-setting)
diff mbox series

Message ID 9c09fdf0-7107-7ab1-6bbd-a49ad73aece9@foss.arm.com
State New
Headers show
Series
  • [arm,3/X] Implement __smla* intrinsics (Q-setting)
Related show

Commit Message

Kyrill Tkachov Nov. 7, 2019, 10:26 a.m. UTC
Hi all,

This patch implements some more Q-setting intrinsics form the SMLA* group.
These can set the saturation bit on overflow in the accumulation step.
Like earlier, these have non-Q-setting RTL forms as well for when the 
Q-bit read
is not needed.

Bootstrapped and tested on arm-none-linux-gnueabihf.
Committing to trunk.
Thanks,
Kyrill

2019-11-07  Kyrylo Tkachov <kyrylo.tkachov@arm.com>

     * config/arm/arm.md (arm_smlabb_setq): New define_insn.
     (arm_smlabb): New define_expand.
     (*maddhisi4tb): Rename to...
     (maddhisi4tb): ... This.
     (*maddhisi4tt): Rename to...
     (maddhisi4tt): ... This.
     (arm_smlatb_setq): New define_insn.
     (arm_smlatb): New define_expand.
     (arm_smlatt_setq): New define_insn.
     (arm_smlatt): New define_expand.
     (arm_<smlaw_op><add_clobber_name>_insn): New define_insn.
     (arm_<smlaw_op>): New define_expand.
     * config/arm/arm_acle.h (__smlabb, __smlatb, __smlabt, __smlatt,
     __smlawb, __smlawt): Define.
     * config/arm_acle_builtins.def: Define builtins for the above.
     * config/arm/iterators.md (SMLAWBT): New int_iterator.
     (slaw_op): New int_attribute.
     * config/arm/unspecs.md (UNSPEC_SMLAWB, UNSPEC_SMLAWT): Define.

2019-11-07  Kyrylo Tkachov <kyrylo.tkachov@arm.com>

     * gcc.target/arm/acle/dsp_arith.c: Update test.

Patch
diff mbox series

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index db7a4006eb4f354e08f22c666fea8f1e87726085..05c8ca2772d4475a25b037e3e745c9558e1c5742 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -2565,8 +2565,40 @@ 
    (set_attr "predicable" "yes")]
 )
 
+(define_insn "arm_smlabb_setq"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(plus:SI (mult:SI (sign_extend:SI
+			   (match_operand:HI 1 "s_register_operand" "r"))
+			  (sign_extend:SI
+			   (match_operand:HI 2 "s_register_operand" "r")))
+		 (match_operand:SI 3 "s_register_operand" "r")))
+   (set (reg:CC APSRQ_REGNUM)
+	(unspec:CC [(reg:CC APSRQ_REGNUM)] UNSPEC_Q_SET))]
+  "TARGET_DSP_MULTIPLY"
+  "smlabb%?\\t%0, %1, %2, %3"
+  [(set_attr "type" "smlaxy")
+   (set_attr "predicable" "yes")]
+)
+
+(define_expand "arm_smlabb"
+ [(match_operand:SI 0 "s_register_operand")
+  (match_operand:SI 1 "s_register_operand")
+  (match_operand:SI 2 "s_register_operand")
+  (match_operand:SI 3 "s_register_operand")]
+  "TARGET_DSP_MULTIPLY"
+  {
+    rtx mult1 = gen_lowpart (HImode, operands[1]);
+    rtx mult2 = gen_lowpart (HImode, operands[2]);
+    if (ARM_Q_BIT_READ)
+      emit_insn (gen_arm_smlabb_setq (operands[0], mult1, mult2, operands[3]));
+    else
+      emit_insn (gen_maddhisi4 (operands[0], mult1, mult2, operands[3]));
+    DONE;
+  }
+)
+
 ;; Note: there is no maddhisi4ibt because this one is canonical form
-(define_insn "*maddhisi4tb"
+(define_insn "maddhisi4tb"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
 	(plus:SI (mult:SI (ashiftrt:SI
 			   (match_operand:SI 1 "s_register_operand" "r")
@@ -2580,7 +2612,41 @@ 
    (set_attr "predicable" "yes")]
 )
 
-(define_insn "*maddhisi4tt"
+(define_insn "arm_smlatb_setq"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(plus:SI (mult:SI (ashiftrt:SI
+			   (match_operand:SI 1 "s_register_operand" "r")
+			   (const_int 16))
+			  (sign_extend:SI
+			   (match_operand:HI 2 "s_register_operand" "r")))
+		 (match_operand:SI 3 "s_register_operand" "r")))
+   (set (reg:CC APSRQ_REGNUM)
+	(unspec:CC [(reg:CC APSRQ_REGNUM)] UNSPEC_Q_SET))]
+  "TARGET_DSP_MULTIPLY"
+  "smlatb%?\\t%0, %1, %2, %3"
+  [(set_attr "type" "smlaxy")
+   (set_attr "predicable" "yes")]
+)
+
+(define_expand "arm_smlatb"
+ [(match_operand:SI 0 "s_register_operand")
+  (match_operand:SI 1 "s_register_operand")
+  (match_operand:SI 2 "s_register_operand")
+  (match_operand:SI 3 "s_register_operand")]
+  "TARGET_DSP_MULTIPLY"
+  {
+    rtx mult2 = gen_lowpart (HImode, operands[2]);
+    if (ARM_Q_BIT_READ)
+      emit_insn (gen_arm_smlatb_setq (operands[0], operands[1],
+				      mult2, operands[3]));
+    else
+      emit_insn (gen_maddhisi4tb (operands[0], operands[1],
+				  mult2, operands[3]));
+    DONE;
+  }
+)
+
+(define_insn "maddhisi4tt"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
 	(plus:SI (mult:SI (ashiftrt:SI
 			   (match_operand:SI 1 "s_register_operand" "r")
@@ -2595,6 +2661,40 @@ 
    (set_attr "predicable" "yes")]
 )
 
+(define_insn "arm_smlatt_setq"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(plus:SI (mult:SI (ashiftrt:SI
+			   (match_operand:SI 1 "s_register_operand" "r")
+			   (const_int 16))
+			  (ashiftrt:SI
+			   (match_operand:SI 2 "s_register_operand" "r")
+			   (const_int 16)))
+		 (match_operand:SI 3 "s_register_operand" "r")))
+   (set (reg:CC APSRQ_REGNUM)
+	(unspec:CC [(reg:CC APSRQ_REGNUM)] UNSPEC_Q_SET))]
+  "TARGET_DSP_MULTIPLY"
+  "smlatt%?\\t%0, %1, %2, %3"
+  [(set_attr "type" "smlaxy")
+   (set_attr "predicable" "yes")]
+)
+
+(define_expand "arm_smlatt"
+ [(match_operand:SI 0 "s_register_operand")
+  (match_operand:SI 1 "s_register_operand")
+  (match_operand:SI 2 "s_register_operand")
+  (match_operand:SI 3 "s_register_operand")]
+  "TARGET_DSP_MULTIPLY"
+  {
+    if (ARM_Q_BIT_READ)
+      emit_insn (gen_arm_smlatt_setq (operands[0], operands[1],
+				      operands[2], operands[3]));
+    else
+      emit_insn (gen_maddhisi4tt (operands[0], operands[1],
+				  operands[2], operands[3]));
+    DONE;
+  }
+)
+
 (define_insn "maddhidi4"
   [(set (match_operand:DI 0 "s_register_operand" "=r")
 	(plus:DI
@@ -2641,6 +2741,38 @@ 
   [(set_attr "type" "smlalxy")
    (set_attr "predicable" "yes")])
 
+(define_insn "arm_<smlaw_op><add_clobber_q_name>_insn"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(unspec:SI
+	   [(match_operand:SI 1 "s_register_operand" "r")
+	    (match_operand:SI 2 "s_register_operand" "r")
+	    (match_operand:SI 3 "s_register_operand" "r")]
+	   SMLAWBT))]
+  "TARGET_DSP_MULTIPLY && <add_clobber_q_pred>"
+  "<smlaw_op>%?\\t%0, %1, %2, %3"
+  [(set_attr "type" "smlaxy")
+   (set_attr "predicable" "yes")]
+)
+
+(define_expand "arm_<smlaw_op>"
+  [(set (match_operand:SI 0 "s_register_operand")
+	(unspec:SI
+	   [(match_operand:SI 1 "s_register_operand")
+	    (match_operand:SI 2 "s_register_operand")
+	    (match_operand:SI 3 "s_register_operand")]
+	   SMLAWBT))]
+  "TARGET_DSP_MULTIPLY"
+  {
+    if (ARM_Q_BIT_READ)
+      emit_insn (gen_arm_<smlaw_op>_setq_insn (operands[0], operands[1],
+					       operands[2], operands[3]));
+    else
+      emit_insn (gen_arm_<smlaw_op>_insn (operands[0], operands[1],
+					  operands[2], operands[3]));
+    DONE;
+  }
+)
+
 (define_expand "mulsf3"
   [(set (match_operand:SF          0 "s_register_operand")
 	(mult:SF (match_operand:SF 1 "s_register_operand")
diff --git a/gcc/config/arm/arm_acle.h b/gcc/config/arm/arm_acle.h
index 397653d3e8bf43cbcb82d98dd704bcd3a66cf782..fb144cf789c87db92d688c2444d1204e0bb14ffe 100644
--- a/gcc/config/arm/arm_acle.h
+++ b/gcc/config/arm/arm_acle.h
@@ -499,6 +499,50 @@  __qdbl (int32_t __x)
 {
   return __qadd (__x, __x);
 }
+
+__extension__ extern __inline int32_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+__smlabb (int32_t __a, int32_t __b, int32_t __c)
+{
+  return __builtin_arm_smlabb (__a, __b, __c);
+}
+
+__extension__ extern __inline int32_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+__smlatb (int32_t __a, int32_t __b, int32_t __c)
+{
+  return __builtin_arm_smlatb (__a, __b, __c);
+}
+
+/* smlatb is equivalent to smlabt with the two multiplication operands
+   swapped around.  */
+__extension__ extern __inline int32_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+__smlabt (int32_t __a, int32_t __b, int32_t __c)
+{
+  return __smlatb (__b, __a, __c);
+}
+
+__extension__ extern __inline int32_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+__smlatt (int32_t __a, int32_t __b, int32_t __c)
+{
+  return __builtin_arm_smlatt (__a, __b, __c);
+}
+
+__extension__ extern __inline int32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__smlawb (int32_t __a, int32_t __b, int32_t __c)
+{
+  return __builtin_arm_smlawb (__a, __b, __c);
+}
+
+__extension__ extern __inline int32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__smlawt (int32_t __a, int32_t __b, int32_t __c)
+{
+  return __builtin_arm_smlawt (__a, __b, __c);
+}
 #endif
 
 #pragma GCC push_options
diff --git a/gcc/config/arm/arm_acle_builtins.def b/gcc/config/arm/arm_acle_builtins.def
index def1a569311e67194a323decc309ed92747c4c86..85dd87e9d8e1507b579d511245537aa75c3ce3d8 100644
--- a/gcc/config/arm/arm_acle_builtins.def
+++ b/gcc/config/arm/arm_acle_builtins.def
@@ -86,3 +86,8 @@  VAR1 (SAT_OCCURRED, saturation_occurred, si)
 VAR1 (SET_SAT, set_saturation, void)
 VAR1 (BINOP, qadd, si)
 VAR1 (BINOP, qsub, si)
+VAR1 (TERNOP, smlabb, si)
+VAR1 (TERNOP, smlatb, si)
+VAR1 (TERNOP, smlatt, si)
+VAR1 (TERNOP, smlawb, si)
+VAR1 (TERNOP, smlawt, si)
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index ebb8218f265023786730881ef0bc9f818e7235b0..006ffd7fc6c8c69e169a4f2e6321d312b70225d6 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -446,6 +446,8 @@ 
 (define_int_iterator SIMD32_DIMODE [UNSPEC_SMLALD UNSPEC_SMLALDX
 				    UNSPEC_SMLSLD UNSPEC_SMLSLDX])
 
+(define_int_iterator SMLAWBT [UNSPEC_SMLAWB UNSPEC_SMLAWT])
+
 (define_int_iterator VQRDMLH_AS [UNSPEC_VQRDMLAH UNSPEC_VQRDMLSH])
 
 (define_int_iterator VFM_LANE_AS [UNSPEC_VFMA_LANE UNSPEC_VFMS_LANE])
@@ -1127,3 +1129,5 @@ 
 
 (define_int_attr opsuffix [(UNSPEC_DOT_S "s8")
 			   (UNSPEC_DOT_U "u8")])
+
+(define_int_attr smlaw_op [(UNSPEC_SMLAWB "smlawb") (UNSPEC_SMLAWT "smlawt")])
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index a4287949e525688ee5141e4975917537f84466ff..06988abb67eee27e948d8039df6a94eefb187618 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -130,6 +130,8 @@ 
   UNSPEC_SMLALDX	; Represent the SMLALDX operation.
   UNSPEC_SMLSLD		; Represent the SMLSLD operation.
   UNSPEC_SMLSLDX	; Represent the SMLSLDX operation.
+  UNSPEC_SMLAWB		; Represent the SMLAWB operation.
+  UNSPEC_SMLAWT		; Represent the SMLAWT operation.
 ])
 
 
diff --git a/gcc/testsuite/gcc.target/arm/acle/dsp_arith.c b/gcc/testsuite/gcc.target/arm/acle/dsp_arith.c
index f0bf80993beb0007b0eb360878f0fd1811098d9e..9ebd55a12879a198b76f4645912187a4d16c9363 100644
--- a/gcc/testsuite/gcc.target/arm/acle/dsp_arith.c
+++ b/gcc/testsuite/gcc.target/arm/acle/dsp_arith.c
@@ -25,3 +25,49 @@  test_qsub (int32_t a, int32_t b)
 }
 
 /* { dg-final { scan-assembler-times "qsub\t...?, ...?, ...?" 1 } } */
+
+int32_t
+test_smlabb (int32_t a, int32_t b, int32_t c)
+{
+  return __smlabb (a, b, c);
+}
+
+/* { dg-final { scan-assembler-times "smlabb\t...?, ...?, ...?, ...?" 1 } } */
+
+int32_t
+test_smlabt (int32_t a, int32_t b, int32_t c)
+{
+  return __smlabt (a, b, c);
+}
+
+int32_t
+test_smlatb (int32_t a, int32_t b, int32_t c)
+{
+  return __smlatb (a, b, c);
+}
+
+/* { dg-final { scan-assembler-times "smlatb\t...?, ...?, ...?, ...?" 2 } } */
+
+int32_t
+test_smlatt (int32_t a, int32_t b, int32_t c)
+{
+  return __smlatt (a, b, c);
+}
+
+/* { dg-final { scan-assembler-times "smlatt\t...?, ...?, ...?, ...?" 1 } } */
+
+int32_t
+test_smlawb (int32_t a, int32_t b, int32_t c)
+{
+  return __smlawb (a, b, c);
+}
+
+/* { dg-final { scan-assembler-times "smlawb\t...?, ...?, ...?, ...?" 1 } } */
+
+int32_t
+test_smlawt (int32_t a, int32_t b, int32_t c)
+{
+  return __smlawt (a, b, c);
+}
+
+/* { dg-final { scan-assembler-times "smlawt\t...?, ...?, ...?, ...?" 1 } } */