diff mbox series

[committed,AArch64] Add support for SVE absolute comparisons

Message ID mptlfvwm6wm.fsf@arm.com
State New
Headers show
Series [committed,AArch64] Add support for SVE absolute comparisons | expand

Commit Message

Richard Sandiford Aug. 14, 2019, 9:29 a.m. UTC
This patch adds support for floating-point absolute comparisons
FACLT and FACLE (aliased as FACGT and FACGE with swapped operands).

Tested on aarch64-linux-gnu (with and without SVE) and aarch64_be-elf.
Applied as r274443.

Richard


2019-08-14  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* config/aarch64/iterators.md (SVE_COND_FP_ABS_CMP): New iterator.
	* config/aarch64/aarch64-sve.md (*aarch64_pred_fac<cmp_op><mode>):
	New pattern.

gcc/testsuite/
	* gcc.target/aarch64/sve/vcond_21.c: New test.
	* gcc.target/aarch64/sve/vcond_21_run.c: Likewise.
diff mbox series

Patch

Index: gcc/config/aarch64/iterators.md
===================================================================
--- gcc/config/aarch64/iterators.md	2019-08-14 10:14:27.899953691 +0100
+++ gcc/config/aarch64/iterators.md	2019-08-14 10:24:53.211364279 +0100
@@ -1709,6 +1709,11 @@  (define_int_iterator SVE_COND_FP_CMP_I0
 					 UNSPEC_COND_FCMLT
 					 UNSPEC_COND_FCMNE])
 
+(define_int_iterator SVE_COND_FP_ABS_CMP [UNSPEC_COND_FCMGE
+					  UNSPEC_COND_FCMGT
+					  UNSPEC_COND_FCMLE
+					  UNSPEC_COND_FCMLT])
+
 (define_int_iterator FCADD [UNSPEC_FCADD90
 			    UNSPEC_FCADD270])
 
Index: gcc/config/aarch64/aarch64-sve.md
===================================================================
--- gcc/config/aarch64/aarch64-sve.md	2019-08-14 10:22:19.524492496 +0100
+++ gcc/config/aarch64/aarch64-sve.md	2019-08-14 10:24:53.211364279 +0100
@@ -94,7 +94,8 @@ 
 ;; ---- [INT,FP] Compare and select
 ;; ---- [INT] Comparisons
 ;; ---- [INT] While tests
-;; ---- [FP] Comparisons
+;; ---- [FP] Direct comparisons
+;; ---- [FP] Absolute comparisons
 ;; ---- [PRED] Test bits
 ;;
 ;; == Reductions
@@ -3364,7 +3365,7 @@  (define_insn_and_rewrite "*while_ult<GPI
 )
 
 ;; -------------------------------------------------------------------------
-;; ---- [FP] Comparisons
+;; ---- [FP] Direct comparisons
 ;; -------------------------------------------------------------------------
 ;; Includes:
 ;; - FCMEQ
@@ -3474,6 +3475,45 @@  (define_insn_and_split "*fcmuo<mode>_and
 )
 
 ;; -------------------------------------------------------------------------
+;; ---- [FP] Absolute comparisons
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FACGE
+;; - FACGT
+;; - FACLE
+;; - FACLT
+;; -------------------------------------------------------------------------
+
+;; Predicated floating-point absolute comparisons.
+(define_insn_and_rewrite "*aarch64_pred_fac<cmp_op><mode>"
+  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
+	(unspec:<VPRED>
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	   (match_operand:SI 4 "aarch64_sve_ptrue_flag")
+	   (unspec:SVE_F
+	     [(match_operand 5)
+	      (match_operand:SI 6 "aarch64_sve_gp_strictness")
+	      (match_operand:SVE_F 2 "register_operand" "w")]
+	     UNSPEC_COND_FABS)
+	   (unspec:SVE_F
+	     [(match_operand 7)
+	      (match_operand:SI 8 "aarch64_sve_gp_strictness")
+	      (match_operand:SVE_F 3 "register_operand" "w")]
+	     UNSPEC_COND_FABS)]
+	  SVE_COND_FP_ABS_CMP))]
+  "TARGET_SVE
+   && aarch64_sve_pred_dominates_p (&operands[5], operands[1])
+   && aarch64_sve_pred_dominates_p (&operands[7], operands[1])"
+  "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+  "&& (!rtx_equal_p (operands[1], operands[5])
+       || !rtx_equal_p (operands[1], operands[7]))"
+  {
+    operands[5] = copy_rtx (operands[1]);
+    operands[7] = copy_rtx (operands[1]);
+  }
+)
+
+;; -------------------------------------------------------------------------
 ;; ---- [PRED] Test bits
 ;; -------------------------------------------------------------------------
 ;; Includes:
Index: gcc/testsuite/gcc.target/aarch64/sve/vcond_21.c
===================================================================
--- /dev/null	2019-07-30 08:53:31.317691683 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/vcond_21.c	2019-08-14 10:24:53.211364279 +0100
@@ -0,0 +1,34 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#define DEF_LOOP(TYPE, ABS, NAME, OP)			\
+  void							\
+  test_##TYPE##_##NAME (TYPE *restrict r,		\
+			TYPE *restrict a,		\
+			TYPE *restrict b, int n)	\
+  {							\
+    for (int i = 0; i < n; ++i)				\
+      r[i] = ABS (a[i]) OP ABS (b[i]) ? 1.0 : 0.0;	\
+  }
+
+#define TEST_TYPE(T, TYPE, ABS)			\
+  T (TYPE, ABS, lt, <)				\
+  T (TYPE, ABS, le, <=)				\
+  T (TYPE, ABS, ge, >=)				\
+  T (TYPE, ABS, gt, >)
+
+#define TEST_ALL(T)				\
+  TEST_TYPE (T, _Float16, __builtin_fabsf16)	\
+  TEST_TYPE (T, float, __builtin_fabsf)		\
+  TEST_TYPE (T, double, __builtin_fabs)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tfac[lg]t\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfac[lg]e\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tfac[lg]t\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfac[lg]e\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tfac[lg]t\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfac[lg]e\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
Index: gcc/testsuite/gcc.target/aarch64/sve/vcond_21_run.c
===================================================================
--- /dev/null	2019-07-30 08:53:31.317691683 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/vcond_21_run.c	2019-08-14 10:24:53.211364279 +0100
@@ -0,0 +1,31 @@ 
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "vcond_21.c"
+
+#define N 97
+
+#define TEST_LOOP(TYPE, ABS, NAME, OP)				\
+  {								\
+    TYPE r[N], a[N], b[N];					\
+    for (int i = 0; i < N; ++i)					\
+      {								\
+	a[i] = i % 5 * (i & 1 ? -1 : 1);			\
+	b[i] = i % 9 * (i & 2 ? -1 : 1);			\
+	asm volatile ("" ::: "memory");				\
+      }								\
+    test_##TYPE##_##NAME (r, a, b, N);				\
+    for (int i = 0; i < N; ++i)					\
+      {								\
+	if (r[i] != (ABS (a[i]) OP ABS (b[i]) ? 1.0 : 0.0))	\
+	  __builtin_abort ();					\
+	asm volatile ("" ::: "memory");				\
+      }								\
+  }
+
+int __attribute__ ((optimize (1)))
+main (int argc, char **argv)
+{
+  TEST_ALL (TEST_LOOP)
+  return 0;
+}