[committed,AArch64] Rework SVE REV[BHW] patterns
diff mbox series

Message ID mptblwqizrm.fsf@arm.com
State New
Headers show
Series
  • [committed,AArch64] Rework SVE REV[BHW] patterns
Related show

Commit Message

Richard Sandiford Aug. 15, 2019, 8:44 a.m. UTC
The current SVE REV patterns follow the AArch64 scheme, in which
UNSPEC_REV<NN> reverses elements within an <NN>-bit granule.
E.g. UNSPEC_REV64 on VNx8HI reverses the four 16-bit elements
within each 64-bit granule.

The native SVE scheme is the other way around: UNSPEC_REV64 is seen
as an operation on 64-bit elements, with REVB swapping bytes within
the elements, REVH swapping halfwords, and so on.  This fits SVE more
naturally because the operation can then be predicated per <NN>-bit
granule/element.

Making the patterns use the Advanced SIMD scheme was more natural
when all we cared about were permutes, since we could then use
the source and target of the permute in their original modes.
However, the ACLE does need patterns that follow the native scheme,
treating them as operations on integer elements.  This patch defines
the patterns that way instead and updates the existing uses to match.

This also brings in a couple of helper routines from the ACLE branch.

Tested on aarch64-linux-gnu (with and without SVE) and aarch64_be-elf.
Applied as r274517.

Richard


2019-08-15  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* config/aarch64/iterators.md (UNSPEC_REVB, UNSPEC_REVH)
	(UNSPEC_REVW): New constants.
	(elem_bits): New mode attribute.
	(SVE_INT_UNARY): New int iterator.
	(optab): Handle UNSPEC_REV[BHW].
	(sve_int_op): New int attribute.
	(min_elem_bits): Handle VNx16QI and the predicate modes.
	* config/aarch64/aarch64-sve.md (*aarch64_sve_rev64<mode>)
	(*aarch64_sve_rev32<mode>, *aarch64_sve_rev16vnx16qi): Delete.
	(@aarch64_pred_<SVE_INT_UNARY:optab><SVE_I:mode>): New pattern.
	* config/aarch64/aarch64.c (aarch64_sve_data_mode): New function.
	(aarch64_sve_int_mode, aarch64_sve_rev_unspec): Likewise.
	(aarch64_split_sve_subreg_move): Use UNSPEC_REV[BHW] instead of
	unspecs based on the total width of the reversed data.
	(aarch64_evpc_rev_local): Likewise (for SVE only).  Use a
	reinterpret followed by a subreg on big-endian targets.

gcc/testsuite/
	* gcc.target/aarch64/sve/revb_1.c: Restrict to little-endian targets.
	Avoid including stdint.h.
	* gcc.target/aarch64/sve/revh_1.c: Likewise.
	* gcc.target/aarch64/sve/revw_1.c: Likewise.
	* gcc.target/aarch64/sve/revb_2.c: New big-endian test.
	* gcc.target/aarch64/sve/revh_2.c: Likewise.
	* gcc.target/aarch64/sve/revw_2.c: Likewise.

Patch
diff mbox series

Index: gcc/config/aarch64/iterators.md
===================================================================
--- gcc/config/aarch64/iterators.md	2019-08-15 09:17:59.073360556 +0100
+++ gcc/config/aarch64/iterators.md	2019-08-15 09:41:46.174822592 +0100
@@ -476,6 +476,9 @@  (define_c_enum "unspec"
     UNSPEC_ANDF		; Used in aarch64-sve.md.
     UNSPEC_IORF		; Used in aarch64-sve.md.
     UNSPEC_XORF		; Used in aarch64-sve.md.
+    UNSPEC_REVB		; Used in aarch64-sve.md.
+    UNSPEC_REVH		; Used in aarch64-sve.md.
+    UNSPEC_REVW		; Used in aarch64-sve.md.
     UNSPEC_SMUL_HIGHPART ; Used in aarch64-sve.md.
     UNSPEC_UMUL_HIGHPART ; Used in aarch64-sve.md.
     UNSPEC_COND_FABS	; Used in aarch64-sve.md.
@@ -638,7 +641,10 @@  (define_mode_attr sizem1 [(QI "#7") (HI
 
 ;; The number of bits in a vector element, or controlled by a predicate
 ;; element.
-(define_mode_attr elem_bits [(VNx8HI "16") (VNx4SI "32") (VNx2DI "64")
+(define_mode_attr elem_bits [(VNx16BI "8") (VNx8BI "16")
+			     (VNx4BI "32") (VNx2BI "64")
+			     (VNx16QI "8") (VNx8HI "16")
+			     (VNx4SI "32") (VNx2DI "64")
 			     (VNx8HF "16") (VNx4SF "32") (VNx2DF "64")])
 
 ;; Attribute to describe constants acceptable in logical operations
@@ -1677,6 +1683,8 @@  (define_int_iterator UNPACK_UNSIGNED [UN
 
 (define_int_iterator MUL_HIGHPART [UNSPEC_SMUL_HIGHPART UNSPEC_UMUL_HIGHPART])
 
+(define_int_iterator SVE_INT_UNARY [UNSPEC_REVB UNSPEC_REVH UNSPEC_REVW])
+
 (define_int_iterator SVE_INT_REDUCTION [UNSPEC_ANDV
 					UNSPEC_IORV
 					UNSPEC_SMAXV
@@ -1777,6 +1785,9 @@  (define_int_attr optab [(UNSPEC_ANDF "an
 			(UNSPEC_ANDV "and")
 			(UNSPEC_IORV "ior")
 			(UNSPEC_XORV "xor")
+			(UNSPEC_REVB "revb")
+			(UNSPEC_REVH "revh")
+			(UNSPEC_REVW "revw")
 			(UNSPEC_UMAXV "umax")
 			(UNSPEC_UMINV "umin")
 			(UNSPEC_SMAXV "smax")
@@ -2045,7 +2056,10 @@  (define_int_attr sve_int_op [(UNSPEC_AND
 			     (UNSPEC_UMAXV "umaxv")
 			     (UNSPEC_UMINV "uminv")
 			     (UNSPEC_SMAXV "smaxv")
-			     (UNSPEC_SMINV "sminv")])
+			     (UNSPEC_SMINV "sminv")
+			     (UNSPEC_REVB "revb")
+			     (UNSPEC_REVH "revh")
+			     (UNSPEC_REVW "revw")])
 
 (define_int_attr sve_fp_op [(UNSPEC_FADDV "faddv")
 			    (UNSPEC_FMAXNMV "fmaxnmv")
@@ -2118,3 +2132,8 @@  (define_int_attr sve_pred_fp_rhs2_immedi
   [(UNSPEC_COND_FMAXNM "aarch64_sve_float_maxmin_immediate")
    (UNSPEC_COND_FMINNM "aarch64_sve_float_maxmin_immediate")
    (UNSPEC_COND_FMUL "aarch64_sve_float_mul_immediate")])
+
+;; The minimum number of element bits that an instruction can handle.
+(define_int_attr min_elem_bits [(UNSPEC_REVB "16")
+				(UNSPEC_REVH "32")
+				(UNSPEC_REVW "64")])
Index: gcc/config/aarch64/aarch64-sve.md
===================================================================
--- gcc/config/aarch64/aarch64-sve.md	2019-08-15 09:39:38.491764697 +0100
+++ gcc/config/aarch64/aarch64-sve.md	2019-08-15 09:41:46.170822619 +0100
@@ -54,6 +54,7 @@ 
 ;;
 ;; == Unary arithmetic
 ;; ---- [INT] General unary arithmetic corresponding to rtx codes
+;; ---- [INT] General unary arithmetic corresponding to unspecs
 ;; ---- [INT] Zero extension
 ;; ---- [INT] Logical inverse
 ;; ---- [FP] General unary arithmetic corresponding to unspecs
@@ -1498,6 +1499,28 @@  (define_insn "*cond_<optab><mode>_any"
 )
 
 ;; -------------------------------------------------------------------------
+;; ---- [INT] General unary arithmetic corresponding to unspecs
+;; -------------------------------------------------------------------------
+;; Includes
+;; - REVB
+;; - REVH
+;; - REVW
+;; -------------------------------------------------------------------------
+
+;; Predicated integer unary operations.
+(define_insn "@aarch64_pred_<optab><mode>"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	   (unspec:SVE_I
+	     [(match_operand:SVE_I 2 "register_operand" "w")]
+	     SVE_INT_UNARY)]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE && <elem_bits> >= <min_elem_bits>"
+  "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+)
+
+;; -------------------------------------------------------------------------
 ;; ---- [INT] Zero extension
 ;; -------------------------------------------------------------------------
 ;; Includes:
@@ -4619,9 +4642,6 @@  (define_insn "*aarch64_sve_tbl<mode>"
 ;; Includes:
 ;; - DUP
 ;; - REV
-;; - REVB
-;; - REVH
-;; - REVW
 ;; -------------------------------------------------------------------------
 
 ;; Duplicate one element of a vector.
@@ -4644,42 +4664,6 @@  (define_insn "@aarch64_sve_rev<mode>"
   "TARGET_SVE"
   "rev\t%0.<Vetype>, %1.<Vetype>")
 
-;; Reverse the order elements within a 64-bit container.
-(define_insn "*aarch64_sve_rev64<mode>"
-  [(set (match_operand:SVE_BHS 0 "register_operand" "=w")
-	(unspec:SVE_BHS
-	  [(match_operand:VNx2BI 1 "register_operand" "Upl")
-	   (unspec:SVE_BHS [(match_operand:SVE_BHS 2 "register_operand" "w")]
-			   UNSPEC_REV64)]
-	  UNSPEC_PRED_X))]
-  "TARGET_SVE"
-  "rev<Vesize>\t%0.d, %1/m, %2.d"
-)
-
-;; Reverse the order elements within a 32-bit container.
-(define_insn "*aarch64_sve_rev32<mode>"
-  [(set (match_operand:SVE_BH 0 "register_operand" "=w")
-	(unspec:SVE_BH
-	  [(match_operand:VNx4BI 1 "register_operand" "Upl")
-	   (unspec:SVE_BH [(match_operand:SVE_BH 2 "register_operand" "w")]
-			  UNSPEC_REV32)]
-	  UNSPEC_PRED_X))]
-  "TARGET_SVE"
-  "rev<Vesize>\t%0.s, %1/m, %2.s"
-)
-
-;; Reverse the order elements within a 16-bit container.
-(define_insn "*aarch64_sve_rev16vnx16qi"
-  [(set (match_operand:VNx16QI 0 "register_operand" "=w")
-	(unspec:VNx16QI
-	  [(match_operand:VNx8BI 1 "register_operand" "Upl")
-	   (unspec:VNx16QI [(match_operand:VNx16QI 2 "register_operand" "w")]
-			   UNSPEC_REV16)]
-	  UNSPEC_PRED_X))]
-  "TARGET_SVE"
-  "revb\t%0.h, %1/m, %2.h"
-)
-
 ;; -------------------------------------------------------------------------
 ;; ---- [INT,FP] Special-purpose binary permutes
 ;; -------------------------------------------------------------------------
Index: gcc/config/aarch64/aarch64.c
===================================================================
--- gcc/config/aarch64/aarch64.c	2019-08-15 09:22:03.047558159 +0100
+++ gcc/config/aarch64/aarch64.c	2019-08-15 09:41:46.174822592 +0100
@@ -1675,6 +1675,22 @@  aarch64_get_mask_mode (poly_uint64 nunit
   return default_get_mask_mode (nunits, nbytes);
 }
 
+/* Return the SVE vector mode that has NUNITS elements of mode INNER_MODE.  */
+
+static opt_machine_mode
+aarch64_sve_data_mode (scalar_mode inner_mode, poly_uint64 nunits)
+{
+  enum mode_class mclass = (is_a <scalar_float_mode> (inner_mode)
+			    ? MODE_VECTOR_FLOAT : MODE_VECTOR_INT);
+  machine_mode mode;
+  FOR_EACH_MODE_IN_CLASS (mode, mclass)
+    if (inner_mode == GET_MODE_INNER (mode)
+	&& known_eq (nunits, GET_MODE_NUNITS (mode))
+	&& aarch64_sve_data_mode_p (mode))
+      return mode;
+  return opt_machine_mode ();
+}
+
 /* Return the integer element mode associated with SVE mode MODE.  */
 
 static scalar_int_mode
@@ -1685,6 +1701,17 @@  aarch64_sve_element_int_mode (machine_mo
   return int_mode_for_size (elt_bits, 0).require ();
 }
 
+/* Return the integer vector mode associated with SVE mode MODE.
+   Unlike mode_for_int_vector, this can handle the case in which
+   MODE is a predicate (and thus has a different total size).  */
+
+static machine_mode
+aarch64_sve_int_mode (machine_mode mode)
+{
+  scalar_int_mode int_mode = aarch64_sve_element_int_mode (mode);
+  return aarch64_sve_data_mode (int_mode, GET_MODE_NUNITS (mode)).require ();
+}
+
 /* Implement TARGET_PREFERRED_ELSE_VALUE.  For binary operations,
    prefer to use the first arithmetic operand as the else value if
    the else value doesn't matter, since that exactly matches the SVE
@@ -4280,14 +4307,29 @@  aarch64_replace_reg_mode (rtx x, machine
   return x;
 }
 
+/* Return the SVE REV[BHW] unspec for reversing quantites of mode MODE
+   stored in wider integer containers.  */
+
+static unsigned int
+aarch64_sve_rev_unspec (machine_mode mode)
+{
+  switch (GET_MODE_UNIT_SIZE (mode))
+    {
+    case 1: return UNSPEC_REVB;
+    case 2: return UNSPEC_REVH;
+    case 4: return UNSPEC_REVW;
+    }
+  gcc_unreachable ();
+}
+
 /* Split a *aarch64_sve_mov<mode>_subreg_be pattern with the given
    operands.  */
 
 void
 aarch64_split_sve_subreg_move (rtx dest, rtx ptrue, rtx src)
 {
-  /* Decide which REV operation we need.  The mode with narrower elements
-     determines the mode of the operands and the mode with the wider
+  /* Decide which REV operation we need.  The mode with wider elements
+     determines the mode of the operands and the mode with the narrower
      elements determines the reverse width.  */
   machine_mode mode_with_wider_elts = GET_MODE (dest);
   machine_mode mode_with_narrower_elts = GET_MODE (src);
@@ -4295,30 +4337,16 @@  aarch64_split_sve_subreg_move (rtx dest,
       < GET_MODE_UNIT_SIZE (mode_with_narrower_elts))
     std::swap (mode_with_wider_elts, mode_with_narrower_elts);
 
+  unsigned int unspec = aarch64_sve_rev_unspec (mode_with_narrower_elts);
   unsigned int wider_bytes = GET_MODE_UNIT_SIZE (mode_with_wider_elts);
-  unsigned int unspec;
-  if (wider_bytes == 8)
-    unspec = UNSPEC_REV64;
-  else if (wider_bytes == 4)
-    unspec = UNSPEC_REV32;
-  else if (wider_bytes == 2)
-    unspec = UNSPEC_REV16;
-  else
-    gcc_unreachable ();
   machine_mode pred_mode = aarch64_sve_pred_mode (wider_bytes).require ();
 
-  /* Emit:
-
-       (set DEST (unspec [PTRUE (unspec [SRC] UNSPEC_REV<nn>)] UNSPEC_PRED_X))
-
-     with the appropriate modes.  */
+  /* Get the operands in the appropriate modes and emit the instruction.  */
   ptrue = gen_lowpart (pred_mode, ptrue);
-  dest = aarch64_replace_reg_mode (dest, mode_with_narrower_elts);
-  src = aarch64_replace_reg_mode (src, mode_with_narrower_elts);
-  src = gen_rtx_UNSPEC (mode_with_narrower_elts, gen_rtvec (1, src), unspec);
-  src = gen_rtx_UNSPEC (mode_with_narrower_elts, gen_rtvec (2, ptrue, src),
-			UNSPEC_PRED_X);
-  emit_insn (gen_rtx_SET (dest, src));
+  dest = aarch64_replace_reg_mode (dest, mode_with_wider_elts);
+  src = aarch64_replace_reg_mode (src, mode_with_wider_elts);
+  emit_insn (gen_aarch64_pred (unspec, mode_with_wider_elts,
+			       dest, ptrue, src));
 }
 
 static bool
@@ -17753,13 +17781,31 @@  aarch64_evpc_rev_local (struct expand_ve
   if (d->testing_p)
     return true;
 
-  rtx src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (1, d->op0), unspec);
   if (d->vec_flags == VEC_SVE_DATA)
     {
-      rtx pred = aarch64_ptrue_reg (pred_mode);
-      src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (2, pred, src),
-			    UNSPEC_PRED_X);
+      machine_mode int_mode = aarch64_sve_int_mode (pred_mode);
+      rtx target = gen_reg_rtx (int_mode);
+      if (BYTES_BIG_ENDIAN)
+	/* The act of taking a subreg between INT_MODE and d->vmode
+	   is itself a reversing operation on big-endian targets;
+	   see the comment at the head of aarch64-sve.md for details.
+	   First reinterpret OP0 as INT_MODE without using a subreg
+	   and without changing the contents.  */
+	emit_insn (gen_aarch64_sve_reinterpret (int_mode, target, d->op0));
+      else
+	{
+	  /* For SVE we use REV[BHW] unspecs derived from the element size
+	     of v->mode and vector modes whose elements have SIZE bytes.
+	     This ensures that the vector modes match the predicate modes.  */
+	  int unspec = aarch64_sve_rev_unspec (d->vmode);
+	  rtx pred = aarch64_ptrue_reg (pred_mode);
+	  emit_insn (gen_aarch64_pred (unspec, int_mode, target, pred,
+				       gen_lowpart (int_mode, d->op0)));
+	}
+      emit_move_insn (d->target, gen_lowpart (d->vmode, target));
+      return true;
     }
+  rtx src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (1, d->op0), unspec);
   emit_set_insn (d->target, src);
   return true;
 }
Index: gcc/testsuite/gcc.target/aarch64/sve/revb_1.c
===================================================================
--- gcc/testsuite/gcc.target/aarch64/sve/revb_1.c	2019-03-08 18:14:29.772994767 +0000
+++ gcc/testsuite/gcc.target/aarch64/sve/revb_1.c	2019-08-15 09:41:46.174822592 +0100
@@ -1,9 +1,7 @@ 
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */
+/* { dg-options "-O -msve-vector-bits=256 --save-temps -mlittle-endian" } */
 
-#include <stdint.h>
-
-typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef __INT8_TYPE__ vnx16qi __attribute__((vector_size (32)));
 
 #define MASK_2(X, Y) (X) ^ (Y), (X + 1) ^ (Y)
 #define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y)
Index: gcc/testsuite/gcc.target/aarch64/sve/revh_1.c
===================================================================
--- gcc/testsuite/gcc.target/aarch64/sve/revh_1.c	2019-03-08 18:14:29.788994704 +0000
+++ gcc/testsuite/gcc.target/aarch64/sve/revh_1.c	2019-08-15 09:41:46.174822592 +0100
@@ -1,9 +1,7 @@ 
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */
+/* { dg-options "-O -msve-vector-bits=256 --save-temps -mlittle-endian" } */
 
-#include <stdint.h>
-
-typedef uint16_t vnx8hi __attribute__((vector_size (32)));
+typedef __UINT16_TYPE__ vnx8hi __attribute__((vector_size (32)));
 typedef _Float16 vnx8hf __attribute__((vector_size (32)));
 
 #define MASK_2(X, Y) (X) ^ (Y), (X + 1) ^ (Y)
Index: gcc/testsuite/gcc.target/aarch64/sve/revw_1.c
===================================================================
--- gcc/testsuite/gcc.target/aarch64/sve/revw_1.c	2019-03-08 18:14:29.780994734 +0000
+++ gcc/testsuite/gcc.target/aarch64/sve/revw_1.c	2019-08-15 09:41:46.174822592 +0100
@@ -1,9 +1,7 @@ 
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */
+/* { dg-options "-O -msve-vector-bits=256 --save-temps -mlittle-endian" } */
 
-#include <stdint.h>
-
-typedef uint32_t vnx4si __attribute__((vector_size (32)));
+typedef __UINT32_TYPE__ vnx4si __attribute__((vector_size (32)));
 typedef float vnx4sf __attribute__((vector_size (32)));
 
 #define MASK_2(X, Y) (X) ^ (Y), (X + 1) ^ (Y)
Index: gcc/testsuite/gcc.target/aarch64/sve/revb_2.c
===================================================================
--- /dev/null	2019-07-30 08:53:31.317691683 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/revb_2.c	2019-08-15 09:41:46.174822592 +0100
@@ -0,0 +1,10 @@ 
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -msve-vector-bits=256 --save-temps -mbig-endian" } */
+
+#include "revb_1.c"
+
+/* { dg-final { scan-assembler-not {\ttbl\t} } } */
+
+/* { dg-final { scan-assembler-times {\trevb\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d} 1 } } */
+/* { dg-final { scan-assembler-times {\trevb\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s} 1 } } */
+/* { dg-final { scan-assembler-times {\trevb\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h} 1 } } */
Index: gcc/testsuite/gcc.target/aarch64/sve/revh_2.c
===================================================================
--- /dev/null	2019-07-30 08:53:31.317691683 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/revh_2.c	2019-08-15 09:41:46.174822592 +0100
@@ -0,0 +1,9 @@ 
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -msve-vector-bits=256 --save-temps -mbig-endian" } */
+
+#include "revh_1.c"
+
+/* { dg-final { scan-assembler-not {\ttbl\t} } } */
+
+/* { dg-final { scan-assembler-times {\trevh\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d} 2 } } */
+/* { dg-final { scan-assembler-times {\trevh\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s} 2 } } */
Index: gcc/testsuite/gcc.target/aarch64/sve/revw_2.c
===================================================================
--- /dev/null	2019-07-30 08:53:31.317691683 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/revw_2.c	2019-08-15 09:41:46.174822592 +0100
@@ -0,0 +1,8 @@ 
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -msve-vector-bits=256 --save-temps -mbig-endian" } */
+
+#include "revw_1.c"
+
+/* { dg-final { scan-assembler-not {\ttbl\t} } } */
+
+/* { dg-final { scan-assembler-times {\trevw\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d} 2 } } */