[committed,AArch64] Fix INDEX patterns for partial VNx2 modes
diff mbox series

Message ID mptimmo2l8k.fsf@arm.com
State New
Headers show
Series
  • [committed,AArch64] Fix INDEX patterns for partial VNx2 modes
Related show

Commit Message

Richard Sandiford Dec. 10, 2019, 4:34 p.m. UTC
The INDEX patterns handle partial modes by choosing the container
size rather than the element size, so that the number of lanes
(and thus number of additions) matches the mode.  This means that
all VNx4 modes use .s and all VNx2 modes use .d, etc.

When adding this, I'd forgotten that the choice between Wn and Xn
registers would need to be updated to use the container size too.
For partial VNx2s, we were using .d containers with Wn rather than
Xn source registers.

Tested on aarch64-linux-gnu, applied as r279173.

Richard


2019-12-10  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* config/aarch64/iterators.md (vccore): New iterator.
	* config/aarch64/aarch64-sve.md (vec_series<mode>): Use it instead
	of vwcore.
	(*vec_series<mode>_plus): Likewise.

gcc/testsuite/
	* gcc.target/aarch64/sve/mixed_size_6.c: New test.

Patch
diff mbox series

Index: gcc/config/aarch64/iterators.md
===================================================================
--- gcc/config/aarch64/iterators.md	2019-11-18 15:36:04.861884957 +0000
+++ gcc/config/aarch64/iterators.md	2019-12-10 16:31:31.328032388 +0000
@@ -1093,6 +1093,12 @@  (define_mode_attr vwcore [(V8QI "w") (V1
 			  (VNx2DI "x")
 			  (VNx2DF "x")])
 
+;; Like vwcore, but for the container mode rather than the element mode.
+(define_mode_attr vccore [(VNx16QI "w") (VNx8QI "w") (VNx4QI "w") (VNx2QI "x")
+			  (VNx8HI "w") (VNx4HI "w") (VNx2HI "x")
+			  (VNx4SI "w") (VNx2SI "x")
+			  (VNx2DI "x")])
+
 ;; Double vector types for ALLX.
 (define_mode_attr Vallxd [(QI "8b") (HI "4h") (SI "2s")])
 
Index: gcc/config/aarch64/aarch64-sve.md
===================================================================
--- gcc/config/aarch64/aarch64-sve.md	2019-11-16 13:31:24.342304673 +0000
+++ gcc/config/aarch64/aarch64-sve.md	2019-12-10 16:31:31.328032388 +0000
@@ -2541,9 +2541,9 @@  (define_insn "vec_series<mode>"
 	  (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))]
   "TARGET_SVE"
   "@
-   index\t%0.<Vctype>, #%1, %<vwcore>2
-   index\t%0.<Vctype>, %<vwcore>1, #%2
-   index\t%0.<Vctype>, %<vwcore>1, %<vwcore>2"
+   index\t%0.<Vctype>, #%1, %<vccore>2
+   index\t%0.<Vctype>, %<vccore>1, #%2
+   index\t%0.<Vctype>, %<vccore>1, %<vccore>2"
 )
 
 ;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range
@@ -2557,7 +2557,7 @@  (define_insn "*vec_series<mode>_plus"
   "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])"
   {
     operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]);
-    return "index\t%0.<Vctype>, %<vwcore>1, #%2";
+    return "index\t%0.<Vctype>, %<vccore>1, #%2";
   }
 )
 
Index: gcc/testsuite/gcc.target/aarch64/sve/mixed_size_6.c
===================================================================
--- /dev/null	2019-09-17 11:41:18.176664108 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/mixed_size_6.c	2019-12-10 16:31:31.328032388 +0000
@@ -0,0 +1,47 @@ 
+/* { dg-options "-O3 -msve-vector-bits=256" } */
+
+#include <stdint.h>
+
+void
+f1 (uint64_t *restrict ptr1, uint8_t *restrict ptr2, uint8_t start)
+{
+#pragma GCC unroll 0
+  for (int i = 0; i < 4; ++i)
+    {
+      ptr1[i] = 10;
+      ptr2[i] = start;
+      start += 1;
+    }
+}
+
+void
+f2 (uint64_t *restrict ptr1, uint16_t *restrict ptr2, uint16_t start)
+{
+#pragma GCC unroll 0
+  for (int i = 0; i < 4; ++i)
+    {
+      ptr1[i] = 10;
+      ptr2[i] = start;
+      start += 2;
+    }
+}
+
+void
+f3 (uint64_t *restrict ptr1, uint32_t *restrict ptr2, uint32_t start)
+{
+#pragma GCC unroll 0
+  for (int i = 0; i < 4; ++i)
+    {
+      ptr1[i] = 10;
+      ptr2[i] = start;
+      start += 4;
+    }
+}
+
+/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.d, x[0-9]+, #1\n} } } */
+/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.d, x[0-9]+, #1\n} } } */
+/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.d, x[0-9]+, #4\n} } } */
+
+/* { dg-final { scan-assembler-not {\tindex\tz[0-9]+\.d, w[0-9]+, #1\n} } } */
+/* { dg-final { scan-assembler-not {\tindex\tz[0-9]+\.d, w[0-9]+, #1\n} } } */
+/* { dg-final { scan-assembler-not {\tindex\tz[0-9]+\.d, w[0-9]+, #4\n} } } */