diff mbox series

tree-optimization/95273 - more vectorizable_shift massaging

Message ID nycvar.YFH.7.76.2005281154220.4397@zhemvz.fhfr.qr
State New
Headers show
Series tree-optimization/95273 - more vectorizable_shift massaging | expand

Commit Message

Richard Biener May 28, 2020, 9:54 a.m. UTC
Covering all bases in vectorizable_shift is hard - this makes sure
to appropriately handle the case of PR95356 without breaking others.

Bootstrapped / tested on x86_64-unknown-linux-gnu, applied.

2020-05-28  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/95273
	PR tree-optimization/95356
	* tree-vect-stmts.c (vectorizable_shift): Adjust when and to
	what we set the vector type of the shift operand SLP node
	again.

	* gcc.target/i386/pr95356.c: New testcase.
---
 gcc/testsuite/gcc.target/i386/pr95356.c | 125 ++++++++++++++++++++++++++++++++
 gcc/tree-vect-stmts.c                   |   6 +-
 2 files changed, 130 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr95356.c
diff mbox series

Patch

diff --git a/gcc/testsuite/gcc.target/i386/pr95356.c b/gcc/testsuite/gcc.target/i386/pr95356.c
new file mode 100644
index 00000000000..fdd917ba5e7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95356.c
@@ -0,0 +1,125 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512dq" } */
+
+extern void abort (void);
+long long a[16];
+
+__attribute__((noinline, noclone)) void
+f1 (void)
+{
+  long long a0, a1, a2, a3;
+  a0 = a[0];
+  a1 = a[1];
+  a2 = a[2];
+  a3 = a[3];
+  a0 = a0 << 2;
+  a1 = a1 << 3;
+  a2 = a2 << 4;
+  a3 = a3 << 5;
+  a[0] = a0;
+  a[1] = a1;
+  a[2] = a2;
+  a[3] = a3;
+}
+
+__attribute__((noinline, noclone)) void
+f2 (void)
+{
+  long long a0, a1, a2, a3;
+  a0 = a[0];
+  a1 = a[1];
+  a2 = a[2];
+  a3 = a[3];
+  a0 = a0 << 2;
+  a1 = a1 << 2;
+  a2 = a2 << 2;
+  a3 = a3 << 2;
+  a[0] = a0;
+  a[1] = a1;
+  a[2] = a2;
+  a[3] = a3;
+}
+
+__attribute__((noinline, noclone)) void
+f2a (int x)
+{
+  long long a0, a1, a2, a3;
+  a0 = a[0];
+  a1 = a[1];
+  a2 = a[2];
+  a3 = a[3];
+  a0 = a0 << x;
+  a1 = a1 << 2;
+  a2 = a2 << 2;
+  a3 = a3 << 2;
+  a[0] = a0;
+  a[1] = a1;
+  a[2] = a2;
+  a[3] = a3;
+}
+
+__attribute__((noinline, noclone)) void
+f2b (int x)
+{
+  long long a0, a1, a2, a3;
+  a0 = a[0];
+  a1 = a[1];
+  a2 = a[2];
+  a3 = a[3];
+  a0 = a0 << 2;
+  a1 = a1 << 2;
+  a2 = a2 << x;
+  a3 = a3 << 2;
+  a[0] = a0;
+  a[1] = a1;
+  a[2] = a2;
+  a[3] = a3;
+}
+
+__attribute__((noinline, noclone)) void
+f3 (int x)
+{
+  long long a0, a1, a2, a3;
+  a0 = a[0];
+  a1 = a[1];
+  a2 = a[2];
+  a3 = a[3];
+  a0 = a0 << x;
+  a1 = a1 << x;
+  a2 = a2 << x;
+  a3 = a3 << x;
+  a[0] = a0;
+  a[1] = a1;
+  a[2] = a2;
+  a[3] = a3;
+}
+
+int
+main ()
+{
+  a[0] = 4LL;
+  a[1] = 3LL;
+  a[2] = 2LL;
+  a[3] = 1LL;
+  f1 ();
+  if (a[0] != (4LL << 2) || a[1] != (3LL << 3)
+      || a[2] != (2LL << 4) || a[3] != (1LL << 5))
+    abort ();
+  f2 ();
+  if (a[0] != (4LL << 4) || a[1] != (3LL << 5)
+      || a[2] != (2LL << 6) || a[3] != (1LL << 7))
+    abort ();
+  f3 (3);
+  if (a[0] != (4LL << 7) || a[1] != (3LL << 8)
+      || a[2] != (2LL << 9) || a[3] != (1LL << 10))
+    abort ();
+  f2a (3);
+  if (a[0] != (4LL << 10) || a[1] != (3LL << 10)
+      || a[2] != (2LL << 11) || a[3] != (1LL << 12))
+    abort ();
+  f2b (3);
+  if (a[0] != (4LL << 12) || a[1] != (3LL << 12)
+      || a[2] != (2LL << 14) || a[3] != (1LL << 14))
+    abort ();
+  return 0;
+}
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 2f92bb5555e..ff335aa531e 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -5792,7 +5792,11 @@  vectorizable_shift (vec_info *vinfo,
       if (slp_node
 	  && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
 	      || (!scalar_shift_arg
-		  && !vect_maybe_update_slp_op_vectype (slp_op1, vectype))))
+		  && (!incompatible_op1_vectype_p
+		      || dt[1] == vect_constant_def)
+		  && !vect_maybe_update_slp_op_vectype
+			(slp_op1,
+			 incompatible_op1_vectype_p ? vectype : op1_vectype))))
 	{
 	  if (dump_enabled_p ())
 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,