diff mbox series

tree-optimization/92645 - avoid harmful early BIT_FIELD_REF canonicalization

Message ID nycvar.YFH.7.76.2101131413380.19759@elmra.sevgm.obk
State New
Headers show
Series tree-optimization/92645 - avoid harmful early BIT_FIELD_REF canonicalization | expand

Commit Message

Richard Biener Jan. 13, 2021, 1:13 p.m. UTC
This avoids canonicalizing BIT_FIELD_REF <T1> (a, <sz>, 0) to
(T1)a on integer typed a.  This confuses the vectorizer SLP matching.

With this delayed to after vector lowering the testcase in PR92645
from Skia is now finally optimized to reasonable assembly.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

2021-01-13  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/92645
	* match.pd (BIT_FIELD_REF to conversion): Delay canonicalization
	until after vector lowering.

	* gcc.target/i386/pr92645-7.c: New testcase.
---
 gcc/match.pd                              |  2 ++
 gcc/testsuite/gcc.target/i386/pr92645-7.c | 24 +++++++++++++++++++++++
 2 files changed, 26 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr92645-7.c
diff mbox series

Patch

diff --git a/gcc/match.pd b/gcc/match.pd
index c286a540c4e..60c383da13b 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -6075,6 +6075,8 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 	   /* Low-parts can be reduced to integral conversions.
 	      ???  The following doesn't work for PDP endian.  */
 	   || (BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
+	       /* But only do this after vectorization.  */
+	       && canonicalize_math_after_vectorization_p ()
 	       /* Don't even think about BITS_BIG_ENDIAN.  */
 	       && TYPE_PRECISION (TREE_TYPE (@0)) % BITS_PER_UNIT == 0
 	       && TYPE_PRECISION (type) % BITS_PER_UNIT == 0
diff --git a/gcc/testsuite/gcc.target/i386/pr92645-7.c b/gcc/testsuite/gcc.target/i386/pr92645-7.c
new file mode 100644
index 00000000000..e4c04c2a82a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr92645-7.c
@@ -0,0 +1,24 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O3 -msse2" } */
+
+typedef long v2di __attribute__((vector_size(16)));
+typedef int v4si __attribute__((vector_size(16)));
+
+void bar (v4si *p, __int128_t *q)
+{
+  union { __int128_t a; v4si b; } u;
+  u.a = *q;
+  (*p)[0] = u.b[0];
+  (*p)[1] = u.b[2];
+  (*p)[2] = u.b[1];
+  (*p)[3] = u.b[3];
+}
+
+/* The function should end up with sth like
+     [v]pshufd $216, (%esi), %xmm0
+     [v]movdqa %xmm0, (%edi)
+     ret
+   recognized by SLP vectorization involving an existing "vector".  */
+/* { dg-final { scan-assembler-not "punpck" } } */
+/* { dg-final { scan-assembler-times "pshufd" 1 } } */