@@ -6075,6 +6075,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
/* Low-parts can be reduced to integral conversions.
??? The following doesn't work for PDP endian. */
|| (BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
+ /* But only do this after vectorization. */
+ && canonicalize_math_after_vectorization_p ()
/* Don't even think about BITS_BIG_ENDIAN. */
&& TYPE_PRECISION (TREE_TYPE (@0)) % BITS_PER_UNIT == 0
&& TYPE_PRECISION (type) % BITS_PER_UNIT == 0
new file mode 100644
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O3 -msse2" } */
+
+typedef long v2di __attribute__((vector_size(16)));
+typedef int v4si __attribute__((vector_size(16)));
+
+void bar (v4si *p, __int128_t *q)
+{
+ union { __int128_t a; v4si b; } u;
+ u.a = *q;
+ (*p)[0] = u.b[0];
+ (*p)[1] = u.b[2];
+ (*p)[2] = u.b[1];
+ (*p)[3] = u.b[3];
+}
+
+/* The function should end up with sth like
+ [v]pshufd $216, (%esi), %xmm0
+ [v]movdqa %xmm0, (%edi)
+ ret
+ recognized by SLP vectorization involving an existing "vector". */
+/* { dg-final { scan-assembler-not "punpck" } } */
+/* { dg-final { scan-assembler-times "pshufd" 1 } } */