i386: Fix up *avx_vperm_broadcast_v4df [PR93430]
diff mbox series

Message ID 20200125235548.GH17695@tucnak
State New
Headers show
Series
  • i386: Fix up *avx_vperm_broadcast_v4df [PR93430]
Related show

Commit Message

Jakub Jelinek Jan. 25, 2020, 11:55 p.m. UTC
Hi!

Apparently my recent patch which moved the *avx_vperm_broadcast* and
*vpermil* patterns before vpermpd broke the following testcase, the
define_insn_and_split matched always but the splitter condition only split
it if not -mavx2 for V4DFmode, basically relying on the vpermpd pattern to
come first.

The following patch fixes it by moving that part of SPLIT-CONDITION into
CONDITION, so that when it is not met, we just don't match the pattern
and thus match the later vpermpd pattern in that case.
Except, for { 0, 0, 0, 0 } permutation, there is actually no reason to do
that, vbroadcastsd from memory seems to be slightly cheaper than vpermpd $0.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2020-01-26  Jakub Jelinek  <jakub@redhat.com>

	PR target/93430
	* config/i386/sse.md (*avx_vperm_broadcast_<mode>): Disallow for
	TARGET_AVX2 and V4DFmode not in the split condition, but in the
	pattern condition, though allow { 0, 0, 0, 0 } broadcast always.

	* gcc.dg/pr93430.c: New test.
	* gcc.target/i386/avx2-pr93430.c: New test.


	Jakub

Comments

Uros Bizjak Jan. 26, 2020, 9:20 a.m. UTC | #1
On Sun, Jan 26, 2020 at 12:55 AM Jakub Jelinek <jakub@redhat.com> wrote:
>
> Hi!
>
> Apparently my recent patch which moved the *avx_vperm_broadcast* and
> *vpermil* patterns before vpermpd broke the following testcase, the
> define_insn_and_split matched always but the splitter condition only split
> it if not -mavx2 for V4DFmode, basically relying on the vpermpd pattern to
> come first.
>
> The following patch fixes it by moving that part of SPLIT-CONDITION into
> CONDITION, so that when it is not met, we just don't match the pattern
> and thus match the later vpermpd pattern in that case.
> Except, for { 0, 0, 0, 0 } permutation, there is actually no reason to do
> that, vbroadcastsd from memory seems to be slightly cheaper than vpermpd $0.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2020-01-26  Jakub Jelinek  <jakub@redhat.com>
>
>         PR target/93430
>         * config/i386/sse.md (*avx_vperm_broadcast_<mode>): Disallow for
>         TARGET_AVX2 and V4DFmode not in the split condition, but in the
>         pattern condition, though allow { 0, 0, 0, 0 } broadcast always.
>
>         * gcc.dg/pr93430.c: New test.
>         * gcc.target/i386/avx2-pr93430.c: New test.

LGTM.

Thanks,
Uros.

> --- gcc/config/i386/sse.md.jj   2020-01-24 22:49:19.000000000 +0100
> +++ gcc/config/i386/sse.md      2020-01-25 18:32:02.100439737 +0100
> @@ -19912,9 +19912,10 @@ (define_insn_and_split "*avx_vperm_broad
>           (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?v")
>           (match_parallel 2 "avx_vbroadcast_operand"
>             [(match_operand 3 "const_int_operand" "C,n,n")])))]
> -  "TARGET_AVX"
> +  "TARGET_AVX
> +   && (<MODE>mode != V4DFmode || !TARGET_AVX2 || operands[3] == const0_rtx)"
>    "#"
> -  "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
> +  "&& reload_completed"
>    [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
>  {
>    rtx op0 = operands[0], op1 = operands[1];
> --- gcc/testsuite/gcc.dg/pr93430.c.jj   2020-01-25 18:39:33.455584367 +0100
> +++ gcc/testsuite/gcc.dg/pr93430.c      2020-01-25 18:38:03.725950223 +0100
> @@ -0,0 +1,33 @@
> +/* PR target/93430 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +/* { dg-additional-options "-mavx -mno-avx2" { target avx } } */
> +
> +typedef double V __attribute__((vector_size (4 * sizeof (double))));
> +typedef long long VI __attribute__((vector_size (4 * sizeof (long long))));
> +
> +#if __SIZEOF_DOUBLE__ == __SIZEOF_LONG_LONG__
> +void
> +foo (V *x, V *y)
> +{
> +  y[0] = __builtin_shuffle (x[0], x[0], (VI) { 0, 0, 0, 0 });
> +}
> +
> +void
> +bar (V *x, V *y)
> +{
> +  y[0] = __builtin_shuffle (x[0], x[0], (VI) { 1, 1, 1, 1 });
> +}
> +
> +void
> +baz (V *x, V *y)
> +{
> +  y[0] = __builtin_shuffle (x[0], x[0], (VI) { 2, 2, 2, 2 });
> +}
> +
> +void
> +qux (V *x, V *y)
> +{
> +  y[0] = __builtin_shuffle (x[0], x[0], (VI) { 3, 3, 3, 3 });
> +}
> +#endif
> --- gcc/testsuite/gcc.target/i386/avx2-pr93430.c.jj     2020-01-25 18:39:55.282252126 +0100
> +++ gcc/testsuite/gcc.target/i386/avx2-pr93430.c        2020-01-25 18:40:35.080646319 +0100
> @@ -0,0 +1,5 @@
> +/* PR target/93430 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx2" } */
> +
> +#include "../../gcc.dg/pr93430.c"
>
>         Jakub
>

Patch
diff mbox series

--- gcc/config/i386/sse.md.jj	2020-01-24 22:49:19.000000000 +0100
+++ gcc/config/i386/sse.md	2020-01-25 18:32:02.100439737 +0100
@@ -19912,9 +19912,10 @@  (define_insn_and_split "*avx_vperm_broad
 	  (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?v")
 	  (match_parallel 2 "avx_vbroadcast_operand"
 	    [(match_operand 3 "const_int_operand" "C,n,n")])))]
-  "TARGET_AVX"
+  "TARGET_AVX
+   && (<MODE>mode != V4DFmode || !TARGET_AVX2 || operands[3] == const0_rtx)"
   "#"
-  "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
+  "&& reload_completed"
   [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
 {
   rtx op0 = operands[0], op1 = operands[1];
--- gcc/testsuite/gcc.dg/pr93430.c.jj	2020-01-25 18:39:33.455584367 +0100
+++ gcc/testsuite/gcc.dg/pr93430.c	2020-01-25 18:38:03.725950223 +0100
@@ -0,0 +1,33 @@ 
+/* PR target/93430 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-additional-options "-mavx -mno-avx2" { target avx } } */
+
+typedef double V __attribute__((vector_size (4 * sizeof (double))));
+typedef long long VI __attribute__((vector_size (4 * sizeof (long long))));
+
+#if __SIZEOF_DOUBLE__ == __SIZEOF_LONG_LONG__
+void
+foo (V *x, V *y)
+{
+  y[0] = __builtin_shuffle (x[0], x[0], (VI) { 0, 0, 0, 0 });
+}
+
+void
+bar (V *x, V *y)
+{
+  y[0] = __builtin_shuffle (x[0], x[0], (VI) { 1, 1, 1, 1 });
+}
+
+void
+baz (V *x, V *y)
+{
+  y[0] = __builtin_shuffle (x[0], x[0], (VI) { 2, 2, 2, 2 });
+}
+
+void
+qux (V *x, V *y)
+{
+  y[0] = __builtin_shuffle (x[0], x[0], (VI) { 3, 3, 3, 3 });
+}
+#endif
--- gcc/testsuite/gcc.target/i386/avx2-pr93430.c.jj	2020-01-25 18:39:55.282252126 +0100
+++ gcc/testsuite/gcc.target/i386/avx2-pr93430.c	2020-01-25 18:40:35.080646319 +0100
@@ -0,0 +1,5 @@ 
+/* PR target/93430 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx2" } */
+
+#include "../../gcc.dg/pr93430.c"