diff mbox

Fix ICE with AVX512F and AMD tuning (PR target/70300)

Message ID 20160321201048.GF3017@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek March 21, 2016, 8:10 p.m. UTC
Hi!

vec_interleave_lowv4sf only supports =x, x, x alternative, not =v, v, v
(which should be supportable for AVX512VL only anyway, but probably
stage1 material), without AVX512VL and with ext sse reg input operand
we have to either due to interleaving or broadcast in the destination,
or disable the splitter.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2016-03-21  Jakub Jelinek  <jakub@redhat.com>

	PR target/70300
	* config/i386/i386.md (cvtsd2ss splitter): Unpack in destination
	instead of source if operands[1] is xmm16 and above and
	!TARGET_AVX512VL.  Use avx512f_vec_dupv16sf_1 instead of
	vec_interleave_lowv4sf if we need to unpack xmm16 and above.

	* gcc.target/i386/pr70300.c: New test.


	Jakub

Comments

Kirill Yukhin March 22, 2016, 6:46 a.m. UTC | #1
Hi Jakub,
On 21 Mar 21:10, Jakub Jelinek wrote:
> vec_interleave_lowv4sf only supports =x, x, x alternative, not =v, v, v
> (which should be supportable for AVX512VL only anyway, but probably
> stage1 material), without AVX512VL and with ext sse reg input operand
> we have to either due to interleaving or broadcast in the destination,
> or disable the splitter.
You're right, something is wrong w/ vec_interleave constraints. Need to
look closer @ stage1.

> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
You patch is OK. Thanks for fixing this!

--
K

> 2016-03-21  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR target/70300
> 	* config/i386/i386.md (cvtsd2ss splitter): Unpack in destination
> 	instead of source if operands[1] is xmm16 and above and
> 	!TARGET_AVX512VL.  Use avx512f_vec_dupv16sf_1 instead of
> 	vec_interleave_lowv4sf if we need to unpack xmm16 and above.
> 
> 	* gcc.target/i386/pr70300.c: New test.
> 
> --- gcc/config/i386/i386.md.jj	2016-03-17 12:53:25.000000000 +0100
> +++ gcc/config/i386/i386.md	2016-03-21 11:50:54.539001151 +0100
> @@ -4229,17 +4229,28 @@ (define_split
>      {
>        /* If it is unsafe to overwrite upper half of source, we need
>  	 to move to destination and unpack there.  */
> -      if ((ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER
> -	   || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 4)
> -	  && true_regnum (operands[0]) != true_regnum (operands[1]))
> +      if (((ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER
> +	    || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 4)
> +	   && true_regnum (operands[0]) != true_regnum (operands[1]))
> +	  || (EXT_REX_SSE_REG_P (operands[1])
> +	      && !TARGET_AVX512VL))
>  	{
>  	  rtx tmp = gen_rtx_REG (SFmode, true_regnum (operands[0]));
>  	  emit_move_insn (tmp, operands[1]);
>  	}
>        else
>  	operands[3] = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0);
> -      emit_insn (gen_vec_interleave_lowv4sf (operands[3], operands[3],
> -      		 			     operands[3]));
> +      /* FIXME: vec_interleave_lowv4sf for AVX512VL should allow
> +	 =v, v, then vbroadcastss will be only needed for AVX512F without
> +	 AVX512VL.  */
> +      if (!EXT_REX_SSE_REGNO_P (true_regnum (operands[3])))
> +	emit_insn (gen_vec_interleave_lowv4sf (operands[3], operands[3],
> +					       operands[3]));
> +      else
> +	{
> +	  rtx tmp = simplify_gen_subreg (V16SFmode, operands[3], V4SFmode, 0);
> +	  emit_insn (gen_avx512f_vec_dupv16sf_1 (tmp, tmp));
> +	}
>      }
>    else
>      emit_insn (gen_vec_setv4sf_0 (operands[3],
> --- gcc/testsuite/gcc.target/i386/pr70300.c.jj	2016-03-21 11:56:16.455580855 +0100
> +++ gcc/testsuite/gcc.target/i386/pr70300.c	2016-03-21 11:55:55.000000000 +0100
> @@ -0,0 +1,25 @@
> +/* PR target/70300 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mtune=amdfam10 -mavx512f" } */
> +
> +typedef _Complex A __attribute__ ((mode (SC)));
> +typedef _Complex B __attribute__ ((mode (DC)));
> +typedef _Complex C __attribute__ ((mode (TC)));
> +
> +C
> +foo (A a, B b, C c, A d, B e, C f)
> +{
> +  b -= a;
> +  d += a;
> +  a += f;
> +  return a + b + d + e;
> +}
> +
> +__attribute__((target ("avx512vl"))) C
> +bar (A a, B b, C c, A d, B e, C f)
> +{
> +  b -= a;
> +  d += a;
> +  a += f;
> +  return a + b + d + e;
> +}
> 
> 	Jakub
diff mbox

Patch

--- gcc/config/i386/i386.md.jj	2016-03-17 12:53:25.000000000 +0100
+++ gcc/config/i386/i386.md	2016-03-21 11:50:54.539001151 +0100
@@ -4229,17 +4229,28 @@  (define_split
     {
       /* If it is unsafe to overwrite upper half of source, we need
 	 to move to destination and unpack there.  */
-      if ((ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER
-	   || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 4)
-	  && true_regnum (operands[0]) != true_regnum (operands[1]))
+      if (((ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER
+	    || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 4)
+	   && true_regnum (operands[0]) != true_regnum (operands[1]))
+	  || (EXT_REX_SSE_REG_P (operands[1])
+	      && !TARGET_AVX512VL))
 	{
 	  rtx tmp = gen_rtx_REG (SFmode, true_regnum (operands[0]));
 	  emit_move_insn (tmp, operands[1]);
 	}
       else
 	operands[3] = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0);
-      emit_insn (gen_vec_interleave_lowv4sf (operands[3], operands[3],
-      		 			     operands[3]));
+      /* FIXME: vec_interleave_lowv4sf for AVX512VL should allow
+	 =v, v, then vbroadcastss will be only needed for AVX512F without
+	 AVX512VL.  */
+      if (!EXT_REX_SSE_REGNO_P (true_regnum (operands[3])))
+	emit_insn (gen_vec_interleave_lowv4sf (operands[3], operands[3],
+					       operands[3]));
+      else
+	{
+	  rtx tmp = simplify_gen_subreg (V16SFmode, operands[3], V4SFmode, 0);
+	  emit_insn (gen_avx512f_vec_dupv16sf_1 (tmp, tmp));
+	}
     }
   else
     emit_insn (gen_vec_setv4sf_0 (operands[3],
--- gcc/testsuite/gcc.target/i386/pr70300.c.jj	2016-03-21 11:56:16.455580855 +0100
+++ gcc/testsuite/gcc.target/i386/pr70300.c	2016-03-21 11:55:55.000000000 +0100
@@ -0,0 +1,25 @@ 
+/* PR target/70300 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=amdfam10 -mavx512f" } */
+
+typedef _Complex A __attribute__ ((mode (SC)));
+typedef _Complex B __attribute__ ((mode (DC)));
+typedef _Complex C __attribute__ ((mode (TC)));
+
+C
+foo (A a, B b, C c, A d, B e, C f)
+{
+  b -= a;
+  d += a;
+  a += f;
+  return a + b + d + e;
+}
+
+__attribute__((target ("avx512vl"))) C
+bar (A a, B b, C c, A d, B e, C f)
+{
+  b -= a;
+  d += a;
+  a += f;
+  return a + b + d + e;
+}