diff mbox

[4.7] Fix ix86_vectorize_vec_perm_const_ok (PR target/57896)

Message ID CAFULd4YZSp6X-djpuwfGJFORkGdQKUL3tJuHvUV54G7bf_Fk5Q@mail.gmail.com
State New
Headers show

Commit Message

Uros Bizjak Feb. 21, 2014, 7:30 a.m. UTC
On Thu, Feb 20, 2014 at 7:39 PM, Jakub Jelinek <jakub@redhat.com> wrote:

> As discussed in the PR, gen_reg_rtx from when init_emit has not been
> initialized is highly undesirable.  The following patch makes sure that
> for d->testing_p we never call gen_reg_rtx (i.e. from within
> ix86_vectorize_vec_perm_const_ok) and never try to emit insns.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux (together with Uros'
> patch to assert that gen_reg_rtx is not called when init_emit is not
> active) with RTL checking, further tested with
> GCC_TEST_RUN_EXPENSIVE=1 make -j16 -k check RUNTESTFLAGS='--target_board=unix\{-msse2,-msse3,-mssse3,-msse4,-mavx,-mavx2,-mavx512f\} dg-torture.exp=*vshuf*'
> (on AVX HW, so -mavx2 and -mavx512f tests expectedly failed execution,
> but at least didn't fail compilation, with the exception of
> gcc.dg/torture/vshuf-v8sf.c which ICEs with -mavx2 -DEXPENSIVE, but
> both without this patch and with this patch - will look at it eventually).

Attached is the complete backport of the patch to 4.7 branch. Jakub,
there is a slight churn in expand_vec_parm_interleave2 (there is no
dfinal.one_operand_p), can you please check if everything is OK there?

2014-02-21  Uros Bizjak  <ubizjak@gmail.com>

    Backport from mainline
    2014-02-20  Jakub Jelinek  <jakub@redhat.com>

    * config/i386/i386.c (ix86_expand_vec_perm): Use V8SImode
    mode for mask of V8SFmode permutation.

    Backport from 4.8 branch
    2014-02-20  Jakub Jelinek  <jakub@redhat.com>

    PR target/57896
    * config/i386/i386.c (expand_vec_perm_interleave2): Don't call
    gen_reg_rtx if d->testing_p.
    (expand_vec_perm_pshufb2, expand_vec_perm_even_odd_1,
    expand_vec_perm_broadcast_1): Return early if d->testing_p and
    we will certainly return true.

Tested on x86_64-pc-linux-gnu {,-m32} with additional gen_reg_rtx
assert added, also with GCC_TEST_RUN_EXPENSIVE=1 -k check
RUNTESTFLAGS='--target_board=unix\{-msse2,-msse3,-mssse3,-msse4,-mavx,-mavx2\}
dg-torture.exp=*vshuf*' on AVX target, which uncovered the above
mentioned unrelated ICE, which is also fixed with this patch.

Uros.

Comments

Jakub Jelinek Feb. 21, 2014, 7:45 a.m. UTC | #1
On Fri, Feb 21, 2014 at 08:30:39AM +0100, Uros Bizjak wrote:
> Attached is the complete backport of the patch to 4.7 branch. Jakub,
> there is a slight churn in expand_vec_parm_interleave2 (there is no
> dfinal.one_operand_p), can you please check if everything is OK there?

Yeah, one_operand_p has been introduced only in 4.8.  LGTM.

	Jakub
diff mbox

Patch

Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 207970)
+++ config/i386/i386.c	(working copy)
@@ -20073,7 +20073,7 @@  ix86_expand_vec_perm (rtx operands[])
 	  return;
 
 	case V8SFmode:
-	  mask = gen_lowpart (V8SFmode, mask);
+	  mask = gen_lowpart (V8SImode, mask);
 	  if (one_operand_shuffle)
 	    emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
 	  else
@@ -36655,7 +36655,9 @@  expand_vec_perm_interleave2 (struct expand_vec_per
       else
 	dfinal.perm[i] = e;
     }
-  dfinal.op0 = gen_reg_rtx (dfinal.vmode);
+
+  if (!d->testing_p)
+    dfinal.op0 = gen_reg_rtx (dfinal.vmode);
   dfinal.op1 = dfinal.op0;
   dremap.target = dfinal.op0;
 
@@ -37052,6 +37054,8 @@  expand_vec_perm_even_odd_1 (struct expand_vec_perm
   switch (d->vmode)
     {
     case V4DFmode:
+      if (d->testing_p)
+	break;
       t1 = gen_reg_rtx (V4DFmode);
       t2 = gen_reg_rtx (V4DFmode);
 
@@ -37071,6 +37075,8 @@  expand_vec_perm_even_odd_1 (struct expand_vec_perm
       {
 	int mask = odd ? 0xdd : 0x88;
 
+	if (d->testing_p)
+	  break;
 	t1 = gen_reg_rtx (V8SFmode);
 	t2 = gen_reg_rtx (V8SFmode);
 	t3 = gen_reg_rtx (V8SFmode);
@@ -37112,6 +37118,8 @@  expand_vec_perm_even_odd_1 (struct expand_vec_perm
 	return expand_vec_perm_pshufb2 (d);
       else
 	{
+	  if (d->testing_p)
+	    break;
 	  /* We need 2*log2(N)-1 operations to achieve odd/even
 	     with interleave. */
 	  t1 = gen_reg_rtx (V8HImode);
@@ -37133,6 +37141,8 @@  expand_vec_perm_even_odd_1 (struct expand_vec_perm
 	return expand_vec_perm_pshufb2 (d);
       else
 	{
+	  if (d->testing_p)
+	    break;
 	  t1 = gen_reg_rtx (V16QImode);
 	  t2 = gen_reg_rtx (V16QImode);
 	  t3 = gen_reg_rtx (V16QImode);
@@ -37165,6 +37175,9 @@  expand_vec_perm_even_odd_1 (struct expand_vec_perm
 	  return expand_vec_perm_even_odd_1 (&d_copy, odd);
 	}
 
+      if (d->testing_p)
+	break;
+
       t1 = gen_reg_rtx (V4DImode);
       t2 = gen_reg_rtx (V4DImode);
 
@@ -37191,6 +37204,9 @@  expand_vec_perm_even_odd_1 (struct expand_vec_perm
 	  return expand_vec_perm_even_odd_1 (&d_copy, odd);
 	}
 
+      if (d->testing_p)
+	break;
+
       t1 = gen_reg_rtx (V8SImode);
       t2 = gen_reg_rtx (V8SImode);
 
@@ -37283,6 +37299,8 @@  expand_vec_perm_broadcast_1 (struct expand_vec_per
     case V16QImode:
       /* These can be implemented via interleave.  We save one insn by
 	 stopping once we have promoted to V4SImode and then use pshufd.  */
+      if (d->testing_p)
+	return true;
       do
 	{
 	  rtx dest;