diff mbox

Fix ix86_vectorize_vec_perm_const_ok (PR target/57896)

Message ID 53066862.7060302@redhat.com
State New
Headers show

Commit Message

Richard Henderson Feb. 20, 2014, 8:41 p.m. UTC
On 02/20/2014 12:39 PM, Jakub Jelinek wrote:
> +  if (!d->testing_p)
> +    {
> +      dremap.target = gen_reg_rtx (dremap.vmode);
> +      dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
> +    }
...
> +	  if (d->testing_p)
> +	    d_copy.target = gen_lowpart (V4DFmode, d->target);
> +	  else
> +	    d_copy.target = gen_reg_rtx (V4DFmode);

I'm not keen on these changes, because they could potentially affect how the
insn matching happens.  I'm not 100% sure it actually matters, but I think
there's a simple way around it: use the same gen_raw_REG kind of thing that we
do to begin.

What about this?  Note that I've also slightly adjusted a few of the breaks, to
take into account that both arms of the if will always succeed.


r~

Comments

Jakub Jelinek Feb. 20, 2014, 8:59 p.m. UTC | #1
On Thu, Feb 20, 2014 at 02:41:06PM -0600, Richard Henderson wrote:
> On 02/20/2014 12:39 PM, Jakub Jelinek wrote:
> > +  if (!d->testing_p)
> > +    {
> > +      dremap.target = gen_reg_rtx (dremap.vmode);
> > +      dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
> > +    }
> ...
> > +	  if (d->testing_p)
> > +	    d_copy.target = gen_lowpart (V4DFmode, d->target);
> > +	  else
> > +	    d_copy.target = gen_reg_rtx (V4DFmode);
> 
> I'm not keen on these changes, because they could potentially affect how the
> insn matching happens.  I'm not 100% sure it actually matters, but I think
> there's a simple way around it: use the same gen_raw_REG kind of thing that we
> do to begin.

Note, the patch has been committed already.

Anyway, I don't see why this would matter, when d->testing_p, d->target
is some pseudo register, d->op0 another one and d->op1 either the same as
d->op0 or yet another register.  For testing we are just matching a single
d_copy (or other permutation) at a time, so all we care about is that
target/op0/op1 have the right modes and that target is a different register
from op0 and op0 either the same as op1 or yet another register.

> What about this?  Note that I've also slightly adjusted a few of the breaks, to
> take into account that both arms of the if will always succeed.

The problem I see with this is that we'll need to maintain test_regno
everywhere.  E.g. there are places which even fir !d->testing_p create
temporary structures and set testing_p in it, that would now require
that we also set test_regno there to something (what?).

	Jakub
Richard Henderson Feb. 20, 2014, 9:07 p.m. UTC | #2
On 02/20/2014 02:59 PM, Jakub Jelinek wrote:
> ... so all we care about is that
> target/op0/op1 have the right modes and that target is a different register
> from op0 and op0 either the same as op1 or yet another register.

That's my point: yet another register.  Thus the creation of a brand new pseudo
(or brand new dummy) is not irrelevant to the match.

> 
> The problem I see with this is that we'll need to maintain test_regno
> everywhere.  E.g. there are places which even fir !d->testing_p create
> temporary structures and set testing_p in it, that would now require
> that we also set test_regno there to something (what?).

True, I didn't think of the other places we set testing_p.  But that's just one
or two more places to set test_regno.  Not out of the question...


r~
diff mbox

Patch

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index cd14e52..d827bb3 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -39499,6 +39499,7 @@  struct expand_vec_perm_d
   rtx target, op0, op1;
   unsigned char perm[MAX_VECT_LEN];
   enum machine_mode vmode;
+  unsigned test_regno;
   unsigned char nelt;
   bool one_operand_p;
   bool testing_p;
@@ -42419,6 +42420,17 @@  init_vselect_insn (void)
   end_sequence ();
 }
 
+/* Create a new psuedo, or for testing, a dummy register.  */
+
+static rtx
+gen_vec_perm_reg (struct expand_vec_perm_p *d, enum machine_mode mode)
+{
+  if (d->testing_p)
+    return gen_raw_REG (mode, ++d->test_regno);
+  else
+    return gen_reg_rtx (mode);
+}
+
 /* Construct (set target (vec_select op0 (parallel perm))) and
    return true if that's a valid instruction in the active ISA.  */
 
@@ -42811,9 +42823,7 @@  expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
 		{
 		  for (i = 0; i < 4; i++)
 		    perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
-		  if (d->testing_p)
-		    return true;
-		  target = gen_reg_rtx (V4DImode);
+		  target = gen_vec_perm_reg (d, V4DImode);
 		  if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
 				      perm, 4, false))
 		    {
@@ -43411,7 +43421,7 @@  expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
       else
 	dfinal.perm[i] = e;
     }
-  dremap.target = gen_reg_rtx (dremap.vmode);
+  dremap.target = gen_vec_perm_reg (d, dremap.vmode);
   dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
   dfinal.op1 = dfinal.op0;
   dfinal.one_operand_p = true;
@@ -43845,6 +43855,9 @@  expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
     return false;
   gcc_assert (!d->one_operand_p);
 
+  if (d->testing_p)
+    return true;
+
   nelt = d->nelt;
   eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
 
@@ -44053,6 +44066,8 @@  expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
   switch (d->vmode)
     {
     case V4DFmode:
+      if (d->testing_p)
+	break;
       t1 = gen_reg_rtx (V4DFmode);
       t2 = gen_reg_rtx (V4DFmode);
 
@@ -44072,6 +44087,8 @@  expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
       {
 	int mask = odd ? 0xdd : 0x88;
 
+	if (d->testing_p)
+	  break;
 	t1 = gen_reg_rtx (V8SFmode);
 	t2 = gen_reg_rtx (V8SFmode);
 	t3 = gen_reg_rtx (V8SFmode);
@@ -44109,6 +44126,8 @@  expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
       gcc_unreachable ();
 
     case V8HImode:
+      if (d->testing_p)
+	break;
       if (TARGET_SSSE3)
 	return expand_vec_perm_pshufb2 (d);
       else
@@ -44130,6 +44149,8 @@  expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
       break;
 
     case V16QImode:
+      if (d->testing_p)
+	break;
       if (TARGET_SSSE3)
 	return expand_vec_perm_pshufb2 (d);
       else
@@ -44160,7 +44181,7 @@  expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
 	{
 	  struct expand_vec_perm_d d_copy = *d;
 	  d_copy.vmode = V4DFmode;
-	  d_copy.target = gen_reg_rtx (V4DFmode);
+	  d_copy.target = gen_vec_perm_reg (d, V4DFmode);
 	  d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
 	  d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
 	  if (expand_vec_perm_even_odd_1 (&d_copy, odd))
@@ -44173,6 +44194,9 @@  expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
 	  return false;
 	}
 
+      if (d->testing_p)
+	break;
+
       t1 = gen_reg_rtx (V4DImode);
       t2 = gen_reg_rtx (V4DImode);
 
@@ -44193,7 +44217,7 @@  expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
 	{
 	  struct expand_vec_perm_d d_copy = *d;
 	  d_copy.vmode = V8SFmode;
-	  d_copy.target = gen_reg_rtx (V8SFmode);
+	  d_copy.target = gen_vec_perm_reg (d, V8SFmode);
 	  d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
 	  d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
 	  if (expand_vec_perm_even_odd_1 (&d_copy, odd))
@@ -44206,6 +44230,9 @@  expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
 	  return false;
 	}
 
+      if (d->testing_p)
+	break;
+
       t1 = gen_reg_rtx (V8SImode);
       t2 = gen_reg_rtx (V8SImode);
       t3 = gen_reg_rtx (V4DImode);
@@ -44298,6 +44325,8 @@  expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
     case V16QImode:
       /* These can be implemented via interleave.  We save one insn by
 	 stopping once we have promoted to V4SImode and then use pshufd.  */
+      if (d->testing_p)
+	return true;
       do
 	{
 	  rtx dest;
@@ -44655,6 +44684,7 @@  ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
   d.vmode = vmode;
   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
   d.testing_p = true;
+  d.test_regno = LAST_VIRTUAL_REGISTER;
 
   /* Given sufficient ISA support we can just return true here
      for selected vector modes.  */
@@ -44699,10 +44729,10 @@  ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
 
   /* Otherwise we have to go through the motions and see if we can
      figure out how to generate the requested permutation.  */
-  d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
-  d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
+  d.target = gen_vec_perm_reg (d, d.vmode);
+  d.op1 = d.op0 = gen_vec_perm_reg (d, d.vmode);
   if (!d.one_operand_p)
-    d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
+    d.op1 = gen_vec_perm_reg (d, d.vmode);
 
   start_sequence ();
   ret = ix86_expand_vec_perm_const_1 (&d);