@@ -39499,6 +39499,7 @@ struct expand_vec_perm_d
rtx target, op0, op1;
unsigned char perm[MAX_VECT_LEN];
enum machine_mode vmode;
+ unsigned test_regno;
unsigned char nelt;
bool one_operand_p;
bool testing_p;
@@ -42419,6 +42420,17 @@ init_vselect_insn (void)
end_sequence ();
}
+/* Create a new psuedo, or for testing, a dummy register. */
+
+static rtx
+gen_vec_perm_reg (struct expand_vec_perm_p *d, enum machine_mode mode)
+{
+ if (d->testing_p)
+ return gen_raw_REG (mode, ++d->test_regno);
+ else
+ return gen_reg_rtx (mode);
+}
+
/* Construct (set target (vec_select op0 (parallel perm))) and
return true if that's a valid instruction in the active ISA. */
@@ -42811,9 +42823,7 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
{
for (i = 0; i < 4; i++)
perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
- if (d->testing_p)
- return true;
- target = gen_reg_rtx (V4DImode);
+ target = gen_vec_perm_reg (d, V4DImode);
if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
perm, 4, false))
{
@@ -43411,7 +43421,7 @@ expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
else
dfinal.perm[i] = e;
}
- dremap.target = gen_reg_rtx (dremap.vmode);
+ dremap.target = gen_vec_perm_reg (d, dremap.vmode);
dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
dfinal.op1 = dfinal.op0;
dfinal.one_operand_p = true;
@@ -43845,6 +43855,9 @@ expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
return false;
gcc_assert (!d->one_operand_p);
+ if (d->testing_p)
+ return true;
+
nelt = d->nelt;
eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
@@ -44053,6 +44066,8 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
switch (d->vmode)
{
case V4DFmode:
+ if (d->testing_p)
+ break;
t1 = gen_reg_rtx (V4DFmode);
t2 = gen_reg_rtx (V4DFmode);
@@ -44072,6 +44087,8 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
{
int mask = odd ? 0xdd : 0x88;
+ if (d->testing_p)
+ break;
t1 = gen_reg_rtx (V8SFmode);
t2 = gen_reg_rtx (V8SFmode);
t3 = gen_reg_rtx (V8SFmode);
@@ -44109,6 +44126,8 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
gcc_unreachable ();
case V8HImode:
+ if (d->testing_p)
+ break;
if (TARGET_SSSE3)
return expand_vec_perm_pshufb2 (d);
else
@@ -44130,6 +44149,8 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
break;
case V16QImode:
+ if (d->testing_p)
+ break;
if (TARGET_SSSE3)
return expand_vec_perm_pshufb2 (d);
else
@@ -44160,7 +44181,7 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
{
struct expand_vec_perm_d d_copy = *d;
d_copy.vmode = V4DFmode;
- d_copy.target = gen_reg_rtx (V4DFmode);
+ d_copy.target = gen_vec_perm_reg (d, V4DFmode);
d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
if (expand_vec_perm_even_odd_1 (&d_copy, odd))
@@ -44173,6 +44194,9 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
return false;
}
+ if (d->testing_p)
+ break;
+
t1 = gen_reg_rtx (V4DImode);
t2 = gen_reg_rtx (V4DImode);
@@ -44193,7 +44217,7 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
{
struct expand_vec_perm_d d_copy = *d;
d_copy.vmode = V8SFmode;
- d_copy.target = gen_reg_rtx (V8SFmode);
+ d_copy.target = gen_vec_perm_reg (d, V8SFmode);
d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
if (expand_vec_perm_even_odd_1 (&d_copy, odd))
@@ -44206,6 +44230,9 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
return false;
}
+ if (d->testing_p)
+ break;
+
t1 = gen_reg_rtx (V8SImode);
t2 = gen_reg_rtx (V8SImode);
t3 = gen_reg_rtx (V4DImode);
@@ -44298,6 +44325,8 @@ expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
case V16QImode:
/* These can be implemented via interleave. We save one insn by
stopping once we have promoted to V4SImode and then use pshufd. */
+ if (d->testing_p)
+ return true;
do
{
rtx dest;
@@ -44655,6 +44684,7 @@ ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
d.vmode = vmode;
d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
d.testing_p = true;
+ d.test_regno = LAST_VIRTUAL_REGISTER;
/* Given sufficient ISA support we can just return true here
for selected vector modes. */
@@ -44699,10 +44729,10 @@ ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
/* Otherwise we have to go through the motions and see if we can
figure out how to generate the requested permutation. */
- d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
- d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
+ d.target = gen_vec_perm_reg (d, d.vmode);
+ d.op1 = d.op0 = gen_vec_perm_reg (d, d.vmode);
if (!d.one_operand_p)
- d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
+ d.op1 = gen_vec_perm_reg (d, d.vmode);
start_sequence ();
ret = ix86_expand_vec_perm_const_1 (&d);