diff mbox

Fix PR 61225

Message ID 000301d01395$675d5890$361809b0$@arm.com
State New
Headers show

Commit Message

Zhenqiang Chen Dec. 9, 2014, 9:49 a.m. UTC
> -----Original Message-----
> From: Jeff Law [mailto:law@redhat.com]
> Sent: Tuesday, December 09, 2014 5:29 AM
> To: Zhenqiang Chen
> Cc: Steven Bosscher; gcc-patches@gcc.gnu.org; Jakub Jelinek
> Subject: Re: [PATCH] Fix PR 61225
> 
> On 12/04/14 01:43, Zhenqiang Chen wrote:
> >>> > >
> >>> > >          Part of PR rtl-optimization/61225
> >>> > >          * combine.c (refer_same_reg_p): New function.
> >>> > >          (combine_instructions): Handle I1 -> I2 -> I3; I2 ->
insn.
> >>> > >          (try_combine): Add one more parameter TO_COMBINED_INSN,
> >>> > > which
> >> >is
> >>> > >          used to create a new insn parallel (TO_COMBINED_INSN,
I3).
> >>> > >
> >>> > >testsuite/ChangeLog:
> >>> > >2014-08-04  Zhenqiang Chen<zhenqiang.chen@linaro.org>
> >>> > >
> >>> > >          * gcc.target/i386/pr61225.c: New test.
> THanks for the updates and clarifications.  Just a few minor things and
while
> it's a bit of a hack, I'll approve:
> 
> 
> 
> > +
> > +/* A is a compare (reg1, 0) and B is SINGLE_SET which SET_SRC is reg2.
> > +   It returns TRUE, if reg1 == reg2, and no other refer of reg1
> > +   except A and B.  */
> > +
> > +static bool
> > +refer_same_reg_p (rtx_insn *a, rtx_insn *b)
> > +{
> > +  rtx seta = single_set (a);
> > +  rtx setb = single_set (b);
> > +
> > +  if (BLOCK_FOR_INSN (a) != BLOCK_FOR_INSN (b)
> > +      || !seta
> > +      || !setb)
> > +    return false;
> > 
> > +  if (GET_CODE (SET_SRC (seta)) != COMPARE
> > +      || GET_MODE_CLASS (GET_MODE (SET_DEST (seta))) != MODE_CC
> > +      || !REG_P (XEXP (SET_SRC (seta), 0))
> > +      || XEXP (SET_SRC (seta), 1) != const0_rtx
> > +      || !REG_P (SET_SRC (setb))
> > +      || REGNO (SET_SRC (setb)) != REGNO (XEXP (SET_SRC (seta), 0)))
> > +    return false;
> Do you need to verify SETA and SETB satisfy single_set?  Or has that
> already been done elsewhere?

A is NEXT_INSN (insn)
B is prev_nonnote_nondebug_insn (insn),

For I1 -> I2 -> B; I2 -> A;
LOG_LINK can make sure I1 and I2 are single_set, but not A and B. And I did
found codes in function try_combine, which can make sure B (or I3) is
single_set. 

So I think the check can skip failed cases at early stage.
 
> The name refer_same_reg_p seems wrong -- your function is verifying the
> underlying RTL store as well as the existence of a a dependency between
> the insns.  Can you try to come up with a better name?

Change it to can_reuse_cc_set_p.

> Please use CONST0_RTX (mode)  IIRC that'll allow this to work regardless
> of the size of the modes relative to the host word size.
 
Updated. 
 
> > +
> > +  if (DF_REG_USE_COUNT (REGNO (SET_SRC (setb))) > 2)
> > +    {
> > +      df_ref use;
> > +      rtx insn;
> > +      unsigned int i = REGNO (SET_SRC (setb));
> > +
> > +      for (use = DF_REG_USE_CHAIN (i); use; use = DF_REF_NEXT_REG
(use))
> > +        {
> > +	  insn = DF_REF_INSN (use);
> > +	  if (insn != a && insn != b && !(NOTE_P (insn) || DEBUG_INSN_P
> (insn)))
> > +	    return false;
> > +	}
> > +    }
> > +
> > +  return true;
> > +}
> Is this fragment really needed?  Does it ever trigger?  I'd think that
> for > 2 uses punting would be fine.  Do we really commonly have cases
> with > 2 uses, but where they're all in SETA and SETB?

The check is to make sure the correctness.  Here is a case,

int 
f1 (int *x)
{
  int t = --*x;
  if (!t)
    foo (x);
  return t;
}

  _4 = *x_3(D);
  _5 = _4 + -1;
  *x_3(D) = _5;
  # DEBUG t => _5
  if (_5 == 0)
   ...
  <bb 4>:
  return _5;

"_5" is used in "return _5". So we can not remove "_5 = _4 + -1".
 
> >   		  }
> >   	      }
> >
> > +	  /* Try to combine a compare insn that sets CC
> > +	     with a preceding insn that can set CC, and maybe with its
> > +	     logical predecessor as well.
> > +	     We need this special code because data flow connections
> > +	     do not get entered in LOG_LINKS.  */
> > +	  if ((prev = prev_nonnote_nondebug_insn (insn)) != NULL_RTX
> > +	      && refer_same_reg_p (insn, prev)
> > +	      && max_combine >= 4)
> > +	    {
> > +		struct insn_link *next1;
> > +		FOR_EACH_LOG_LINK (next1, prev)
> > +		  {
> > +		    rtx_insn *link1 = next1->insn;
> > +		    if (NOTE_P (link1))
> > +		      continue;
> > +		    /* I1 -> I2 -> I3; I2 -> insn;
> > +		       output parallel (insn, I3).  */
> > +		    FOR_EACH_LOG_LINK (nextlinks, link1)
> > +		      if ((next = try_combine (prev, link1,
> > +					       nextlinks->insn, NULL,
> > +					       &new_direct_jump_p,
> > +					       last_combined_insn, insn)) !=
0)
> > +
> > +			{
> > +			  delete_insn (insn);
> > +			  insn = next;
> > +			  statistics_counter_event (cfun, "four-insn
combine",
> 1);
> > +			  goto retry;
> > +			}
> > +		    /* I2 -> I3; I2 -> insn
> > +		       output next = parallel (insn, I3).  */
> > +		    if ((next = try_combine (prev, link1,
> > +					     NULL, NULL,
> > +					     &new_direct_jump_p,
> > +					     last_combined_insn, insn)) !=
0)
> > +
> > +		      {
> > +			delete_insn (insn);
> > +			insn = next;
> > +			statistics_counter_event (cfun, "three-insn
combine",
> 1);
> > +			goto retry;
> > +		      }
> > +		  }
> > +	    }
> So you've got two new combine cases here, but I think the testcase only
> tests one of them.  Can you include a testcase for both of hte major
> paths above (I1->I2->I3; I2->insn and I2->I3; I2->INSN)

pr61225.c is the case to cover I1->I2->I3; I2->insn.

For I2 -> I3; I2 -> insn, I tried my test cases and found peephole2 can also
handle them. So I removed the code from the patch.

Here is the final patch.
Bootstrap and no make check regression on X86-64.

ChangeLog:
2014-11-09  Zhenqiang Chen  <zhenqiang.chen@linaro.org>

	Part of PR rtl-optimization/61225
	* combine.c (can_reuse_cc_set_p): New function.
	(combine_instructions): Handle I1 -> I2 -> I3; I2 -> insn.
	(try_combine): Add one more parameter TO_COMBINED_INSN, which
	is used to create a new insn parallel (TO_COMBINED_INSN, I3).

testsuite/ChangeLog:
2014-11-09  Zhenqiang Chen<zhenqiang.chen@linaro.org>

	* gcc.target/i386/pr61225.c: New test.

+/* { dg-final { cleanup-rtl-dump "combine" } } */

Comments

Jeff Law Dec. 9, 2014, 6:51 p.m. UTC | #1
On 12/09/14 02:49, Zhenqiang Chen wrote:
>> Do you need to verify SETA and SETB satisfy single_set?  Or has that
>> already been done elsewhere?
>
> A is NEXT_INSN (insn)
> B is prev_nonnote_nondebug_insn (insn),
>
> For I1 -> I2 -> B; I2 -> A;
> LOG_LINK can make sure I1 and I2 are single_set, but not A and B. And I did
> found codes in function try_combine, which can make sure B (or I3) is
> single_set.
>
> So I think the check can skip failed cases at early stage.
Thanks for doing the research on this.

>
> The check is to make sure the correctness.  Here is a case,
>
> int
> f1 (int *x)
> {
>    int t = --*x;
>    if (!t)
>      foo (x);
>    return t;
> }
>
>    _4 = *x_3(D);
>    _5 = _4 + -1;
>    *x_3(D) = _5;
>    # DEBUG t => _5
>    if (_5 == 0)
>     ...
>    <bb 4>:
>    return _5;
>
> "_5" is used in "return _5". So we can not remove "_5 = _4 + -1".
Right, but ISTM that if the # uses > 2, then we could just return false 
rather than bothering to see if all the uses are consumed by A or B. 
It's not a big deal, I just have a hard time seeing that doing something 
more complex than "if (# uses > 2) return false;" makes sense.




>> So you've got two new combine cases here, but I think the testcase only
>> tests one of them.  Can you include a testcase for both of hte major
>> paths above (I1->I2->I3; I2->insn and I2->I3; I2->INSN)
>
> pr61225.c is the case to cover I1->I2->I3; I2->insn.
>
> For I2 -> I3; I2 -> insn, I tried my test cases and found peephole2 can also
> handle them. So I removed the code from the patch.
Seems like the reasonable thing to do.

>
> Here is the final patch.
> Bootstrap and no make check regression on X86-64.
>
> ChangeLog:
> 2014-11-09  Zhenqiang Chen  <zhenqiang.chen@linaro.org>
>
> 	Part of PR rtl-optimization/61225
> 	* combine.c (can_reuse_cc_set_p): New function.
> 	(combine_instructions): Handle I1 -> I2 -> I3; I2 -> insn.
> 	(try_combine): Add one more parameter TO_COMBINED_INSN, which
> 	is used to create a new insn parallel (TO_COMBINED_INSN, I3).
>
> testsuite/ChangeLog:
> 2014-11-09  Zhenqiang Chen<zhenqiang.chen@linaro.org>
>
> 	* gcc.target/i386/pr61225.c: New test.
>

OK for the trunk.

jeff
Segher Boessenkool Dec. 9, 2014, 7:07 p.m. UTC | #2
On Tue, Dec 09, 2014 at 05:49:18PM +0800, Zhenqiang Chen wrote:
> > Do you need to verify SETA and SETB satisfy single_set?  Or has that
> > already been done elsewhere?
> 
> A is NEXT_INSN (insn)
> B is prev_nonnote_nondebug_insn (insn),
> 
> For I1 -> I2 -> B; I2 -> A;
> LOG_LINK can make sure I1 and I2 are single_set,

It cannot, not anymore anyway.  LOG_LINKs can point to an insn with multiple
SETs; multiple LOG_LINKs can point to such an insn.

The only thing a LOG_LINK from Y to X tells you is that Y is the earliest
insn after X that uses some register set by X (and it knows which register,
too, nowadays).

> > > +  if (DF_REG_USE_COUNT (REGNO (SET_SRC (setb))) > 2)
> > > +    {
> > > +      df_ref use;
> > > +      rtx insn;
> > > +      unsigned int i = REGNO (SET_SRC (setb));
> > > +
> > > +      for (use = DF_REG_USE_CHAIN (i); use; use = DF_REF_NEXT_REG
> (use))
> > > +        {
> > > +	  insn = DF_REF_INSN (use);
> > > +	  if (insn != a && insn != b && !(NOTE_P (insn) || DEBUG_INSN_P
> > (insn)))
> > > +	    return false;
> > > +	}
> > > +    }
> > > +
> > > +  return true;
> > > +}
> > Is this fragment really needed?  Does it ever trigger?  I'd think that
> > for > 2 uses punting would be fine.  Do we really commonly have cases
> > with > 2 uses, but where they're all in SETA and SETB?

Can't you just check for a death note on the second insn?  Together with
reg_used_between_p?

> > > +	  /* Try to combine a compare insn that sets CC
> > > +	     with a preceding insn that can set CC, and maybe with its
> > > +	     logical predecessor as well.
> > > +	     We need this special code because data flow connections
> > > +	     do not get entered in LOG_LINKS.  */

I think you mean "not _all_ data flow connections"?

> > > +	  if ((prev = prev_nonnote_nondebug_insn (insn)) != NULL_RTX
> > > +	      && refer_same_reg_p (insn, prev)
> > > +	      && max_combine >= 4)
> > > +	    {
> > > +		struct insn_link *next1;
> > > +		FOR_EACH_LOG_LINK (next1, prev)
> > > +		  {
> > > +		    rtx_insn *link1 = next1->insn;
> > > +		    if (NOTE_P (link1))
> > > +		      continue;
> > > +		    /* I1 -> I2 -> I3; I2 -> insn;
> > > +		       output parallel (insn, I3).  */
> > > +		    FOR_EACH_LOG_LINK (nextlinks, link1)
> > > +		      if ((next = try_combine (prev, link1,
> > > +					       nextlinks->insn, NULL,
> > > +					       &new_direct_jump_p,
> > > +					       last_combined_insn, insn)) !=
> 0)
> > > +
> > > +			{
> > > +			  delete_insn (insn);
> > > +			  insn = next;
> > > +			  statistics_counter_event (cfun, "four-insn
> combine",
> > 1);
> > > +			  goto retry;
> > > +			}
> > > +		    /* I2 -> I3; I2 -> insn
> > > +		       output next = parallel (insn, I3).  */
> > > +		    if ((next = try_combine (prev, link1,
> > > +					     NULL, NULL,
> > > +					     &new_direct_jump_p,
> > > +					     last_combined_insn, insn)) !=
> 0)
> > > +
> > > +		      {
> > > +			delete_insn (insn);
> > > +			insn = next;
> > > +			statistics_counter_event (cfun, "three-insn
> combine",
> > 1);
> > > +			goto retry;
> > > +		      }
> > > +		  }
> > > +	    }
> > So you've got two new combine cases here, but I think the testcase only
> > tests one of them.  Can you include a testcase for both of hte major
> > paths above (I1->I2->I3; I2->insn and I2->I3; I2->INSN)
> 
> pr61225.c is the case to cover I1->I2->I3; I2->insn.
> 
> For I2 -> I3; I2 -> insn, I tried my test cases and found peephole2 can also
> handle them. So I removed the code from the patch.

Why?  The simpler case has much better chances of being used.

In fact, there are many more cases you could handle:

You handle

I1 -> I2 -> I3; I2 -> insn
      I2 -> I3; I2 -> insn

but there are also

   I1,I2 -> I3; I2 -> insn

and the many 4-insn combos, too.
But that's not all: instead of just dealing with I2->insn, you can just as
well have I1->insn or I0->insn, and if you could handle the SET not dying
in the resulting insn, I3->insn.

In fact, in that case you really only need to handle I3->insn (no other
instructions involved), as a simple 2-insn combination that combines into
the earlier insn instead of the later, to get the effect you want.

Just like your patch, that would pull "insn" earlier, but it would do it
much more explicitly.


Some comments on the patch...

> +/* A is a compare (reg1, 0) and B is SINGLE_SET which SET_SRC is reg2.
> +   It returns TRUE, if reg1 == reg2, and no other refer of reg1
> +   except A and B.  */

That sound like the only correct inputs are such a compare etc., but the
routine tests whether that is true.

> +static bool
> +can_reuse_cc_set_p (rtx_insn *a, rtx_insn *b)
> +{
> +  rtx seta = single_set (a);
> +  rtx setb = single_set (b);
> +
> +  if (BLOCK_FOR_INSN (a) != BLOCK_FOR_INSN (b)

Neither the comment nor the function name mention this.  This test is
better placed in the caller of this function, anyway.

> +	  /* Try to combine a compare insn that sets CC
> +	     with a preceding insn that can set CC, and maybe with its
> +	     logical predecessor as well.

If you do remove the I2->I3 case this comment needs updating.  If you
don't remove it, it should be tried before the I1->I2->I3 case.

> +   TO_COMBINED_INSN is an insn after I3 that sets CC flags.  It is used to
> +   combine with I3 to make a new insn.  */

"combine into I3", to make clear the resulting insn is placed at I3?

> @@ -3323,7 +3396,11 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn
> *i1, rtx_insn *i0,
>  	  rtx old = newpat;
>  	  total_sets = 1 + extra_sets;
>  	  newpat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (total_sets));
> -	  XVECEXP (newpat, 0, 0) = old;
> +
> +	  if (to_combined_insn)
> +	    XVECEXP (newpat, 0, --total_sets) = old;
> +	  else
> +	    XVECEXP (newpat, 0, 0) = old;
>  	}

Is this correct?  If so, it needs a big fat comment, because it is
not exactly obvious :-)

Also, it doesn't handle at all the case where the new pattern already is
a PARALLEL; can that never happen?

Cheers,


Segher
Jeff Law Dec. 9, 2014, 7:15 p.m. UTC | #3
On 12/09/14 12:07, Segher Boessenkool wrote:
> On Tue, Dec 09, 2014 at 05:49:18PM +0800, Zhenqiang Chen wrote:
>>> Do you need to verify SETA and SETB satisfy single_set?  Or has that
>>> already been done elsewhere?
>>
>> A is NEXT_INSN (insn)
>> B is prev_nonnote_nondebug_insn (insn),
>>
>> For I1 -> I2 -> B; I2 -> A;
>> LOG_LINK can make sure I1 and I2 are single_set,
>
> It cannot, not anymore anyway.  LOG_LINKs can point to an insn with multiple
> SETs; multiple LOG_LINKs can point to such an insn.
So let's go ahead and put a single_set test in this function.

>>>> Is this fragment really needed?  Does it ever trigger?  I'd think that
>>> for > 2 uses punting would be fine.  Do we really commonly have cases
>>> with > 2 uses, but where they're all in SETA and SETB?
>
> Can't you just check for a death note on the second insn?  Together with
> reg_used_between_p?
Yea, that'd accomplish the same thing I think Zhenqiang is trying to 
catch and is simpler than walking the lists.

>
>>>> +	  /* Try to combine a compare insn that sets CC
>>>> +	     with a preceding insn that can set CC, and maybe with its
>>>> +	     logical predecessor as well.
>>>> +	     We need this special code because data flow connections
>>>> +	     do not get entered in LOG_LINKS.  */
>
> I think you mean "not _all_ data flow connections"?
I almost said something about this comment, but figured I was nitpicking 
too much :-)

>>> So you've got two new combine cases here, but I think the testcase only
>>> tests one of them.  Can you include a testcase for both of hte major
>>> paths above (I1->I2->I3; I2->insn and I2->I3; I2->INSN)
>>
>> pr61225.c is the case to cover I1->I2->I3; I2->insn.
>>
>> For I2 -> I3; I2 -> insn, I tried my test cases and found peephole2 can also
>> handle them. So I removed the code from the patch.
>
> Why?  The simpler case has much better chances of being used.
The question does it actually catch anything not already handled?  I 
guess you could argue that doing it in combine is better than peep2 and 
I'd agree with that.

>
> In fact, there are many more cases you could handle:
>
> You handle
>
> I1 -> I2 -> I3; I2 -> insn
>        I2 -> I3; I2 -> insn
>
> but there are also
>
>     I1,I2 -> I3; I2 -> insn
>
> and the many 4-insn combos, too.
Yes, but I wonder how much of this is really necessary in practice.  We 
could do exhaustive testing here, but I suspect the payoff isn't all 
that great.  Thus I'm comfortable with faulting in the cases we actually 
find are useful in practice.

>
>> +/* A is a compare (reg1, 0) and B is SINGLE_SET which SET_SRC is reg2.
>> +   It returns TRUE, if reg1 == reg2, and no other refer of reg1
>> +   except A and B.  */
>
> That sound like the only correct inputs are such a compare etc., but the
> routine tests whether that is true.
Correct, the RTL has to have a specific form and that is tested for. 
Comment updates can't hurt.


>
>> +static bool
>> +can_reuse_cc_set_p (rtx_insn *a, rtx_insn *b)
>> +{
>> +  rtx seta = single_set (a);
>> +  rtx setb = single_set (b);
>> +
>> +  if (BLOCK_FOR_INSN (a) != BLOCK_FOR_INSN (b)
>
> Neither the comment nor the function name mention this.  This test is
> better placed in the caller of this function, anyway.
Didn't consider it terribly important.  Moving it to the caller doesn't 
change anything significantly, though I would agree it's martinally cleaner.

>
>> @@ -3323,7 +3396,11 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn
>> *i1, rtx_insn *i0,
>>   	  rtx old = newpat;
>>   	  total_sets = 1 + extra_sets;
>>   	  newpat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (total_sets));
>> -	  XVECEXP (newpat, 0, 0) = old;
>> +
>> +	  if (to_combined_insn)
>> +	    XVECEXP (newpat, 0, --total_sets) = old;
>> +	  else
>> +	    XVECEXP (newpat, 0, 0) = old;
>>   	}
>
> Is this correct?  If so, it needs a big fat comment, because it is
> not exactly obvious :-)
>
> Also, it doesn't handle at all the case where the new pattern already is
> a PARALLEL; can that never happen?
I'd convinced myself it was.  But yes, a comment here would be good.

Presumably you're thinking about a PARALLEL that satisfies single_set_p?

jeff
Segher Boessenkool Dec. 10, 2014, 1:47 p.m. UTC | #4
On Tue, Dec 09, 2014 at 12:15:30PM -0700, Jeff Law wrote:
> >>@@ -3323,7 +3396,11 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn
> >>*i1, rtx_insn *i0,
> >>  	  rtx old = newpat;
> >>  	  total_sets = 1 + extra_sets;
> >>  	  newpat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (total_sets));
> >>-	  XVECEXP (newpat, 0, 0) = old;
> >>+
> >>+	  if (to_combined_insn)
> >>+	    XVECEXP (newpat, 0, --total_sets) = old;
> >>+	  else
> >>+	    XVECEXP (newpat, 0, 0) = old;
> >>  	}
> >
> >Is this correct?  If so, it needs a big fat comment, because it is
> >not exactly obvious :-)
> >
> >Also, it doesn't handle at all the case where the new pattern already is
> >a PARALLEL; can that never happen?
> I'd convinced myself it was.  But yes, a comment here would be good.
> 
> Presumably you're thinking about a PARALLEL that satisfies single_set_p?

I wasn't thinking about anything in particular; this code does not handle
a PARALLEL newpat with to_combined_insn correctly, and it doesn't say it
cannot happen.

But yes, I don't see why it could not happen?  E.g. a parallel of multiple
sets with all but one of those dead?

Why should it be single_set here anyway?  (Maybe I need more coffee, sorry
if so).


Segher
Jeff Law Jan. 14, 2015, 9:50 p.m. UTC | #5
On 12/10/14 06:47, Segher Boessenkool wrote:
> On Tue, Dec 09, 2014 at 12:15:30PM -0700, Jeff Law wrote:
>>>> @@ -3323,7 +3396,11 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn
>>>> *i1, rtx_insn *i0,
>>>>   	  rtx old = newpat;
>>>>   	  total_sets = 1 + extra_sets;
>>>>   	  newpat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (total_sets));
>>>> -	  XVECEXP (newpat, 0, 0) = old;
>>>> +
>>>> +	  if (to_combined_insn)
>>>> +	    XVECEXP (newpat, 0, --total_sets) = old;
>>>> +	  else
>>>> +	    XVECEXP (newpat, 0, 0) = old;
>>>>   	}
>>>
>>> Is this correct?  If so, it needs a big fat comment, because it is
>>> not exactly obvious :-)
>>>
>>> Also, it doesn't handle at all the case where the new pattern already is
>>> a PARALLEL; can that never happen?
>> I'd convinced myself it was.  But yes, a comment here would be good.
>>
>> Presumably you're thinking about a PARALLEL that satisfies single_set_p?
>
> I wasn't thinking about anything in particular; this code does not handle
> a PARALLEL newpat with to_combined_insn correctly, and it doesn't say it
> cannot happen.
It situations like this where I really need to just put the damn patch 
into my tree and fire up the debugger and poke at it for a while.

Regardless, I got mail from Zhenqiang that he left ARM at the start of 
the year for other opportunities and won't be doing GCC work.

My initial thought is to attach his work to date to the BZ, we can use 
it as a starting point if we want to pursue this missed optimization 
further (it's a regression and thus suitable for stage4 if we're so 
inclined).

Thoughts?

jeff
diff mbox

Patch

diff --git a/gcc/combine.c b/gcc/combine.c
index e6deb41..7360ca3 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -431,7 +431,7 @@  static int can_combine_p (rtx_insn *, rtx_insn *,
rtx_insn *, rtx_insn *,
 static int combinable_i3pat (rtx_insn *, rtx *, rtx, rtx, rtx, int, int,
rtx *);
 static int contains_muldiv (rtx);
 static rtx_insn *try_combine (rtx_insn *, rtx_insn *, rtx_insn *, rtx_insn
*,
-			      int *, rtx_insn *);
+			      int *, rtx_insn *, rtx_insn *);
 static void undo_all (void);
 static void undo_commit (void);
 static rtx *find_split_point (rtx *, rtx_insn *, bool);
@@ -1120,6 +1120,46 @@  insn_a_feeds_b (rtx_insn *a, rtx_insn *b)
 #endif
   return false;
 }
+
+/* A is a compare (reg1, 0) and B is SINGLE_SET which SET_SRC is reg2.
+   It returns TRUE, if reg1 == reg2, and no other refer of reg1
+   except A and B.  */
+
+static bool
+can_reuse_cc_set_p (rtx_insn *a, rtx_insn *b)
+{
+  rtx seta = single_set (a);
+  rtx setb = single_set (b);
+
+  if (BLOCK_FOR_INSN (a) != BLOCK_FOR_INSN (b)
+      || !seta
+      || !setb)
+    return false;
+
+  if (GET_CODE (SET_SRC (seta)) != COMPARE
+      || GET_MODE_CLASS (GET_MODE (SET_DEST (seta))) != MODE_CC
+      || !REG_P (XEXP (SET_SRC (seta), 0))
+      || XEXP (SET_SRC (seta), 1) != CONST0_RTX (GET_MODE (SET_SRC (seta)))
+      || !REG_P (SET_SRC (setb))
+      || REGNO (SET_SRC (setb)) != REGNO (XEXP (SET_SRC (seta), 0)))
+    return false;
+
+  if (DF_REG_USE_COUNT (REGNO (SET_SRC (setb))) > 2)
+    {
+      df_ref use;
+      rtx insn;
+      unsigned int i = REGNO (SET_SRC (setb));
+
+      for (use = DF_REG_USE_CHAIN (i); use; use = DF_REF_NEXT_REG (use))
+        {
+	  insn = DF_REF_INSN (use);
+	  if (insn != a && insn != b && !(NOTE_P (insn) || DEBUG_INSN_P
(insn)))
+	    return false;
+	}
+    }
+  return true;
+}
+
 

 /* Main entry point for combiner.  F is the first insn of the function.
    NREGS is the first unused pseudo-reg number.
@@ -1129,10 +1169,7 @@  insn_a_feeds_b (rtx_insn *a, rtx_insn *b)
 static int
 combine_instructions (rtx_insn *f, unsigned int nregs)
 {
-  rtx_insn *insn, *next;
-#ifdef HAVE_cc0
-  rtx_insn *prev;
-#endif
+  rtx_insn *insn, *next, *prev;
   struct insn_link *links, *nextlinks;
   rtx_insn *first;
   basic_block last_bb;
@@ -1279,7 +1316,7 @@  combine_instructions (rtx_insn *f, unsigned int nregs)
 	  FOR_EACH_LOG_LINK (links, insn)
 	    if ((next = try_combine (insn, links->insn, NULL,
 				     NULL, &new_direct_jump_p,
-				     last_combined_insn)) != 0)
+				     last_combined_insn, NULL)) != 0)
 	      {
 		statistics_counter_event (cfun, "two-insn combine", 1);
 		goto retry;
@@ -1300,7 +1337,7 @@  combine_instructions (rtx_insn *f, unsigned int nregs)
 		FOR_EACH_LOG_LINK (nextlinks, link)
 		  if ((next = try_combine (insn, link, nextlinks->insn,
 					   NULL, &new_direct_jump_p,
-					   last_combined_insn)) != 0)
+					   last_combined_insn, NULL)) != 0)
 		    {
 		      statistics_counter_event (cfun, "three-insn combine",
1);
 		      goto retry;
@@ -1322,13 +1359,13 @@  combine_instructions (rtx_insn *f, unsigned int
nregs)
 	    {
 	      if ((next = try_combine (insn, prev, NULL, NULL,
 				       &new_direct_jump_p,
-				       last_combined_insn)) != 0)
+				       last_combined_insn, NULL)) != 0)
 		goto retry;
 
 	      FOR_EACH_LOG_LINK (nextlinks, prev)
 		  if ((next = try_combine (insn, prev, nextlinks->insn,
 					   NULL, &new_direct_jump_p,
-					   last_combined_insn)) != 0)
+					   last_combined_insn, NULL)) != 0)
 		    goto retry;
 	    }
 
@@ -1342,13 +1379,13 @@  combine_instructions (rtx_insn *f, unsigned int
nregs)
 	    {
 	      if ((next = try_combine (insn, prev, NULL, NULL,
 				       &new_direct_jump_p,
-				       last_combined_insn)) != 0)
+				       last_combined_insn, NULL)) != 0)
 		goto retry;
 
 	      FOR_EACH_LOG_LINK (nextlinks, prev)
 		  if ((next = try_combine (insn, prev, nextlinks->insn,
 					   NULL, &new_direct_jump_p,
-					   last_combined_insn)) != 0)
+					   last_combined_insn, NULL)) != 0)
 		    goto retry;
 	    }
 
@@ -1364,7 +1401,7 @@  combine_instructions (rtx_insn *f, unsigned int nregs)
 		  && sets_cc0_p (PATTERN (prev))
 		  && (next = try_combine (insn, links->insn,
 					  prev, NULL, &new_direct_jump_p,
-					  last_combined_insn)) != 0)
+					  last_combined_insn, NULL)) != 0)
 		goto retry;
 #endif
 
@@ -1377,7 +1414,7 @@  combine_instructions (rtx_insn *f, unsigned int nregs)
 		if ((next = try_combine (insn, links->insn,
 					 nextlinks->insn, NULL,
 					 &new_direct_jump_p,
-					 last_combined_insn)) != 0)
+					 last_combined_insn, NULL)) != 0)
 
 		  {
 		    statistics_counter_event (cfun, "three-insn combine",
1);
@@ -1406,7 +1443,7 @@  combine_instructions (rtx_insn *f, unsigned int nregs)
 		      if ((next = try_combine (insn, link, link1,
 					       nextlinks->insn,
 					       &new_direct_jump_p,
-					       last_combined_insn)) != 0)
+					       last_combined_insn, NULL)) !=
0)
 			{
 			  statistics_counter_event (cfun, "four-insn
combine", 1);
 			  goto retry;
@@ -1417,7 +1454,7 @@  combine_instructions (rtx_insn *f, unsigned int nregs)
 		      if ((next = try_combine (insn, link, link1,
 					       nextlinks->insn,
 					       &new_direct_jump_p,
-					       last_combined_insn)) != 0)
+					       last_combined_insn, NULL)) !=
0)
 			{
 			  statistics_counter_event (cfun, "four-insn
combine", 1);
 			  goto retry;
@@ -1434,7 +1471,7 @@  combine_instructions (rtx_insn *f, unsigned int nregs)
 		      if ((next = try_combine (insn, link, link1,
 					       nextlinks->insn,
 					       &new_direct_jump_p,
-					       last_combined_insn)) != 0)
+					       last_combined_insn, NULL)) !=
0)
 			{
 			  statistics_counter_event (cfun, "four-insn
combine", 1);
 			  goto retry;
@@ -1444,7 +1481,7 @@  combine_instructions (rtx_insn *f, unsigned int nregs)
 		      if ((next = try_combine (insn, link, link1,
 					       nextlinks->insn,
 					       &new_direct_jump_p,
-					       last_combined_insn)) != 0)
+					       last_combined_insn, NULL)) !=
0)
 			{
 			  statistics_counter_event (cfun, "four-insn
combine", 1);
 			  goto retry;
@@ -1452,6 +1489,37 @@  combine_instructions (rtx_insn *f, unsigned int
nregs)
 		  }
 	      }
 
+	  /* Try to combine a compare insn that sets CC
+	     with a preceding insn that can set CC, and maybe with its
+	     logical predecessor as well.
+	     We need this special code because data flow connections
+	     do not get entered in LOG_LINKS.  */
+	  if ((prev = prev_nonnote_nondebug_insn (insn)) != NULL_RTX
+	      && can_reuse_cc_set_p (insn, prev)
+	      && max_combine >= 4)
+	    {
+		struct insn_link *next1;
+		FOR_EACH_LOG_LINK (next1, prev)
+		  {
+		    rtx_insn *link1 = next1->insn;
+		    if (NOTE_P (link1))
+		      continue;
+		    /* I1 -> I2 -> I3; I2 -> insn;
+		       output parallel (insn, I3).  */
+		    FOR_EACH_LOG_LINK (nextlinks, link1)
+		      if ((next = try_combine (prev, link1,
+					       nextlinks->insn, NULL,
+					       &new_direct_jump_p,
+					       last_combined_insn, insn)) !=
0)
+
+			{
+			  delete_insn (insn);
+			  insn = next;
+			  statistics_counter_event (cfun, "four-insn
combine", 1);
+			  goto retry;
+			}
+		  }
+	    }
 	  /* Try this insn with each REG_EQUAL note it links back to.  */
 	  FOR_EACH_LOG_LINK (links, insn)
 	    {
@@ -1477,7 +1545,7 @@  combine_instructions (rtx_insn *f, unsigned int nregs)
 		  i2mod_new_rhs = copy_rtx (note);
 		  next = try_combine (insn, i2mod, NULL, NULL,
 				      &new_direct_jump_p,
-				      last_combined_insn);
+				      last_combined_insn, NULL);
 		  i2mod = NULL;
 		  if (next)
 		    {
@@ -2533,11 +2601,15 @@  can_split_parallel_of_n_reg_sets (rtx_insn *insn,
int n)
 
    LAST_COMBINED_INSN is either I3, or some insn after I3 that has
    been I3 passed to an earlier try_combine within the same basic
-   block.  */
+   block.
+
+   TO_COMBINED_INSN is an insn after I3 that sets CC flags.  It is used to
+   combine with I3 to make a new insn.  */
 
 static rtx_insn *
 try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
-	     int *new_direct_jump_p, rtx_insn *last_combined_insn)
+	     int *new_direct_jump_p, rtx_insn *last_combined_insn,
+	     rtx_insn *to_combined_insn)
 {
   /* New patterns for I3 and I2, respectively.  */
   rtx newpat, newi2pat = 0;
@@ -2630,6 +2702,7 @@  try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1,
rtx_insn *i0,
       || cant_combine_insn_p (i2)
       || (i1 && cant_combine_insn_p (i1))
       || (i0 && cant_combine_insn_p (i0))
+      || (to_combined_insn && cant_combine_insn_p (to_combined_insn))
       || likely_spilled_retval_p (i3))
     return 0;
 
@@ -3323,7 +3396,11 @@  try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn
*i1, rtx_insn *i0,
 	  rtx old = newpat;
 	  total_sets = 1 + extra_sets;
 	  newpat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (total_sets));
-	  XVECEXP (newpat, 0, 0) = old;
+
+	  if (to_combined_insn)
+	    XVECEXP (newpat, 0, --total_sets) = old;
+	  else
+	    XVECEXP (newpat, 0, 0) = old;
 	}
 
       if (added_sets_0)
@@ -3346,6 +3423,21 @@  try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn
*i1, rtx_insn *i0,
 	  if ((i0_feeds_i1_n && i1_feeds_i2_n) || i0_feeds_i2_n)
 	    t = subst (t, i0dest, i0src_copy2 ? i0src_copy2 : i0src, 0, 0,
0);
 
+	  if (to_combined_insn)
+	    {
+	      rtx todest = NULL_RTX, tosrc = NULL_RTX;
+	      if (can_combine_p (i2, to_combined_insn, NULL, NULL,
+				 i3, NULL, &todest, &tosrc))
+		{
+		  rtx pat = copy_rtx (PATTERN (to_combined_insn));
+		  t = subst (pat, todest, tosrc, 0, 0, 0);
+		}
+	      else
+		{
+		  undo_all ();
+		  return 0;
+		}
+	    }
 	  XVECEXP (newpat, 0, --total_sets) = t;
 	}
     }
diff --git a/gcc/testsuite/gcc.target/i386/pr61225.c
b/gcc/testsuite/gcc.target/i386/pr61225.c
new file mode 100644
index 0000000..9c08102
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr61225.c
@@ -0,0 +1,16 @@ 
+/* PR rtl-optimization/61225 */
+/* { dg-do compile } */
+/* { dg-options "-Os -fdump-rtl-combine-details" } */
+
+void foo (void *);
+
+int *
+f1 (int *x)
+{
+  if (!--*x)
+    foo (x);
+  return x;
+}
+
+/* { dg-final { scan-rtl-dump "Successfully matched this instruction"
"combine" } } */