Patchwork Reduce conservativeness in REE using machine model (issue6631066)

login
register
mail settings
Submitter Teresa Johnson
Date Oct. 18, 2012, 3:21 p.m.
Message ID <CAAe5K+WKxKLq4JGreW6tKaSKQqhBNVvO7bepYcotQiCHUnzDfQ@mail.gmail.com>
Download mbox | patch
Permalink /patch/192352/
State New
Headers show

Comments

Teresa Johnson - Oct. 18, 2012, 3:21 p.m.
The attached patch implements avoids conservative behavior in REE by allowing
removal of redundant extends when the def feeds another extend with a different
mode. This works because in merge_def_and_ext only calls combine_set_extension
if the candidate for removal has a wider mode than the def extend's
mode, otherwise
the def extend mode is preserved. In combine_set_extension the def is
modified to use
the wider candidate's mode.

See my previous message in this thread for more description of why this
solution is preferred and works. Added two test cases to check for removal
of redundant zero and sign extends.

Bootstrapped and tested on x86_64-unknown-linux-gnu. Is this ok for trunk?

Thanks,
Teresa

2012-10-18  Teresa Johnson  <tejohnson@google.com>

        * ree.c (add_removable_extension): Remove unnecessary
        mode check with other extension.

2012-10-18  Teresa Johnson  <tejohnson@google.com>

        * gcc.c-torture/execute/20111227-2.c: New test.
        * gcc.c-torture/execute/20111227-3.c: Ditto.


Index: gcc.c-torture/execute/20111227-2.c
===================================================================

/* Testcase derived from 20111227-1.c to ensure that REE is combining
   redundant zero extends with zero extend to wider mode.  */
/* { dg-options "-fdump-rtl-ree -O" } */
extern void abort (void);

unsigned short s;
unsigned int i;
unsigned long l;
unsigned char v = -1;

void __attribute__((noinline,noclone))
bar (int t)
{
  if (t == 2 && s != 0xff)
    abort ();
  if (t == 1 && i != 0xff)
    abort ();
  if (t == 0 && l != 0xff)
    abort ();
}

void __attribute__((noinline,noclone))
foo (unsigned char *a, int t)
{
  unsigned char r = v;

  if (t == 2)
    s = (unsigned short) r;
  else if (t == 1)
    i = (unsigned int) r;
  else if (t == 0)
    l = (unsigned long) r;
  bar (t);
}

int main(void)
{
  foo (&v, 0);
  foo (&v, 1);
  foo (&v, 2);
  return 0;
}
/* { dg-final { scan-rtl-dump "Elimination opportunities = 3 realized
= 3" "ree" } }  */
/* { dg-final { cleanup-rtl-dump "ree" } }  */


Index: gcc.c-torture/execute/20111227-3.c
===================================================================
/* Testcase derived from 20111227-1.c to ensure that REE is combining
   redundant sign extends with sign extend to wider mode.  */
/* { dg-options "-fdump-rtl-ree -O" } */

extern void abort (void);

signed short s;
signed int i;
signed long l;
signed char v = -1;

void __attribute__((noinline,noclone))
bar (int t)
{
  if (t == 2 && s != -1)
    abort ();
  if (t == 1 && i != -1)
    abort ();
  if (t == 0 && l != -1)
    abort ();
}

void __attribute__((noinline,noclone))
foo (signed char *a, int t)
{
  signed char r = v;

  if (t == 2)
    s = (signed short) r;
  else if (t == 1)
    i = (signed int) r;
  else if (t == 0)
    l = (signed long) r;
  bar (t);
}

int main(void)
{
  foo (&v, 0);
  foo (&v, 1);
  foo (&v, 2);
  return 0;
}
/* { dg-final { scan-rtl-dump "Elimination opportunities = 3 realized
= 3" "ree" } }  */
/* { dg-final { cleanup-rtl-dump "ree" } }  */


On Mon, Oct 15, 2012 at 2:04 PM, Teresa Johnson <tejohnson@google.com> wrote:
> On Fri, Oct 12, 2012 at 1:23 AM, Jakub Jelinek <jakub@redhat.com> wrote:
>> On Thu, Oct 11, 2012 at 02:44:12PM -0700, Teresa Johnson wrote:
>>> Revised patch to address conservative behavior in redundant extend
>>> elimination that was resulting in redundant extends not being
>>> removed. Now uses a new target hook machine_mode_from_attr_mode
>>> which is currently enabled only for i386.
>>
>> I still don't like it, the hook still is about how it is implemented
>> instead of what target property it wants to ask (the important thing
>> there is that a {QI,HI} -> SImode zero extension instruction on x86_64
>> performs {QI,HI} -> DImode extension actually).  That isn't the case for any
>> other modes, isn't the case for sign extension etc.
>
> That's true, although for sign extends the attr modes being set in
> i386.md ensure that this won't do the wrong thing as the the attr
> modes in the machine desc file match the machine_mode. However, this
> ends up leading to the conservative behavior remaining for sign
> extends (see testcase below).
>
>>
>> Can you please post a testcase first?
>
> This was exposed by the snappy decompression code. However, I was able
> to reproduce it by modifying the testcase submitted with the fix that
> introduced this checking (gcc.c-torture/execute/20111227-1.c for
> http://gcc.gnu.org/ml/gcc-patches/2011-12/msg01744.html). This test
> case was failing because a sign_extend was being combined with a
> zero_extend, so the instruction code check below fixed it, and the
> mode check was unnecessary:
>
>       /* Second, make sure the reaching definitions don't feed another and
>          different extension.  FIXME: this obviously can be improved.  */
>       for (def = defs; def; def = def->next)
>         if ((idx = def_map[INSN_UID(DF_REF_INSN (def->ref))])
>             && (cand = &VEC_index (ext_cand, *insn_list, idx - 1))
>             && (cand->code != code || cand->mode != mode))
>
> Here is my modified test case that exposes the conservative behavior I
> saw in snappy:
>
> ------------------------
> extern void abort (void);
>
> unsigned short s;
> unsigned int i;
> unsigned long l;
> unsigned char v = -1;
>
> void __attribute__((noinline,noclone))
> bar (int t)
> {
>   if (t == 2 && s != 0xff)
>     abort ();
>   if (t == 1 && i != 0xff)
>     abort ();
>   if (t == 0 && l != 0xff)
>     abort ();
> }
>
> void __attribute__((noinline,noclone))
> foo (unsigned char *a, int t)
> {
>   unsigned char r = v;
>
>   if (t == 2)
>     s = (unsigned short) r;
>   else if (t == 1)
>     i = (unsigned int) r;
>   else if (t == 0)
>     l = (unsigned long) r;
>   bar (t);
> }
>
> int main(void)
> {
>   foo (&v, 0);
>   foo (&v, 1);
>   foo (&v, 2);
>   return 0;
> }
> -----------------------
>
> With trunk, there are currently 3 movzbl generated for foo():
>
>         movzbl  v(%rip), %eax
>         movzbl  %al, %eax
>         movzbl  %al, %eax
>
> With my fix this goes down to 1 movzbl. However, if the test case is
> modified to use signed instead of unsigned, we still end up with 3
> movsbl, of which 2 are redundant:
>
>         movsbw  v(%rip), %ax
>         movsbq  %al, %rax
>         movsbl  %al, %eax
>
> A single movsbq will suffice. But because of the attr mode settings
> for sign extends I mentioned above, my patch does not help here.
>
>>
>> Given the recent ree.c changes to remember the performed operations and
>> their original modes (struct ext_modified), perhaps the
>> "Second, make sure the reaching definitions don't feed another and"...
>> check could be made less strict or even removed, but for that a testcase is
>> really needed.
>
> I believe that we can remove the mode check from the above code
> altogether. The reason is that the ree.c code will always select the
> widest mode when combining extends (in merge_def_and_ext). So with a
> simple change to the ree.c code to simply avoid the mode checking both
> the unsigned and signed cases get addressed. In the signed case we are
> left with a single movs:
>
>         movsbq  v(%rip), %rax
>
> I didn't see any test failures in a x86_64-unknown-linux-gnu bootstrap
> and regression test run. If this seems reasonable, I can follow up
> with the patch (which is trivial), and I can also submit the 2 new
> test cases (the signed test case is included below).
>
> Thanks,
> Teresa
>
>
> extern void abort (void);
>
> signed short s;
> signed int i;
> signed long l;
> signed char v = -1;
>
> void __attribute__((noinline,noclone))
> bar (int t)
> {
>   if (t == 2 && s != -1)
>     abort ();
>   if (t == 1 && i != -1)
>     abort ();
>   if (t == 0 && l != -1)
>     abort ();
> }
>
> void __attribute__((noinline,noclone))
> foo (signed char *a, int t)
> {
>   signed char r = v;
>
>   if (t == 2)
>     s = (signed short) r;
>   else if (t == 1)
>     i = (signed int) r;
>   else if (t == 0)
>     l = (signed long) r;
>   bar (t);
> }
>
> int main(void)
> {
>   foo (&v, 0);
>   foo (&v, 1);
>   foo (&v, 2);
>   return 0;
> }
>
>
>
>>
>>         Jakub
>
>
>
> --
> Teresa Johnson | Software Engineer | tejohnson@google.com | 408-460-2413
Teresa Johnson - Oct. 25, 2012, 8:28 p.m.
ping.
Teresa

On Thu, Oct 18, 2012 at 8:21 AM, Teresa Johnson <tejohnson@google.com> wrote:
>
> The attached patch implements avoids conservative behavior in REE by allowing
> removal of redundant extends when the def feeds another extend with a different
> mode. This works because in merge_def_and_ext only calls combine_set_extension
> if the candidate for removal has a wider mode than the def extend's
> mode, otherwise
> the def extend mode is preserved. In combine_set_extension the def is
> modified to use
> the wider candidate's mode.
>
> See my previous message in this thread for more description of why this
> solution is preferred and works. Added two test cases to check for removal
> of redundant zero and sign extends.
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu. Is this ok for trunk?
>
> Thanks,
> Teresa
>
> 2012-10-18  Teresa Johnson  <tejohnson@google.com>
>
>         * ree.c (add_removable_extension): Remove unnecessary
>         mode check with other extension.
>
> 2012-10-18  Teresa Johnson  <tejohnson@google.com>
>
>         * gcc.c-torture/execute/20111227-2.c: New test.
>         * gcc.c-torture/execute/20111227-3.c: Ditto.
>
> Index: ree.c
> ===================================================================
> --- ree.c       (revision 192265)
> +++ ree.c       (working copy)
> @@ -803,7 +803,7 @@ add_removable_extension (const_rtx expr, rtx insn,
>        for (def = defs; def; def = def->next)
>         if ((idx = def_map[INSN_UID(DF_REF_INSN (def->ref))])
>             && (cand = &VEC_index (ext_cand, *insn_list, idx - 1))
> -           && (cand->code != code || cand->mode != mode))
> +           && cand->code != code)
>           {
>             if (dump_file)
>               {
>
> Index: gcc.c-torture/execute/20111227-2.c
> ===================================================================
>
> /* Testcase derived from 20111227-1.c to ensure that REE is combining
>    redundant zero extends with zero extend to wider mode.  */
> /* { dg-options "-fdump-rtl-ree -O" } */
> extern void abort (void);
>
> unsigned short s;
> unsigned int i;
> unsigned long l;
> unsigned char v = -1;
>
> void __attribute__((noinline,noclone))
> bar (int t)
> {
>   if (t == 2 && s != 0xff)
>     abort ();
>   if (t == 1 && i != 0xff)
>     abort ();
>   if (t == 0 && l != 0xff)
>     abort ();
> }
>
> void __attribute__((noinline,noclone))
> foo (unsigned char *a, int t)
> {
>   unsigned char r = v;
>
>   if (t == 2)
>     s = (unsigned short) r;
>   else if (t == 1)
>     i = (unsigned int) r;
>   else if (t == 0)
>     l = (unsigned long) r;
>   bar (t);
> }
>
> int main(void)
> {
>   foo (&v, 0);
>   foo (&v, 1);
>   foo (&v, 2);
>   return 0;
> }
> /* { dg-final { scan-rtl-dump "Elimination opportunities = 3 realized
> = 3" "ree" } }  */
> /* { dg-final { cleanup-rtl-dump "ree" } }  */
>
>
> Index: gcc.c-torture/execute/20111227-3.c
> ===================================================================
> /* Testcase derived from 20111227-1.c to ensure that REE is combining
>    redundant sign extends with sign extend to wider mode.  */
> /* { dg-options "-fdump-rtl-ree -O" } */
>
> extern void abort (void);
>
> signed short s;
> signed int i;
> signed long l;
> signed char v = -1;
>
> void __attribute__((noinline,noclone))
> bar (int t)
> {
>   if (t == 2 && s != -1)
>     abort ();
>   if (t == 1 && i != -1)
>     abort ();
>   if (t == 0 && l != -1)
>     abort ();
> }
>
> void __attribute__((noinline,noclone))
> foo (signed char *a, int t)
> {
>   signed char r = v;
>
>   if (t == 2)
>     s = (signed short) r;
>   else if (t == 1)
>     i = (signed int) r;
>   else if (t == 0)
>     l = (signed long) r;
>   bar (t);
> }
>
> int main(void)
> {
>   foo (&v, 0);
>   foo (&v, 1);
>   foo (&v, 2);
>   return 0;
> }
> /* { dg-final { scan-rtl-dump "Elimination opportunities = 3 realized
> = 3" "ree" } }  */
> /* { dg-final { cleanup-rtl-dump "ree" } }  */
>
>
> On Mon, Oct 15, 2012 at 2:04 PM, Teresa Johnson <tejohnson@google.com> wrote:
> > On Fri, Oct 12, 2012 at 1:23 AM, Jakub Jelinek <jakub@redhat.com> wrote:
> >> On Thu, Oct 11, 2012 at 02:44:12PM -0700, Teresa Johnson wrote:
> >>> Revised patch to address conservative behavior in redundant extend
> >>> elimination that was resulting in redundant extends not being
> >>> removed. Now uses a new target hook machine_mode_from_attr_mode
> >>> which is currently enabled only for i386.
> >>
> >> I still don't like it, the hook still is about how it is implemented
> >> instead of what target property it wants to ask (the important thing
> >> there is that a {QI,HI} -> SImode zero extension instruction on x86_64
> >> performs {QI,HI} -> DImode extension actually).  That isn't the case for any
> >> other modes, isn't the case for sign extension etc.
> >
> > That's true, although for sign extends the attr modes being set in
> > i386.md ensure that this won't do the wrong thing as the the attr
> > modes in the machine desc file match the machine_mode. However, this
> > ends up leading to the conservative behavior remaining for sign
> > extends (see testcase below).
> >
> >>
> >> Can you please post a testcase first?
> >
> > This was exposed by the snappy decompression code. However, I was able
> > to reproduce it by modifying the testcase submitted with the fix that
> > introduced this checking (gcc.c-torture/execute/20111227-1.c for
> > http://gcc.gnu.org/ml/gcc-patches/2011-12/msg01744.html). This test
> > case was failing because a sign_extend was being combined with a
> > zero_extend, so the instruction code check below fixed it, and the
> > mode check was unnecessary:
> >
> >       /* Second, make sure the reaching definitions don't feed another and
> >          different extension.  FIXME: this obviously can be improved.  */
> >       for (def = defs; def; def = def->next)
> >         if ((idx = def_map[INSN_UID(DF_REF_INSN (def->ref))])
> >             && (cand = &VEC_index (ext_cand, *insn_list, idx - 1))
> >             && (cand->code != code || cand->mode != mode))
> >
> > Here is my modified test case that exposes the conservative behavior I
> > saw in snappy:
> >
> > ------------------------
> > extern void abort (void);
> >
> > unsigned short s;
> > unsigned int i;
> > unsigned long l;
> > unsigned char v = -1;
> >
> > void __attribute__((noinline,noclone))
> > bar (int t)
> > {
> >   if (t == 2 && s != 0xff)
> >     abort ();
> >   if (t == 1 && i != 0xff)
> >     abort ();
> >   if (t == 0 && l != 0xff)
> >     abort ();
> > }
> >
> > void __attribute__((noinline,noclone))
> > foo (unsigned char *a, int t)
> > {
> >   unsigned char r = v;
> >
> >   if (t == 2)
> >     s = (unsigned short) r;
> >   else if (t == 1)
> >     i = (unsigned int) r;
> >   else if (t == 0)
> >     l = (unsigned long) r;
> >   bar (t);
> > }
> >
> > int main(void)
> > {
> >   foo (&v, 0);
> >   foo (&v, 1);
> >   foo (&v, 2);
> >   return 0;
> > }
> > -----------------------
> >
> > With trunk, there are currently 3 movzbl generated for foo():
> >
> >         movzbl  v(%rip), %eax
> >         movzbl  %al, %eax
> >         movzbl  %al, %eax
> >
> > With my fix this goes down to 1 movzbl. However, if the test case is
> > modified to use signed instead of unsigned, we still end up with 3
> > movsbl, of which 2 are redundant:
> >
> >         movsbw  v(%rip), %ax
> >         movsbq  %al, %rax
> >         movsbl  %al, %eax
> >
> > A single movsbq will suffice. But because of the attr mode settings
> > for sign extends I mentioned above, my patch does not help here.
> >
> >>
> >> Given the recent ree.c changes to remember the performed operations and
> >> their original modes (struct ext_modified), perhaps the
> >> "Second, make sure the reaching definitions don't feed another and"...
> >> check could be made less strict or even removed, but for that a testcase is
> >> really needed.
> >
> > I believe that we can remove the mode check from the above code
> > altogether. The reason is that the ree.c code will always select the
> > widest mode when combining extends (in merge_def_and_ext). So with a
> > simple change to the ree.c code to simply avoid the mode checking both
> > the unsigned and signed cases get addressed. In the signed case we are
> > left with a single movs:
> >
> >         movsbq  v(%rip), %rax
> >
> > I didn't see any test failures in a x86_64-unknown-linux-gnu bootstrap
> > and regression test run. If this seems reasonable, I can follow up
> > with the patch (which is trivial), and I can also submit the 2 new
> > test cases (the signed test case is included below).
> >
> > Thanks,
> > Teresa
> >
> >
> > extern void abort (void);
> >
> > signed short s;
> > signed int i;
> > signed long l;
> > signed char v = -1;
> >
> > void __attribute__((noinline,noclone))
> > bar (int t)
> > {
> >   if (t == 2 && s != -1)
> >     abort ();
> >   if (t == 1 && i != -1)
> >     abort ();
> >   if (t == 0 && l != -1)
> >     abort ();
> > }
> >
> > void __attribute__((noinline,noclone))
> > foo (signed char *a, int t)
> > {
> >   signed char r = v;
> >
> >   if (t == 2)
> >     s = (signed short) r;
> >   else if (t == 1)
> >     i = (signed int) r;
> >   else if (t == 0)
> >     l = (signed long) r;
> >   bar (t);
> > }
> >
> > int main(void)
> > {
> >   foo (&v, 0);
> >   foo (&v, 1);
> >   foo (&v, 2);
> >   return 0;
> > }
> >
> >
> >
> >>
> >>         Jakub
> >
> >
> >
> > --
> > Teresa Johnson | Software Engineer | tejohnson@google.com | 408-460-2413
>
>
>
> --
> Teresa Johnson | Software Engineer | tejohnson@google.com | 408-460-2413




--
Teresa Johnson | Software Engineer | tejohnson@google.com | 408-460-2413
Jakub Jelinek - Oct. 25, 2012, 8:59 p.m.
On Thu, Oct 25, 2012 at 01:28:32PM -0700, Teresa Johnson wrote:
> > 2012-10-18  Teresa Johnson  <tejohnson@google.com>
> >
> >         * ree.c (add_removable_extension): Remove unnecessary
> >         mode check with other extension.
> >
> > 2012-10-18  Teresa Johnson  <tejohnson@google.com>
> >
> >         * gcc.c-torture/execute/20111227-2.c: New test.
> >         * gcc.c-torture/execute/20111227-3.c: Ditto.

Ok for trunk.

	Jakub

Patch

Index: ree.c
===================================================================
--- ree.c       (revision 192265)
+++ ree.c       (working copy)
@@ -803,7 +803,7 @@  add_removable_extension (const_rtx expr, rtx insn,
       for (def = defs; def; def = def->next)
        if ((idx = def_map[INSN_UID(DF_REF_INSN (def->ref))])
            && (cand = &VEC_index (ext_cand, *insn_list, idx - 1))
-           && (cand->code != code || cand->mode != mode))
+           && cand->code != code)
          {
            if (dump_file)
              {