Message ID | caf857de-0fdd-8a89-0f97-6764a8a9a6c1@redhat.com |
---|---|
State | New |
Headers | show |
On Thu, Dec 22, 2016 at 7:26 AM, Jeff Law <law@redhat.com> wrote: > This is the final patch in the kit to improve our DSE implementation. > > It's based on a observation by Richi. Namely that a read from bytes of > memory that are dead can be ignored. By ignoring such reads we can > sometimes find additional stores that allow us to either eliminate or trim > an earlier store more aggressively. > > This only hit (by hit I mean the ability to ignore resulted in finding a > full or partially dead store that we didn't otherwise find) once during a > bootstrap, but does hit often in the libstdc++ testsuite. I've added a test > derived from the conversation between myself and Richi last year. > > There's nothing in the BZ database on this issue and I can't reasonably call > it a bugfix. I wouldn't lose sleep if this deferred to gcc-8. > > Bootstrapped and regression tested on x86-64-linux-gnu. OK for the trunk or > defer to gcc-8? > > > > * tree-ssa-dse.c (live_bytes_read): New function. > (dse_classify_store): Ignore reads of dead bytes. > > * testsuite/gcc.dg/tree-ssa/ssa-dse-26.c: New test. > * testsuite/gcc.dg/tree-ssa/ssa-dse-26.c: Likewise. > > > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c > b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c > new file mode 100644 > index 0000000..6605dfe > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c > @@ -0,0 +1,33 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-dse1-details" } */ > + > +enum constraint_expr_type > +{ > + SCALAR, DEREF, ADDRESSOF > +}; > +typedef struct constraint_expr > +{ > + enum constraint_expr_type type; > + unsigned int var; > + long offset; > +} constraint_expr ; > +typedef struct constraint > +{ > + struct constraint_expr lhs; > + struct constraint_expr rhs; > +} constraint; > +static _Bool > +constraint_expr_equal (struct constraint_expr x, struct constraint_expr y) > +{ > + return x.type == y.type && x.var == y.var && x.offset == y.offset; > +} > + > +_Bool > +constraint_equal (struct constraint a, struct constraint b) > +{ > + return constraint_expr_equal (a.lhs, b.lhs) > + && constraint_expr_equal (a.rhs, b.rhs); > +} > + > +/* { dg-final { scan-tree-dump-times "Deleted dead store" 2 "dse1" } } */ > + > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-27.c > b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-27.c > new file mode 100644 > index 0000000..48dc92e > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-27.c > @@ -0,0 +1,21 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-dse1-details -fno-tree-fre -fno-tree-sra" > } */ > + > +struct S { struct R { int x; int y; } r; int z; } s; > + > +extern void blah (struct S); > + > +void > +foo () > +{ > + struct S s = { {1, 2}, 3 }; > + s.r.x = 1; > + s.r.y = 2; > + struct R r = s.r; > + s.z = 3; > + blah (s); > +} > + > + > +/* { dg-final { scan-tree-dump-times "Deleted dead store" 4 "dse1" } } */ > + > diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c > index a807d6d..f5b53fc 100644 > --- a/gcc/tree-ssa-dse.c > +++ b/gcc/tree-ssa-dse.c > @@ -475,6 +475,41 @@ maybe_trim_partially_dead_store (ao_ref *ref, sbitmap > live, gimple *stmt) > } > } > > +/* Return TRUE if USE_REF reads bytes from LIVE where live is > + derived from REF, a write reference. > + > + While this routine may modify USE_REF, it's passed by value, not > + location. So callers do not see those modifications. */ > + > +static bool > +live_bytes_read (ao_ref use_ref, ao_ref *ref, sbitmap live) > +{ > + /* We have already verified that USE_REF and REF hit the same object. > + Now verify that there's actually an overlap between USE_REF and REF. > */ > + if ((use_ref.offset < ref->offset > + && use_ref.offset + use_ref.size > ref->offset) > + || (use_ref.offset >= ref->offset > + && use_ref.offset < ref->offset + ref->size)) can you use ranges_overlap_p? (tree-ssa-alias.h) > + { > + normalize_ref (&use_ref, ref); > + > + /* If USE_REF covers all of REF, then it will hit one or more > + live bytes. This avoids useless iteration over the bitmap > + below. */ > + if (use_ref.offset == ref->offset && use_ref.size == ref->size) > + return true; > + > + /* Now iterate over what's left in USE_REF and see if any of > + those bits are i LIVE. */ > + for (int i = (use_ref.offset - ref->offset) / BITS_PER_UNIT; > + i < (use_ref.offset + use_ref.size) / BITS_PER_UNIT; i++) > + if (bitmap_bit_p (live, i)) a bitmap_bit_in_range_p () would be nice to have. And it can be more efficient than this loop... > + return true; > + return false; > + } > + return true; > +} > + > /* A helper of dse_optimize_stmt. > Given a GIMPLE_ASSIGN in STMT that writes to REF, find a candidate > statement *USE_STMT that may prove STMT to be dead. > @@ -554,6 +589,41 @@ dse_classify_store (ao_ref *ref, gimple *stmt, gimple > **use_stmt, > /* If the statement is a use the store is not dead. */ > else if (ref_maybe_used_by_stmt_p (use_stmt, ref)) > { > + /* Handle common cases where we can easily build a ao_ref > + structure for USE_STMT and in doing so we find that the > + references hit non-live bytes and thus can be ignored. */ > + if (live_bytes) > + { > + if (is_gimple_assign (use_stmt)) > + { > + /* Other cases were noted as non-aliasing by > + the call to ref_maybe_used_by_stmt_p. */ > + ao_ref use_ref; > + ao_ref_init (&use_ref, gimple_assign_rhs1 (use_stmt)); > + if (valid_ao_ref_for_dse (&use_ref) > + && use_ref.base == ref->base > + && use_ref.size == use_ref.max_size > + && !live_bytes_read (use_ref, ref, live_bytes)) > + { > + if (gimple_vdef (use_stmt)) > + { > + /* If we have already seen a store and > + this is also a store, then we have to > + fail. */ > + if (temp) > + { > + fail = true; > + BREAK_FROM_IMM_USE_STMT (ui); > + } as this case is rather cheap to test please test it together with live_bytes. Like if (live_bytes && (! gimple_vdef (use_stmt) || ! temp)) otherwise the patch looks reasonable for GCC 8. Richard. > + > + /* Otherwise walk through this store. */ > + temp = use_stmt; > + } > + continue; > + } > + } > + } > + > fail = true; > BREAK_FROM_IMM_USE_STMT (ui); > } >
Another old patch getting resurrected... On 01/04/2017 06:50 AM, Richard Biener wrote: > On Thu, Dec 22, 2016 at 7:26 AM, Jeff Law <law@redhat.com> wrote: >> This is the final patch in the kit to improve our DSE implementation. >> >> It's based on a observation by Richi. Namely that a read from bytes of >> memory that are dead can be ignored. By ignoring such reads we can >> sometimes find additional stores that allow us to either eliminate or trim >> an earlier store more aggressively. >> >> This only hit (by hit I mean the ability to ignore resulted in finding a >> full or partially dead store that we didn't otherwise find) once during a >> bootstrap, but does hit often in the libstdc++ testsuite. I've added a test >> derived from the conversation between myself and Richi last year. >> >> There's nothing in the BZ database on this issue and I can't reasonably call >> it a bugfix. I wouldn't lose sleep if this deferred to gcc-8. >> >> Bootstrapped and regression tested on x86-64-linux-gnu. OK for the trunk or >> defer to gcc-8? >> >> >> >> * tree-ssa-dse.c (live_bytes_read): New function. >> (dse_classify_store): Ignore reads of dead bytes. >> >> * testsuite/gcc.dg/tree-ssa/ssa-dse-26.c: New test. >> * testsuite/gcc.dg/tree-ssa/ssa-dse-26.c: Likewise. >> >> >> [ snip ] >> diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c >> index a807d6d..f5b53fc 100644 >> --- a/gcc/tree-ssa-dse.c >> +++ b/gcc/tree-ssa-dse.c >> @@ -475,6 +475,41 @@ maybe_trim_partially_dead_store (ao_ref *ref, sbitmap >> live, gimple *stmt) >> } >> } >> >> +/* Return TRUE if USE_REF reads bytes from LIVE where live is >> + derived from REF, a write reference. >> + >> + While this routine may modify USE_REF, it's passed by value, not >> + location. So callers do not see those modifications. */ >> + >> +static bool >> +live_bytes_read (ao_ref use_ref, ao_ref *ref, sbitmap live) >> +{ >> + /* We have already verified that USE_REF and REF hit the same object. >> + Now verify that there's actually an overlap between USE_REF and REF. >> */ >> + if ((use_ref.offset < ref->offset >> + && use_ref.offset + use_ref.size > ref->offset) >> + || (use_ref.offset >= ref->offset >> + && use_ref.offset < ref->offset + ref->size)) > > can you use ranges_overlap_p? (tree-ssa-alias.h) Yes. Didn't know about it. Done. > >> + { >> + normalize_ref (&use_ref, ref); >> + >> + /* If USE_REF covers all of REF, then it will hit one or more >> + live bytes. This avoids useless iteration over the bitmap >> + below. */ >> + if (use_ref.offset == ref->offset && use_ref.size == ref->size) >> + return true; >> + >> + /* Now iterate over what's left in USE_REF and see if any of >> + those bits are i LIVE. */ >> + for (int i = (use_ref.offset - ref->offset) / BITS_PER_UNIT; >> + i < (use_ref.offset + use_ref.size) / BITS_PER_UNIT; i++) >> + if (bitmap_bit_p (live, i)) > > a bitmap_bit_in_range_p () would be nice to have. And it can be more > efficient than this loop... Yea. That likely would help here. I'm testing with a bitmap_bit_in_range_p implementation (only for sbitmaps since that's what we're using here). That implementation does the reasonably efficient things and is modeled after the sbitmap implementation of bitmap_set_range. >> @@ -554,6 +589,41 @@ dse_classify_store (ao_ref *ref, gimple *stmt, gimple >> **use_stmt, >> /* If the statement is a use the store is not dead. */ >> else if (ref_maybe_used_by_stmt_p (use_stmt, ref)) >> { >> + /* Handle common cases where we can easily build a ao_ref >> + structure for USE_STMT and in doing so we find that the >> + references hit non-live bytes and thus can be ignored. */ >> + if (live_bytes) >> + { >> + if (is_gimple_assign (use_stmt)) >> + { >> + /* Other cases were noted as non-aliasing by >> + the call to ref_maybe_used_by_stmt_p. */ >> + ao_ref use_ref; >> + ao_ref_init (&use_ref, gimple_assign_rhs1 (use_stmt)); >> + if (valid_ao_ref_for_dse (&use_ref) >> + && use_ref.base == ref->base >> + && use_ref.size == use_ref.max_size >> + && !live_bytes_read (use_ref, ref, live_bytes)) >> + { >> + if (gimple_vdef (use_stmt)) >> + { >> + /* If we have already seen a store and >> + this is also a store, then we have to >> + fail. */ >> + if (temp) >> + { >> + fail = true; >> + BREAK_FROM_IMM_USE_STMT (ui); >> + } > > as this case is rather cheap to test please test it together with > live_bytes. Like > > if (live_bytes && (! gimple_vdef (use_stmt) || ! temp)) Seems reasonable. > > otherwise the patch looks reasonable for GCC 8. Given the sbitmap.[ch] change, posting for re-approval Bootstrapped and regression tested on x86_64. Earlier version without the bitmap_bit_in_range_p improvement was also bootstrapped and regression tested on aarch64. Jeff * sbitmap.c (bitmap_bit_in_range_p): New function. * sbitmap.h (bitmap_bit_in_range_p): Prototype. * tree-ssa-dse.c (live_bytes_read): New function. (dse_classify_store): Ignore reads of dead bytes. * testsuite/gcc.dg/tree-ssa/ssa-dse-26.c: New test. diff --git a/gcc/sbitmap.c b/gcc/sbitmap.c index c065d13..4bf13a1 100644 --- a/gcc/sbitmap.c +++ b/gcc/sbitmap.c @@ -316,6 +316,59 @@ bitmap_set_range (sbitmap bmap, unsigned int start, unsigned int count) bmap->elms[start_word] |= mask; } +/* Return TRUE if any bit between START and END inclusive is set within + the simple bitmap BMAP. Return FALSE otherwise. */ + +bool +bitmap_bit_in_range_p (const_sbitmap bmap, unsigned int start, unsigned int end) +{ + unsigned int start_word = start / SBITMAP_ELT_BITS; + unsigned int start_bitno = start % SBITMAP_ELT_BITS; + + /* Testing within a word, starting at the beginning of a word. */ + if (start_bitno == 0 && (end - start) < SBITMAP_ELT_BITS) + { + SBITMAP_ELT_TYPE mask = ((SBITMAP_ELT_TYPE)1 << (end - start)) - 1; + return (bmap->elms[start_word] & mask) != 0; + } + + unsigned int end_word = end / SBITMAP_ELT_BITS; + unsigned int end_bitno = end % SBITMAP_ELT_BITS; + + /* Testing starts somewhere in the middle of a word. Test up to the + end of the word or the end of the requested region, whichever comes + first. */ + if (start_bitno != 0) + { + unsigned int nbits = ((start_word == end_word) + ? end_bitno - start_bitno + : SBITMAP_ELT_BITS - start_bitno); + SBITMAP_ELT_TYPE mask = ((SBITMAP_ELT_TYPE)1 << nbits) - 1; + mask <<= start_bitno; + if (bmap->elms[start_word] & mask) + return true; + start_word++; + } + + if (start_word > end_word) + return false; + + /* Now test words at a time until we hit a partial word. */ + unsigned int nwords = (end_word - start_word); + while (nwords) + { + if (bmap->elms[start_word]) + return true; + start_word++; + nwords--; + } + + /* Now handle residuals in the last word. */ + SBITMAP_ELT_TYPE mask + = ((SBITMAP_ELT_TYPE)1 << (SBITMAP_ELT_BITS - end_bitno)) - 1; + return (bmap->elms[start_word] & mask) != 0; +} + #if GCC_VERSION < 3400 /* Table of number of set bits in a character, indexed by value of char. */ static const unsigned char popcount_table[] = diff --git a/gcc/sbitmap.h b/gcc/sbitmap.h index ce4d27d..ff52e93 100644 --- a/gcc/sbitmap.h +++ b/gcc/sbitmap.h @@ -51,6 +51,7 @@ along with GCC; see the file COPYING3. If not see * set_difference : bitmap_and_compl * set_disjuction : (not implemented) * set_compare : bitmap_equal_p + * bit_in_range_p : bitmap_bit_in_range_p Some operations on 3 sets that occur frequently in data flow problems are also implemented: @@ -253,6 +254,7 @@ extern bool bitmap_and (sbitmap, const_sbitmap, const_sbitmap); extern bool bitmap_ior (sbitmap, const_sbitmap, const_sbitmap); extern bool bitmap_xor (sbitmap, const_sbitmap, const_sbitmap); extern bool bitmap_subset_p (const_sbitmap, const_sbitmap); +extern bool bitmap_bit_in_range_p (const_sbitmap, unsigned int, unsigned int); extern int bitmap_first_set_bit (const_sbitmap); extern int bitmap_last_set_bit (const_sbitmap); diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c new file mode 100644 index 0000000..6605dfe --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-dse1-details" } */ + +enum constraint_expr_type +{ + SCALAR, DEREF, ADDRESSOF +}; +typedef struct constraint_expr +{ + enum constraint_expr_type type; + unsigned int var; + long offset; +} constraint_expr ; +typedef struct constraint +{ + struct constraint_expr lhs; + struct constraint_expr rhs; +} constraint; +static _Bool +constraint_expr_equal (struct constraint_expr x, struct constraint_expr y) +{ + return x.type == y.type && x.var == y.var && x.offset == y.offset; +} + +_Bool +constraint_equal (struct constraint a, struct constraint b) +{ + return constraint_expr_equal (a.lhs, b.lhs) + && constraint_expr_equal (a.rhs, b.rhs); +} + +/* { dg-final { scan-tree-dump-times "Deleted dead store" 2 "dse1" } } */ + diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c index 70c8b07..1eca740 100644 --- a/gcc/tree-ssa-dse.c +++ b/gcc/tree-ssa-dse.c @@ -468,6 +468,36 @@ maybe_trim_partially_dead_store (ao_ref *ref, sbitmap live, gimple *stmt) } } +/* Return TRUE if USE_REF reads bytes from LIVE where live is + derived from REF, a write reference. + + While this routine may modify USE_REF, it's passed by value, not + location. So callers do not see those modifications. */ + +static bool +live_bytes_read (ao_ref use_ref, ao_ref *ref, sbitmap live) +{ + /* We have already verified that USE_REF and REF hit the same object. + Now verify that there's actually an overlap between USE_REF and REF. */ + if (ranges_overlap_p (use_ref.offset, use_ref.size, ref->offset, ref->size)) + { + normalize_ref (&use_ref, ref); + + /* If USE_REF covers all of REF, then it will hit one or more + live bytes. This avoids useless iteration over the bitmap + below. */ + if (use_ref.offset <= ref->offset + && use_ref.offset + use_ref.size >= ref->offset + ref->size) + return true; + + /* Now check if any of the remaining bits in use_ref are set in LIVE. */ + unsigned int start = (use_ref.offset - ref->offset) / BITS_PER_UNIT; + unsigned int end = (use_ref.offset + use_ref.size) / BITS_PER_UNIT; + return bitmap_bit_in_range_p (live, start, end); + } + return true; +} + /* A helper of dse_optimize_stmt. Given a GIMPLE_ASSIGN in STMT that writes to REF, find a candidate statement *USE_STMT that may prove STMT to be dead. @@ -547,6 +577,31 @@ dse_classify_store (ao_ref *ref, gimple *stmt, gimple **use_stmt, /* If the statement is a use the store is not dead. */ else if (ref_maybe_used_by_stmt_p (use_stmt, ref)) { + /* Handle common cases where we can easily build a ao_ref + structure for USE_STMT and in doing so we find that the + references hit non-live bytes and thus can be ignored. */ + if (live_bytes && (!gimple_vdef (use_stmt) || !temp)) + { + if (is_gimple_assign (use_stmt)) + { + /* Other cases were noted as non-aliasing by + the call to ref_maybe_used_by_stmt_p. */ + ao_ref use_ref; + ao_ref_init (&use_ref, gimple_assign_rhs1 (use_stmt)); + if (valid_ao_ref_for_dse (&use_ref) + && use_ref.base == ref->base + && use_ref.size == use_ref.max_size + && !live_bytes_read (use_ref, ref, live_bytes)) + { + /* If this statement has a VDEF, then it is the + first store we have seen, so walk through it. */ + if (gimple_vdef (use_stmt)) + temp = use_stmt; + continue; + } + } + } + fail = true; BREAK_FROM_IMM_USE_STMT (ui); }
Jeff Law <law@redhat.com> writes: > @@ -468,6 +468,36 @@ maybe_trim_partially_dead_store (ao_ref *ref, sbitmap live, gimple *stmt) > } > } > > +/* Return TRUE if USE_REF reads bytes from LIVE where live is > + derived from REF, a write reference. > + > + While this routine may modify USE_REF, it's passed by value, not > + location. So callers do not see those modifications. */ > + > +static bool > +live_bytes_read (ao_ref use_ref, ao_ref *ref, sbitmap live) > +{ > + /* We have already verified that USE_REF and REF hit the same object. > + Now verify that there's actually an overlap between USE_REF and REF. */ > + if (ranges_overlap_p (use_ref.offset, use_ref.size, ref->offset, ref->size)) > + { > + normalize_ref (&use_ref, ref); > + > + /* If USE_REF covers all of REF, then it will hit one or more > + live bytes. This avoids useless iteration over the bitmap > + below. */ > + if (use_ref.offset <= ref->offset > + && use_ref.offset + use_ref.size >= ref->offset + ref->size) > + return true; > + > + /* Now check if any of the remaining bits in use_ref are set in LIVE. */ > + unsigned int start = (use_ref.offset - ref->offset) / BITS_PER_UNIT; > + unsigned int end = (use_ref.offset + use_ref.size) / BITS_PER_UNIT; > + return bitmap_bit_in_range_p (live, start, end); > + } > + return true; > +} When rebasing the SVE changes on top of this, I wasn't sure why the function returned true rather than false when there's no overlap. Is that deliberate? It might be worth a comment if so. Thanks, Richard
Hi Jeff, On 7 September 2017 at 00:18, Jeff Law <law@redhat.com> wrote: > Another old patch getting resurrected... > > This patch (r253305) introduces a new FAIL on arm-none-eabi (as opposed arm-linux-gnueabi*): FAIL: gcc.dg/tree-ssa/ssa-dse-26.c scan-tree-dump-times dse1 "Deleted dead store" 2 I'm not familiar with all this, but I quickly looked at the testcase, and noticed it uses enums. One ABI difference between arm-non-eabi and arm-linux-gnueabi is that the bare-metal target defaults to short-enums, while the linux one uses no-short-enums. Could that be the problem? Thanks, Christophe > On 01/04/2017 06:50 AM, Richard Biener wrote: >> On Thu, Dec 22, 2016 at 7:26 AM, Jeff Law <law@redhat.com> wrote: >>> This is the final patch in the kit to improve our DSE implementation. >>> >>> It's based on a observation by Richi. Namely that a read from bytes of >>> memory that are dead can be ignored. By ignoring such reads we can >>> sometimes find additional stores that allow us to either eliminate or trim >>> an earlier store more aggressively. >>> >>> This only hit (by hit I mean the ability to ignore resulted in finding a >>> full or partially dead store that we didn't otherwise find) once during a >>> bootstrap, but does hit often in the libstdc++ testsuite. I've added a test >>> derived from the conversation between myself and Richi last year. >>> >>> There's nothing in the BZ database on this issue and I can't reasonably call >>> it a bugfix. I wouldn't lose sleep if this deferred to gcc-8. >>> >>> Bootstrapped and regression tested on x86-64-linux-gnu. OK for the trunk or >>> defer to gcc-8? >>> >>> >>> >>> * tree-ssa-dse.c (live_bytes_read): New function. >>> (dse_classify_store): Ignore reads of dead bytes. >>> >>> * testsuite/gcc.dg/tree-ssa/ssa-dse-26.c: New test. >>> * testsuite/gcc.dg/tree-ssa/ssa-dse-26.c: Likewise. >>> >>> >>> > [ snip ] > >>> diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c >>> index a807d6d..f5b53fc 100644 >>> --- a/gcc/tree-ssa-dse.c >>> +++ b/gcc/tree-ssa-dse.c >>> @@ -475,6 +475,41 @@ maybe_trim_partially_dead_store (ao_ref *ref, sbitmap >>> live, gimple *stmt) >>> } >>> } >>> >>> +/* Return TRUE if USE_REF reads bytes from LIVE where live is >>> + derived from REF, a write reference. >>> + >>> + While this routine may modify USE_REF, it's passed by value, not >>> + location. So callers do not see those modifications. */ >>> + >>> +static bool >>> +live_bytes_read (ao_ref use_ref, ao_ref *ref, sbitmap live) >>> +{ >>> + /* We have already verified that USE_REF and REF hit the same object. >>> + Now verify that there's actually an overlap between USE_REF and REF. >>> */ >>> + if ((use_ref.offset < ref->offset >>> + && use_ref.offset + use_ref.size > ref->offset) >>> + || (use_ref.offset >= ref->offset >>> + && use_ref.offset < ref->offset + ref->size)) >> >> can you use ranges_overlap_p? (tree-ssa-alias.h) > Yes. Didn't know about it. Done. > >> >>> + { >>> + normalize_ref (&use_ref, ref); >>> + >>> + /* If USE_REF covers all of REF, then it will hit one or more >>> + live bytes. This avoids useless iteration over the bitmap >>> + below. */ >>> + if (use_ref.offset == ref->offset && use_ref.size == ref->size) >>> + return true; >>> + >>> + /* Now iterate over what's left in USE_REF and see if any of >>> + those bits are i LIVE. */ >>> + for (int i = (use_ref.offset - ref->offset) / BITS_PER_UNIT; >>> + i < (use_ref.offset + use_ref.size) / BITS_PER_UNIT; i++) >>> + if (bitmap_bit_p (live, i)) >> >> a bitmap_bit_in_range_p () would be nice to have. And it can be more >> efficient than this loop... > Yea. That likely would help here. I'm testing with a > bitmap_bit_in_range_p implementation (only for sbitmaps since that's > what we're using here). > > That implementation does the reasonably efficient things and is modeled > after the sbitmap implementation of bitmap_set_range. > > >>> @@ -554,6 +589,41 @@ dse_classify_store (ao_ref *ref, gimple *stmt, gimple >>> **use_stmt, >>> /* If the statement is a use the store is not dead. */ >>> else if (ref_maybe_used_by_stmt_p (use_stmt, ref)) >>> { >>> + /* Handle common cases where we can easily build a ao_ref >>> + structure for USE_STMT and in doing so we find that the >>> + references hit non-live bytes and thus can be ignored. */ >>> + if (live_bytes) >>> + { >>> + if (is_gimple_assign (use_stmt)) >>> + { >>> + /* Other cases were noted as non-aliasing by >>> + the call to ref_maybe_used_by_stmt_p. */ >>> + ao_ref use_ref; >>> + ao_ref_init (&use_ref, gimple_assign_rhs1 (use_stmt)); >>> + if (valid_ao_ref_for_dse (&use_ref) >>> + && use_ref.base == ref->base >>> + && use_ref.size == use_ref.max_size >>> + && !live_bytes_read (use_ref, ref, live_bytes)) >>> + { >>> + if (gimple_vdef (use_stmt)) >>> + { >>> + /* If we have already seen a store and >>> + this is also a store, then we have to >>> + fail. */ >>> + if (temp) >>> + { >>> + fail = true; >>> + BREAK_FROM_IMM_USE_STMT (ui); >>> + } >> >> as this case is rather cheap to test please test it together with >> live_bytes. Like >> >> if (live_bytes && (! gimple_vdef (use_stmt) || ! temp)) > Seems reasonable. > > >> >> otherwise the patch looks reasonable for GCC 8. > Given the sbitmap.[ch] change, posting for re-approval > > Bootstrapped and regression tested on x86_64. Earlier version without > the bitmap_bit_in_range_p improvement was also bootstrapped and > regression tested on aarch64. > > Jeff > > > > * sbitmap.c (bitmap_bit_in_range_p): New function. > * sbitmap.h (bitmap_bit_in_range_p): Prototype. > * tree-ssa-dse.c (live_bytes_read): New function. > (dse_classify_store): Ignore reads of dead bytes. > > * testsuite/gcc.dg/tree-ssa/ssa-dse-26.c: New test. > > diff --git a/gcc/sbitmap.c b/gcc/sbitmap.c > index c065d13..4bf13a1 100644 > --- a/gcc/sbitmap.c > +++ b/gcc/sbitmap.c > @@ -316,6 +316,59 @@ bitmap_set_range (sbitmap bmap, unsigned int start, unsigned int count) > bmap->elms[start_word] |= mask; > } > > +/* Return TRUE if any bit between START and END inclusive is set within > + the simple bitmap BMAP. Return FALSE otherwise. */ > + > +bool > +bitmap_bit_in_range_p (const_sbitmap bmap, unsigned int start, unsigned int end) > +{ > + unsigned int start_word = start / SBITMAP_ELT_BITS; > + unsigned int start_bitno = start % SBITMAP_ELT_BITS; > + > + /* Testing within a word, starting at the beginning of a word. */ > + if (start_bitno == 0 && (end - start) < SBITMAP_ELT_BITS) > + { > + SBITMAP_ELT_TYPE mask = ((SBITMAP_ELT_TYPE)1 << (end - start)) - 1; > + return (bmap->elms[start_word] & mask) != 0; > + } > + > + unsigned int end_word = end / SBITMAP_ELT_BITS; > + unsigned int end_bitno = end % SBITMAP_ELT_BITS; > + > + /* Testing starts somewhere in the middle of a word. Test up to the > + end of the word or the end of the requested region, whichever comes > + first. */ > + if (start_bitno != 0) > + { > + unsigned int nbits = ((start_word == end_word) > + ? end_bitno - start_bitno > + : SBITMAP_ELT_BITS - start_bitno); > + SBITMAP_ELT_TYPE mask = ((SBITMAP_ELT_TYPE)1 << nbits) - 1; > + mask <<= start_bitno; > + if (bmap->elms[start_word] & mask) > + return true; > + start_word++; > + } > + > + if (start_word > end_word) > + return false; > + > + /* Now test words at a time until we hit a partial word. */ > + unsigned int nwords = (end_word - start_word); > + while (nwords) > + { > + if (bmap->elms[start_word]) > + return true; > + start_word++; > + nwords--; > + } > + > + /* Now handle residuals in the last word. */ > + SBITMAP_ELT_TYPE mask > + = ((SBITMAP_ELT_TYPE)1 << (SBITMAP_ELT_BITS - end_bitno)) - 1; > + return (bmap->elms[start_word] & mask) != 0; > +} > + > #if GCC_VERSION < 3400 > /* Table of number of set bits in a character, indexed by value of char. */ > static const unsigned char popcount_table[] = > diff --git a/gcc/sbitmap.h b/gcc/sbitmap.h > index ce4d27d..ff52e93 100644 > --- a/gcc/sbitmap.h > +++ b/gcc/sbitmap.h > @@ -51,6 +51,7 @@ along with GCC; see the file COPYING3. If not see > * set_difference : bitmap_and_compl > * set_disjuction : (not implemented) > * set_compare : bitmap_equal_p > + * bit_in_range_p : bitmap_bit_in_range_p > > Some operations on 3 sets that occur frequently in data flow problems > are also implemented: > @@ -253,6 +254,7 @@ extern bool bitmap_and (sbitmap, const_sbitmap, const_sbitmap); > extern bool bitmap_ior (sbitmap, const_sbitmap, const_sbitmap); > extern bool bitmap_xor (sbitmap, const_sbitmap, const_sbitmap); > extern bool bitmap_subset_p (const_sbitmap, const_sbitmap); > +extern bool bitmap_bit_in_range_p (const_sbitmap, unsigned int, unsigned int); > > extern int bitmap_first_set_bit (const_sbitmap); > extern int bitmap_last_set_bit (const_sbitmap); > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c > new file mode 100644 > index 0000000..6605dfe > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c > @@ -0,0 +1,33 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-dse1-details" } */ > + > +enum constraint_expr_type > +{ > + SCALAR, DEREF, ADDRESSOF > +}; > +typedef struct constraint_expr > +{ > + enum constraint_expr_type type; > + unsigned int var; > + long offset; > +} constraint_expr ; > +typedef struct constraint > +{ > + struct constraint_expr lhs; > + struct constraint_expr rhs; > +} constraint; > +static _Bool > +constraint_expr_equal (struct constraint_expr x, struct constraint_expr y) > +{ > + return x.type == y.type && x.var == y.var && x.offset == y.offset; > +} > + > +_Bool > +constraint_equal (struct constraint a, struct constraint b) > +{ > + return constraint_expr_equal (a.lhs, b.lhs) > + && constraint_expr_equal (a.rhs, b.rhs); > +} > + > +/* { dg-final { scan-tree-dump-times "Deleted dead store" 2 "dse1" } } */ > + > diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c > index 70c8b07..1eca740 100644 > --- a/gcc/tree-ssa-dse.c > +++ b/gcc/tree-ssa-dse.c > @@ -468,6 +468,36 @@ maybe_trim_partially_dead_store (ao_ref *ref, sbitmap live, gimple *stmt) > } > } > > +/* Return TRUE if USE_REF reads bytes from LIVE where live is > + derived from REF, a write reference. > + > + While this routine may modify USE_REF, it's passed by value, not > + location. So callers do not see those modifications. */ > + > +static bool > +live_bytes_read (ao_ref use_ref, ao_ref *ref, sbitmap live) > +{ > + /* We have already verified that USE_REF and REF hit the same object. > + Now verify that there's actually an overlap between USE_REF and REF. */ > + if (ranges_overlap_p (use_ref.offset, use_ref.size, ref->offset, ref->size)) > + { > + normalize_ref (&use_ref, ref); > + > + /* If USE_REF covers all of REF, then it will hit one or more > + live bytes. This avoids useless iteration over the bitmap > + below. */ > + if (use_ref.offset <= ref->offset > + && use_ref.offset + use_ref.size >= ref->offset + ref->size) > + return true; > + > + /* Now check if any of the remaining bits in use_ref are set in LIVE. */ > + unsigned int start = (use_ref.offset - ref->offset) / BITS_PER_UNIT; > + unsigned int end = (use_ref.offset + use_ref.size) / BITS_PER_UNIT; > + return bitmap_bit_in_range_p (live, start, end); > + } > + return true; > +} > + > /* A helper of dse_optimize_stmt. > Given a GIMPLE_ASSIGN in STMT that writes to REF, find a candidate > statement *USE_STMT that may prove STMT to be dead. > @@ -547,6 +577,31 @@ dse_classify_store (ao_ref *ref, gimple *stmt, gimple **use_stmt, > /* If the statement is a use the store is not dead. */ > else if (ref_maybe_used_by_stmt_p (use_stmt, ref)) > { > + /* Handle common cases where we can easily build a ao_ref > + structure for USE_STMT and in doing so we find that the > + references hit non-live bytes and thus can be ignored. */ > + if (live_bytes && (!gimple_vdef (use_stmt) || !temp)) > + { > + if (is_gimple_assign (use_stmt)) > + { > + /* Other cases were noted as non-aliasing by > + the call to ref_maybe_used_by_stmt_p. */ > + ao_ref use_ref; > + ao_ref_init (&use_ref, gimple_assign_rhs1 (use_stmt)); > + if (valid_ao_ref_for_dse (&use_ref) > + && use_ref.base == ref->base > + && use_ref.size == use_ref.max_size > + && !live_bytes_read (use_ref, ref, live_bytes)) > + { > + /* If this statement has a VDEF, then it is the > + first store we have seen, so walk through it. */ > + if (gimple_vdef (use_stmt)) > + temp = use_stmt; > + continue; > + } > + } > + } > + > fail = true; > BREAK_FROM_IMM_USE_STMT (ui); > } >
On 5 October 2017 at 21:40, Christophe Lyon <christophe.lyon@linaro.org> wrote: > Hi Jeff, > > > On 7 September 2017 at 00:18, Jeff Law <law@redhat.com> wrote: >> Another old patch getting resurrected... >> >> > > This patch (r253305) introduces a new FAIL on arm-none-eabi (as > opposed arm-linux-gnueabi*): > FAIL: gcc.dg/tree-ssa/ssa-dse-26.c scan-tree-dump-times dse1 > "Deleted dead store" 2 > > I'm not familiar with all this, but I quickly looked at the testcase, > and noticed it > uses enums. > One ABI difference between arm-non-eabi and arm-linux-gnueabi is that the > bare-metal target defaults to short-enums, while the linux one uses > no-short-enums. > > Could that be the problem? > It looks like it was, since Wilco committed this: https://gcc.gnu.org/ml/gcc-patches/2017-10/msg00465.html > Thanks, > > Christophe > >> On 01/04/2017 06:50 AM, Richard Biener wrote: >>> On Thu, Dec 22, 2016 at 7:26 AM, Jeff Law <law@redhat.com> wrote: >>>> This is the final patch in the kit to improve our DSE implementation. >>>> >>>> It's based on a observation by Richi. Namely that a read from bytes of >>>> memory that are dead can be ignored. By ignoring such reads we can >>>> sometimes find additional stores that allow us to either eliminate or trim >>>> an earlier store more aggressively. >>>> >>>> This only hit (by hit I mean the ability to ignore resulted in finding a >>>> full or partially dead store that we didn't otherwise find) once during a >>>> bootstrap, but does hit often in the libstdc++ testsuite. I've added a test >>>> derived from the conversation between myself and Richi last year. >>>> >>>> There's nothing in the BZ database on this issue and I can't reasonably call >>>> it a bugfix. I wouldn't lose sleep if this deferred to gcc-8. >>>> >>>> Bootstrapped and regression tested on x86-64-linux-gnu. OK for the trunk or >>>> defer to gcc-8? >>>> >>>> >>>> >>>> * tree-ssa-dse.c (live_bytes_read): New function. >>>> (dse_classify_store): Ignore reads of dead bytes. >>>> >>>> * testsuite/gcc.dg/tree-ssa/ssa-dse-26.c: New test. >>>> * testsuite/gcc.dg/tree-ssa/ssa-dse-26.c: Likewise. >>>> >>>> >>>> >> [ snip ] >> >>>> diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c >>>> index a807d6d..f5b53fc 100644 >>>> --- a/gcc/tree-ssa-dse.c >>>> +++ b/gcc/tree-ssa-dse.c >>>> @@ -475,6 +475,41 @@ maybe_trim_partially_dead_store (ao_ref *ref, sbitmap >>>> live, gimple *stmt) >>>> } >>>> } >>>> >>>> +/* Return TRUE if USE_REF reads bytes from LIVE where live is >>>> + derived from REF, a write reference. >>>> + >>>> + While this routine may modify USE_REF, it's passed by value, not >>>> + location. So callers do not see those modifications. */ >>>> + >>>> +static bool >>>> +live_bytes_read (ao_ref use_ref, ao_ref *ref, sbitmap live) >>>> +{ >>>> + /* We have already verified that USE_REF and REF hit the same object. >>>> + Now verify that there's actually an overlap between USE_REF and REF. >>>> */ >>>> + if ((use_ref.offset < ref->offset >>>> + && use_ref.offset + use_ref.size > ref->offset) >>>> + || (use_ref.offset >= ref->offset >>>> + && use_ref.offset < ref->offset + ref->size)) >>> >>> can you use ranges_overlap_p? (tree-ssa-alias.h) >> Yes. Didn't know about it. Done. >> >>> >>>> + { >>>> + normalize_ref (&use_ref, ref); >>>> + >>>> + /* If USE_REF covers all of REF, then it will hit one or more >>>> + live bytes. This avoids useless iteration over the bitmap >>>> + below. */ >>>> + if (use_ref.offset == ref->offset && use_ref.size == ref->size) >>>> + return true; >>>> + >>>> + /* Now iterate over what's left in USE_REF and see if any of >>>> + those bits are i LIVE. */ >>>> + for (int i = (use_ref.offset - ref->offset) / BITS_PER_UNIT; >>>> + i < (use_ref.offset + use_ref.size) / BITS_PER_UNIT; i++) >>>> + if (bitmap_bit_p (live, i)) >>> >>> a bitmap_bit_in_range_p () would be nice to have. And it can be more >>> efficient than this loop... >> Yea. That likely would help here. I'm testing with a >> bitmap_bit_in_range_p implementation (only for sbitmaps since that's >> what we're using here). >> >> That implementation does the reasonably efficient things and is modeled >> after the sbitmap implementation of bitmap_set_range. >> >> >>>> @@ -554,6 +589,41 @@ dse_classify_store (ao_ref *ref, gimple *stmt, gimple >>>> **use_stmt, >>>> /* If the statement is a use the store is not dead. */ >>>> else if (ref_maybe_used_by_stmt_p (use_stmt, ref)) >>>> { >>>> + /* Handle common cases where we can easily build a ao_ref >>>> + structure for USE_STMT and in doing so we find that the >>>> + references hit non-live bytes and thus can be ignored. */ >>>> + if (live_bytes) >>>> + { >>>> + if (is_gimple_assign (use_stmt)) >>>> + { >>>> + /* Other cases were noted as non-aliasing by >>>> + the call to ref_maybe_used_by_stmt_p. */ >>>> + ao_ref use_ref; >>>> + ao_ref_init (&use_ref, gimple_assign_rhs1 (use_stmt)); >>>> + if (valid_ao_ref_for_dse (&use_ref) >>>> + && use_ref.base == ref->base >>>> + && use_ref.size == use_ref.max_size >>>> + && !live_bytes_read (use_ref, ref, live_bytes)) >>>> + { >>>> + if (gimple_vdef (use_stmt)) >>>> + { >>>> + /* If we have already seen a store and >>>> + this is also a store, then we have to >>>> + fail. */ >>>> + if (temp) >>>> + { >>>> + fail = true; >>>> + BREAK_FROM_IMM_USE_STMT (ui); >>>> + } >>> >>> as this case is rather cheap to test please test it together with >>> live_bytes. Like >>> >>> if (live_bytes && (! gimple_vdef (use_stmt) || ! temp)) >> Seems reasonable. >> >> >>> >>> otherwise the patch looks reasonable for GCC 8. >> Given the sbitmap.[ch] change, posting for re-approval >> >> Bootstrapped and regression tested on x86_64. Earlier version without >> the bitmap_bit_in_range_p improvement was also bootstrapped and >> regression tested on aarch64. >> >> Jeff >> >> >> >> * sbitmap.c (bitmap_bit_in_range_p): New function. >> * sbitmap.h (bitmap_bit_in_range_p): Prototype. >> * tree-ssa-dse.c (live_bytes_read): New function. >> (dse_classify_store): Ignore reads of dead bytes. >> >> * testsuite/gcc.dg/tree-ssa/ssa-dse-26.c: New test. >> >> diff --git a/gcc/sbitmap.c b/gcc/sbitmap.c >> index c065d13..4bf13a1 100644 >> --- a/gcc/sbitmap.c >> +++ b/gcc/sbitmap.c >> @@ -316,6 +316,59 @@ bitmap_set_range (sbitmap bmap, unsigned int start, unsigned int count) >> bmap->elms[start_word] |= mask; >> } >> >> +/* Return TRUE if any bit between START and END inclusive is set within >> + the simple bitmap BMAP. Return FALSE otherwise. */ >> + >> +bool >> +bitmap_bit_in_range_p (const_sbitmap bmap, unsigned int start, unsigned int end) >> +{ >> + unsigned int start_word = start / SBITMAP_ELT_BITS; >> + unsigned int start_bitno = start % SBITMAP_ELT_BITS; >> + >> + /* Testing within a word, starting at the beginning of a word. */ >> + if (start_bitno == 0 && (end - start) < SBITMAP_ELT_BITS) >> + { >> + SBITMAP_ELT_TYPE mask = ((SBITMAP_ELT_TYPE)1 << (end - start)) - 1; >> + return (bmap->elms[start_word] & mask) != 0; >> + } >> + >> + unsigned int end_word = end / SBITMAP_ELT_BITS; >> + unsigned int end_bitno = end % SBITMAP_ELT_BITS; >> + >> + /* Testing starts somewhere in the middle of a word. Test up to the >> + end of the word or the end of the requested region, whichever comes >> + first. */ >> + if (start_bitno != 0) >> + { >> + unsigned int nbits = ((start_word == end_word) >> + ? end_bitno - start_bitno >> + : SBITMAP_ELT_BITS - start_bitno); >> + SBITMAP_ELT_TYPE mask = ((SBITMAP_ELT_TYPE)1 << nbits) - 1; >> + mask <<= start_bitno; >> + if (bmap->elms[start_word] & mask) >> + return true; >> + start_word++; >> + } >> + >> + if (start_word > end_word) >> + return false; >> + >> + /* Now test words at a time until we hit a partial word. */ >> + unsigned int nwords = (end_word - start_word); >> + while (nwords) >> + { >> + if (bmap->elms[start_word]) >> + return true; >> + start_word++; >> + nwords--; >> + } >> + >> + /* Now handle residuals in the last word. */ >> + SBITMAP_ELT_TYPE mask >> + = ((SBITMAP_ELT_TYPE)1 << (SBITMAP_ELT_BITS - end_bitno)) - 1; >> + return (bmap->elms[start_word] & mask) != 0; >> +} >> + >> #if GCC_VERSION < 3400 >> /* Table of number of set bits in a character, indexed by value of char. */ >> static const unsigned char popcount_table[] = >> diff --git a/gcc/sbitmap.h b/gcc/sbitmap.h >> index ce4d27d..ff52e93 100644 >> --- a/gcc/sbitmap.h >> +++ b/gcc/sbitmap.h >> @@ -51,6 +51,7 @@ along with GCC; see the file COPYING3. If not see >> * set_difference : bitmap_and_compl >> * set_disjuction : (not implemented) >> * set_compare : bitmap_equal_p >> + * bit_in_range_p : bitmap_bit_in_range_p >> >> Some operations on 3 sets that occur frequently in data flow problems >> are also implemented: >> @@ -253,6 +254,7 @@ extern bool bitmap_and (sbitmap, const_sbitmap, const_sbitmap); >> extern bool bitmap_ior (sbitmap, const_sbitmap, const_sbitmap); >> extern bool bitmap_xor (sbitmap, const_sbitmap, const_sbitmap); >> extern bool bitmap_subset_p (const_sbitmap, const_sbitmap); >> +extern bool bitmap_bit_in_range_p (const_sbitmap, unsigned int, unsigned int); >> >> extern int bitmap_first_set_bit (const_sbitmap); >> extern int bitmap_last_set_bit (const_sbitmap); >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c >> new file mode 100644 >> index 0000000..6605dfe >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c >> @@ -0,0 +1,33 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-O2 -fdump-tree-dse1-details" } */ >> + >> +enum constraint_expr_type >> +{ >> + SCALAR, DEREF, ADDRESSOF >> +}; >> +typedef struct constraint_expr >> +{ >> + enum constraint_expr_type type; >> + unsigned int var; >> + long offset; >> +} constraint_expr ; >> +typedef struct constraint >> +{ >> + struct constraint_expr lhs; >> + struct constraint_expr rhs; >> +} constraint; >> +static _Bool >> +constraint_expr_equal (struct constraint_expr x, struct constraint_expr y) >> +{ >> + return x.type == y.type && x.var == y.var && x.offset == y.offset; >> +} >> + >> +_Bool >> +constraint_equal (struct constraint a, struct constraint b) >> +{ >> + return constraint_expr_equal (a.lhs, b.lhs) >> + && constraint_expr_equal (a.rhs, b.rhs); >> +} >> + >> +/* { dg-final { scan-tree-dump-times "Deleted dead store" 2 "dse1" } } */ >> + >> diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c >> index 70c8b07..1eca740 100644 >> --- a/gcc/tree-ssa-dse.c >> +++ b/gcc/tree-ssa-dse.c >> @@ -468,6 +468,36 @@ maybe_trim_partially_dead_store (ao_ref *ref, sbitmap live, gimple *stmt) >> } >> } >> >> +/* Return TRUE if USE_REF reads bytes from LIVE where live is >> + derived from REF, a write reference. >> + >> + While this routine may modify USE_REF, it's passed by value, not >> + location. So callers do not see those modifications. */ >> + >> +static bool >> +live_bytes_read (ao_ref use_ref, ao_ref *ref, sbitmap live) >> +{ >> + /* We have already verified that USE_REF and REF hit the same object. >> + Now verify that there's actually an overlap between USE_REF and REF. */ >> + if (ranges_overlap_p (use_ref.offset, use_ref.size, ref->offset, ref->size)) >> + { >> + normalize_ref (&use_ref, ref); >> + >> + /* If USE_REF covers all of REF, then it will hit one or more >> + live bytes. This avoids useless iteration over the bitmap >> + below. */ >> + if (use_ref.offset <= ref->offset >> + && use_ref.offset + use_ref.size >= ref->offset + ref->size) >> + return true; >> + >> + /* Now check if any of the remaining bits in use_ref are set in LIVE. */ >> + unsigned int start = (use_ref.offset - ref->offset) / BITS_PER_UNIT; >> + unsigned int end = (use_ref.offset + use_ref.size) / BITS_PER_UNIT; >> + return bitmap_bit_in_range_p (live, start, end); >> + } >> + return true; >> +} >> + >> /* A helper of dse_optimize_stmt. >> Given a GIMPLE_ASSIGN in STMT that writes to REF, find a candidate >> statement *USE_STMT that may prove STMT to be dead. >> @@ -547,6 +577,31 @@ dse_classify_store (ao_ref *ref, gimple *stmt, gimple **use_stmt, >> /* If the statement is a use the store is not dead. */ >> else if (ref_maybe_used_by_stmt_p (use_stmt, ref)) >> { >> + /* Handle common cases where we can easily build a ao_ref >> + structure for USE_STMT and in doing so we find that the >> + references hit non-live bytes and thus can be ignored. */ >> + if (live_bytes && (!gimple_vdef (use_stmt) || !temp)) >> + { >> + if (is_gimple_assign (use_stmt)) >> + { >> + /* Other cases were noted as non-aliasing by >> + the call to ref_maybe_used_by_stmt_p. */ >> + ao_ref use_ref; >> + ao_ref_init (&use_ref, gimple_assign_rhs1 (use_stmt)); >> + if (valid_ao_ref_for_dse (&use_ref) >> + && use_ref.base == ref->base >> + && use_ref.size == use_ref.max_size >> + && !live_bytes_read (use_ref, ref, live_bytes)) >> + { >> + /* If this statement has a VDEF, then it is the >> + first store we have seen, so walk through it. */ >> + if (gimple_vdef (use_stmt)) >> + temp = use_stmt; >> + continue; >> + } >> + } >> + } >> + >> fail = true; >> BREAK_FROM_IMM_USE_STMT (ui); >> } >>
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c new file mode 100644 index 0000000..6605dfe --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-dse1-details" } */ + +enum constraint_expr_type +{ + SCALAR, DEREF, ADDRESSOF +}; +typedef struct constraint_expr +{ + enum constraint_expr_type type; + unsigned int var; + long offset; +} constraint_expr ; +typedef struct constraint +{ + struct constraint_expr lhs; + struct constraint_expr rhs; +} constraint; +static _Bool +constraint_expr_equal (struct constraint_expr x, struct constraint_expr y) +{ + return x.type == y.type && x.var == y.var && x.offset == y.offset; +} + +_Bool +constraint_equal (struct constraint a, struct constraint b) +{ + return constraint_expr_equal (a.lhs, b.lhs) + && constraint_expr_equal (a.rhs, b.rhs); +} + +/* { dg-final { scan-tree-dump-times "Deleted dead store" 2 "dse1" } } */ + diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-27.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-27.c new file mode 100644 index 0000000..48dc92e --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-27.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-dse1-details -fno-tree-fre -fno-tree-sra" } */ + +struct S { struct R { int x; int y; } r; int z; } s; + +extern void blah (struct S); + +void +foo () +{ + struct S s = { {1, 2}, 3 }; + s.r.x = 1; + s.r.y = 2; + struct R r = s.r; + s.z = 3; + blah (s); +} + + +/* { dg-final { scan-tree-dump-times "Deleted dead store" 4 "dse1" } } */ + diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c index a807d6d..f5b53fc 100644 --- a/gcc/tree-ssa-dse.c +++ b/gcc/tree-ssa-dse.c @@ -475,6 +475,41 @@ maybe_trim_partially_dead_store (ao_ref *ref, sbitmap live, gimple *stmt) } } +/* Return TRUE if USE_REF reads bytes from LIVE where live is + derived from REF, a write reference. + + While this routine may modify USE_REF, it's passed by value, not + location. So callers do not see those modifications. */ + +static bool +live_bytes_read (ao_ref use_ref, ao_ref *ref, sbitmap live) +{ + /* We have already verified that USE_REF and REF hit the same object. + Now verify that there's actually an overlap between USE_REF and REF. */ + if ((use_ref.offset < ref->offset + && use_ref.offset + use_ref.size > ref->offset) + || (use_ref.offset >= ref->offset + && use_ref.offset < ref->offset + ref->size)) + { + normalize_ref (&use_ref, ref); + + /* If USE_REF covers all of REF, then it will hit one or more + live bytes. This avoids useless iteration over the bitmap + below. */ + if (use_ref.offset == ref->offset && use_ref.size == ref->size) + return true; + + /* Now iterate over what's left in USE_REF and see if any of + those bits are i LIVE. */ + for (int i = (use_ref.offset - ref->offset) / BITS_PER_UNIT; + i < (use_ref.offset + use_ref.size) / BITS_PER_UNIT; i++) + if (bitmap_bit_p (live, i)) + return true; + return false; + } + return true; +} + /* A helper of dse_optimize_stmt. Given a GIMPLE_ASSIGN in STMT that writes to REF, find a candidate statement *USE_STMT that may prove STMT to be dead. @@ -554,6 +589,41 @@ dse_classify_store (ao_ref *ref, gimple *stmt, gimple **use_stmt, /* If the statement is a use the store is not dead. */ else if (ref_maybe_used_by_stmt_p (use_stmt, ref)) { + /* Handle common cases where we can easily build a ao_ref + structure for USE_STMT and in doing so we find that the + references hit non-live bytes and thus can be ignored. */ + if (live_bytes) + { + if (is_gimple_assign (use_stmt)) + { + /* Other cases were noted as non-aliasing by + the call to ref_maybe_used_by_stmt_p. */ + ao_ref use_ref; + ao_ref_init (&use_ref, gimple_assign_rhs1 (use_stmt)); + if (valid_ao_ref_for_dse (&use_ref) + && use_ref.base == ref->base + && use_ref.size == use_ref.max_size + && !live_bytes_read (use_ref, ref, live_bytes)) + { + if (gimple_vdef (use_stmt)) + { + /* If we have already seen a store and + this is also a store, then we have to + fail. */ + if (temp) + { + fail = true; + BREAK_FROM_IMM_USE_STMT (ui); + } + + /* Otherwise walk through this store. */ + temp = use_stmt; + } + continue; + } + } + } + fail = true; BREAK_FROM_IMM_USE_STMT (ui); }