diff mbox

[RFA,4/4] Ignore reads of "dead" memory locations in DSE

Message ID caf857de-0fdd-8a89-0f97-6764a8a9a6c1@redhat.com
State New
Headers show

Commit Message

Jeff Law Dec. 22, 2016, 6:26 a.m. UTC
This is the final patch in the kit to improve our DSE implementation.

It's based on a observation by Richi.  Namely that a read from bytes of 
memory that are dead can be ignored.  By ignoring such reads we can 
sometimes find additional stores that allow us to either eliminate or 
trim an earlier store more aggressively.

This only hit (by hit I mean the ability to ignore resulted in finding a 
full or partially dead store that we didn't otherwise find) once during 
a bootstrap, but does hit often in the libstdc++ testsuite.  I've added 
a test derived from the conversation between myself and Richi last year.

There's nothing in the BZ database on this issue and I can't reasonably 
call it a bugfix.  I wouldn't lose sleep if this deferred to gcc-8.

Bootstrapped and regression tested on x86-64-linux-gnu.  OK for the 
trunk or defer to gcc-8?
* tree-ssa-dse.c (live_bytes_read): New function.
	(dse_classify_store): Ignore reads of dead bytes.

	* testsuite/gcc.dg/tree-ssa/ssa-dse-26.c: New test.
	* testsuite/gcc.dg/tree-ssa/ssa-dse-26.c: Likewise.

Comments

Richard Biener Jan. 4, 2017, 1:50 p.m. UTC | #1
On Thu, Dec 22, 2016 at 7:26 AM, Jeff Law <law@redhat.com> wrote:
> This is the final patch in the kit to improve our DSE implementation.
>
> It's based on a observation by Richi.  Namely that a read from bytes of
> memory that are dead can be ignored.  By ignoring such reads we can
> sometimes find additional stores that allow us to either eliminate or trim
> an earlier store more aggressively.
>
> This only hit (by hit I mean the ability to ignore resulted in finding a
> full or partially dead store that we didn't otherwise find) once during a
> bootstrap, but does hit often in the libstdc++ testsuite.  I've added a test
> derived from the conversation between myself and Richi last year.
>
> There's nothing in the BZ database on this issue and I can't reasonably call
> it a bugfix.  I wouldn't lose sleep if this deferred to gcc-8.
>
> Bootstrapped and regression tested on x86-64-linux-gnu.  OK for the trunk or
> defer to gcc-8?
>
>
>
>         * tree-ssa-dse.c (live_bytes_read): New function.
>         (dse_classify_store): Ignore reads of dead bytes.
>
>         * testsuite/gcc.dg/tree-ssa/ssa-dse-26.c: New test.
>         * testsuite/gcc.dg/tree-ssa/ssa-dse-26.c: Likewise.
>
>
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c
> b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c
> new file mode 100644
> index 0000000..6605dfe
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c
> @@ -0,0 +1,33 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-dse1-details" } */
> +
> +enum constraint_expr_type
> +{
> +  SCALAR, DEREF, ADDRESSOF
> +};
> +typedef struct constraint_expr
> +{
> +  enum constraint_expr_type type;
> +  unsigned int var;
> +  long offset;
> +} constraint_expr ;
> +typedef struct constraint
> +{
> +  struct constraint_expr lhs;
> +  struct constraint_expr rhs;
> +} constraint;
> +static _Bool
> +constraint_expr_equal (struct constraint_expr x, struct constraint_expr y)
> +{
> +  return x.type == y.type && x.var == y.var && x.offset == y.offset;
> +}
> +
> +_Bool
> +constraint_equal (struct constraint a, struct constraint b)
> +{
> +  return constraint_expr_equal (a.lhs, b.lhs)
> +    && constraint_expr_equal (a.rhs, b.rhs);
> +}
> +
> +/* { dg-final { scan-tree-dump-times "Deleted dead store" 2 "dse1" } } */
> +
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-27.c
> b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-27.c
> new file mode 100644
> index 0000000..48dc92e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-27.c
> @@ -0,0 +1,21 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-dse1-details -fno-tree-fre -fno-tree-sra"
> } */
> +
> +struct S { struct R { int x; int y; } r; int z; } s;
> +
> +extern void blah (struct S);
> +
> +void
> +foo ()
> +{
> +  struct S s = { {1, 2}, 3 };
> +  s.r.x = 1;
> +   s.r.y = 2;
> +   struct R r = s.r;
> +  s.z = 3;
> +  blah (s);
> +}
> +
> +
> +/* { dg-final { scan-tree-dump-times "Deleted dead store" 4 "dse1" } } */
> +
> diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c
> index a807d6d..f5b53fc 100644
> --- a/gcc/tree-ssa-dse.c
> +++ b/gcc/tree-ssa-dse.c
> @@ -475,6 +475,41 @@ maybe_trim_partially_dead_store (ao_ref *ref, sbitmap
> live, gimple *stmt)
>      }
>  }
>
> +/* Return TRUE if USE_REF reads bytes from LIVE where live is
> +   derived from REF, a write reference.
> +
> +   While this routine may modify USE_REF, it's passed by value, not
> +   location.  So callers do not see those modifications.  */
> +
> +static bool
> +live_bytes_read (ao_ref use_ref, ao_ref *ref, sbitmap live)
> +{
> +  /* We have already verified that USE_REF and REF hit the same object.
> +     Now verify that there's actually an overlap between USE_REF and REF.
> */
> +  if ((use_ref.offset < ref->offset
> +       && use_ref.offset + use_ref.size > ref->offset)
> +      || (use_ref.offset >= ref->offset
> +         && use_ref.offset < ref->offset + ref->size))

can you use ranges_overlap_p? (tree-ssa-alias.h)

> +    {
> +      normalize_ref (&use_ref, ref);
> +
> +      /* If USE_REF covers all of REF, then it will hit one or more
> +        live bytes.   This avoids useless iteration over the bitmap
> +        below.  */
> +      if (use_ref.offset == ref->offset && use_ref.size == ref->size)
> +       return true;
> +
> +      /* Now iterate over what's left in USE_REF and see if any of
> +        those bits are i LIVE.  */
> +      for (int i = (use_ref.offset - ref->offset) / BITS_PER_UNIT;
> +          i < (use_ref.offset + use_ref.size) / BITS_PER_UNIT; i++)
> +       if (bitmap_bit_p (live, i))

a bitmap_bit_in_range_p () would be nice to have.  And it can be more
efficient than this loop...

> +         return true;
> +      return false;
> +    }
> +  return true;
> +}
> +
>  /* A helper of dse_optimize_stmt.
>     Given a GIMPLE_ASSIGN in STMT that writes to REF, find a candidate
>     statement *USE_STMT that may prove STMT to be dead.
> @@ -554,6 +589,41 @@ dse_classify_store (ao_ref *ref, gimple *stmt, gimple
> **use_stmt,
>           /* If the statement is a use the store is not dead.  */
>           else if (ref_maybe_used_by_stmt_p (use_stmt, ref))
>             {
> +             /* Handle common cases where we can easily build a ao_ref
> +                structure for USE_STMT and in doing so we find that the
> +                references hit non-live bytes and thus can be ignored.  */
> +             if (live_bytes)
> +               {
> +                 if (is_gimple_assign (use_stmt))
> +                   {
> +                     /* Other cases were noted as non-aliasing by
> +                        the call to ref_maybe_used_by_stmt_p.  */
> +                     ao_ref use_ref;
> +                     ao_ref_init (&use_ref, gimple_assign_rhs1 (use_stmt));
> +                     if (valid_ao_ref_for_dse (&use_ref)
> +                         && use_ref.base == ref->base
> +                         && use_ref.size == use_ref.max_size
> +                         && !live_bytes_read (use_ref, ref, live_bytes))
> +                       {
> +                         if (gimple_vdef (use_stmt))
> +                           {
> +                             /* If we have already seen a store and
> +                                this is also a store, then we have to
> +                                fail.  */
> +                             if (temp)
> +                               {
> +                                 fail = true;
> +                                 BREAK_FROM_IMM_USE_STMT (ui);
> +                               }

as this case is rather cheap to test please test it together with
live_bytes.  Like

  if (live_bytes && (! gimple_vdef (use_stmt) || ! temp))

otherwise the patch looks reasonable for GCC 8.

Richard.

> +
> +                             /* Otherwise walk through this store.  */
> +                             temp = use_stmt;
> +                           }
> +                         continue;
> +                       }
> +                   }
> +               }
> +
>               fail = true;
>               BREAK_FROM_IMM_USE_STMT (ui);
>             }
>
Jeff Law Sept. 6, 2017, 10:18 p.m. UTC | #2
Another old patch getting resurrected...


On 01/04/2017 06:50 AM, Richard Biener wrote:
> On Thu, Dec 22, 2016 at 7:26 AM, Jeff Law <law@redhat.com> wrote:
>> This is the final patch in the kit to improve our DSE implementation.
>>
>> It's based on a observation by Richi.  Namely that a read from bytes of
>> memory that are dead can be ignored.  By ignoring such reads we can
>> sometimes find additional stores that allow us to either eliminate or trim
>> an earlier store more aggressively.
>>
>> This only hit (by hit I mean the ability to ignore resulted in finding a
>> full or partially dead store that we didn't otherwise find) once during a
>> bootstrap, but does hit often in the libstdc++ testsuite.  I've added a test
>> derived from the conversation between myself and Richi last year.
>>
>> There's nothing in the BZ database on this issue and I can't reasonably call
>> it a bugfix.  I wouldn't lose sleep if this deferred to gcc-8.
>>
>> Bootstrapped and regression tested on x86-64-linux-gnu.  OK for the trunk or
>> defer to gcc-8?
>>
>>
>>
>>         * tree-ssa-dse.c (live_bytes_read): New function.
>>         (dse_classify_store): Ignore reads of dead bytes.
>>
>>         * testsuite/gcc.dg/tree-ssa/ssa-dse-26.c: New test.
>>         * testsuite/gcc.dg/tree-ssa/ssa-dse-26.c: Likewise.
>>
>>
>>
[ snip ]

>> diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c
>> index a807d6d..f5b53fc 100644
>> --- a/gcc/tree-ssa-dse.c
>> +++ b/gcc/tree-ssa-dse.c
>> @@ -475,6 +475,41 @@ maybe_trim_partially_dead_store (ao_ref *ref, sbitmap
>> live, gimple *stmt)
>>      }
>>  }
>>
>> +/* Return TRUE if USE_REF reads bytes from LIVE where live is
>> +   derived from REF, a write reference.
>> +
>> +   While this routine may modify USE_REF, it's passed by value, not
>> +   location.  So callers do not see those modifications.  */
>> +
>> +static bool
>> +live_bytes_read (ao_ref use_ref, ao_ref *ref, sbitmap live)
>> +{
>> +  /* We have already verified that USE_REF and REF hit the same object.
>> +     Now verify that there's actually an overlap between USE_REF and REF.
>> */
>> +  if ((use_ref.offset < ref->offset
>> +       && use_ref.offset + use_ref.size > ref->offset)
>> +      || (use_ref.offset >= ref->offset
>> +         && use_ref.offset < ref->offset + ref->size))
> 
> can you use ranges_overlap_p? (tree-ssa-alias.h)
Yes.  Didn't know about it.  Done.

> 
>> +    {
>> +      normalize_ref (&use_ref, ref);
>> +
>> +      /* If USE_REF covers all of REF, then it will hit one or more
>> +        live bytes.   This avoids useless iteration over the bitmap
>> +        below.  */
>> +      if (use_ref.offset == ref->offset && use_ref.size == ref->size)
>> +       return true;
>> +
>> +      /* Now iterate over what's left in USE_REF and see if any of
>> +        those bits are i LIVE.  */
>> +      for (int i = (use_ref.offset - ref->offset) / BITS_PER_UNIT;
>> +          i < (use_ref.offset + use_ref.size) / BITS_PER_UNIT; i++)
>> +       if (bitmap_bit_p (live, i))
> 
> a bitmap_bit_in_range_p () would be nice to have.  And it can be more
> efficient than this loop...
Yea.  That likely would help here.  I'm testing with a
bitmap_bit_in_range_p implementation (only for sbitmaps since that's
what we're using here).

That implementation does the reasonably efficient things and is modeled
after the sbitmap implementation of bitmap_set_range.


>> @@ -554,6 +589,41 @@ dse_classify_store (ao_ref *ref, gimple *stmt, gimple
>> **use_stmt,
>>           /* If the statement is a use the store is not dead.  */
>>           else if (ref_maybe_used_by_stmt_p (use_stmt, ref))
>>             {
>> +             /* Handle common cases where we can easily build a ao_ref
>> +                structure for USE_STMT and in doing so we find that the
>> +                references hit non-live bytes and thus can be ignored.  */
>> +             if (live_bytes)
>> +               {
>> +                 if (is_gimple_assign (use_stmt))
>> +                   {
>> +                     /* Other cases were noted as non-aliasing by
>> +                        the call to ref_maybe_used_by_stmt_p.  */
>> +                     ao_ref use_ref;
>> +                     ao_ref_init (&use_ref, gimple_assign_rhs1 (use_stmt));
>> +                     if (valid_ao_ref_for_dse (&use_ref)
>> +                         && use_ref.base == ref->base
>> +                         && use_ref.size == use_ref.max_size
>> +                         && !live_bytes_read (use_ref, ref, live_bytes))
>> +                       {
>> +                         if (gimple_vdef (use_stmt))
>> +                           {
>> +                             /* If we have already seen a store and
>> +                                this is also a store, then we have to
>> +                                fail.  */
>> +                             if (temp)
>> +                               {
>> +                                 fail = true;
>> +                                 BREAK_FROM_IMM_USE_STMT (ui);
>> +                               }
> 
> as this case is rather cheap to test please test it together with
> live_bytes.  Like
> 
>   if (live_bytes && (! gimple_vdef (use_stmt) || ! temp))
Seems reasonable.


> 
> otherwise the patch looks reasonable for GCC 8.
Given the sbitmap.[ch] change, posting for re-approval

Bootstrapped and regression tested on x86_64.  Earlier version without
the bitmap_bit_in_range_p improvement was also bootstrapped and
regression tested on aarch64.

Jeff
* sbitmap.c (bitmap_bit_in_range_p): New function.
	* sbitmap.h (bitmap_bit_in_range_p): Prototype.
	* tree-ssa-dse.c (live_bytes_read): New function.
	(dse_classify_store): Ignore reads of dead bytes.

	* testsuite/gcc.dg/tree-ssa/ssa-dse-26.c: New test.

diff --git a/gcc/sbitmap.c b/gcc/sbitmap.c
index c065d13..4bf13a1 100644
--- a/gcc/sbitmap.c
+++ b/gcc/sbitmap.c
@@ -316,6 +316,59 @@ bitmap_set_range (sbitmap bmap, unsigned int start, unsigned int count)
   bmap->elms[start_word] |= mask;
 }
 
+/* Return TRUE if any bit between START and END inclusive is set within
+   the simple bitmap BMAP.  Return FALSE otherwise.  */
+
+bool
+bitmap_bit_in_range_p (const_sbitmap bmap, unsigned int start, unsigned int end)
+{
+  unsigned int start_word = start / SBITMAP_ELT_BITS;
+  unsigned int start_bitno = start % SBITMAP_ELT_BITS;
+
+  /* Testing within a word, starting at the beginning of a word.  */
+  if (start_bitno == 0 && (end - start) < SBITMAP_ELT_BITS)
+    {
+      SBITMAP_ELT_TYPE mask = ((SBITMAP_ELT_TYPE)1 << (end - start)) - 1;
+      return (bmap->elms[start_word] & mask) != 0;
+    }
+
+  unsigned int end_word = end / SBITMAP_ELT_BITS;
+  unsigned int end_bitno = end % SBITMAP_ELT_BITS;
+
+  /* Testing starts somewhere in the middle of a word.  Test up to the
+     end of the word or the end of the requested region, whichever comes
+     first.  */
+  if (start_bitno != 0)
+    {
+      unsigned int nbits = ((start_word == end_word)
+			    ? end_bitno - start_bitno
+			    : SBITMAP_ELT_BITS - start_bitno);
+      SBITMAP_ELT_TYPE mask = ((SBITMAP_ELT_TYPE)1 << nbits) - 1;
+      mask <<= start_bitno;
+      if (bmap->elms[start_word] & mask)
+	return true;
+      start_word++;
+    }
+
+  if (start_word > end_word)
+    return false;
+
+  /* Now test words at a time until we hit a partial word.  */
+  unsigned int nwords = (end_word - start_word);
+  while (nwords)
+    {
+      if (bmap->elms[start_word])
+	return true;
+      start_word++;
+      nwords--;
+    }
+
+  /* Now handle residuals in the last word.  */
+  SBITMAP_ELT_TYPE mask
+    = ((SBITMAP_ELT_TYPE)1 << (SBITMAP_ELT_BITS - end_bitno)) - 1;
+  return (bmap->elms[start_word] & mask) != 0;
+}
+
 #if GCC_VERSION < 3400
 /* Table of number of set bits in a character, indexed by value of char.  */
 static const unsigned char popcount_table[] =
diff --git a/gcc/sbitmap.h b/gcc/sbitmap.h
index ce4d27d..ff52e93 100644
--- a/gcc/sbitmap.h
+++ b/gcc/sbitmap.h
@@ -51,6 +51,7 @@ along with GCC; see the file COPYING3.  If not see
      * set_difference		: bitmap_and_compl
      * set_disjuction		: (not implemented)
      * set_compare		: bitmap_equal_p
+     * bit_in_range_p		: bitmap_bit_in_range_p
 
    Some operations on 3 sets that occur frequently in data flow problems
    are also implemented:
@@ -253,6 +254,7 @@ extern bool bitmap_and (sbitmap, const_sbitmap, const_sbitmap);
 extern bool bitmap_ior (sbitmap, const_sbitmap, const_sbitmap);
 extern bool bitmap_xor (sbitmap, const_sbitmap, const_sbitmap);
 extern bool bitmap_subset_p (const_sbitmap, const_sbitmap);
+extern bool bitmap_bit_in_range_p (const_sbitmap, unsigned int, unsigned int);
 
 extern int bitmap_first_set_bit (const_sbitmap);
 extern int bitmap_last_set_bit (const_sbitmap);
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c
new file mode 100644
index 0000000..6605dfe
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-dse1-details" } */
+
+enum constraint_expr_type
+{
+  SCALAR, DEREF, ADDRESSOF
+};
+typedef struct constraint_expr
+{
+  enum constraint_expr_type type;
+  unsigned int var;
+  long offset;
+} constraint_expr ;
+typedef struct constraint
+{
+  struct constraint_expr lhs;
+  struct constraint_expr rhs;
+} constraint;
+static _Bool
+constraint_expr_equal (struct constraint_expr x, struct constraint_expr y)
+{
+  return x.type == y.type && x.var == y.var && x.offset == y.offset;
+}
+
+_Bool
+constraint_equal (struct constraint a, struct constraint b)
+{
+  return constraint_expr_equal (a.lhs, b.lhs)
+    && constraint_expr_equal (a.rhs, b.rhs);
+}
+
+/* { dg-final { scan-tree-dump-times "Deleted dead store" 2 "dse1" } } */
+
diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c
index 70c8b07..1eca740 100644
--- a/gcc/tree-ssa-dse.c
+++ b/gcc/tree-ssa-dse.c
@@ -468,6 +468,36 @@ maybe_trim_partially_dead_store (ao_ref *ref, sbitmap live, gimple *stmt)
     }
 }
 
+/* Return TRUE if USE_REF reads bytes from LIVE where live is
+   derived from REF, a write reference.
+
+   While this routine may modify USE_REF, it's passed by value, not
+   location.  So callers do not see those modifications.  */
+
+static bool
+live_bytes_read (ao_ref use_ref, ao_ref *ref, sbitmap live)
+{
+  /* We have already verified that USE_REF and REF hit the same object.
+     Now verify that there's actually an overlap between USE_REF and REF.  */
+  if (ranges_overlap_p (use_ref.offset, use_ref.size, ref->offset, ref->size))
+    {
+      normalize_ref (&use_ref, ref);
+
+      /* If USE_REF covers all of REF, then it will hit one or more
+	 live bytes.   This avoids useless iteration over the bitmap
+	 below.  */
+      if (use_ref.offset <= ref->offset
+	  && use_ref.offset + use_ref.size >= ref->offset + ref->size)
+	return true;
+
+      /* Now check if any of the remaining bits in use_ref are set in LIVE.  */
+      unsigned int start = (use_ref.offset - ref->offset) / BITS_PER_UNIT;
+      unsigned int end  = (use_ref.offset + use_ref.size) / BITS_PER_UNIT;
+      return bitmap_bit_in_range_p (live, start, end);
+    }
+  return true;
+}
+
 /* A helper of dse_optimize_stmt.
    Given a GIMPLE_ASSIGN in STMT that writes to REF, find a candidate
    statement *USE_STMT that may prove STMT to be dead.
@@ -547,6 +577,31 @@ dse_classify_store (ao_ref *ref, gimple *stmt, gimple **use_stmt,
 	  /* If the statement is a use the store is not dead.  */
 	  else if (ref_maybe_used_by_stmt_p (use_stmt, ref))
 	    {
+	      /* Handle common cases where we can easily build a ao_ref
+		 structure for USE_STMT and in doing so we find that the
+		 references hit non-live bytes and thus can be ignored.  */
+	      if (live_bytes && (!gimple_vdef (use_stmt) || !temp))
+		{
+		  if (is_gimple_assign (use_stmt))
+		    {
+		      /* Other cases were noted as non-aliasing by
+			 the call to ref_maybe_used_by_stmt_p.  */
+		      ao_ref use_ref;
+		      ao_ref_init (&use_ref, gimple_assign_rhs1 (use_stmt));
+		      if (valid_ao_ref_for_dse (&use_ref)
+			  && use_ref.base == ref->base
+			  && use_ref.size == use_ref.max_size
+			  && !live_bytes_read (use_ref, ref, live_bytes))
+			{
+			  /* If this statement has a VDEF, then it is the
+			     first store we have seen, so walk through it.  */
+			  if (gimple_vdef (use_stmt))
+			    temp = use_stmt;
+			  continue;
+			}
+		    }
+		}
+
 	      fail = true;
 	      BREAK_FROM_IMM_USE_STMT (ui);
 	    }
Richard Sandiford Oct. 2, 2017, 4:36 p.m. UTC | #3
Jeff Law <law@redhat.com> writes:
> @@ -468,6 +468,36 @@ maybe_trim_partially_dead_store (ao_ref *ref, sbitmap live, gimple *stmt)
>      }
>  }
>  
> +/* Return TRUE if USE_REF reads bytes from LIVE where live is
> +   derived from REF, a write reference.
> +
> +   While this routine may modify USE_REF, it's passed by value, not
> +   location.  So callers do not see those modifications.  */
> +
> +static bool
> +live_bytes_read (ao_ref use_ref, ao_ref *ref, sbitmap live)
> +{
> +  /* We have already verified that USE_REF and REF hit the same object.
> +     Now verify that there's actually an overlap between USE_REF and REF.  */
> +  if (ranges_overlap_p (use_ref.offset, use_ref.size, ref->offset, ref->size))
> +    {
> +      normalize_ref (&use_ref, ref);
> +
> +      /* If USE_REF covers all of REF, then it will hit one or more
> +	 live bytes.   This avoids useless iteration over the bitmap
> +	 below.  */
> +      if (use_ref.offset <= ref->offset
> +	  && use_ref.offset + use_ref.size >= ref->offset + ref->size)
> +	return true;
> +
> +      /* Now check if any of the remaining bits in use_ref are set in LIVE.  */
> +      unsigned int start = (use_ref.offset - ref->offset) / BITS_PER_UNIT;
> +      unsigned int end  = (use_ref.offset + use_ref.size) / BITS_PER_UNIT;
> +      return bitmap_bit_in_range_p (live, start, end);
> +    }
> +  return true;
> +}

When rebasing the SVE changes on top of this, I wasn't sure why the
function returned true rather than false when there's no overlap.
Is that deliberate?  It might be worth a comment if so.

Thanks,
Richard
Christophe Lyon Oct. 5, 2017, 7:40 p.m. UTC | #4
Hi Jeff,


On 7 September 2017 at 00:18, Jeff Law <law@redhat.com> wrote:
> Another old patch getting resurrected...
>
>

This patch (r253305) introduces a new FAIL on arm-none-eabi (as
opposed arm-linux-gnueabi*):
FAIL:     gcc.dg/tree-ssa/ssa-dse-26.c scan-tree-dump-times dse1
"Deleted dead store" 2

I'm not familiar with all this, but I quickly looked at the testcase,
and noticed it
uses enums.
One ABI difference between arm-non-eabi and arm-linux-gnueabi is that the
bare-metal target defaults to short-enums, while the linux one uses
no-short-enums.

Could that be the problem?

Thanks,

Christophe

> On 01/04/2017 06:50 AM, Richard Biener wrote:
>> On Thu, Dec 22, 2016 at 7:26 AM, Jeff Law <law@redhat.com> wrote:
>>> This is the final patch in the kit to improve our DSE implementation.
>>>
>>> It's based on a observation by Richi.  Namely that a read from bytes of
>>> memory that are dead can be ignored.  By ignoring such reads we can
>>> sometimes find additional stores that allow us to either eliminate or trim
>>> an earlier store more aggressively.
>>>
>>> This only hit (by hit I mean the ability to ignore resulted in finding a
>>> full or partially dead store that we didn't otherwise find) once during a
>>> bootstrap, but does hit often in the libstdc++ testsuite.  I've added a test
>>> derived from the conversation between myself and Richi last year.
>>>
>>> There's nothing in the BZ database on this issue and I can't reasonably call
>>> it a bugfix.  I wouldn't lose sleep if this deferred to gcc-8.
>>>
>>> Bootstrapped and regression tested on x86-64-linux-gnu.  OK for the trunk or
>>> defer to gcc-8?
>>>
>>>
>>>
>>>         * tree-ssa-dse.c (live_bytes_read): New function.
>>>         (dse_classify_store): Ignore reads of dead bytes.
>>>
>>>         * testsuite/gcc.dg/tree-ssa/ssa-dse-26.c: New test.
>>>         * testsuite/gcc.dg/tree-ssa/ssa-dse-26.c: Likewise.
>>>
>>>
>>>
> [ snip ]
>
>>> diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c
>>> index a807d6d..f5b53fc 100644
>>> --- a/gcc/tree-ssa-dse.c
>>> +++ b/gcc/tree-ssa-dse.c
>>> @@ -475,6 +475,41 @@ maybe_trim_partially_dead_store (ao_ref *ref, sbitmap
>>> live, gimple *stmt)
>>>      }
>>>  }
>>>
>>> +/* Return TRUE if USE_REF reads bytes from LIVE where live is
>>> +   derived from REF, a write reference.
>>> +
>>> +   While this routine may modify USE_REF, it's passed by value, not
>>> +   location.  So callers do not see those modifications.  */
>>> +
>>> +static bool
>>> +live_bytes_read (ao_ref use_ref, ao_ref *ref, sbitmap live)
>>> +{
>>> +  /* We have already verified that USE_REF and REF hit the same object.
>>> +     Now verify that there's actually an overlap between USE_REF and REF.
>>> */
>>> +  if ((use_ref.offset < ref->offset
>>> +       && use_ref.offset + use_ref.size > ref->offset)
>>> +      || (use_ref.offset >= ref->offset
>>> +         && use_ref.offset < ref->offset + ref->size))
>>
>> can you use ranges_overlap_p? (tree-ssa-alias.h)
> Yes.  Didn't know about it.  Done.
>
>>
>>> +    {
>>> +      normalize_ref (&use_ref, ref);
>>> +
>>> +      /* If USE_REF covers all of REF, then it will hit one or more
>>> +        live bytes.   This avoids useless iteration over the bitmap
>>> +        below.  */
>>> +      if (use_ref.offset == ref->offset && use_ref.size == ref->size)
>>> +       return true;
>>> +
>>> +      /* Now iterate over what's left in USE_REF and see if any of
>>> +        those bits are i LIVE.  */
>>> +      for (int i = (use_ref.offset - ref->offset) / BITS_PER_UNIT;
>>> +          i < (use_ref.offset + use_ref.size) / BITS_PER_UNIT; i++)
>>> +       if (bitmap_bit_p (live, i))
>>
>> a bitmap_bit_in_range_p () would be nice to have.  And it can be more
>> efficient than this loop...
> Yea.  That likely would help here.  I'm testing with a
> bitmap_bit_in_range_p implementation (only for sbitmaps since that's
> what we're using here).
>
> That implementation does the reasonably efficient things and is modeled
> after the sbitmap implementation of bitmap_set_range.
>
>
>>> @@ -554,6 +589,41 @@ dse_classify_store (ao_ref *ref, gimple *stmt, gimple
>>> **use_stmt,
>>>           /* If the statement is a use the store is not dead.  */
>>>           else if (ref_maybe_used_by_stmt_p (use_stmt, ref))
>>>             {
>>> +             /* Handle common cases where we can easily build a ao_ref
>>> +                structure for USE_STMT and in doing so we find that the
>>> +                references hit non-live bytes and thus can be ignored.  */
>>> +             if (live_bytes)
>>> +               {
>>> +                 if (is_gimple_assign (use_stmt))
>>> +                   {
>>> +                     /* Other cases were noted as non-aliasing by
>>> +                        the call to ref_maybe_used_by_stmt_p.  */
>>> +                     ao_ref use_ref;
>>> +                     ao_ref_init (&use_ref, gimple_assign_rhs1 (use_stmt));
>>> +                     if (valid_ao_ref_for_dse (&use_ref)
>>> +                         && use_ref.base == ref->base
>>> +                         && use_ref.size == use_ref.max_size
>>> +                         && !live_bytes_read (use_ref, ref, live_bytes))
>>> +                       {
>>> +                         if (gimple_vdef (use_stmt))
>>> +                           {
>>> +                             /* If we have already seen a store and
>>> +                                this is also a store, then we have to
>>> +                                fail.  */
>>> +                             if (temp)
>>> +                               {
>>> +                                 fail = true;
>>> +                                 BREAK_FROM_IMM_USE_STMT (ui);
>>> +                               }
>>
>> as this case is rather cheap to test please test it together with
>> live_bytes.  Like
>>
>>   if (live_bytes && (! gimple_vdef (use_stmt) || ! temp))
> Seems reasonable.
>
>
>>
>> otherwise the patch looks reasonable for GCC 8.
> Given the sbitmap.[ch] change, posting for re-approval
>
> Bootstrapped and regression tested on x86_64.  Earlier version without
> the bitmap_bit_in_range_p improvement was also bootstrapped and
> regression tested on aarch64.
>
> Jeff
>
>
>
>         * sbitmap.c (bitmap_bit_in_range_p): New function.
>         * sbitmap.h (bitmap_bit_in_range_p): Prototype.
>         * tree-ssa-dse.c (live_bytes_read): New function.
>         (dse_classify_store): Ignore reads of dead bytes.
>
>         * testsuite/gcc.dg/tree-ssa/ssa-dse-26.c: New test.
>
> diff --git a/gcc/sbitmap.c b/gcc/sbitmap.c
> index c065d13..4bf13a1 100644
> --- a/gcc/sbitmap.c
> +++ b/gcc/sbitmap.c
> @@ -316,6 +316,59 @@ bitmap_set_range (sbitmap bmap, unsigned int start, unsigned int count)
>    bmap->elms[start_word] |= mask;
>  }
>
> +/* Return TRUE if any bit between START and END inclusive is set within
> +   the simple bitmap BMAP.  Return FALSE otherwise.  */
> +
> +bool
> +bitmap_bit_in_range_p (const_sbitmap bmap, unsigned int start, unsigned int end)
> +{
> +  unsigned int start_word = start / SBITMAP_ELT_BITS;
> +  unsigned int start_bitno = start % SBITMAP_ELT_BITS;
> +
> +  /* Testing within a word, starting at the beginning of a word.  */
> +  if (start_bitno == 0 && (end - start) < SBITMAP_ELT_BITS)
> +    {
> +      SBITMAP_ELT_TYPE mask = ((SBITMAP_ELT_TYPE)1 << (end - start)) - 1;
> +      return (bmap->elms[start_word] & mask) != 0;
> +    }
> +
> +  unsigned int end_word = end / SBITMAP_ELT_BITS;
> +  unsigned int end_bitno = end % SBITMAP_ELT_BITS;
> +
> +  /* Testing starts somewhere in the middle of a word.  Test up to the
> +     end of the word or the end of the requested region, whichever comes
> +     first.  */
> +  if (start_bitno != 0)
> +    {
> +      unsigned int nbits = ((start_word == end_word)
> +                           ? end_bitno - start_bitno
> +                           : SBITMAP_ELT_BITS - start_bitno);
> +      SBITMAP_ELT_TYPE mask = ((SBITMAP_ELT_TYPE)1 << nbits) - 1;
> +      mask <<= start_bitno;
> +      if (bmap->elms[start_word] & mask)
> +       return true;
> +      start_word++;
> +    }
> +
> +  if (start_word > end_word)
> +    return false;
> +
> +  /* Now test words at a time until we hit a partial word.  */
> +  unsigned int nwords = (end_word - start_word);
> +  while (nwords)
> +    {
> +      if (bmap->elms[start_word])
> +       return true;
> +      start_word++;
> +      nwords--;
> +    }
> +
> +  /* Now handle residuals in the last word.  */
> +  SBITMAP_ELT_TYPE mask
> +    = ((SBITMAP_ELT_TYPE)1 << (SBITMAP_ELT_BITS - end_bitno)) - 1;
> +  return (bmap->elms[start_word] & mask) != 0;
> +}
> +
>  #if GCC_VERSION < 3400
>  /* Table of number of set bits in a character, indexed by value of char.  */
>  static const unsigned char popcount_table[] =
> diff --git a/gcc/sbitmap.h b/gcc/sbitmap.h
> index ce4d27d..ff52e93 100644
> --- a/gcc/sbitmap.h
> +++ b/gcc/sbitmap.h
> @@ -51,6 +51,7 @@ along with GCC; see the file COPYING3.  If not see
>       * set_difference          : bitmap_and_compl
>       * set_disjuction          : (not implemented)
>       * set_compare             : bitmap_equal_p
> +     * bit_in_range_p          : bitmap_bit_in_range_p
>
>     Some operations on 3 sets that occur frequently in data flow problems
>     are also implemented:
> @@ -253,6 +254,7 @@ extern bool bitmap_and (sbitmap, const_sbitmap, const_sbitmap);
>  extern bool bitmap_ior (sbitmap, const_sbitmap, const_sbitmap);
>  extern bool bitmap_xor (sbitmap, const_sbitmap, const_sbitmap);
>  extern bool bitmap_subset_p (const_sbitmap, const_sbitmap);
> +extern bool bitmap_bit_in_range_p (const_sbitmap, unsigned int, unsigned int);
>
>  extern int bitmap_first_set_bit (const_sbitmap);
>  extern int bitmap_last_set_bit (const_sbitmap);
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c
> new file mode 100644
> index 0000000..6605dfe
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c
> @@ -0,0 +1,33 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-dse1-details" } */
> +
> +enum constraint_expr_type
> +{
> +  SCALAR, DEREF, ADDRESSOF
> +};
> +typedef struct constraint_expr
> +{
> +  enum constraint_expr_type type;
> +  unsigned int var;
> +  long offset;
> +} constraint_expr ;
> +typedef struct constraint
> +{
> +  struct constraint_expr lhs;
> +  struct constraint_expr rhs;
> +} constraint;
> +static _Bool
> +constraint_expr_equal (struct constraint_expr x, struct constraint_expr y)
> +{
> +  return x.type == y.type && x.var == y.var && x.offset == y.offset;
> +}
> +
> +_Bool
> +constraint_equal (struct constraint a, struct constraint b)
> +{
> +  return constraint_expr_equal (a.lhs, b.lhs)
> +    && constraint_expr_equal (a.rhs, b.rhs);
> +}
> +
> +/* { dg-final { scan-tree-dump-times "Deleted dead store" 2 "dse1" } } */
> +
> diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c
> index 70c8b07..1eca740 100644
> --- a/gcc/tree-ssa-dse.c
> +++ b/gcc/tree-ssa-dse.c
> @@ -468,6 +468,36 @@ maybe_trim_partially_dead_store (ao_ref *ref, sbitmap live, gimple *stmt)
>      }
>  }
>
> +/* Return TRUE if USE_REF reads bytes from LIVE where live is
> +   derived from REF, a write reference.
> +
> +   While this routine may modify USE_REF, it's passed by value, not
> +   location.  So callers do not see those modifications.  */
> +
> +static bool
> +live_bytes_read (ao_ref use_ref, ao_ref *ref, sbitmap live)
> +{
> +  /* We have already verified that USE_REF and REF hit the same object.
> +     Now verify that there's actually an overlap between USE_REF and REF.  */
> +  if (ranges_overlap_p (use_ref.offset, use_ref.size, ref->offset, ref->size))
> +    {
> +      normalize_ref (&use_ref, ref);
> +
> +      /* If USE_REF covers all of REF, then it will hit one or more
> +        live bytes.   This avoids useless iteration over the bitmap
> +        below.  */
> +      if (use_ref.offset <= ref->offset
> +         && use_ref.offset + use_ref.size >= ref->offset + ref->size)
> +       return true;
> +
> +      /* Now check if any of the remaining bits in use_ref are set in LIVE.  */
> +      unsigned int start = (use_ref.offset - ref->offset) / BITS_PER_UNIT;
> +      unsigned int end  = (use_ref.offset + use_ref.size) / BITS_PER_UNIT;
> +      return bitmap_bit_in_range_p (live, start, end);
> +    }
> +  return true;
> +}
> +
>  /* A helper of dse_optimize_stmt.
>     Given a GIMPLE_ASSIGN in STMT that writes to REF, find a candidate
>     statement *USE_STMT that may prove STMT to be dead.
> @@ -547,6 +577,31 @@ dse_classify_store (ao_ref *ref, gimple *stmt, gimple **use_stmt,
>           /* If the statement is a use the store is not dead.  */
>           else if (ref_maybe_used_by_stmt_p (use_stmt, ref))
>             {
> +             /* Handle common cases where we can easily build a ao_ref
> +                structure for USE_STMT and in doing so we find that the
> +                references hit non-live bytes and thus can be ignored.  */
> +             if (live_bytes && (!gimple_vdef (use_stmt) || !temp))
> +               {
> +                 if (is_gimple_assign (use_stmt))
> +                   {
> +                     /* Other cases were noted as non-aliasing by
> +                        the call to ref_maybe_used_by_stmt_p.  */
> +                     ao_ref use_ref;
> +                     ao_ref_init (&use_ref, gimple_assign_rhs1 (use_stmt));
> +                     if (valid_ao_ref_for_dse (&use_ref)
> +                         && use_ref.base == ref->base
> +                         && use_ref.size == use_ref.max_size
> +                         && !live_bytes_read (use_ref, ref, live_bytes))
> +                       {
> +                         /* If this statement has a VDEF, then it is the
> +                            first store we have seen, so walk through it.  */
> +                         if (gimple_vdef (use_stmt))
> +                           temp = use_stmt;
> +                         continue;
> +                       }
> +                   }
> +               }
> +
>               fail = true;
>               BREAK_FROM_IMM_USE_STMT (ui);
>             }
>
Christophe Lyon Oct. 9, 2017, 11:05 a.m. UTC | #5
On 5 October 2017 at 21:40, Christophe Lyon <christophe.lyon@linaro.org> wrote:
> Hi Jeff,
>
>
> On 7 September 2017 at 00:18, Jeff Law <law@redhat.com> wrote:
>> Another old patch getting resurrected...
>>
>>
>
> This patch (r253305) introduces a new FAIL on arm-none-eabi (as
> opposed arm-linux-gnueabi*):
> FAIL:     gcc.dg/tree-ssa/ssa-dse-26.c scan-tree-dump-times dse1
> "Deleted dead store" 2
>
> I'm not familiar with all this, but I quickly looked at the testcase,
> and noticed it
> uses enums.
> One ABI difference between arm-non-eabi and arm-linux-gnueabi is that the
> bare-metal target defaults to short-enums, while the linux one uses
> no-short-enums.
>
> Could that be the problem?
>

It looks like it was, since Wilco committed this:
https://gcc.gnu.org/ml/gcc-patches/2017-10/msg00465.html


> Thanks,
>
> Christophe
>
>> On 01/04/2017 06:50 AM, Richard Biener wrote:
>>> On Thu, Dec 22, 2016 at 7:26 AM, Jeff Law <law@redhat.com> wrote:
>>>> This is the final patch in the kit to improve our DSE implementation.
>>>>
>>>> It's based on a observation by Richi.  Namely that a read from bytes of
>>>> memory that are dead can be ignored.  By ignoring such reads we can
>>>> sometimes find additional stores that allow us to either eliminate or trim
>>>> an earlier store more aggressively.
>>>>
>>>> This only hit (by hit I mean the ability to ignore resulted in finding a
>>>> full or partially dead store that we didn't otherwise find) once during a
>>>> bootstrap, but does hit often in the libstdc++ testsuite.  I've added a test
>>>> derived from the conversation between myself and Richi last year.
>>>>
>>>> There's nothing in the BZ database on this issue and I can't reasonably call
>>>> it a bugfix.  I wouldn't lose sleep if this deferred to gcc-8.
>>>>
>>>> Bootstrapped and regression tested on x86-64-linux-gnu.  OK for the trunk or
>>>> defer to gcc-8?
>>>>
>>>>
>>>>
>>>>         * tree-ssa-dse.c (live_bytes_read): New function.
>>>>         (dse_classify_store): Ignore reads of dead bytes.
>>>>
>>>>         * testsuite/gcc.dg/tree-ssa/ssa-dse-26.c: New test.
>>>>         * testsuite/gcc.dg/tree-ssa/ssa-dse-26.c: Likewise.
>>>>
>>>>
>>>>
>> [ snip ]
>>
>>>> diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c
>>>> index a807d6d..f5b53fc 100644
>>>> --- a/gcc/tree-ssa-dse.c
>>>> +++ b/gcc/tree-ssa-dse.c
>>>> @@ -475,6 +475,41 @@ maybe_trim_partially_dead_store (ao_ref *ref, sbitmap
>>>> live, gimple *stmt)
>>>>      }
>>>>  }
>>>>
>>>> +/* Return TRUE if USE_REF reads bytes from LIVE where live is
>>>> +   derived from REF, a write reference.
>>>> +
>>>> +   While this routine may modify USE_REF, it's passed by value, not
>>>> +   location.  So callers do not see those modifications.  */
>>>> +
>>>> +static bool
>>>> +live_bytes_read (ao_ref use_ref, ao_ref *ref, sbitmap live)
>>>> +{
>>>> +  /* We have already verified that USE_REF and REF hit the same object.
>>>> +     Now verify that there's actually an overlap between USE_REF and REF.
>>>> */
>>>> +  if ((use_ref.offset < ref->offset
>>>> +       && use_ref.offset + use_ref.size > ref->offset)
>>>> +      || (use_ref.offset >= ref->offset
>>>> +         && use_ref.offset < ref->offset + ref->size))
>>>
>>> can you use ranges_overlap_p? (tree-ssa-alias.h)
>> Yes.  Didn't know about it.  Done.
>>
>>>
>>>> +    {
>>>> +      normalize_ref (&use_ref, ref);
>>>> +
>>>> +      /* If USE_REF covers all of REF, then it will hit one or more
>>>> +        live bytes.   This avoids useless iteration over the bitmap
>>>> +        below.  */
>>>> +      if (use_ref.offset == ref->offset && use_ref.size == ref->size)
>>>> +       return true;
>>>> +
>>>> +      /* Now iterate over what's left in USE_REF and see if any of
>>>> +        those bits are i LIVE.  */
>>>> +      for (int i = (use_ref.offset - ref->offset) / BITS_PER_UNIT;
>>>> +          i < (use_ref.offset + use_ref.size) / BITS_PER_UNIT; i++)
>>>> +       if (bitmap_bit_p (live, i))
>>>
>>> a bitmap_bit_in_range_p () would be nice to have.  And it can be more
>>> efficient than this loop...
>> Yea.  That likely would help here.  I'm testing with a
>> bitmap_bit_in_range_p implementation (only for sbitmaps since that's
>> what we're using here).
>>
>> That implementation does the reasonably efficient things and is modeled
>> after the sbitmap implementation of bitmap_set_range.
>>
>>
>>>> @@ -554,6 +589,41 @@ dse_classify_store (ao_ref *ref, gimple *stmt, gimple
>>>> **use_stmt,
>>>>           /* If the statement is a use the store is not dead.  */
>>>>           else if (ref_maybe_used_by_stmt_p (use_stmt, ref))
>>>>             {
>>>> +             /* Handle common cases where we can easily build a ao_ref
>>>> +                structure for USE_STMT and in doing so we find that the
>>>> +                references hit non-live bytes and thus can be ignored.  */
>>>> +             if (live_bytes)
>>>> +               {
>>>> +                 if (is_gimple_assign (use_stmt))
>>>> +                   {
>>>> +                     /* Other cases were noted as non-aliasing by
>>>> +                        the call to ref_maybe_used_by_stmt_p.  */
>>>> +                     ao_ref use_ref;
>>>> +                     ao_ref_init (&use_ref, gimple_assign_rhs1 (use_stmt));
>>>> +                     if (valid_ao_ref_for_dse (&use_ref)
>>>> +                         && use_ref.base == ref->base
>>>> +                         && use_ref.size == use_ref.max_size
>>>> +                         && !live_bytes_read (use_ref, ref, live_bytes))
>>>> +                       {
>>>> +                         if (gimple_vdef (use_stmt))
>>>> +                           {
>>>> +                             /* If we have already seen a store and
>>>> +                                this is also a store, then we have to
>>>> +                                fail.  */
>>>> +                             if (temp)
>>>> +                               {
>>>> +                                 fail = true;
>>>> +                                 BREAK_FROM_IMM_USE_STMT (ui);
>>>> +                               }
>>>
>>> as this case is rather cheap to test please test it together with
>>> live_bytes.  Like
>>>
>>>   if (live_bytes && (! gimple_vdef (use_stmt) || ! temp))
>> Seems reasonable.
>>
>>
>>>
>>> otherwise the patch looks reasonable for GCC 8.
>> Given the sbitmap.[ch] change, posting for re-approval
>>
>> Bootstrapped and regression tested on x86_64.  Earlier version without
>> the bitmap_bit_in_range_p improvement was also bootstrapped and
>> regression tested on aarch64.
>>
>> Jeff
>>
>>
>>
>>         * sbitmap.c (bitmap_bit_in_range_p): New function.
>>         * sbitmap.h (bitmap_bit_in_range_p): Prototype.
>>         * tree-ssa-dse.c (live_bytes_read): New function.
>>         (dse_classify_store): Ignore reads of dead bytes.
>>
>>         * testsuite/gcc.dg/tree-ssa/ssa-dse-26.c: New test.
>>
>> diff --git a/gcc/sbitmap.c b/gcc/sbitmap.c
>> index c065d13..4bf13a1 100644
>> --- a/gcc/sbitmap.c
>> +++ b/gcc/sbitmap.c
>> @@ -316,6 +316,59 @@ bitmap_set_range (sbitmap bmap, unsigned int start, unsigned int count)
>>    bmap->elms[start_word] |= mask;
>>  }
>>
>> +/* Return TRUE if any bit between START and END inclusive is set within
>> +   the simple bitmap BMAP.  Return FALSE otherwise.  */
>> +
>> +bool
>> +bitmap_bit_in_range_p (const_sbitmap bmap, unsigned int start, unsigned int end)
>> +{
>> +  unsigned int start_word = start / SBITMAP_ELT_BITS;
>> +  unsigned int start_bitno = start % SBITMAP_ELT_BITS;
>> +
>> +  /* Testing within a word, starting at the beginning of a word.  */
>> +  if (start_bitno == 0 && (end - start) < SBITMAP_ELT_BITS)
>> +    {
>> +      SBITMAP_ELT_TYPE mask = ((SBITMAP_ELT_TYPE)1 << (end - start)) - 1;
>> +      return (bmap->elms[start_word] & mask) != 0;
>> +    }
>> +
>> +  unsigned int end_word = end / SBITMAP_ELT_BITS;
>> +  unsigned int end_bitno = end % SBITMAP_ELT_BITS;
>> +
>> +  /* Testing starts somewhere in the middle of a word.  Test up to the
>> +     end of the word or the end of the requested region, whichever comes
>> +     first.  */
>> +  if (start_bitno != 0)
>> +    {
>> +      unsigned int nbits = ((start_word == end_word)
>> +                           ? end_bitno - start_bitno
>> +                           : SBITMAP_ELT_BITS - start_bitno);
>> +      SBITMAP_ELT_TYPE mask = ((SBITMAP_ELT_TYPE)1 << nbits) - 1;
>> +      mask <<= start_bitno;
>> +      if (bmap->elms[start_word] & mask)
>> +       return true;
>> +      start_word++;
>> +    }
>> +
>> +  if (start_word > end_word)
>> +    return false;
>> +
>> +  /* Now test words at a time until we hit a partial word.  */
>> +  unsigned int nwords = (end_word - start_word);
>> +  while (nwords)
>> +    {
>> +      if (bmap->elms[start_word])
>> +       return true;
>> +      start_word++;
>> +      nwords--;
>> +    }
>> +
>> +  /* Now handle residuals in the last word.  */
>> +  SBITMAP_ELT_TYPE mask
>> +    = ((SBITMAP_ELT_TYPE)1 << (SBITMAP_ELT_BITS - end_bitno)) - 1;
>> +  return (bmap->elms[start_word] & mask) != 0;
>> +}
>> +
>>  #if GCC_VERSION < 3400
>>  /* Table of number of set bits in a character, indexed by value of char.  */
>>  static const unsigned char popcount_table[] =
>> diff --git a/gcc/sbitmap.h b/gcc/sbitmap.h
>> index ce4d27d..ff52e93 100644
>> --- a/gcc/sbitmap.h
>> +++ b/gcc/sbitmap.h
>> @@ -51,6 +51,7 @@ along with GCC; see the file COPYING3.  If not see
>>       * set_difference          : bitmap_and_compl
>>       * set_disjuction          : (not implemented)
>>       * set_compare             : bitmap_equal_p
>> +     * bit_in_range_p          : bitmap_bit_in_range_p
>>
>>     Some operations on 3 sets that occur frequently in data flow problems
>>     are also implemented:
>> @@ -253,6 +254,7 @@ extern bool bitmap_and (sbitmap, const_sbitmap, const_sbitmap);
>>  extern bool bitmap_ior (sbitmap, const_sbitmap, const_sbitmap);
>>  extern bool bitmap_xor (sbitmap, const_sbitmap, const_sbitmap);
>>  extern bool bitmap_subset_p (const_sbitmap, const_sbitmap);
>> +extern bool bitmap_bit_in_range_p (const_sbitmap, unsigned int, unsigned int);
>>
>>  extern int bitmap_first_set_bit (const_sbitmap);
>>  extern int bitmap_last_set_bit (const_sbitmap);
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c
>> new file mode 100644
>> index 0000000..6605dfe
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c
>> @@ -0,0 +1,33 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -fdump-tree-dse1-details" } */
>> +
>> +enum constraint_expr_type
>> +{
>> +  SCALAR, DEREF, ADDRESSOF
>> +};
>> +typedef struct constraint_expr
>> +{
>> +  enum constraint_expr_type type;
>> +  unsigned int var;
>> +  long offset;
>> +} constraint_expr ;
>> +typedef struct constraint
>> +{
>> +  struct constraint_expr lhs;
>> +  struct constraint_expr rhs;
>> +} constraint;
>> +static _Bool
>> +constraint_expr_equal (struct constraint_expr x, struct constraint_expr y)
>> +{
>> +  return x.type == y.type && x.var == y.var && x.offset == y.offset;
>> +}
>> +
>> +_Bool
>> +constraint_equal (struct constraint a, struct constraint b)
>> +{
>> +  return constraint_expr_equal (a.lhs, b.lhs)
>> +    && constraint_expr_equal (a.rhs, b.rhs);
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-times "Deleted dead store" 2 "dse1" } } */
>> +
>> diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c
>> index 70c8b07..1eca740 100644
>> --- a/gcc/tree-ssa-dse.c
>> +++ b/gcc/tree-ssa-dse.c
>> @@ -468,6 +468,36 @@ maybe_trim_partially_dead_store (ao_ref *ref, sbitmap live, gimple *stmt)
>>      }
>>  }
>>
>> +/* Return TRUE if USE_REF reads bytes from LIVE where live is
>> +   derived from REF, a write reference.
>> +
>> +   While this routine may modify USE_REF, it's passed by value, not
>> +   location.  So callers do not see those modifications.  */
>> +
>> +static bool
>> +live_bytes_read (ao_ref use_ref, ao_ref *ref, sbitmap live)
>> +{
>> +  /* We have already verified that USE_REF and REF hit the same object.
>> +     Now verify that there's actually an overlap between USE_REF and REF.  */
>> +  if (ranges_overlap_p (use_ref.offset, use_ref.size, ref->offset, ref->size))
>> +    {
>> +      normalize_ref (&use_ref, ref);
>> +
>> +      /* If USE_REF covers all of REF, then it will hit one or more
>> +        live bytes.   This avoids useless iteration over the bitmap
>> +        below.  */
>> +      if (use_ref.offset <= ref->offset
>> +         && use_ref.offset + use_ref.size >= ref->offset + ref->size)
>> +       return true;
>> +
>> +      /* Now check if any of the remaining bits in use_ref are set in LIVE.  */
>> +      unsigned int start = (use_ref.offset - ref->offset) / BITS_PER_UNIT;
>> +      unsigned int end  = (use_ref.offset + use_ref.size) / BITS_PER_UNIT;
>> +      return bitmap_bit_in_range_p (live, start, end);
>> +    }
>> +  return true;
>> +}
>> +
>>  /* A helper of dse_optimize_stmt.
>>     Given a GIMPLE_ASSIGN in STMT that writes to REF, find a candidate
>>     statement *USE_STMT that may prove STMT to be dead.
>> @@ -547,6 +577,31 @@ dse_classify_store (ao_ref *ref, gimple *stmt, gimple **use_stmt,
>>           /* If the statement is a use the store is not dead.  */
>>           else if (ref_maybe_used_by_stmt_p (use_stmt, ref))
>>             {
>> +             /* Handle common cases where we can easily build a ao_ref
>> +                structure for USE_STMT and in doing so we find that the
>> +                references hit non-live bytes and thus can be ignored.  */
>> +             if (live_bytes && (!gimple_vdef (use_stmt) || !temp))
>> +               {
>> +                 if (is_gimple_assign (use_stmt))
>> +                   {
>> +                     /* Other cases were noted as non-aliasing by
>> +                        the call to ref_maybe_used_by_stmt_p.  */
>> +                     ao_ref use_ref;
>> +                     ao_ref_init (&use_ref, gimple_assign_rhs1 (use_stmt));
>> +                     if (valid_ao_ref_for_dse (&use_ref)
>> +                         && use_ref.base == ref->base
>> +                         && use_ref.size == use_ref.max_size
>> +                         && !live_bytes_read (use_ref, ref, live_bytes))
>> +                       {
>> +                         /* If this statement has a VDEF, then it is the
>> +                            first store we have seen, so walk through it.  */
>> +                         if (gimple_vdef (use_stmt))
>> +                           temp = use_stmt;
>> +                         continue;
>> +                       }
>> +                   }
>> +               }
>> +
>>               fail = true;
>>               BREAK_FROM_IMM_USE_STMT (ui);
>>             }
>>
diff mbox

Patch

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c
new file mode 100644
index 0000000..6605dfe
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c
@@ -0,0 +1,33 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-dse1-details" } */
+
+enum constraint_expr_type
+{
+  SCALAR, DEREF, ADDRESSOF
+};
+typedef struct constraint_expr
+{
+  enum constraint_expr_type type;
+  unsigned int var;
+  long offset;
+} constraint_expr ;
+typedef struct constraint
+{
+  struct constraint_expr lhs;
+  struct constraint_expr rhs;
+} constraint;
+static _Bool
+constraint_expr_equal (struct constraint_expr x, struct constraint_expr y)
+{
+  return x.type == y.type && x.var == y.var && x.offset == y.offset;
+}
+
+_Bool
+constraint_equal (struct constraint a, struct constraint b)
+{
+  return constraint_expr_equal (a.lhs, b.lhs)
+    && constraint_expr_equal (a.rhs, b.rhs);
+}
+
+/* { dg-final { scan-tree-dump-times "Deleted dead store" 2 "dse1" } } */
+
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-27.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-27.c
new file mode 100644
index 0000000..48dc92e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-27.c
@@ -0,0 +1,21 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-dse1-details -fno-tree-fre -fno-tree-sra" } */
+
+struct S { struct R { int x; int y; } r; int z; } s;
+
+extern void blah (struct S);
+
+void
+foo ()
+{
+  struct S s = { {1, 2}, 3 };
+  s.r.x = 1;
+   s.r.y = 2;
+   struct R r = s.r;
+  s.z = 3;
+  blah (s);
+}
+
+
+/* { dg-final { scan-tree-dump-times "Deleted dead store" 4 "dse1" } } */
+
diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c
index a807d6d..f5b53fc 100644
--- a/gcc/tree-ssa-dse.c
+++ b/gcc/tree-ssa-dse.c
@@ -475,6 +475,41 @@  maybe_trim_partially_dead_store (ao_ref *ref, sbitmap live, gimple *stmt)
     }
 }
 
+/* Return TRUE if USE_REF reads bytes from LIVE where live is
+   derived from REF, a write reference.
+
+   While this routine may modify USE_REF, it's passed by value, not
+   location.  So callers do not see those modifications.  */
+
+static bool
+live_bytes_read (ao_ref use_ref, ao_ref *ref, sbitmap live)
+{
+  /* We have already verified that USE_REF and REF hit the same object.
+     Now verify that there's actually an overlap between USE_REF and REF.  */
+  if ((use_ref.offset < ref->offset
+	&& use_ref.offset + use_ref.size > ref->offset)
+      || (use_ref.offset >= ref->offset
+	  && use_ref.offset < ref->offset + ref->size))
+    {
+      normalize_ref (&use_ref, ref);
+
+      /* If USE_REF covers all of REF, then it will hit one or more
+	 live bytes.   This avoids useless iteration over the bitmap
+	 below.  */
+      if (use_ref.offset == ref->offset && use_ref.size == ref->size)
+	return true;
+
+      /* Now iterate over what's left in USE_REF and see if any of
+	 those bits are i LIVE.  */
+      for (int i = (use_ref.offset - ref->offset) / BITS_PER_UNIT;
+	   i < (use_ref.offset + use_ref.size) / BITS_PER_UNIT; i++)
+	if (bitmap_bit_p (live, i))
+	  return true;
+      return false;
+    }
+  return true;
+}
+
 /* A helper of dse_optimize_stmt.
    Given a GIMPLE_ASSIGN in STMT that writes to REF, find a candidate
    statement *USE_STMT that may prove STMT to be dead.
@@ -554,6 +589,41 @@  dse_classify_store (ao_ref *ref, gimple *stmt, gimple **use_stmt,
 	  /* If the statement is a use the store is not dead.  */
 	  else if (ref_maybe_used_by_stmt_p (use_stmt, ref))
 	    {
+	      /* Handle common cases where we can easily build a ao_ref
+		 structure for USE_STMT and in doing so we find that the
+		 references hit non-live bytes and thus can be ignored.  */
+	      if (live_bytes)
+		{
+		  if (is_gimple_assign (use_stmt))
+		    {
+		      /* Other cases were noted as non-aliasing by
+			 the call to ref_maybe_used_by_stmt_p.  */
+		      ao_ref use_ref;
+		      ao_ref_init (&use_ref, gimple_assign_rhs1 (use_stmt));
+		      if (valid_ao_ref_for_dse (&use_ref)
+			  && use_ref.base == ref->base
+			  && use_ref.size == use_ref.max_size
+			  && !live_bytes_read (use_ref, ref, live_bytes))
+			{
+			  if (gimple_vdef (use_stmt))
+			    {
+			      /* If we have already seen a store and
+			         this is also a store, then we have to
+			         fail.  */
+			      if (temp)
+			        {
+			          fail = true;
+			          BREAK_FROM_IMM_USE_STMT (ui);
+			        }
+
+			      /* Otherwise walk through this store.  */
+			      temp = use_stmt;
+			    }
+			  continue;
+			}
+		    }
+		}
+
 	      fail = true;
 	      BREAK_FROM_IMM_USE_STMT (ui);
 	    }