diff mbox series

restore -Warray-bounds for string literals (PR 83776)

Message ID 097132cc-18d7-3133-b425-e3ee9da28e77@gmail.com
State New
Headers show
Series restore -Warray-bounds for string literals (PR 83776) | expand

Commit Message

Martin Sebor Jan. 26, 2018, 2:16 a.m. UTC
PR tree-optimization/83776 - [6/7/8 Regression] missing
-Warray-bounds indexing past the end of a string literal,
identified a not-so-recent improvement to constant propagation
as the reason for GCC no longer being able to detect out-of-
bounds accesses to string literals.  The root cause is that
the change caused accesses to strings to be transformed into
MEM_REFs that the -Warray-bounds checker isn't prepared to
handle.  A simple example is:

   int h (void)
   {
     const char *p = "1234";
     return p[16];   // missing -Warray-bounds
   }

To fix the regression the attached patch extends the array bounds
checker to handle the small subset of MEM_REF expressions that
refer to string literals but stops of short of doing more than
that.  There are outstanding gaps in the detection that the patch
intentionally doesn't handle.  They are either caused by other
regressions (PR 84047) or by other latent bugs/limitations, or
by limitations in the approach I took to try to keep the patch
simple.  I hope to address some of those in a follow-up patch
for GCC 9.

Martin

Comments

Jeff Law April 30, 2018, 10:44 p.m. UTC | #1
On 01/25/2018 07:16 PM, Martin Sebor wrote:
> PR tree-optimization/83776 - [6/7/8 Regression] missing
> -Warray-bounds indexing past the end of a string literal,
> identified a not-so-recent improvement to constant propagation
> as the reason for GCC no longer being able to detect out-of-
> bounds accesses to string literals.  The root cause is that
> the change caused accesses to strings to be transformed into
> MEM_REFs that the -Warray-bounds checker isn't prepared to
> handle.  A simple example is:
> 
>   int h (void)
>   {
>     const char *p = "1234";
>     return p[16];   // missing -Warray-bounds
>   }
> 
> To fix the regression the attached patch extends the array bounds
> checker to handle the small subset of MEM_REF expressions that
> refer to string literals but stops of short of doing more than
> that.  There are outstanding gaps in the detection that the patch
> intentionally doesn't handle.  They are either caused by other
> regressions (PR 84047) or by other latent bugs/limitations, or
> by limitations in the approach I took to try to keep the patch
> simple.  I hope to address some of those in a follow-up patch
> for GCC 9.
> 
> Martin
> 
> gcc-83776.diff
> 
> 
> PR tree-optimization/83776 - [6/7/8 Regression] missing -Warray-bounds indexing past the end of a string literal
> 
> gcc/ChangeLog:
> 
> 	PR tree-optimization/83776
> 	* tree-vrp.c (vrp_prop::check_mem_ref): New function.
> 	(check_array_bounds): Call it.
> 
> gcc/testsuite/ChangeLog:
> 
> 	PR tree-optimization/83776
> 	* gcc.dg/Warray-bounds-27.c: New test.
> 	* gcc.dg/Warray-bounds-28.c: New test.
> 	* gcc.dg/Warray-bounds-29.c: New test.
> 
> diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
> index 3294bde..b2e45c9 100644
> --- a/gcc/tree-vrp.c
> +++ b/gcc/tree-vrp.c
> @@ -4763,6 +4763,7 @@ class vrp_prop : public ssa_propagation_engine
>    void vrp_finalize (bool);
>    void check_all_array_refs (void);
>    void check_array_ref (location_t, tree, bool);
> +  void check_mem_ref (location_t, tree);
>    void search_for_addr_array (tree, location_t);
>  
>    class vr_values vr_values;
> @@ -4781,6 +4782,7 @@ class vrp_prop : public ssa_propagation_engine
>    void extract_range_from_phi_node (gphi *phi, value_range *vr)
>      { vr_values.extract_range_from_phi_node (phi, vr); }
>  };
> +
>  /* Checks one ARRAY_REF in REF, located at LOCUS. Ignores flexible arrays
>     and "struct" hacks. If VRP can determine that the
>     array subscript is a constant, check if it is outside valid
> @@ -4915,6 +4917,179 @@ vrp_prop::check_array_ref (location_t location, tree ref,
>      }
>  }
>  
> +/* Checks one MEM_REF in REF, located at LOCATION, for out-of-bounds
> +   references to string constants.  If VRP can determine that the array
> +   subscript is a constant, check if it is outside valid range.
> +   If the array subscript is a RANGE, warn if it is non-overlapping
> +   with valid range.
> +   IGNORE_OFF_BY_ONE is true if the MEM_REF is inside an ADDR_EXPR.  */
This function doesn't have IGNORE_OFF_BY_ONE as a parameter.  Drop it
from the comment.


> +
> +void
> +vrp_prop::check_mem_ref (location_t location, tree ref)
> +{
> +  if (TREE_NO_WARNING (ref))
> +    return;
> +
> +  tree arg = TREE_OPERAND (ref, 0);
> +  tree cstoff = TREE_OPERAND (ref, 1);
> +  tree varoff = NULL_TREE;
> +
> +  const offset_int maxobjsize = tree_to_shwi (max_object_size ());
> +
> +  /* The string constant bounds in bytes.  Initially set to [0, MAXOBJSIZE]
> +     until a tighter bound is determined.  */
> +  offset_int strbounds[2];
> +  strbounds[1] = maxobjsize;
> +  strbounds[0] = -strbounds[1] - 1;
> +
> +  /* The minimum and maximum intermediate offset.  For a reference
> +     to be valid, not only does the final offset/subscript must be
> +     in bounds but all intermediate offsets must be as well. */
> +  offset_int ioff = wi::to_offset (fold_convert (ptrdiff_type_node, cstoff));
> +  offset_int extrema[2] = { 0, wi::abs (ioff) };
> +
> +  /* The range of the byte offset into the reference.  */
> +  offset_int offrange[2] = { 0, 0 };
> +
> +  value_range *vr = NULL;
> +
> +  /* Determine the offsets and increment OFFRANGE for the bounds of each.  */
> +  while (TREE_CODE (arg) == SSA_NAME)
> +    {
> +      gimple *def = SSA_NAME_DEF_STMT (arg);
> +      if (!is_gimple_assign (def))
> +	{
> +	  if (tree var = SSA_NAME_VAR (arg))
> +	    arg = var;
> +	  break;
> +	}
What's the point of looking at the underlying SSA_NAME_VAR here? I can't
see how that's ever helpful.  You'll always exit the loop at this point
which does something like

if (TREE_CODE (arg) == ADDR_EXPR)
  {
     do something interesting
  }
else
  return;

ISTM that any time you dig into SSA_NAME_VAR (arg) what you're going to
get back is some kind of _DECL node -- I'm not aware of a case where
you're going to get back an ADDR_EXPR.




> +
> +      tree_code code = gimple_assign_rhs_code (def);
> +      if (code == POINTER_PLUS_EXPR)
> +	{
> +	  arg = gimple_assign_rhs1 (def);
> +	  varoff = gimple_assign_rhs2 (def);
> +	}
> +      else if (code == ASSERT_EXPR)
> +	{
> +	  arg = TREE_OPERAND (gimple_assign_rhs1 (def), 0);
> +	  continue;
> +	}
> +      else
> +	return;
> +
> +      if (TREE_CODE (varoff) != SSA_NAME)
> +	break;
> +
> +      vr = get_value_range (varoff);
> +      if (!vr || vr->type == VR_UNDEFINED || !vr->min || !vr->max)
> +	break;
Doesn't this let VR_ANTI_RANGE through?  Why not instead

  if (!vr || vr->type != VR_RANGE || !vr->min || !vr->max)

?  I'm having trouble convincing myself the subsequent code will DTRT
for an anti-range.




> +
> +      if (TREE_CODE (vr->min) != INTEGER_CST
> +          || TREE_CODE (vr->max) != INTEGER_CST)
> +        break;
> +
> +      offset_int ofr[] = {
> +	wi::to_offset (fold_convert (ptrdiff_type_node, vr->min)),
> +	wi::to_offset (fold_convert (ptrdiff_type_node, vr->max))
> +      };
> +
> +      if (vr->type == VR_RANGE)
> +	{
> +	  if (ofr[0] < ofr[1])
> +	    {
> +	      offrange[0] += ofr[0];
> +	      offrange[1] += ofr[1];
> +	    }
> +	  else
> +	    {
> +	      offrange[0] += strbounds[0];
> +	      offrange[1] += strbounds[1];
> +	    }
When can the ELSE clause above happen for a VR_RANGE?


> +  /* At level 2 check also intermediate offsets.  */
> +  int i = 0;
> +  if (extrema[i] < -strbounds[1]
> +      || extrema[i = 1] > strbounds[1] + eltsize)
> +    {
> +      HOST_WIDE_INT tmpidx = extrema[i].to_shwi () / eltsize.to_shwi ();
> +
> +      warning_at (location, OPT_Warray_bounds,
> +		  "intermediate array offset %wi is outside array bounds "
> +		  "of %qT",
> +		  tmpidx,  strtype);
> +      TREE_NO_WARNING (ref) = 1;
> +    }
> +}
This seems ill-advised.  All that matters is the actual index used in
the dereference.  Intermediate values (ie, address computations) along
the way are uninteresting -- we may form an address out of the bounds of
the array as an intermediate computation.  But the actual memory
reference will be within the range.

I think there's some questions to answer here and likely another iteration.

jeff
Richard Biener May 2, 2018, 7:22 a.m. UTC | #2
On Fri, Jan 26, 2018 at 3:16 AM, Martin Sebor <msebor@gmail.com> wrote:
> PR tree-optimization/83776 - [6/7/8 Regression] missing
> -Warray-bounds indexing past the end of a string literal,
> identified a not-so-recent improvement to constant propagation
> as the reason for GCC no longer being able to detect out-of-
> bounds accesses to string literals.  The root cause is that
> the change caused accesses to strings to be transformed into
> MEM_REFs that the -Warray-bounds checker isn't prepared to
> handle.  A simple example is:
>
>   int h (void)
>   {
>     const char *p = "1234";
>     return p[16];   // missing -Warray-bounds
>   }
>
> To fix the regression the attached patch extends the array bounds
> checker to handle the small subset of MEM_REF expressions that
> refer to string literals but stops of short of doing more than
> that.  There are outstanding gaps in the detection that the patch
> intentionally doesn't handle.  They are either caused by other
> regressions (PR 84047) or by other latent bugs/limitations, or
> by limitations in the approach I took to try to keep the patch
> simple.  I hope to address some of those in a follow-up patch
> for GCC 9.

+      gimple *def = SSA_NAME_DEF_STMT (arg);
+      if (!is_gimple_assign (def))
+       {
+         if (tree var = SSA_NAME_VAR (arg))
+           arg = var;

this is never correct.  Whether sth has an underlying VAR_DECL or not
is irrelevant.  It looks like you'll always take the

+  else
+    return;

path then anyways.  So why obfuscate the code this way?

+      offset_int ofr[] = {
+       wi::to_offset (fold_convert (ptrdiff_type_node, vr->min)),
+       wi::to_offset (fold_convert (ptrdiff_type_node, vr->max))
+      };

huh.  Do you maybe want to use widest_int for ofr[]?  What's
wrong with wi::to_offset (vr->min)?  Building another intermediate
tree node here looks just bogus.

What are you trying to do in this loop anyways?  I suppose
look at

  p_1 = &obj[i_2];  // already bounds-checked, but with ignore_off_by_one
  ... = MEM[p_1 + CST];

?  But then

+      if (TREE_CODE (varoff) != SSA_NAME)
+       break;

you should at least handle INTEGER_CSTs here?

+      if (!vr || vr->type == VR_UNDEFINED || !vr->min || !vr->max)
+       break;

please use positive tests here, VR_RANGE || VR_ANTI_RANGE.  As usual
the anti-range handling looks odd.  Please add comments so we can follow
what you were thinking when writing range merging code.  Even better if you
can stick to use existing code rather than always re-inventing the wheel...

I think I commented on earlier variants but this doesn't seem to resemble
any of them.

Richard.



> Martin
Martin Sebor July 13, 2018, 11:09 p.m. UTC | #3
>> +/* Checks one MEM_REF in REF, located at LOCATION, for out-of-bounds
>> +   references to string constants.  If VRP can determine that the array
>> +   subscript is a constant, check if it is outside valid range.
>> +   If the array subscript is a RANGE, warn if it is non-overlapping
>> +   with valid range.
>> +   IGNORE_OFF_BY_ONE is true if the MEM_REF is inside an ADDR_EXPR.  */
> This function doesn't have IGNORE_OFF_BY_ONE as a parameter.  Drop it
> from the comment.

In the latest update (yet to be posted) the function uses it.

>> +  /* Determine the offsets and increment OFFRANGE for the bounds of each.  */
>> +  while (TREE_CODE (arg) == SSA_NAME)
>> +    {
>> +      gimple *def = SSA_NAME_DEF_STMT (arg);
>> +      if (!is_gimple_assign (def))
>> +	{
>> +	  if (tree var = SSA_NAME_VAR (arg))
>> +	    arg = var;
>> +	  break;
>> +	}
> What's the point of looking at the underlying SSA_NAME_VAR here? I can't
> see how that's ever helpful.  You'll always exit the loop at this point
> which does something like
>
> if (TREE_CODE (arg) == ADDR_EXPR)
>   {
>      do something interesting
>   }
> else
>   return;
>
> ISTM that any time you dig into SSA_NAME_VAR (arg) what you're going to
> get back is some kind of _DECL node -- I'm not aware of a case where
> you're going to get back an ADDR_EXPR.

I have removed the code.  It was a vestige of something that
didn't work out and I didn't notice.

>> +
>> +      tree_code code = gimple_assign_rhs_code (def);
>> +      if (code == POINTER_PLUS_EXPR)
>> +	{
>> +	  arg = gimple_assign_rhs1 (def);
>> +	  varoff = gimple_assign_rhs2 (def);
>> +	}
>> +      else if (code == ASSERT_EXPR)
>> +	{
>> +	  arg = TREE_OPERAND (gimple_assign_rhs1 (def), 0);
>> +	  continue;
>> +	}
>> +      else
>> +	return;
>> +
>> +      if (TREE_CODE (varoff) != SSA_NAME)
>> +	break;
>> +
>> +      vr = get_value_range (varoff);
>> +      if (!vr || vr->type == VR_UNDEFINED || !vr->min || !vr->max)
>> +	break;
> Doesn't this let VR_ANTI_RANGE through?  Why not instead
>
>   if (!vr || vr->type != VR_RANGE || !vr->min || !vr->max)
>
> ?  I'm having trouble convincing myself the subsequent code will DTRT
> for an anti-range.

The anti-range code adds PTRDIFF_MIN and PTRDIFF_MAX to
the offset (that's what ARRBOUNDS is initially set to, until
we have found the array that's being dereferenced).

Because of the loop bailing for an anti-range could be too
early (the subsequent iterations might compensate for
the conservative anti-range handling and find a bug in
offsets added later).  It's unlikely but so are all bugs
so I try to handle even corner cases.

>> +
>> +      if (TREE_CODE (vr->min) != INTEGER_CST
>> +          || TREE_CODE (vr->max) != INTEGER_CST)
>> +        break;
>> +
>> +      offset_int ofr[] = {
>> +	wi::to_offset (fold_convert (ptrdiff_type_node, vr->min)),
>> +	wi::to_offset (fold_convert (ptrdiff_type_node, vr->max))
>> +      };
>> +
>> +      if (vr->type == VR_RANGE)
>> +	{
>> +	  if (ofr[0] < ofr[1])
>> +	    {
>> +	      offrange[0] += ofr[0];
>> +	      offrange[1] += ofr[1];
>> +	    }
>> +	  else
>> +	    {
>> +	      offrange[0] += strbounds[0];
>> +	      offrange[1] += strbounds[1];
>> +	    }
> When can the ELSE clause above happen for a VR_RANGE?

For a maximum in excess of PTRDIFF_MAX and a non-negative
minimum that's less than that.

>> +  /* At level 2 check also intermediate offsets.  */
>> +  int i = 0;
>> +  if (extrema[i] < -strbounds[1]
>> +      || extrema[i = 1] > strbounds[1] + eltsize)
>> +    {
>> +      HOST_WIDE_INT tmpidx = extrema[i].to_shwi () / eltsize.to_shwi ();
>> +
>> +      warning_at (location, OPT_Warray_bounds,
>> +		  "intermediate array offset %wi is outside array bounds "
>> +		  "of %qT",
>> +		  tmpidx,  strtype);
>> +      TREE_NO_WARNING (ref) = 1;
>> +    }
>> +}
> This seems ill-advised.  All that matters is the actual index used in
> the dereference.  Intermediate values (ie, address computations) along
> the way are uninteresting -- we may form an address out of the bounds of
> the array as an intermediate computation.  But the actual memory
> reference will be within the range.

The idea is to help detect bugs that cannot be detected otherwise.
Take the example below:

   void g (int i)
   {
     if (i < 1 || 2 < i)
       i = 1;

     const char *p1 = "12" + i;
     const char *p2 = p1 + i;

     extern int x, y;

     x = p2[-4];   // #1: only valid if p2 is out of bounds
     y = p2[0];    // #2 therefore this must be out of bounds
   }

For the dereference at #1 to be valid i must be 2 (i.e.,
the upper bound of its range) and so p2 is therefore out
of bounds.

We may be able to compensate for it and compute the right
address at #1 but if the out-of-bounds value is used in
another dereference that makes a different assumption
(such as #2) one of the two is definitely wrong.  It might
be possible to do something smarter and determine if the
pointer really is used this way but I didn't want to
complicate the things too much so I put the logic under
level 2.

I will post an updated patch shortly.

Martin
Martin Sebor July 13, 2018, 11:45 p.m. UTC | #4
On 05/02/2018 01:22 AM, Richard Biener wrote:
> On Fri, Jan 26, 2018 at 3:16 AM, Martin Sebor <msebor@gmail.com> wrote:
>> PR tree-optimization/83776 - [6/7/8 Regression] missing
>> -Warray-bounds indexing past the end of a string literal,
>> identified a not-so-recent improvement to constant propagation
>> as the reason for GCC no longer being able to detect out-of-
>> bounds accesses to string literals.  The root cause is that
>> the change caused accesses to strings to be transformed into
>> MEM_REFs that the -Warray-bounds checker isn't prepared to
>> handle.  A simple example is:
>>
>>   int h (void)
>>   {
>>     const char *p = "1234";
>>     return p[16];   // missing -Warray-bounds
>>   }
>>
>> To fix the regression the attached patch extends the array bounds
>> checker to handle the small subset of MEM_REF expressions that
>> refer to string literals but stops of short of doing more than
>> that.  There are outstanding gaps in the detection that the patch
>> intentionally doesn't handle.  They are either caused by other
>> regressions (PR 84047) or by other latent bugs/limitations, or
>> by limitations in the approach I took to try to keep the patch
>> simple.  I hope to address some of those in a follow-up patch
>> for GCC 9.
>
> +      gimple *def = SSA_NAME_DEF_STMT (arg);
> +      if (!is_gimple_assign (def))
> +       {
> +         if (tree var = SSA_NAME_VAR (arg))
> +           arg = var;
>
> this is never correct.  Whether sth has an underlying VAR_DECL or not
> is irrelevant.  It looks like you'll always take the
>
> +  else
> +    return;
>
> path then anyways.  So why obfuscate the code this way?

I have removed the code.  It was a vestige of something that
didn't pan out and I didn't notice.

>
> +      offset_int ofr[] = {
> +       wi::to_offset (fold_convert (ptrdiff_type_node, vr->min)),
> +       wi::to_offset (fold_convert (ptrdiff_type_node, vr->max))
> +      };
>
> huh.  Do you maybe want to use widest_int for ofr[]?  What's
> wrong with wi::to_offset (vr->min)?  Building another intermediate
> tree node here looks just bogus.

I need to convert size_type indices to signed to keep their sign
if it's negative and include it in warnings.  I've moved the code
into a conditional where it's used to minimize the cost but other
than that I don't know how else to convert it.

>
> What are you trying to do in this loop anyways?

The loop It determines the range of the final index/offset for
a series of POINTER_PLUS_EXPRs.  It handles cases like this:

   int g (int i, int j, int k)
   {
     if (i < 1) i = 1;
     if (j < 1) j = 1;
     if (k < 1) k = 1;

     const char *p0 = "123";
     const char *p1 = p0 + i;
     const char *p2 = p1 + j;
     const char *p3 = p2 + k;

     // p2[3] and p3[1] are out of bounds
     return p0[4] + p1[3] + p2[2] + p3[1];
   }

> I suppose
> look at
>
>   p_1 = &obj[i_2];  // already bounds-checked, but with ignore_off_by_one
>   ... = MEM[p_1 + CST];
>
> ?  But then
>
> +      if (TREE_CODE (varoff) != SSA_NAME)
> +       break;
>
> you should at least handle INTEGER_CSTs here?

It's already handled (and set in CSTOFF).  There should be no
more constant offsets after that (I haven't come across any.)

>
> +      if (!vr || vr->type == VR_UNDEFINED || !vr->min || !vr->max)
> +       break;
>
> please use positive tests here, VR_RANGE || VR_ANTI_RANGE.  As usual
> the anti-range handling looks odd.  Please add comments so we can follow
> what you were thinking when writing range merging code.  Even better if you
> can stick to use existing code rather than always re-inventing the wheel...

The anti-range handling is to conservatively add
[-MAXOBJSIZE -1, MAXOBJSIZE] to OFFRANGE.  I've added comments
to make it clear.  I'd be more than happy to reuse existing
code if I knew where to find it (if it exists).  It sure would
save me lots of work to have a library of utility functions to
call instead of rolling my own code each time.

>
> I think I commented on earlier variants but this doesn't seem to resemble
> any of them.

I've reworked the patch (sorry) to also handle arrays.  For GCC
9 it seems I might as well do both in one go.

Attached is an updated patch with these changes.

Martin
PR tree-optimization/84047 - missing -Warray-bounds on an out-of-bounds index into an array
PR tree-optimization/83776 - missing -Warray-bounds indexing past the end of a string literal

gcc/ChangeLog:

	PR tree-optimization/84047
	PR tree-optimization/83776
	* tree-vrp.c (vrp_prop::check_mem_ref): New function.
	(check_array_bounds): Call it.
	* /gcc/tree-sra.c (get_access_for_expr): Fail for out-of-bounds
	array offsets.

gcc/testsuite/ChangeLog:

	PR tree-optimization/83776
	PR tree-optimization/84047
	* gcc.dg/Warray-bounds-29.c: New test.
	* gcc.dg/Warray-bounds-30.c: New test.
	* gcc.dg/Warray-bounds-31.c: New test.
	* gcc.dg/Warray-bounds-32.c: New test.

diff --git a/gcc/testsuite/gcc.dg/Warray-bounds-29.c b/gcc/testsuite/gcc.dg/Warray-bounds-29.c
new file mode 100644
index 0000000..72c5d1c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/Warray-bounds-29.c
@@ -0,0 +1,150 @@
+/* PR tree-optimization/83776: missing -Warray-bounds indexing past the end
+   of a string literal
+   Test to exercise warnings for computations of otherwise in-bounds indices
+   into strings that temporarily exceed the bounds of the string.
+   { dg-do compile }
+   { dg-options "-O2 -Warray-bounds=2 -ftrack-macro-expansion=0" } */
+
+#include "range.h"
+
+#define MAX DIFF_MAX
+#define MIN DIFF_MIN
+
+typedef __WCHAR_TYPE__ wchar_t;
+
+void sink (int, ...);
+
+#define T(expr)   sink (0, expr)
+
+void test_narrow (void)
+{
+  int i = SR (1, 2);
+
+  const char *p0 = "12";
+  const char *p1 = p0 + i;    /* points at '2' or beyond */
+  const char *p2 = p1 + i;    /* points at '\0' or beyond */
+  const char *p3 = p2 + i;    /* points just past the end */
+  const char *p4 = p3 + i;    /* invalid */
+
+  T (p0[-1]);                 /* { dg-warning "array subscript \(-1|\[0-9\]+) is outside array bounds of .char\\\[3]." } */
+  T (p0[0]);
+  T (p0[1]);
+  T (p0[2]);
+  T (p0[3]);                  /* { dg-warning "array subscript 3 is outside array bounds of .char\\\[3]." } */
+
+  T (&p0[-1]);                /* { dg-warning "array subscript \(-1|\[0-9\]+) is \(above|below|outside\) array bounds of .char\\\[3]." } */
+  T (&p0[0]);
+  T (&p0[1]);
+  T (&p0[2]);
+  T (&p0[3]);
+  T (&p0[4]);                 /* { dg-warning "array subscript 4 is \(above|outside\) array bounds of .char\\\[3]." } */
+
+  T (p1[-3]);                 /* { dg-warning "array subscript \\\[-2, -1] is outside array bounds of .char\\\[3]." } */
+  T (p1[-2]);
+  T (p1[-1]);
+  T (p1[ 0]);
+  T (p1[ 1]);
+  T (p1[ 2]);                 /* { dg-warning "array subscript \\\[3, 4] is outside array bounds of .char\\\[3]." } */
+  T (p1[ 3]);                 /* { dg-warning "array subscript \\\[4, 5] is outside array bounds of .char\\\[3]." } */
+
+  T (&p1[-3]);                /* { dg-warning "array subscript \\\[-2, -1] is outside array bounds of .char\\\[3]." "bug" { xfail *-*-* } } */
+  T (&p1[-2]);
+  T (&p1[-1]);
+  T (&p1[ 0]);
+  T (&p1[ 1]);
+  T (&p1[ 2]);
+  T (&p1[ 3]);                /* { dg-warning "array subscript \\\[4, 6] is outside array bounds of .char\\\[3]." "bug" { xfail *-*-* } } */
+
+  T (p2[-4]);                 /* { dg-warning "intermediate array offset 4 is outside array bounds of .char\\\[3]." } */
+  T (p2[-3]);
+  T (p2[-2]);
+  T (p2[-1]);
+  T (p2[ 0]);
+
+  /* Even though the lower bound of p3's offsets is in bounds, in order
+     to subtract 4 from p3 and get a dereferenceable pointer its value
+     would have to be out-of-bounds.  */
+  T (p3[-4]);                 /* { dg-warning "intermediate array offset 4 is outside array bounds of .char\\\[3]." } */
+  T (p3[-3]);
+  T (p3[-2]);
+  T (p3[-1]);
+  T (p3[ 0]);                 /* { dg-warning "array subscript \\\[3, 6] is outside array bounds of .char\\\[3]." } */
+
+  T (p4[-4]);                 /* { dg-warning "intermediate array offset 4 is outside array bounds of .char\\\[3]." } */
+  T (p4[-3]);                 /* { dg-warning "intermediate array offset 4 is outside array bounds of .char\\\[3]." } */
+  T (p4[-2]);                 /* { dg-warning "intermediate array offset 4 is outside array bounds of .char\\\[3]." } */
+
+  /* The final subscripts below are invalid.  */
+  T (p4[-1]);                 /* { dg-warning "array subscript \\\[3, 7] is outside array bounds of .char\\\[3]." } */
+  T (p4[ 0]);                 /* { dg-warning "array subscript \\\[4, 8] is outside array bounds of .char\\\[3]." } */
+}
+
+
+void test_narrow_vflow (void)
+{
+  int i = SR (DIFF_MAX - 2, DIFF_MAX);
+  int j = SR (1, DIFF_MAX);
+
+  const char *p0 = "123";
+  const char *p1 = p0 + i;    /* points at '2' or beyond */
+  const char *p2 = p1 + i;    /* points at '\0' or beyond */
+  const char *p3 = p2 + i;    /* points just past the end */
+  const char *p4 = p3 + i;    /* invalid */
+}
+
+
+void test_wide (void)
+{
+  int i = SR (1, 2);
+
+  const wchar_t *p0 = L"123";
+  const wchar_t *p1 = p0 + i;    /* points at L'2' or beyond */
+  const wchar_t *p2 = p1 + i;    /* points at L'3' or beyond */
+  const wchar_t *p3 = p2 + i;    /* points at L'\0' or beyond */
+  const wchar_t *p4 = p3 + i;    /* points just past the end */
+  const wchar_t *p5 = p4 + i;    /* invalid */
+
+  T (p0[0]);
+  T (p0[1]);
+  T (p0[2]);
+  T (p0[3]);
+  T (p0[4]);                  /* { dg-warning "array subscript 4 is outside array bounds of .\[a-z \]+\\\[4]." } */
+
+  T (p1[-1]);
+  T (p1[ 0]);
+  T (p1[ 1]);
+  T (p1[ 2]);
+  T (p1[ 3]);                  /* { dg-warning "array subscript \\\[4, 5] is outside array bounds of .\[a-z \]+\\\[4]." } */
+
+  T (&p1[-1]);
+  T (&p1[ 0]);
+  T (&p1[ 1]);
+  T (&p1[ 2]);
+  T (&p1[ 3]);
+  T (&p1[ 4]);                 /* { dg-warning "array subscript \\\[5, 6] is outside array bounds of .\[a-z \]+\\\[4]." "bug" { xfail *-*-* } } */
+
+  T (p2[-5]);                 /* { dg-warning "array subscript \\\[-3, -1] is outside array bounds of .\[a-z \]+\\\[4]." } */
+  T (p2[-4]);
+  T (p2[-3]);
+  T (p2[-2]);
+  T (p2[-1]);
+  T (p2[ 0]);
+
+  /* Even though the lower bound of p3's offsets is in bounds, in order
+     to subtract 5 from p3 and get a dereferenceable pointer its value
+     would have to be out-of-bounds.  */
+  T (p3[-5]);                 /* { dg-warning "intermediate array offset 5 is outside array bounds of .\[a-z \]+\\\[4]." } */
+  T (p3[-4]);
+  T (p3[-3]);
+  T (p3[-2]);
+  T (p3[-1]);
+  T (p3[ 0]);
+  T (p3[ 1]);                 /* { dg-warning "array subscript \\\[4, 7] is outside array bounds of .\[a-z \]+\\\[4]." } */
+
+  T (p4[-5]);                 /* { dg-warning "intermediate array offset 5 is outside array bounds of .\[a-z \]+\\\[4]." } */
+  T (p4[-4]);
+  T (p4[-3]);
+  T (p4[-2]);
+  T (p4[-1]);
+  T (p4[ 0]);                 /* { dg-warning "array subscript \\\[4, 8] is outside array bounds of .\[a-z \]+\\\[4]." } */
+}
diff --git a/gcc/testsuite/gcc.dg/Warray-bounds-30.c b/gcc/testsuite/gcc.dg/Warray-bounds-30.c
new file mode 100644
index 0000000..ac7e9a6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/Warray-bounds-30.c
@@ -0,0 +1,200 @@
+/* PR tree-optimization/84047 - missing -Warray-bounds on an out-of-bounds
+   index into an array
+   { dg-do compile }
+   { dg-options "-O2 -Warray-bounds=2 -ftrack-macro-expansion=0" } */
+
+#include "range.h"
+
+#define MAX DIFF_MAX
+#define MIN DIFF_MIN
+
+void sink (int, ...);
+
+#define T(...)   sink (0, __VA_ARGS__)
+
+void test_global_char_array (void)
+{
+  extern char gcar1[1];
+  char *p = gcar1;
+
+  T (p[MIN]);       /* { dg-warning "subscript -\[0-9\]+ is outside array bounds of .char\\\[1]." } */
+  T (p[-1]);        /* { dg-warning "subscript -1 is outside array bounds of .char\\\[1]." } */
+  T (p[0]);
+  T (p[1]);         /* { dg-warning "subscript 1 is outside array bounds of .char\\\[1]." } */
+  T (p[MAX]);       /* { dg-warning "subscript \[0-9\]+ is outside array bounds of .char\\\[1]." } */
+
+  T (&p[MIN]);      /* { dg-warning "subscript -\[0-9\]+ is \(below|outside\) array bounds of .char\\\[1]." } */
+  T (&p[-1]);       /* { dg-warning "subscript -1 is \(below|outside\) array bounds of .char\\\[1]." } */
+  T (&p[0]);
+  T (&p[1]);
+  T (&p[2]);        /* { dg-warning "subscript 2 is \(above|outside\) array bounds of .char\\\[1]." } */
+  T (&p[MAX]);      /* { dg-warning "subscript \[0-9\]+ is \(above|outside\) array bounds of .char\\\[1]." } */
+
+  extern char gcar3[3];
+  char *q = gcar3;
+
+  T (q[MIN]);       /* { dg-warning "subscript -\[0-9\]+ is outside array bounds of .char\\\[3]." } */
+  T (q[-1]);        /* { dg-warning "subscript -1 is outside array bounds of .char\\\[3]." } */
+  T (q[0]);
+  T (q[1]);
+  T (q[2]);
+  T (q[3]);         /* { dg-warning "subscript 3 is outside array bounds of .char\\\[3]." } */
+  T (q[MAX]);       /* { dg-warning "subscript \[0-9\]+ is outside array bounds of .char\\\[3]." } */
+
+  T (&q[MIN]);      /* { dg-warning "subscript -\[0-9\]+ is \(below|outside\) array bounds of .char\\\[3]." } */
+  T (&q[-1]);       /* { dg-warning "subscript -1 is \(below|outside\) array bounds of .char\\\[3]." } */
+  T (&q[0]);
+  T (&q[1]);
+  T (&q[2]);
+  T (&q[3]);
+  T (&q[4]);        /* { dg-warning "subscript 4 is \(above|outside\) array bounds of .char\\\[3]." } */
+  T (&q[MAX]);      /* { dg-warning "subscript \[0-9\]+ is \(above|outside\) array bounds of .char\\\[3]." } */
+}
+
+
+void test_global_int_array (void)
+{
+  /* Use smaller values to prevent false negatives due to undetected
+     integer overflow/wrapping.  */
+  ptrdiff_t min = MIN / sizeof (int);
+  ptrdiff_t max = MAX / sizeof (int);
+
+  extern int giar1[1];
+  extern int giar3[3];
+
+  int *p = giar1;
+
+  T (p[min]);       /* { dg-warning "subscript -\[0-9\]+ is outside array bounds of .int\\\[1]." } */
+  T (p[-1]);        /* { dg-warning "subscript -1 is outside array bounds of .int\\\[1]." } */
+  T (p[0]);
+  T (p[1]);         /* { dg-warning "subscript 1 is outside array bounds of .int\\\[1]." } */
+  T (p[max]);       /* { dg-warning "subscript \[0-9\]+ is outside array bounds of .int\\\[1]." } */
+
+  T (&p[min]);      /* { dg-warning "subscript -\[0-9\]+ is \(below|outside\) array bounds of .int\\\[1]." } */
+  T (&p[-1]);       /* { dg-warning "subscript -1 is \(below|outside\) array bounds of .int\\\[1]." } */
+  T (&p[0]);
+  T (&p[1]);
+  T (&p[2]);        /* { dg-warning "subscript 2 is \(above|outside\) array bounds of .int\\\[1]." } */
+  T (&p[max]);      /* { dg-warning "subscript \[0-9\]+ is \(above|outside\) array bounds of .int\\\[1]." } */
+
+  int *q = giar3;
+
+  T (q[min]);       /* { dg-warning "subscript -\[0-9\]+ is outside array bounds of .int\\\[3]." } */
+  T (q[-1]);        /* { dg-warning "subscript -1 is outside array bounds of .int\\\[3]." } */
+  T (q[0]);
+  T (q[1]);
+  T (q[2]);
+  T (q[3]);         /* { dg-warning "subscript 3 is outside array bounds of .int\\\[3]." } */
+  T (q[max]);       /* { dg-warning "subscript \[0-9\]+ is outside array bounds of .int\\\[3]." } */
+
+  T (&q[min]);      /* { dg-warning "subscript -\[0-9\]+ is \(below|outside\) array bounds of .int\\\[3]." } */
+  T (&q[-1]);       /* { dg-warning "subscript -1 is \(below|outside\) array bounds of .int\\\[3]." } */
+  T (&q[0]);
+  T (&q[1]);
+  T (&q[2]);
+  T (&q[3]);
+  T (&q[4]);        /* { dg-warning "subscript 4 is \(above|outside\) array bounds of .int\\\[3]." } */
+  T (&q[max]);      /* { dg-warning "subscript \[0-9\]+ is \(above|outside\) array bounds of .int\\\[3]." } */
+}
+
+
+void test_global_short_2dim_array (void)
+{
+  extern short giar3_5[3][5];
+
+  short *p = giar3_5[0];
+
+  /* The access below isn't diagnosed because the reference is transformed
+     into MEM_REF (short*, &giar3_5, 0), i.e., *giar3_5[0][0].  */
+  T (p[MIN]);       /* { dg-warning "subscript -\[0-9\]+ is outside array bounds of .short int\\\[3]" "bug" { xfail *-*-*} } */
+  T (p[-1]);        /* { dg-warning "subscript -1 is outside array bounds of .short int\\\[3]" } */
+  T (p[0]);
+  T (p[1]);
+  T (p[2]);
+  T (p[15]);        /* { dg-warning "subscript 15 is outside array bounds of .short int\\\[3]" } */
+  T (p[MAX]);       /* { dg-warning "subscript -?\[0-9\]+ is outside array bounds of .short int\\\[3]" } */
+
+  T (&p[MIN]);      /* { dg-warning "subscript -\[0-9\]+ is \(below|outside\) array bounds of .short int\\\[3]" "bug" { xfail *-*-* } } */
+  T (&p[-1]);       /* { dg-warning "subscript -1 is \(below|outside\) array bounds of .short int\\\[3]" } */
+  T (&p[0]);
+  T (&p[1]);
+  T (&p[2]);
+  T (&p[3]);
+  T (&p[16]);       /* { dg-warning "subscript 16 is \(above|outside\) array bounds of .short int\\\[3]" } */
+  T (&p[MAX]);      /* { dg-warning "subscript -?\[0-9\]+ is \(above|outside\) array bounds of .short int\\\[3]" } */
+}
+
+
+void test_local_char_array (void)
+{
+  char ar1[1] = { 1 };
+  char *p = ar1;
+
+  T (p[MIN]);      /* { dg-warning "subscript -\[0-9\]+ is outside array bounds of .char\\\[1]." } */
+  T (p[-1]);       /* { dg-warning "subscript -1 is outside array bounds of .char\\\[1]." } */
+  T (p[0]);
+  T (p[1]);         /* { dg-warning "subscript 1 is outside array bounds of .char\\\[1]." } */
+  T (p[MAX]);       /* { dg-warning "subscript \[0-9\]+ is outside array bounds of .char\\\[1]." } */
+
+  T (&p[MIN]);     /* { dg-warning "subscript -\[0-9\]+ is \(below|outside\) array bounds of .char\\\[1]." } */
+  T (&p[-1]);      /* { dg-warning "subscript -1 is \(below|outside\) array bounds of .char\\\[1]." } */
+  T (&p[0]);
+  T (&p[1]);
+  T (&p[2]);        /* { dg-warning "subscript 2 is \(above|outside\) array bounds of .char\\\[1]." } */
+  T (&p[MAX]);      /* { dg-warning "subscript \[0-9\]+ is \(above|outside\) array bounds of .char\\\[1]." } */
+
+  char ar3[3] = { 1, 2, 3 };
+  p = ar3;
+
+  T (p[MIN]);       /* { dg-warning "subscript -\[0-9\]+ is outside array bounds of .char\\\[3]." } */
+  T (p[-1]);        /* { dg-warning "subscript -1 is outside array bounds of .char\\\[3]." } */
+  T (p[0]);
+  T (p[1]);
+  T (p[2]);
+  T (p[3]);         /* { dg-warning "subscript 3 is outside array bounds of .char\\\[3]." } */
+  T (p[MAX]);       /* { dg-warning "subscript \[0-9\]+ is outside array bounds of .char\\\[3]." } */
+
+  T (&p[MIN]);      /* { dg-warning "subscript -\[0-9\]+ is \(below|outside\) array bounds of .char\\\[3]." } */
+  T (&p[-1]);       /* { dg-warning "subscript -1 is \(below|outside\) array bounds of .char\\\[3]." } */
+  T (&p[0]);
+  T (&p[1]);
+  T (&p[2]);
+  T (&p[3]);
+  T (&p[4]);        /* { dg-warning "subscript 4 is \(above|outside\) array bounds of .char\\\[3]." } */
+  T (&p[MAX]);      /* { dg-warning "subscript \[0-9\]+ is \(above|outside\) array bounds of .char\\\[3]." } */
+}
+
+struct S
+{
+  int a[2], b[3];
+} s [4];
+
+void test_struct_array_cst (void)
+{
+  T (s[0].a[0] + s[0].a[1] + s[0].b[0] + s[0].b[1] + s[0].b[2] + s[0].b[2]
+     + s[1].a[0] + s[1].a[1] + s[1].b[0] + s[1].b[1] + s[1].b[2] + s[1].b[2]
+     + s[2].a[0] + s[2].a[1] + s[2].b[0] + s[2].b[1] + s[2].b[2] + s[2].b[2]
+     + s[3].a[0] + s[3].a[1] + s[3].b[0] + s[3].b[1] + s[3].b[2] + s[3].b[2]);
+
+  T (&s[0].a[2],
+     &s[0].b[3],
+     &s[1].a[2],
+     &s[1].b[3],
+     &s[2].a[2],
+     &s[2].b[3],
+     &s[3].a[2],
+     &s[3].b[3]);
+
+  T (s[0].a[2]);    /* { dg-warning "subscript 2 is above array bounds of .int\\\[2\\\]." } */
+  T (s[0].b[3]);    /* { dg-warning "subscript 3 is above array bounds of .int\\\[3\\\]." } */
+  T (s[1].a[2]);    /* { dg-warning "subscript 2 is above array bounds of .int\\\[2\\\]." } */
+  T (s[1].b[3]);    /* { dg-warning "subscript 3 is above array bounds of .int\\\[3\\\]." } */
+  T (s[2].a[2]);    /* { dg-warning "subscript 2 is above array bounds of .int\\\[2\\\]." } */
+  T (s[2].b[3]);    /* { dg-warning "subscript 3 is above array bounds of .int\\\[3\\\]." } */
+  T (s[3].a[2]);    /* { dg-warning "subscript 2 is above array bounds of .int\\\[2\\\]." } */
+  T (s[3].b[3]);    /* { dg-warning "subscript 3 is above array bounds of .int\\\[3\\\]." } */
+
+  T (s[4].a[0]);    /* { dg-warning "subscript 4 is above array bounds of .struct S\\\[4\\\]." } */
+  T (s[4].a[2]);    /* { dg-warning "subscript 4 is above array bounds of .struct S\\\[4\\\]." } */
+                    /* { dg-warning "subscript 2 is above array bounds of .int\\\[2\\\]." "" { target *-*-* } .-1 } */
+}
diff --git a/gcc/testsuite/gcc.dg/Warray-bounds-31.c b/gcc/testsuite/gcc.dg/Warray-bounds-31.c
new file mode 100644
index 0000000..e0be1e5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/Warray-bounds-31.c
@@ -0,0 +1,248 @@
+/* PR tree-optimization/83776: missing -Warray-bounds indexing past the end
+   of a string literal
+   Test to exercise detection of out-of-bounds indices into narrow string
+   literals.
+   { dg-do compile }
+   { dg-options "-O2 -Warray-bounds -ftrack-macro-expansion=0" } */
+
+#include "range.h"
+
+#define MAX DIFF_MAX
+#define MIN DIFF_MIN
+
+#define S1 "1"
+#define S3 "123"
+#define S7 "1234567"
+#define S8 "12345678"
+#define S9 "123456789"
+
+void sink (int, ...);
+
+#define T(expr)   sink (0, expr)
+
+
+void narrow_direct_cst (void)
+{
+  T (S1[MIN]);                /* { dg-warning "array subscript -\[0-9\]+ is below array bounds of .char\\\[2]" "" { xfail *-*-* } } */
+  T (S1[-1]);                 /* { dg-warning "array subscript -1 is below array bounds of .char\\\[2]" } */
+  T (S1[0]);
+  T (S1[1]);
+  T (S1[2]);                  /* { dg-warning "array subscript 2 is above array bounds of .char\\\[2]" } */
+  T (S1[MAX]);                /* { dg-warning "array subscript \[0-9\]+ is above array bounds of .char\\\[2]" } */
+
+  T (&S1[MIN]);               /* { dg-warning "array subscript -\[0-9\]+ is below array bounds of .char\\\[2]" } */
+  T (&S1[-1]);                /* { dg-warning "array subscript -1 is below array bounds of .char\\\[2]" } */
+  T (&S1[0]);
+  T (&S1[2]);
+  T (&S1[3]);                 /* { dg-warning "array subscript 3 is above array bounds of .char\\\[2]" } */
+  T (&S1[MAX]);               /* { dg-warning "array subscript \[0-9\]+ is above array bounds of .char\\\[2]" } */
+
+  T (S9[MIN]);                /* { dg-warning "array subscript -\[0-9\]+ is below array bounds of .char\\\[10]" "" { xfail *-*-* } } */
+  T (S9[-1]);                 /* { dg-warning "array subscript -1 is below array bounds of .char\\\[10]" } */
+  T (S9[9]);
+  T (S9[10]);                 /* { dg-warning "array subscript 10 is above array bounds of .char\\\[10]" } */
+  T (S9[11]);                 /* { dg-warning "array subscript 11 is above array bounds of .char\\\[10]" } */
+  T (S9[MAX]);                /* { dg-warning "array subscript \[0-9\]+ is above array bounds of .char\\\[10]" } */
+
+  T (&S9[MIN]);               /* { dg-warning "array subscript -\[0-9\]+ is below array bounds of .char\\\[10]" } */
+  T (&S9[-1]);                /* { dg-warning "array subscript -1 is below array bounds of .char\\\[10]" } */
+  T (&S9[9]);
+  T (&S9[10]);
+  T (&S9[11]);                 /* { dg-warning "array subscript 11 is above array bounds of .char\\\[10]" } */
+  T (&S9[MAX]);               /* { dg-warning "array subscript \[0-9\]+ is above array bounds of .char\\\[10]" } */
+}
+
+void narrow_ptr_deref_cst (void)
+{
+  const char *p = S8 + 9;
+
+  T (*(p + MIN));             /* { dg-warning "array subscript -\[0-9\]+ is outside array bounds of .char\\\[9]." } */
+  T (*(p - 10));              /* { dg-warning "array subscript -1 is outside array bounds of .char\\\[9]." } */
+  T (*(p - 9));
+  T (*(p - 1));
+  T (*p);                     /* { dg-warning "array subscript 9 is outside array bounds of .char\\\[9]." } */
+  T (*(p + 1));               /* { dg-warning "array subscript 10 is outside array bounds of .char\\\[9]." } */
+  T (*(p + 2));               /* { dg-warning "array subscript 11 is outside array bounds of .char\\\[9]." } */
+}
+
+void narrow_ptr_index_cst (void)
+{
+  const char *p = S7;
+
+  T (p[MIN + 1]);             /* { dg-warning "array subscript -\[0-9\]+ is outside array bounds of .char\\\[8]." "bug 84047" { xfail *-*-* } } */
+  T (p[-1]);                  /* { dg-warning "array subscript -1 is outside array bounds of .char\\\[8]." } */
+  T (p[0]);
+  T (p[1]);
+  T (p[8]);                   /* { dg-warning "array subscript 8 is outside array bounds of .char\\\[8]." } */
+  T (p[99]);                  /* { dg-warning "array subscript 99 is outside array bounds of .char\\\[8]." } */
+  T (p[MAX]);                 /* { dg-warning "array subscript \[0-9\]+ is outside array bounds of .char\\\[8]." } */
+
+  T (&p[MIN + 1]);            /* { dg-warning "array subscript -\[0-9\]+ is \(below|outside\) array bounds of .char\\\[8]." } */
+  T (&p[-1]);                 /* { dg-warning "array subscript -1 is \(below|outside\) array bounds of .char\\\[8]." } */
+  T (&p[0]);
+  T (&p[1]);
+  T (&p[8]);
+  T (&p[9]);                  /* { dg-warning "array subscript 9 is \(above|outside\) array bounds of .char\\\[8]." } */
+  T (&p[99]);                 /* { dg-warning "array subscript 99 is \(above|outside\) array bounds of .char\\\[8]." } */
+  T (&p[MAX]);                /* { dg-warning "array subscript \[0-9\]+ is \(above|outside\) array bounds of .char\\\[8]." } */
+
+  const char *q = S8 + 4;
+  T (q[MIN + 1]);             /* { dg-warning "array subscript -?\[0-9\]+ is outside array bounds of .char\\\[9]." "bug 84047" { xfail *-*-* } } */
+  T (q[-5]);                  /* { dg-warning "array subscript -1 is outside array bounds of .char\\\[9]." } */
+  T (q[-4]);
+  T (q[0]);
+  T (q[1]);
+  T (q[3]);
+  T (q[4]);
+  T (q[5]);                   /* { dg-warning "array subscript 9 is outside array bounds of .char\\\[9]." } */
+  T (q[99]);                  /* { dg-warning "array subscript 103 is outside array bounds of .char\\\[9]." } */
+  T (q[MAX - 4]);             /* { dg-warning "array subscript \[0-9\]+ is outside array bounds of .char\\\[9]." } */
+  T (q[MAX - 3]);             /* { dg-warning "array subscript \[0-9\]+ is outside array bounds of .char\\\[9]." "bug 84047" { xfail *-*-* } } */
+
+  T (&q[MIN + 1]);            /* { dg-warning "array subscript -?\[0-9\]+ is \(below|outside\) array bounds of .char\\\[9]." } */
+  T (&q[-5]);                 /* { dg-warning "array subscript -1 is \(below|outside\) array bounds of .char\\\[9]." } */
+  T (&q[-4]);
+  T (&q[0]);
+  T (&q[1]);
+  T (&q[5]);
+  T (&q[6]);                  /* { dg-warning "array subscript 10 is \(above|outside\) array bounds of .char\\\[9]." } */
+  T (&q[99]);                 /* { dg-warning "array subscript 103 is \(above|outside\) array bounds of .char\\\[9]." } */
+  T (&q[MAX - 4]);            /* { dg-warning "array subscript \[0-9\]+ is \(above|outside\) array bounds of .char\\\[9]." } */
+  T (&q[MAX - 3]);            /* { dg-warning "array subscript -?\[0-9\]+ is \(below|outside\) array bounds of .char\\\[9]." } */
+}
+
+
+void narrow_direct_range (ptrdiff_t i, size_t j)
+{
+  T (S3[i]);
+  T (S9[j]);
+
+  T (S3[SR (MIN, -1)]);       /* { dg-warning "array subscript -1 is below array bounds of .char\\\[4]" } */
+  T (S3[SR (MIN, 0)]);
+  T (S3[SR (-2, -1)]);        /* { dg-warning "array subscript -1 is below array bounds of .char\\\[4]" } */
+  T (S3[SR (1, 2)]);
+  T (S3[SR (1, 999)]);
+  T (S3[SR (2, 999)]);
+  T (S3[SR (3, 999)]);
+  T (S3[SR (4, 999)]);       /* { dg-warning "array subscript 4 is above array bounds of .char\\\[4]" } */
+
+  T (&S3[SR (MIN, -1)]);      /* { dg-warning "array subscript -1 is below array bounds of .char\\\[4]" } */
+  T (&S3[SR (MIN, 0)]);
+  T (&S3[SR (-2, -1)]);       /* { dg-warning "array subscript -1 is below array bounds of .char\\\[4]" } */
+  T (&S3[SR (1, 2)]);
+  T (&S3[SR (1, 999)]);
+  T (&S3[SR (2, 999)]);
+  T (&S3[SR (4, 999)]);
+  T (&S3[SR (5, 999)]);      /* { dg-warning "array subscript 5 is above array bounds of .char\\\[4]" } */
+
+  T (S9[SR (MIN, -9)]);       /* { dg-warning "array subscript -9 is below array bounds of .char\\\[10]" } */
+  T (S9[SR (MIN, -1)]);       /* { dg-warning "array subscript -1 is below array bounds of .char\\\[10]" } */
+  T (S9[SR (MIN, 0)]);
+  T (S9[SR (-2, -1)]);        /* { dg-warning "array subscript -1 is below array bounds of .char\\\[10]" } */
+  T (S9[SR (1, 2)]);
+  T (S9[SR (1, 9)]);
+  T (S9[SR (1, 999)]);
+  T (S9[SR (9, 999)]);
+  T (S9[SR (10, 999)]);       /* { dg-warning "array subscript 10 is above array bounds of .char\\\[10]" } */
+  T (S9[SR (99, MAX)]);       /* { dg-warning "array subscript 99 is above array bounds of .char\\\[10]" } */
+}
+
+
+void narrow_ptr_deref_range (ptrdiff_t i, size_t j)
+{
+  const char *p;
+
+  p = S1 + i;
+  T (*p);
+
+  p = S1 + j;
+  T (*p);
+
+  p = S1 + SR (-999, 999);
+  T (*p);
+
+  p = S1 + SR (-1, 1);
+  T (*p);
+
+  p = S1 + SR (-1, 0);
+  T (*p);
+
+  p = S1 + SR (0, 1);
+  T (*p);
+
+  p = S1 + SR (1, 2);
+  T (*p);
+
+  p = S1 + SR (2, 3);
+  T (*p);                     /* { dg-warning "array subscript \\\[2, 3] is outside array bounds of .char\\\[2]." } */
+
+  p = S1 + SR (9, 99);
+  T (*p);                     /* { dg-warning "array subscript \\\[9, 99] is outside array bounds of .char\\\[2]." } */
+
+  p = S8 + SR (-999, 999);
+  T (*p);
+
+  p = S8 + SR (-9, -1);
+  T (*p);                     /* { dg-warning "array subscript \\\[-9, -1] is outside array bounds of .char\\\[9]." } */
+
+  p = S8 + SR (-9, 0);
+  T (*p);
+
+  p = S8 + SR (-9, 9);
+  T (*p);
+
+  p = S8 + SR (-9, 123);
+  T (*p);
+
+  p = S8 + SR (8, 123);
+  T (*p);
+
+  p = S8 + SR (9, 123);
+  T (*p);                     /* { dg-warning "array subscript \\\[9, 123] is outside array bounds of .char\\\[9]." } */
+
+  {
+    const char *p1 = S3 + i;
+    const char *p2 = p1 + i;
+    const char *p3 = p2 + i;
+    const char *p4 = p3 + i;
+    const char *p5 = p4 + i;
+
+    T (*p1);
+    T (*p2);
+    T (*p3);
+    T (*p4);
+    T (*p5);
+  }
+
+  {
+    i = SR (1, 2);
+
+    const char *p1 = S3 +  SR (1, DIFF_MAX - 1);
+    const char *p2 = p1 + i;
+    const char *p3 = p2 + i;
+    const char *p4 = p3 + i;
+    const char *p5 = p4 + i;
+
+    T (*p1);
+    T (*p2);
+    T (*p3);
+    T (*p4);                  /* { dg-warning "array subscript \\\[4, \[0-9\]+] is outside array bounds of .char\\\[4]." } */
+    T (*p5);                  /* { dg-warning "array subscript \\\[5, \[0-9\]+] is outside array bounds of .char\\\[4]." } */
+  }
+}
+
+
+void narrow_ptr_index_range (void)
+{
+  const char *p;
+
+  p = S7;
+  T (p[SR (-9, -1)]);         /* { dg-warning "array subscript \\\[-9, -1] is outside array bounds of .char\\\[8]." } */
+  T (p[SR (-8, 0)]);
+  T (p[SR (0, MAX)]);
+  T (p[SR (1, 9)]);
+  T (p[SR (8, 9)]);           /* { dg-warning "array subscript \\\[8, 9] is outside array bounds of .char\\\[8]." } */
+
+  p = S7 + SR (4, 6);
+  T (p[5]);                   /* { dg-warning "array subscript \\\[9, 11] is outside array bounds of .char\\\[8]." } */
+}
diff --git a/gcc/testsuite/gcc.dg/Warray-bounds-32.c b/gcc/testsuite/gcc.dg/Warray-bounds-32.c
new file mode 100644
index 0000000..9512764
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/Warray-bounds-32.c
@@ -0,0 +1,185 @@
+/* PR tree-optimization/83776: missing -Warray-bounds indexing past the end
+   of a string literal
+   Test to exercise indices into wide string literals.
+   { dg-do compile }
+   { dg-options "-O2 -Warray-bounds -ftrack-macro-expansion=0" } */
+
+#include "range.h"
+
+#define MAX DIFF_MAX
+#define MIN DIFF_MIN
+
+typedef __WCHAR_TYPE__ wchar_t;
+
+#define W2 L"12"
+#define W3 L"123"
+#define W4 L"1234"
+#define W7 L"1234567"
+#define W8 L"12345678"
+#define W9 L"123456789"
+
+void sink (int);
+
+#define T(expr)   sink (expr)
+
+
+void wide_direct_cst (void)
+{
+  T (W9[MIN]);                /* { dg-warning "array subscript -\[0-9\]+ is below array bounds of .\[a-z \]+\\\[10]" "" } */
+  T (W9[-1]);                 /* { dg-warning "array subscript -1 is below array bounds of .\[a-z \]+\\\[10]" } */
+  T (W9[11]);                 /* { dg-warning "array subscript 11 is above array bounds of .\[a-z \]+\\\[10]" } */
+  T (W9[MAX]);                /* { dg-warning "array subscript \[0-9\]+ is above array bounds of .\[a-z \]+\\\[10]" } */
+
+}
+
+void wide_ptr_deref_cst (void)
+{
+  const wchar_t *p = W8 + 9;
+  T (*p);                     /* { dg-warning "array subscript 9 is outside array bounds of .\[a-z \]+\\\[9]." } */
+  T (p[1]);                   /* { dg-warning "array subscript 10 is outside array bounds of .\[a-z \]+\\\[9]." } */
+  T (p[99]);                  /* { dg-warning "array subscript 108 is outside array bounds of .\[a-z \]+\\\[9]." } */
+}
+
+void wide_ptr_index_cst (void)
+{
+  const wchar_t *p = W7;
+
+  T (p[1]);
+  T (p[8]);                   /* { dg-warning "array subscript 8 is outside array bounds of .\[a-z \]+\\\[8]." } */
+  T (p[99]);                  /* { dg-warning "array subscript 99 is outside array bounds of .\[a-z \]+\\\[8]." } */
+  T (p[MAX]);                 /* { dg-warning "array subscript -?\[0-9\]+ is outside array bounds of .\[a-z \]+\\\[8]." } */
+}
+
+
+void wide_direct_range (ptrdiff_t i, size_t j)
+{
+  T (W9[i]);
+  T (W9[j]);
+
+  T (W9[SR (MIN, -9)]);       /* { dg-warning "array subscript -9 is below array bounds of .\[a-z \]+\\\[10]" } */
+  T (W9[SR (MIN, -1)]);       /* { dg-warning "array subscript -1 is below array bounds of .\[a-z \]+\\\[10]" } */
+  T (W9[SR (MIN, 0)]);
+  T (W9[SR (-2, -1)]);        /* { dg-warning "array subscript -1 is below array bounds of .\[a-z \]+\\\[10]" } */
+  T (W9[SR (1, 2)]);
+  T (W9[SR (1, 9)]);
+  T (W9[SR (1, 999)]);
+  T (W9[SR (9, 999)]);
+  T (W9[SR (10, 999)]);       /* { dg-warning "array subscript 10 is above array bounds of .\[a-z \]+\\\[10]" } */
+  T (W9[SR (99, MAX)]);       /* { dg-warning "array subscript 99 is above array bounds of .\[a-z \]+\\\[10]" } */
+}
+
+void wide_ptr_deref_range (ptrdiff_t i, size_t j)
+{
+  const wchar_t *p;
+
+  p = W8 + i;
+  T (*p);
+
+  p = W8 + j;
+  T (*p);
+
+  p = W8 + SR (-9, -1);
+  T (*p);                     /* { dg-warning "array subscript \\\[-9, -1] is outside array bounds of .\[a-z \]+\\\[9]." } */
+
+  p = W8 + SR (-9, 0);
+  T (*p);
+
+  p = W8 + SR (-9, 9);
+  T (*p);
+
+  p = W8 + SR (9, 123);
+  T (*p);                     /* { dg-warning "array subscript \\\[9, 123] is outside array bounds of .\[a-z \]+\\\[9]." } */
+}
+
+void wide_ptr_index_range (void)
+{
+  const wchar_t *p;
+
+  p = W7;
+  T (p[SR (-9, -1)]);         /* { dg-warning "array subscript \\\[-9, -1] is outside array bounds of .\[a-z \]+\\\[8]." } */
+  T (p[SR (-8, 0)]);
+  T (p[SR (0, MAX)]);
+  T (p[SR (1, 9)]);
+  T (p[SR (8, 9)]);           /* { dg-warning "array subscript \\\[8, 9] is outside array bounds of .\[a-z \]+\\\[8]." } */
+
+  p = W7 + SR (4, 6);
+  T (p[5]);                   /* { dg-warning "array subscript \\\[9, 11] is outside array bounds of .\[a-z \]+\\\[8]." } */
+}
+
+void wide_ptr_index_range_1 (void)
+{
+  {
+    int i = SR (1, 2);
+    const wchar_t *p1 = W2 + i;
+
+    T (p1[0]);
+  }
+  {
+    int i = SR (1, 2);
+    const wchar_t *p1 = W2 + i;
+
+    T (p1[1]);
+  }
+  {
+    int i = SR (1, 2);
+    const wchar_t *p1 = W2 + i;
+
+    T (p1[2]);                /* { dg-warning "array subscript \\\[3, 4] is outside array bounds of .\[a-z \]+\\\[3]." } */
+  }
+}
+
+void wide_ptr_index_range_chain (void)
+{
+  int i = SR (1, 2);
+  {
+    const wchar_t *p1 = W2 + i;
+    const wchar_t *p2 = p1 + i;
+    const wchar_t *p3 = p2 + i;
+
+    T (p1[-3]);               /* { dg-warning "array subscript \\\[-2, -1] is outside array bounds of .\[a-z \]+\\\[3]." } */
+    T (p1[-2]);
+    T (p1[-1]);
+    T (p1[0]);
+    T (p1[1]);
+    T (p1[2]);                /* { dg-warning "array subscript \\\[3, 4] is outside array bounds of .\[a-z \]+\\\[3]." } */
+
+    T (p2[-5]);               /* { dg-warning "array subscript \\\[-3, -1] is outside array bounds of .\[a-z \]+\\\[3]." } */
+    T (p2[-4]);
+    T (p2[-1]);
+    T (p2[0]);
+    T (p2[1]);                /* { dg-warning "array subscript \\\[3, 5] is outside array bounds of .\[a-z \]+\\\[3]." } */
+
+    T (p3[0]);                /* { dg-warning "array subscript \\\[3, 6] is outside array bounds of .\[a-z \]+\\\[3]." } */
+    T (p3[1]);                /* { dg-warning "array subscript \\\[4, 7] is outside array bounds of .\[a-z \]+\\\[3]." } */
+    T (p3[9999]);             /* { dg-warning "array subscript \\\[10002, 10005] is outside array bounds of .\[a-z \]+\\\[3]." } */
+
+    /* Large offsets are indistinguishable from negative values.  */
+    T (p3[DIFF_MAX]);         /* { dg-warning "array subscript" "bug" { xfail *-*-* } } */
+  }
+
+  {
+    const wchar_t *p1 = W3 + i;
+    const wchar_t *p2 = p1 + i;
+    const wchar_t *p3 = p2 + i;
+    const wchar_t *p4 = p3 + i;
+
+    T (p1[-3]);               /* { dg-warning "array subscript \\\[-2, -1] is outside array bounds of .\[a-z \]+\\\[4]." } */
+    T (p1[-2]);
+    T (p1[1]);
+    T (p1[2]);
+    T (p1[3]);                /* { dg-warning "array subscript \\\[4, 5] is outside array bounds of .\[a-z \]+\\\[4]." } */
+
+    T (p3[1]);                /* { dg-warning "array subscript \\\[4, 7] is outside array bounds of .\[a-z \]+\\\[4]." } */
+  }
+}
+
+void wide_ptr_index_range_4 (void)
+{
+  int i = SR (1, 2);
+  const wchar_t *p1 = W4 + i;
+  const wchar_t *p2 = p1 + i;
+  const wchar_t *p3 = p2 + i;
+  const wchar_t *p4 = p3 + i;
+
+  T (p4[1]);                  /* { dg-warning "array subscript \\\[5, 9] is outside array bounds of .\[a-z \]+\\\[5]." } */
+}
diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
index 3e30f6b..8221a06 100644
--- a/gcc/tree-sra.c
+++ b/gcc/tree-sra.c
@@ -3110,6 +3110,19 @@ get_access_for_expr (tree expr)
       || !DECL_P (base))
     return NULL;
 
+  /* Fail for out-of-bounds array offsets.  */
+  tree basetype = TREE_TYPE (base);
+  if (TREE_CODE (basetype) == ARRAY_TYPE)
+    {
+      if (offset < 0)
+	return NULL;
+
+      if (tree size = DECL_SIZE (base))
+	if (tree_fits_uhwi_p (size)
+	    && tree_to_uhwi (size) < (unsigned HOST_WIDE_INT) offset)
+	  return NULL;
+    }
+
   if (!bitmap_bit_p (candidate_bitmap, DECL_UID (base)))
     return NULL;
 
diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index a7453ab..27021760 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -4778,6 +4778,7 @@ class vrp_prop : public ssa_propagation_engine
   void vrp_finalize (bool);
   void check_all_array_refs (void);
   void check_array_ref (location_t, tree, bool);
+  void check_mem_ref (location_t, tree, bool);
   void search_for_addr_array (tree, location_t);
 
   class vr_values vr_values;
@@ -4930,21 +4931,280 @@ vrp_prop::check_array_ref (location_t location, tree ref,
     }
 }
 
+/* Checks one MEM_REF in REF, located at LOCATION, for out-of-bounds
+   references to string constants.  If VRP can determine that the array
+   subscript is a constant, check if it is outside valid range.
+   If the array subscript is a RANGE, warn if it is non-overlapping
+   with valid range.
+   IGNORE_OFF_BY_ONE is true if the MEM_REF is inside an ADDR_EXPR
+   (used to allow one-past-the-end indices for code that takes
+   the address of the just-past-the-end element of an array).  */
+
+void
+vrp_prop::check_mem_ref (location_t location, tree ref, bool ignore_off_by_one)
+{
+  if (TREE_NO_WARNING (ref))
+    return;
+
+  tree arg = TREE_OPERAND (ref, 0);
+  /* The constant and variable offset of the reference.  */
+  tree cstoff = TREE_OPERAND (ref, 1);
+  tree varoff = NULL_TREE;
+
+  const offset_int maxobjsize = tree_to_shwi (max_object_size ());
+
+  /* The array or string constant bounds in bytes.  Initially set
+     to [-MAXOBJSIZE - 1, MAXOBJSIZE]  until a tighter bound is
+     determined.  */
+  offset_int arrbounds[2];
+  arrbounds[1] = maxobjsize;
+  arrbounds[0] = -arrbounds[1] - 1;
+
+  /* The minimum and maximum intermediate offset.  For a reference
+     to be valid, not only does the final offset/subscript must be
+     in bounds but all intermediate offsets must be as well. */
+  offset_int ioff = wi::to_offset (fold_convert (ptrdiff_type_node, cstoff));
+  offset_int extrema[2] = { 0, wi::abs (ioff) };
+
+  /* The range of the byte offset into the reference.  */
+  offset_int offrange[2] = { 0, 0 };
+
+  value_range *vr = NULL;
+
+  /* Determine the offsets and increment OFFRANGE for the bounds of each.
+     The loop computes the the range of the final offset for expressions
+     such as (A + i0 + ... + iN)[CSTOFF] where i0 through iN are SSA_NAMEs
+     in some range.  */
+  while (TREE_CODE (arg) == SSA_NAME)
+    {
+      gimple *def = SSA_NAME_DEF_STMT (arg);
+      if (!is_gimple_assign (def))
+	break;
+
+      tree_code code = gimple_assign_rhs_code (def);
+      if (code == POINTER_PLUS_EXPR)
+	{
+	  arg = gimple_assign_rhs1 (def);
+	  varoff = gimple_assign_rhs2 (def);
+	}
+      else if (code == ASSERT_EXPR)
+	{
+	  arg = TREE_OPERAND (gimple_assign_rhs1 (def), 0);
+	  continue;
+	}
+      else
+	return;
+
+      /* VAROFF should always be a SSA_NAME here (and not even
+	 INTEGER_CST) but there's no point in taking chances.  */
+      if (TREE_CODE (varoff) != SSA_NAME)
+	break;
+
+      vr = get_value_range (varoff);
+      if (!vr || vr->type == VR_UNDEFINED || !vr->min || !vr->max)
+	break;
+
+      if (TREE_CODE (vr->min) != INTEGER_CST
+          || TREE_CODE (vr->max) != INTEGER_CST)
+        break;
+
+      if (vr->type == VR_RANGE)
+	{
+	  if (tree_int_cst_lt (vr->min, vr->max))
+	    {
+	      offset_int min
+		= wi::to_offset (fold_convert (ptrdiff_type_node, vr->min));
+	      offset_int max
+		= wi::to_offset (fold_convert (ptrdiff_type_node, vr->max));
+	      if (min < max)
+		{
+		  offrange[0] += min;
+		  offrange[1] += max;
+		}
+	      else
+		{
+		  offrange[0] += max;
+		  offrange[1] += min;
+		}
+	    }
+	  else
+	    {
+	      /* Conservatively add [-MAXOBJSIZE -1, MAXOBJSIZE]
+		 to OFFRANGE.  */
+	      offrange[0] += arrbounds[0];
+	      offrange[1] += arrbounds[1];
+	    }
+	}
+      else
+	{
+	  /* For an anti-range, analogously to the above, conservatively
+	     add [-MAXOBJSIZE -1, MAXOBJSIZE] to OFFRANGE.  */
+	  offrange[0] += arrbounds[0];
+	  offrange[1] += arrbounds[1];
+	}
+
+      /* Keep track of the minimum and maximum offset.  */
+      if (offrange[1] < 0 && offrange[1] < extrema[0])
+	extrema[0] = offrange[1];
+      if (offrange[0] > 0 && offrange[0] > extrema[1])
+	extrema[1] = offrange[0];
+
+      if (offrange[0] < arrbounds[0])
+	offrange[0] = arrbounds[0];
+
+      if (offrange[1] > arrbounds[1])
+	offrange[1] = arrbounds[1];
+    }
+
+  if (TREE_CODE (arg) == ADDR_EXPR)
+    {
+      arg = TREE_OPERAND (arg, 0);
+      if (TREE_CODE (arg) != STRING_CST
+	  && TREE_CODE (arg) != VAR_DECL)
+	return;
+    }
+  else
+    return;
+
+  /* The type of the object being referred to.  It can be an array,
+     string literal, or a non-array type when the MEM_REF represents
+     a reference/subscript via a pointer to an object that is not
+     an element of an array.  References to members of structs and
+     unions are excluded because MEM_REF doesn't make it possible
+     to identify the member where the reference orginated.  */
+  tree reftype = TREE_TYPE (arg);
+  if (POINTER_TYPE_P (reftype)
+      || RECORD_OR_UNION_TYPE_P (reftype))
+    return;
+
+  offset_int eltsize;
+  if (TREE_CODE (reftype) == ARRAY_TYPE)
+    {
+      eltsize = wi::to_offset (TYPE_SIZE_UNIT (TREE_TYPE (reftype)));
+
+      if (tree dom = TYPE_DOMAIN (reftype))
+	{
+	  tree bnds[] = { TYPE_MIN_VALUE (dom), TYPE_MAX_VALUE (dom) };
+	  if (array_at_struct_end_p (arg)
+	      || !bnds[0] || !bnds[1])
+	    {
+	      arrbounds[0] = 0;
+	      arrbounds[1] = wi::lrshift (maxobjsize, wi::floor_log2 (eltsize));
+	    }
+	  else
+	    {
+	      arrbounds[0] = wi::to_offset (bnds[0]) * eltsize;
+	      arrbounds[1] = (wi::to_offset (bnds[1]) + 1) * eltsize;
+	    }
+	}
+      else
+	{
+	  arrbounds[0] = 0;
+	  arrbounds[1] = wi::lrshift (maxobjsize, wi::floor_log2 (eltsize));
+	}
+
+      if (TREE_CODE (ref) == MEM_REF)
+	{
+	  /* For MEM_REF determine a tighter bound of the non-array
+	     element type.  */
+	  tree eltype = TREE_TYPE (reftype);
+	  while (TREE_CODE (eltype) == ARRAY_TYPE)
+	    eltype = TREE_TYPE (eltype);
+	  eltsize = wi::to_offset (TYPE_SIZE_UNIT (eltype));
+	}
+    }
+  else
+    {
+      eltsize = 1;
+      arrbounds[0] = 0;
+      arrbounds[1] = wi::to_offset (TYPE_SIZE_UNIT (reftype));
+    }
+
+  offrange[0] += ioff;
+  offrange[1] += ioff;
+
+  /* Compute the more permissive upper bound when IGNORE_OFF_BY_ONE
+     is set (when taking the address of the one-past-last element
+     of an array) but always use the stricter bound in diagnostics. */
+  offset_int ubound = arrbounds[1];
+  if (ignore_off_by_one)
+    ubound += 1;
+
+  if (offrange[0] >= ubound || offrange[1] < arrbounds[0])
+    {
+      /* Treat a reference to a non-array object as one to an array
+	 of a single element.  */
+      if (TREE_CODE (reftype) != ARRAY_TYPE)
+	reftype = build_array_type_nelts (reftype, 1);
+
+      if (TREE_CODE (ref) == MEM_REF)
+	{
+	  /* Extract the element type out of MEM_REF and use its size
+	     to compute the index to print in the diagnostic; arrays
+	     in MEM_REF don't mean anything.   */
+	  tree type = TREE_TYPE (ref);
+	  while (TREE_CODE (type) == ARRAY_TYPE)
+	    type = TREE_TYPE (type);
+	  tree size = TYPE_SIZE_UNIT (type);
+	  offrange[0] = offrange[0] / wi::to_offset (size);
+	  offrange[1] = offrange[1] / wi::to_offset (size);
+	}
+      else
+	{
+	  /* For anything other than MEM_REF, compute the index to
+	     print in the diagnostic as the offset over element size.  */
+	  offrange[0] = offrange[0] / eltsize;
+	  offrange[1] = offrange[1] / eltsize;
+	}
+
+      if (offrange[0] == offrange[1])
+	warning_at (location, OPT_Warray_bounds,
+		    "array subscript %wi is outside array bounds "
+		    "of %qT",
+		    offrange[0].to_shwi (), reftype);
+      else
+	warning_at (location, OPT_Warray_bounds,
+		    "array subscript [%wi, %wi] is outside array bounds "
+		    "of %qT",
+		    offrange[0].to_shwi (), offrange[1].to_shwi (), reftype);
+      TREE_NO_WARNING (ref) = 1;
+      return;
+    }
+
+  if (warn_array_bounds < 2)
+    return;
+
+  /* At level 2 check also intermediate offsets.  */
+  int i = 0;
+  if (extrema[i] < -arrbounds[1] || extrema[i = 1] > ubound)
+    {
+      HOST_WIDE_INT tmpidx = extrema[i].to_shwi () / eltsize.to_shwi ();
+
+      warning_at (location, OPT_Warray_bounds,
+		  "intermediate array offset %wi is outside array bounds "
+		  "of %qT",
+		  tmpidx,  reftype);
+      TREE_NO_WARNING (ref) = 1;
+    }
+}
+
 /* Searches if the expr T, located at LOCATION computes
    address of an ARRAY_REF, and call check_array_ref on it.  */
 
 void
 vrp_prop::search_for_addr_array (tree t, location_t location)
 {
-  /* Check each ARRAY_REFs in the reference chain. */
+  /* Check each ARRAY_REF and MEM_REF in the reference chain. */
   do
     {
       if (TREE_CODE (t) == ARRAY_REF)
 	check_array_ref (location, t, true /*ignore_off_by_one*/);
+      else if (TREE_CODE (t) == MEM_REF)
+	check_mem_ref (location, t, true /*ignore_off_by_one*/);
 
       t = TREE_OPERAND (t, 0);
     }
-  while (handled_component_p (t));
+  while (handled_component_p (t) || TREE_CODE (t) == MEM_REF);
 
   if (TREE_CODE (t) == MEM_REF
       && TREE_CODE (TREE_OPERAND (t, 0)) == ADDR_EXPR
@@ -5026,7 +5286,8 @@ check_array_bounds (tree *tp, int *walk_subtree, void *data)
   vrp_prop *vrp_prop = (class vrp_prop *)wi->info;
   if (TREE_CODE (t) == ARRAY_REF)
     vrp_prop->check_array_ref (location, t, false /*ignore_off_by_one*/);
-
+  else if (TREE_CODE (t) == MEM_REF)
+    vrp_prop->check_mem_ref (location, t, false /*ignore_off_by_one*/);
   else if (TREE_CODE (t) == ADDR_EXPR)
     {
       vrp_prop->search_for_addr_array (t, location);
Jeff Law July 16, 2018, 2:53 a.m. UTC | #5
On 07/13/2018 05:09 PM, Martin Sebor wrote:
>>> +  /* At level 2 check also intermediate offsets.  */
>>> +  int i = 0;
>>> +  if (extrema[i] < -strbounds[1]
>>> +      || extrema[i = 1] > strbounds[1] + eltsize)
>>> +    {
>>> +      HOST_WIDE_INT tmpidx = extrema[i].to_shwi () / eltsize.to_shwi
>>> ();
>>> +
>>> +      warning_at (location, OPT_Warray_bounds,
>>> +          "intermediate array offset %wi is outside array bounds "
>>> +          "of %qT",
>>> +          tmpidx,  strtype);
>>> +      TREE_NO_WARNING (ref) = 1;
>>> +    }
>>> +}
>> This seems ill-advised.  All that matters is the actual index used in
>> the dereference.  Intermediate values (ie, address computations) along
>> the way are uninteresting -- we may form an address out of the bounds of
>> the array as an intermediate computation.  But the actual memory
>> reference will be within the range.
> 
> The idea is to help detect bugs that cannot be detected otherwise.
> Take the example below:
> 
>   void g (int i)
>   {
>     if (i < 1 || 2 < i)
>       i = 1;
> 
>     const char *p1 = "12" + i;
>     const char *p2 = p1 + i;
> 
>     extern int x, y;
> 
>     x = p2[-4];   // #1: only valid if p2 is out of bounds
>     y = p2[0];    // #2 therefore this must be out of bounds
>   }
> 
> For the dereference at #1 to be valid i must be 2 (i.e.,
> the upper bound of its range) and so p2 is therefore out
> of bounds.
> 
> We may be able to compensate for it and compute the right
> address at #1 but if the out-of-bounds value is used in
> another dereference that makes a different assumption
> (such as #2) one of the two is definitely wrong.  It might
> be possible to do something smarter and determine if the
> pointer really is used this way but I didn't want to
> complicate the things too much so I put the logic under
> level 2.
> 
> I will post an updated patch shortly.
The problem here is the intermediate values may well be out of bounds
addresses that are brought into bounds by an offset in the actual
dereference.

The loop optimizers can create this situation internally and IIRC Ada
also creates them before handing the code off to gimple.

I'm not aware of a way to distinguish between cases where the compiler
has created the out of bounds address and will compensate in the final
access vs user error.  Worse yet, I know of no way to stop creation of
the out of bounds address, so users would get an warning which they
can't reasonably cope with.

I guess having it under level 2 may be OK.  I'd kind of like to hear
other opinions here.

jeff

ps.  And yes, this is braindamaged and is known to break badly on some
segmented architectures which select the segment based on the base
register rather than the full effective address.  THe PA has that as a
design "feature".  The mn103 (and defunct mn102 IIRC) have this as an
implementation bug.
Jeff Law July 19, 2018, 9:51 p.m. UTC | #6
On 07/13/2018 05:45 PM, Martin Sebor wrote:
>>
>> +      offset_int ofr[] = {
>> +       wi::to_offset (fold_convert (ptrdiff_type_node, vr->min)),
>> +       wi::to_offset (fold_convert (ptrdiff_type_node, vr->max))
>> +      };
>>
>> huh.  Do you maybe want to use widest_int for ofr[]?  What's
>> wrong with wi::to_offset (vr->min)?  Building another intermediate
>> tree node here looks just bogus.
> 
> I need to convert size_type indices to signed to keep their sign
> if it's negative and include it in warnings.  I've moved the code
> into a conditional where it's used to minimize the cost but other
> than that I don't know how else to convert it.
> 
>>
>> What are you trying to do in this loop anyways?
> 
> The loop It determines the range of the final index/offset for
> a series of POINTER_PLUS_EXPRs.  It handles cases like this:
> 
>   int g (int i, int j, int k)
>   {
>     if (i < 1) i = 1;
>     if (j < 1) j = 1;
>     if (k < 1) k = 1;
> 
>     const char *p0 = "123";
>     const char *p1 = p0 + i;
>     const char *p2 = p1 + j;
>     const char *p3 = p2 + k;
> 
>     // p2[3] and p3[1] are out of bounds
>     return p0[4] + p1[3] + p2[2] + p3[1];
>   }
> 
>> I suppose
>> look at
>>
>>   p_1 = &obj[i_2];  // already bounds-checked, but with ignore_off_by_one
>>   ... = MEM[p_1 + CST];
>>
>> ?  But then
>>
>> +      if (TREE_CODE (varoff) != SSA_NAME)
>> +       break;
>>
>> you should at least handle INTEGER_CSTs here?
> 
> It's already handled (and set in CSTOFF).  There should be no
> more constant offsets after that (I haven't come across any.)
> 
>>
>> +      if (!vr || vr->type == VR_UNDEFINED || !vr->min || !vr->max)
>> +       break;
>>
>> please use positive tests here, VR_RANGE || VR_ANTI_RANGE.  As usual
>> the anti-range handling looks odd.  Please add comments so we can follow
>> what you were thinking when writing range merging code.  Even better if you
>>
>> can stick to use existing code rather than always re-inventing the wheel...
>>
> 
> The anti-range handling is to conservatively add
> [-MAXOBJSIZE -1, MAXOBJSIZE] to OFFRANGE.  I've added comments
> to make it clear.  I'd be more than happy to reuse existing
> code if I knew where to find it (if it exists).  It sure would
> save me lots of work to have a library of utility functions to
> call instead of rolling my own code each time.
Finding stuff is never easy :(  GCC's just gotten so big it's virtually
impossible for anyone to know all the APIs.

The suggestion I'd have would be to (when possible) factor this stuff
into routines you can reuse.  We (as a whole) have this tendency to
open-code all kinds of things rather than factoring the code into
reusable routines.

In addition to increasing the probability that you'll be able to reuse
code later, just reading a new function's header tends to make me (as a
reviewer) internally ask if there's already a routine we should be using
instead.  When it's open-coded it's significantly harder to spot those
cases (at least for me).


> 
>>
>> I think I commented on earlier variants but this doesn't seem to resemble
>> any of them.
> 
> I've reworked the patch (sorry) to also handle arrays.  For GCC
> 9 it seems I might as well do both in one go.
> 
> Attached is an updated patch with these changes.
> 
> Martin
> 
> gcc-83776.diff
> 
> 
> PR tree-optimization/84047 - missing -Warray-bounds on an out-of-bounds index into an array
> PR tree-optimization/83776 - missing -Warray-bounds indexing past the end of a string literal
> 
> gcc/ChangeLog:
> 
> 	PR tree-optimization/84047
> 	PR tree-optimization/83776
> 	* tree-vrp.c (vrp_prop::check_mem_ref): New function.
> 	(check_array_bounds): Call it.
> 	* /gcc/tree-sra.c (get_access_for_expr): Fail for out-of-bounds
> 	array offsets.
> 
> gcc/testsuite/ChangeLog:
> 
> 	PR tree-optimization/83776
> 	PR tree-optimization/84047
> 	* gcc.dg/Warray-bounds-29.c: New test.
> 	* gcc.dg/Warray-bounds-30.c: New test.
> 	* gcc.dg/Warray-bounds-31.c: New test.
> 	* gcc.dg/Warray-bounds-32.c: New test.
> 

> diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
> index 3e30f6b..8221a06 100644
> --- a/gcc/tree-sra.c
> +++ b/gcc/tree-sra.c
> @@ -3110,6 +3110,19 @@ get_access_for_expr (tree expr)
>        || !DECL_P (base))
>      return NULL;
>  
> +  /* Fail for out-of-bounds array offsets.  */
> +  tree basetype = TREE_TYPE (base);
> +  if (TREE_CODE (basetype) == ARRAY_TYPE)
> +    {
> +      if (offset < 0)
> +	return NULL;
> +
> +      if (tree size = DECL_SIZE (base))
> +	if (tree_fits_uhwi_p (size)
> +	    && tree_to_uhwi (size) < (unsigned HOST_WIDE_INT) offset)
> +	  return NULL;
> +    }
> +
>    if (!bitmap_bit_p (candidate_bitmap, DECL_UID (base)))
>      return NULL;
So I'm a bit curious about this hunk.  Did we end up creating an access
structure that walked off the end of the array?  Presumably  you
suppressing SRA at this point so that you can see the array access later
and give a suitable warning.  Right?


>  
> diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
> index a7453ab..27021760 100644
> --- a/gcc/tree-vrp.c
> +++ b/gcc/tree-vrp.c
> @@ -4930,21 +4931,280 @@ vrp_prop::check_array_ref (location_t location, tree ref,
>      }
>  }
>  
> +/* Checks one MEM_REF in REF, located at LOCATION, for out-of-bounds
> +   references to string constants.  If VRP can determine that the array
> +   subscript is a constant, check if it is outside valid range.
> +   If the array subscript is a RANGE, warn if it is non-overlapping
> +   with valid range.
> +   IGNORE_OFF_BY_ONE is true if the MEM_REF is inside an ADDR_EXPR
> +   (used to allow one-past-the-end indices for code that takes
> +   the address of the just-past-the-end element of an array).  */
> +
> +void
> +vrp_prop::check_mem_ref (location_t location, tree ref, bool ignore_off_by_one)
> +{
> +  if (TREE_NO_WARNING (ref))
> +    return;
> +
> +  tree arg = TREE_OPERAND (ref, 0);
> +  /* The constant and variable offset of the reference.  */
> +  tree cstoff = TREE_OPERAND (ref, 1);
> +  tree varoff = NULL_TREE;
> +
> +  const offset_int maxobjsize = tree_to_shwi (max_object_size ());
> +
> +  /* The array or string constant bounds in bytes.  Initially set
> +     to [-MAXOBJSIZE - 1, MAXOBJSIZE]  until a tighter bound is
> +     determined.  */
> +  offset_int arrbounds[2];
> +  arrbounds[1] = maxobjsize;
> +  arrbounds[0] = -arrbounds[1] - 1;
I realize that arrbounds[1] == maxobjsize at this point.  But is there
any reason not to use maxobjsize in the assignment to arrbounds[0] so
that its computation matches the comment.  It would also seem advisable
to set the min first, then the max since that's the ordering in the
comment and in the array.


> +
> +  /* The minimum and maximum intermediate offset.  For a reference
> +     to be valid, not only does the final offset/subscript must be
> +     in bounds but all intermediate offsets must be as well. */
> +  offset_int ioff = wi::to_offset (fold_convert (ptrdiff_type_node, cstoff));
> +  offset_int extrema[2] = { 0, wi::abs (ioff) };
You should probably tone back the comment here since the intermediate
offsets do not have to be in bounds.


[ Big snip ]

> +  /* The type of the object being referred to.  It can be an array,
> +     string literal, or a non-array type when the MEM_REF represents
> +     a reference/subscript via a pointer to an object that is not
> +     an element of an array.  References to members of structs and
> +     unions are excluded because MEM_REF doesn't make it possible
> +     to identify the member where the reference orginated.  */
s/orginated/originated/

OK with those changes.

jeff
Martin Sebor July 19, 2018, 11:36 p.m. UTC | #7
On 07/19/2018 03:51 PM, Jeff Law wrote:
> On 07/13/2018 05:45 PM, Martin Sebor wrote:
>>>
>>> +      offset_int ofr[] = {
>>> +       wi::to_offset (fold_convert (ptrdiff_type_node, vr->min)),
>>> +       wi::to_offset (fold_convert (ptrdiff_type_node, vr->max))
>>> +      };
>>>
>>> huh.  Do you maybe want to use widest_int for ofr[]?  What's
>>> wrong with wi::to_offset (vr->min)?  Building another intermediate
>>> tree node here looks just bogus.
>>
>> I need to convert size_type indices to signed to keep their sign
>> if it's negative and include it in warnings.  I've moved the code
>> into a conditional where it's used to minimize the cost but other
>> than that I don't know how else to convert it.
>>
>>>
>>> What are you trying to do in this loop anyways?
>>
>> The loop It determines the range of the final index/offset for
>> a series of POINTER_PLUS_EXPRs.  It handles cases like this:
>>
>>   int g (int i, int j, int k)
>>   {
>>     if (i < 1) i = 1;
>>     if (j < 1) j = 1;
>>     if (k < 1) k = 1;
>>
>>     const char *p0 = "123";
>>     const char *p1 = p0 + i;
>>     const char *p2 = p1 + j;
>>     const char *p3 = p2 + k;
>>
>>     // p2[3] and p3[1] are out of bounds
>>     return p0[4] + p1[3] + p2[2] + p3[1];
>>   }
>>
>>> I suppose
>>> look at
>>>
>>>   p_1 = &obj[i_2];  // already bounds-checked, but with ignore_off_by_one
>>>   ... = MEM[p_1 + CST];
>>>
>>> ?  But then
>>>
>>> +      if (TREE_CODE (varoff) != SSA_NAME)
>>> +       break;
>>>
>>> you should at least handle INTEGER_CSTs here?
>>
>> It's already handled (and set in CSTOFF).  There should be no
>> more constant offsets after that (I haven't come across any.)
>>
>>>
>>> +      if (!vr || vr->type == VR_UNDEFINED || !vr->min || !vr->max)
>>> +       break;
>>>
>>> please use positive tests here, VR_RANGE || VR_ANTI_RANGE.  As usual
>>> the anti-range handling looks odd.  Please add comments so we can follow
>>> what you were thinking when writing range merging code.  Even better if you
>>>
>>> can stick to use existing code rather than always re-inventing the wheel...
>>>
>>
>> The anti-range handling is to conservatively add
>> [-MAXOBJSIZE -1, MAXOBJSIZE] to OFFRANGE.  I've added comments
>> to make it clear.  I'd be more than happy to reuse existing
>> code if I knew where to find it (if it exists).  It sure would
>> save me lots of work to have a library of utility functions to
>> call instead of rolling my own code each time.
> Finding stuff is never easy :(  GCC's just gotten so big it's virtually
> impossible for anyone to know all the APIs.
>
> The suggestion I'd have would be to (when possible) factor this stuff
> into routines you can reuse.  We (as a whole) have this tendency to
> open-code all kinds of things rather than factoring the code into
> reusable routines.
>
> In addition to increasing the probability that you'll be able to reuse
> code later, just reading a new function's header tends to make me (as a
> reviewer) internally ask if there's already a routine we should be using
> instead.  When it's open-coded it's significantly harder to spot those
> cases (at least for me).
>
>
>>
>>>
>>> I think I commented on earlier variants but this doesn't seem to resemble
>>> any of them.
>>
>> I've reworked the patch (sorry) to also handle arrays.  For GCC
>> 9 it seems I might as well do both in one go.
>>
>> Attached is an updated patch with these changes.
>>
>> Martin
>>
>> gcc-83776.diff
>>
>>
>> PR tree-optimization/84047 - missing -Warray-bounds on an out-of-bounds index into an array
>> PR tree-optimization/83776 - missing -Warray-bounds indexing past the end of a string literal
>>
>> gcc/ChangeLog:
>>
>> 	PR tree-optimization/84047
>> 	PR tree-optimization/83776
>> 	* tree-vrp.c (vrp_prop::check_mem_ref): New function.
>> 	(check_array_bounds): Call it.
>> 	* /gcc/tree-sra.c (get_access_for_expr): Fail for out-of-bounds
>> 	array offsets.
>>
>> gcc/testsuite/ChangeLog:
>>
>> 	PR tree-optimization/83776
>> 	PR tree-optimization/84047
>> 	* gcc.dg/Warray-bounds-29.c: New test.
>> 	* gcc.dg/Warray-bounds-30.c: New test.
>> 	* gcc.dg/Warray-bounds-31.c: New test.
>> 	* gcc.dg/Warray-bounds-32.c: New test.
>>
>
>> diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
>> index 3e30f6b..8221a06 100644
>> --- a/gcc/tree-sra.c
>> +++ b/gcc/tree-sra.c
>> @@ -3110,6 +3110,19 @@ get_access_for_expr (tree expr)
>>        || !DECL_P (base))
>>      return NULL;
>>
>> +  /* Fail for out-of-bounds array offsets.  */
>> +  tree basetype = TREE_TYPE (base);
>> +  if (TREE_CODE (basetype) == ARRAY_TYPE)
>> +    {
>> +      if (offset < 0)
>> +	return NULL;
>> +
>> +      if (tree size = DECL_SIZE (base))
>> +	if (tree_fits_uhwi_p (size)
>> +	    && tree_to_uhwi (size) < (unsigned HOST_WIDE_INT) offset)
>> +	  return NULL;
>> +    }
>> +
>>    if (!bitmap_bit_p (candidate_bitmap, DECL_UID (base)))
>>      return NULL;
> So I'm a bit curious about this hunk.  Did we end up creating an access
> structure that walked off the end of the array?  Presumably  you
> suppressing SRA at this point so that you can see the array access later
> and give a suitable warning.  Right?

Yes, but I didn't make a note of the test case that triggered
it and I'm not able to trigger the code path now, so the change
might not be necessary.  I've removed it from the patch.  If it
turns out that it is needed after all I'll commit it later.

>> diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
>> index a7453ab..27021760 100644
>> --- a/gcc/tree-vrp.c
>> +++ b/gcc/tree-vrp.c
>> @@ -4930,21 +4931,280 @@ vrp_prop::check_array_ref (location_t location, tree ref,
>>      }
>>  }
>>
>> +/* Checks one MEM_REF in REF, located at LOCATION, for out-of-bounds
>> +   references to string constants.  If VRP can determine that the array
>> +   subscript is a constant, check if it is outside valid range.
>> +   If the array subscript is a RANGE, warn if it is non-overlapping
>> +   with valid range.
>> +   IGNORE_OFF_BY_ONE is true if the MEM_REF is inside an ADDR_EXPR
>> +   (used to allow one-past-the-end indices for code that takes
>> +   the address of the just-past-the-end element of an array).  */
>> +
>> +void
>> +vrp_prop::check_mem_ref (location_t location, tree ref, bool ignore_off_by_one)
>> +{
>> +  if (TREE_NO_WARNING (ref))
>> +    return;
>> +
>> +  tree arg = TREE_OPERAND (ref, 0);
>> +  /* The constant and variable offset of the reference.  */
>> +  tree cstoff = TREE_OPERAND (ref, 1);
>> +  tree varoff = NULL_TREE;
>> +
>> +  const offset_int maxobjsize = tree_to_shwi (max_object_size ());
>> +
>> +  /* The array or string constant bounds in bytes.  Initially set
>> +     to [-MAXOBJSIZE - 1, MAXOBJSIZE]  until a tighter bound is
>> +     determined.  */
>> +  offset_int arrbounds[2];
>> +  arrbounds[1] = maxobjsize;
>> +  arrbounds[0] = -arrbounds[1] - 1;
> I realize that arrbounds[1] == maxobjsize at this point.  But is there
> any reason not to use maxobjsize in the assignment to arrbounds[0] so
> that its computation matches the comment.  It would also seem advisable
> to set the min first, then the max since that's the ordering in the
> comment and in the array.

Done.

>
>
>> +
>> +  /* The minimum and maximum intermediate offset.  For a reference
>> +     to be valid, not only does the final offset/subscript must be
>> +     in bounds but all intermediate offsets must be as well. */
>> +  offset_int ioff = wi::to_offset (fold_convert (ptrdiff_type_node, cstoff));
>> +  offset_int extrema[2] = { 0, wi::abs (ioff) };
> You should probably tone back the comment here since the intermediate
> offsets do not have to be in bounds.

Done.

>
>
> [ Big snip ]
>
>> +  /* The type of the object being referred to.  It can be an array,
>> +     string literal, or a non-array type when the MEM_REF represents
>> +     a reference/subscript via a pointer to an object that is not
>> +     an element of an array.  References to members of structs and
>> +     unions are excluded because MEM_REF doesn't make it possible
>> +     to identify the member where the reference orginated.  */
> s/orginated/originated/
>
> OK with those changes.

Committed without the SRA change as 262893.

Martin
Rainer Orth July 20, 2018, 11:34 a.m. UTC | #8
Hi Martin,

> On 07/19/2018 03:51 PM, Jeff Law wrote:
>> On 07/13/2018 05:45 PM, Martin Sebor wrote:
>>>>
>>>> +      offset_int ofr[] = {
>>>> +       wi::to_offset (fold_convert (ptrdiff_type_node, vr->min)),
>>>> +       wi::to_offset (fold_convert (ptrdiff_type_node, vr->max))
>>>> +      };
>>>>
>>>> huh.  Do you maybe want to use widest_int for ofr[]?  What's
>>>> wrong with wi::to_offset (vr->min)?  Building another intermediate
>>>> tree node here looks just bogus.
>>>
>>> I need to convert size_type indices to signed to keep their sign
>>> if it's negative and include it in warnings.  I've moved the code
>>> into a conditional where it's used to minimize the cost but other
>>> than that I don't know how else to convert it.
>>>
>>>>
>>>> What are you trying to do in this loop anyways?
>>>
>>> The loop It determines the range of the final index/offset for
>>> a series of POINTER_PLUS_EXPRs.  It handles cases like this:
>>>
>>>   int g (int i, int j, int k)
>>>   {
>>>     if (i < 1) i = 1;
>>>     if (j < 1) j = 1;
>>>     if (k < 1) k = 1;
>>>
>>>     const char *p0 = "123";
>>>     const char *p1 = p0 + i;
>>>     const char *p2 = p1 + j;
>>>     const char *p3 = p2 + k;
>>>
>>>     // p2[3] and p3[1] are out of bounds
>>>     return p0[4] + p1[3] + p2[2] + p3[1];
>>>   }
>>>
>>>> I suppose
>>>> look at
>>>>
>>>>   p_1 = &obj[i_2];  // already bounds-checked, but with ignore_off_by_one
>>>>   ... = MEM[p_1 + CST];
>>>>
>>>> ?  But then
>>>>
>>>> +      if (TREE_CODE (varoff) != SSA_NAME)
>>>> +       break;
>>>>
>>>> you should at least handle INTEGER_CSTs here?
>>>
>>> It's already handled (and set in CSTOFF).  There should be no
>>> more constant offsets after that (I haven't come across any.)
>>>
>>>>
>>>> +      if (!vr || vr->type == VR_UNDEFINED || !vr->min || !vr->max)
>>>> +       break;
>>>>
>>>> please use positive tests here, VR_RANGE || VR_ANTI_RANGE.  As usual
>>>> the anti-range handling looks odd.  Please add comments so we can follow
>>>> what you were thinking when writing range merging code.  Even better if you
>>>>
>>>> can stick to use existing code rather than always re-inventing the wheel...
>>>>
>>>
>>> The anti-range handling is to conservatively add
>>> [-MAXOBJSIZE -1, MAXOBJSIZE] to OFFRANGE.  I've added comments
>>> to make it clear.  I'd be more than happy to reuse existing
>>> code if I knew where to find it (if it exists).  It sure would
>>> save me lots of work to have a library of utility functions to
>>> call instead of rolling my own code each time.
>> Finding stuff is never easy :(  GCC's just gotten so big it's virtually
>> impossible for anyone to know all the APIs.
>>
>> The suggestion I'd have would be to (when possible) factor this stuff
>> into routines you can reuse.  We (as a whole) have this tendency to
>> open-code all kinds of things rather than factoring the code into
>> reusable routines.
>>
>> In addition to increasing the probability that you'll be able to reuse
>> code later, just reading a new function's header tends to make me (as a
>> reviewer) internally ask if there's already a routine we should be using
>> instead.  When it's open-coded it's significantly harder to spot those
>> cases (at least for me).
>>
>>
>>>
>>>>
>>>> I think I commented on earlier variants but this doesn't seem to resemble
>>>> any of them.
>>>
>>> I've reworked the patch (sorry) to also handle arrays.  For GCC
>>> 9 it seems I might as well do both in one go.
>>>
>>> Attached is an updated patch with these changes.
>>>
>>> Martin
>>>
>>> gcc-83776.diff
>>>
>>>
>>> PR tree-optimization/84047 - missing -Warray-bounds on an out-of-bounds
>>> index into an array
>>> PR tree-optimization/83776 - missing -Warray-bounds indexing past the
>>> end of a string literal
>>>
>>> gcc/ChangeLog:
>>>
>>> 	PR tree-optimization/84047
>>> 	PR tree-optimization/83776
>>> 	* tree-vrp.c (vrp_prop::check_mem_ref): New function.
>>> 	(check_array_bounds): Call it.
>>> 	* /gcc/tree-sra.c (get_access_for_expr): Fail for out-of-bounds
>>> 	array offsets.
>>>
>>> gcc/testsuite/ChangeLog:
>>>
>>> 	PR tree-optimization/83776
>>> 	PR tree-optimization/84047
>>> 	* gcc.dg/Warray-bounds-29.c: New test.
>>> 	* gcc.dg/Warray-bounds-30.c: New test.
>>> 	* gcc.dg/Warray-bounds-31.c: New test.
>>> 	* gcc.dg/Warray-bounds-32.c: New test.
>>>
>>
>>> diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
>>> index 3e30f6b..8221a06 100644
>>> --- a/gcc/tree-sra.c
>>> +++ b/gcc/tree-sra.c
>>> @@ -3110,6 +3110,19 @@ get_access_for_expr (tree expr)
>>>        || !DECL_P (base))
>>>      return NULL;
>>>
>>> +  /* Fail for out-of-bounds array offsets.  */
>>> +  tree basetype = TREE_TYPE (base);
>>> +  if (TREE_CODE (basetype) == ARRAY_TYPE)
>>> +    {
>>> +      if (offset < 0)
>>> +	return NULL;
>>> +
>>> +      if (tree size = DECL_SIZE (base))
>>> +	if (tree_fits_uhwi_p (size)
>>> +	    && tree_to_uhwi (size) < (unsigned HOST_WIDE_INT) offset)
>>> +	  return NULL;
>>> +    }
>>> +
>>>    if (!bitmap_bit_p (candidate_bitmap, DECL_UID (base)))
>>>      return NULL;
>> So I'm a bit curious about this hunk.  Did we end up creating an access
>> structure that walked off the end of the array?  Presumably  you
>> suppressing SRA at this point so that you can see the array access later
>> and give a suitable warning.  Right?
>
> Yes, but I didn't make a note of the test case that triggered
> it and I'm not able to trigger the code path now, so the change
> might not be necessary.  I've removed it from the patch.  If it
> turns out that it is needed after all I'll commit it later.
>
>>> diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
>>> index a7453ab..27021760 100644
>>> --- a/gcc/tree-vrp.c
>>> +++ b/gcc/tree-vrp.c
>>> @@ -4930,21 +4931,280 @@ vrp_prop::check_array_ref (location_t location, tree ref,
>>>      }
>>>  }
>>>
>>> +/* Checks one MEM_REF in REF, located at LOCATION, for out-of-bounds
>>> +   references to string constants.  If VRP can determine that the array
>>> +   subscript is a constant, check if it is outside valid range.
>>> +   If the array subscript is a RANGE, warn if it is non-overlapping
>>> +   with valid range.
>>> +   IGNORE_OFF_BY_ONE is true if the MEM_REF is inside an ADDR_EXPR
>>> +   (used to allow one-past-the-end indices for code that takes
>>> +   the address of the just-past-the-end element of an array).  */
>>> +
>>> +void
>>> +vrp_prop::check_mem_ref (location_t location, tree ref, bool ignore_off_by_one)
>>> +{
>>> +  if (TREE_NO_WARNING (ref))
>>> +    return;
>>> +
>>> +  tree arg = TREE_OPERAND (ref, 0);
>>> +  /* The constant and variable offset of the reference.  */
>>> +  tree cstoff = TREE_OPERAND (ref, 1);
>>> +  tree varoff = NULL_TREE;
>>> +
>>> +  const offset_int maxobjsize = tree_to_shwi (max_object_size ());
>>> +
>>> +  /* The array or string constant bounds in bytes.  Initially set
>>> +     to [-MAXOBJSIZE - 1, MAXOBJSIZE]  until a tighter bound is
>>> +     determined.  */
>>> +  offset_int arrbounds[2];
>>> +  arrbounds[1] = maxobjsize;
>>> +  arrbounds[0] = -arrbounds[1] - 1;
>> I realize that arrbounds[1] == maxobjsize at this point.  But is there
>> any reason not to use maxobjsize in the assignment to arrbounds[0] so
>> that its computation matches the comment.  It would also seem advisable
>> to set the min first, then the max since that's the ordering in the
>> comment and in the array.
>
> Done.
>
>>
>>
>>> +
>>> +  /* The minimum and maximum intermediate offset.  For a reference
>>> +     to be valid, not only does the final offset/subscript must be
>>> +     in bounds but all intermediate offsets must be as well. */
>>> + offset_int ioff = wi::to_offset (fold_convert (ptrdiff_type_node,
>>> cstoff));
>>> +  offset_int extrema[2] = { 0, wi::abs (ioff) };
>> You should probably tone back the comment here since the intermediate
>> offsets do not have to be in bounds.
>
> Done.
>
>>
>>
>> [ Big snip ]
>>
>>> +  /* The type of the object being referred to.  It can be an array,
>>> +     string literal, or a non-array type when the MEM_REF represents
>>> +     a reference/subscript via a pointer to an object that is not
>>> +     an element of an array.  References to members of structs and
>>> +     unions are excluded because MEM_REF doesn't make it possible
>>> +     to identify the member where the reference orginated.  */
>> s/orginated/originated/
>>
>> OK with those changes.
>
> Committed without the SRA change as 262893.

your patch has caused quite a number of testsuite failures:

+FAIL: c-c++-common/Warray-bounds-2.c  -std=gnu++11 (test for excess errors)
+FAIL: c-c++-common/Warray-bounds-2.c  -std=gnu++14 (test for excess errors)
+FAIL: c-c++-common/Warray-bounds-2.c  -std=gnu++98 (test for excess errors)

Excess errors:
/vol/gcc/src/hg/trunk/local/gcc/testsuite/c-c++-common/Warray-bounds-2.c:200:11: warning: array subscript -1 is outside array bounds of 'Array [2]' [-Warray-bounds]

+FAIL: c-c++-common/Warray-bounds-2.c  -Wc++-compat  (test for excess errors)

on 32 and 64-bit Solaris/SPARC and x86, also Linux/x86_64

+XPASS: gcc.dg/Warray-bounds-31.c  (test for warnings, line 26)
+XPASS: gcc.dg/Warray-bounds-31.c  (test for warnings, line 40)
+FAIL: gcc.dg/Warray-bounds-31.c (test for excess errors)
+XPASS: gcc.dg/Warray-bounds-31.c bug 84047 (test for warnings, line 72)
+XPASS: gcc.dg/Warray-bounds-31.c bug 84047 (test for warnings, line 90)

Excess errors:
/vol/gcc/src/hg/trunk/local/gcc/testsuite/gcc.dg/Warray-bounds-31.c:100:3: warning: array subscript -2147483648 is outside array bounds of 'char[9]' [-Warray-bounds]

32-bit Solaris and Linux only.

+FAIL: gcc.dg/Warray-bounds-32.c  (test for warnings, line 28)

Please fix.

	Rainer
Martin Sebor July 20, 2018, 4:57 p.m. UTC | #9
On 07/20/2018 05:34 AM, Rainer Orth wrote:
> Hi Martin,
>
>> On 07/19/2018 03:51 PM, Jeff Law wrote:
>>> On 07/13/2018 05:45 PM, Martin Sebor wrote:
>>>>>
>>>>> +      offset_int ofr[] = {
>>>>> +       wi::to_offset (fold_convert (ptrdiff_type_node, vr->min)),
>>>>> +       wi::to_offset (fold_convert (ptrdiff_type_node, vr->max))
>>>>> +      };
>>>>>
>>>>> huh.  Do you maybe want to use widest_int for ofr[]?  What's
>>>>> wrong with wi::to_offset (vr->min)?  Building another intermediate
>>>>> tree node here looks just bogus.
>>>>
>>>> I need to convert size_type indices to signed to keep their sign
>>>> if it's negative and include it in warnings.  I've moved the code
>>>> into a conditional where it's used to minimize the cost but other
>>>> than that I don't know how else to convert it.
>>>>
>>>>>
>>>>> What are you trying to do in this loop anyways?
>>>>
>>>> The loop It determines the range of the final index/offset for
>>>> a series of POINTER_PLUS_EXPRs.  It handles cases like this:
>>>>
>>>>   int g (int i, int j, int k)
>>>>   {
>>>>     if (i < 1) i = 1;
>>>>     if (j < 1) j = 1;
>>>>     if (k < 1) k = 1;
>>>>
>>>>     const char *p0 = "123";
>>>>     const char *p1 = p0 + i;
>>>>     const char *p2 = p1 + j;
>>>>     const char *p3 = p2 + k;
>>>>
>>>>     // p2[3] and p3[1] are out of bounds
>>>>     return p0[4] + p1[3] + p2[2] + p3[1];
>>>>   }
>>>>
>>>>> I suppose
>>>>> look at
>>>>>
>>>>>   p_1 = &obj[i_2];  // already bounds-checked, but with ignore_off_by_one
>>>>>   ... = MEM[p_1 + CST];
>>>>>
>>>>> ?  But then
>>>>>
>>>>> +      if (TREE_CODE (varoff) != SSA_NAME)
>>>>> +       break;
>>>>>
>>>>> you should at least handle INTEGER_CSTs here?
>>>>
>>>> It's already handled (and set in CSTOFF).  There should be no
>>>> more constant offsets after that (I haven't come across any.)
>>>>
>>>>>
>>>>> +      if (!vr || vr->type == VR_UNDEFINED || !vr->min || !vr->max)
>>>>> +       break;
>>>>>
>>>>> please use positive tests here, VR_RANGE || VR_ANTI_RANGE.  As usual
>>>>> the anti-range handling looks odd.  Please add comments so we can follow
>>>>> what you were thinking when writing range merging code.  Even better if you
>>>>>
>>>>> can stick to use existing code rather than always re-inventing the wheel...
>>>>>
>>>>
>>>> The anti-range handling is to conservatively add
>>>> [-MAXOBJSIZE -1, MAXOBJSIZE] to OFFRANGE.  I've added comments
>>>> to make it clear.  I'd be more than happy to reuse existing
>>>> code if I knew where to find it (if it exists).  It sure would
>>>> save me lots of work to have a library of utility functions to
>>>> call instead of rolling my own code each time.
>>> Finding stuff is never easy :(  GCC's just gotten so big it's virtually
>>> impossible for anyone to know all the APIs.
>>>
>>> The suggestion I'd have would be to (when possible) factor this stuff
>>> into routines you can reuse.  We (as a whole) have this tendency to
>>> open-code all kinds of things rather than factoring the code into
>>> reusable routines.
>>>
>>> In addition to increasing the probability that you'll be able to reuse
>>> code later, just reading a new function's header tends to make me (as a
>>> reviewer) internally ask if there's already a routine we should be using
>>> instead.  When it's open-coded it's significantly harder to spot those
>>> cases (at least for me).
>>>
>>>
>>>>
>>>>>
>>>>> I think I commented on earlier variants but this doesn't seem to resemble
>>>>> any of them.
>>>>
>>>> I've reworked the patch (sorry) to also handle arrays.  For GCC
>>>> 9 it seems I might as well do both in one go.
>>>>
>>>> Attached is an updated patch with these changes.
>>>>
>>>> Martin
>>>>
>>>> gcc-83776.diff
>>>>
>>>>
>>>> PR tree-optimization/84047 - missing -Warray-bounds on an out-of-bounds
>>>> index into an array
>>>> PR tree-optimization/83776 - missing -Warray-bounds indexing past the
>>>> end of a string literal
>>>>
>>>> gcc/ChangeLog:
>>>>
>>>> 	PR tree-optimization/84047
>>>> 	PR tree-optimization/83776
>>>> 	* tree-vrp.c (vrp_prop::check_mem_ref): New function.
>>>> 	(check_array_bounds): Call it.
>>>> 	* /gcc/tree-sra.c (get_access_for_expr): Fail for out-of-bounds
>>>> 	array offsets.
>>>>
>>>> gcc/testsuite/ChangeLog:
>>>>
>>>> 	PR tree-optimization/83776
>>>> 	PR tree-optimization/84047
>>>> 	* gcc.dg/Warray-bounds-29.c: New test.
>>>> 	* gcc.dg/Warray-bounds-30.c: New test.
>>>> 	* gcc.dg/Warray-bounds-31.c: New test.
>>>> 	* gcc.dg/Warray-bounds-32.c: New test.
>>>>
>>>
>>>> diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
>>>> index 3e30f6b..8221a06 100644
>>>> --- a/gcc/tree-sra.c
>>>> +++ b/gcc/tree-sra.c
>>>> @@ -3110,6 +3110,19 @@ get_access_for_expr (tree expr)
>>>>        || !DECL_P (base))
>>>>      return NULL;
>>>>
>>>> +  /* Fail for out-of-bounds array offsets.  */
>>>> +  tree basetype = TREE_TYPE (base);
>>>> +  if (TREE_CODE (basetype) == ARRAY_TYPE)
>>>> +    {
>>>> +      if (offset < 0)
>>>> +	return NULL;
>>>> +
>>>> +      if (tree size = DECL_SIZE (base))
>>>> +	if (tree_fits_uhwi_p (size)
>>>> +	    && tree_to_uhwi (size) < (unsigned HOST_WIDE_INT) offset)
>>>> +	  return NULL;
>>>> +    }
>>>> +
>>>>    if (!bitmap_bit_p (candidate_bitmap, DECL_UID (base)))
>>>>      return NULL;
>>> So I'm a bit curious about this hunk.  Did we end up creating an access
>>> structure that walked off the end of the array?  Presumably  you
>>> suppressing SRA at this point so that you can see the array access later
>>> and give a suitable warning.  Right?
>>
>> Yes, but I didn't make a note of the test case that triggered
>> it and I'm not able to trigger the code path now, so the change
>> might not be necessary.  I've removed it from the patch.  If it
>> turns out that it is needed after all I'll commit it later.
>>
>>>> diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
>>>> index a7453ab..27021760 100644
>>>> --- a/gcc/tree-vrp.c
>>>> +++ b/gcc/tree-vrp.c
>>>> @@ -4930,21 +4931,280 @@ vrp_prop::check_array_ref (location_t location, tree ref,
>>>>      }
>>>>  }
>>>>
>>>> +/* Checks one MEM_REF in REF, located at LOCATION, for out-of-bounds
>>>> +   references to string constants.  If VRP can determine that the array
>>>> +   subscript is a constant, check if it is outside valid range.
>>>> +   If the array subscript is a RANGE, warn if it is non-overlapping
>>>> +   with valid range.
>>>> +   IGNORE_OFF_BY_ONE is true if the MEM_REF is inside an ADDR_EXPR
>>>> +   (used to allow one-past-the-end indices for code that takes
>>>> +   the address of the just-past-the-end element of an array).  */
>>>> +
>>>> +void
>>>> +vrp_prop::check_mem_ref (location_t location, tree ref, bool ignore_off_by_one)
>>>> +{
>>>> +  if (TREE_NO_WARNING (ref))
>>>> +    return;
>>>> +
>>>> +  tree arg = TREE_OPERAND (ref, 0);
>>>> +  /* The constant and variable offset of the reference.  */
>>>> +  tree cstoff = TREE_OPERAND (ref, 1);
>>>> +  tree varoff = NULL_TREE;
>>>> +
>>>> +  const offset_int maxobjsize = tree_to_shwi (max_object_size ());
>>>> +
>>>> +  /* The array or string constant bounds in bytes.  Initially set
>>>> +     to [-MAXOBJSIZE - 1, MAXOBJSIZE]  until a tighter bound is
>>>> +     determined.  */
>>>> +  offset_int arrbounds[2];
>>>> +  arrbounds[1] = maxobjsize;
>>>> +  arrbounds[0] = -arrbounds[1] - 1;
>>> I realize that arrbounds[1] == maxobjsize at this point.  But is there
>>> any reason not to use maxobjsize in the assignment to arrbounds[0] so
>>> that its computation matches the comment.  It would also seem advisable
>>> to set the min first, then the max since that's the ordering in the
>>> comment and in the array.
>>
>> Done.
>>
>>>
>>>
>>>> +
>>>> +  /* The minimum and maximum intermediate offset.  For a reference
>>>> +     to be valid, not only does the final offset/subscript must be
>>>> +     in bounds but all intermediate offsets must be as well. */
>>>> + offset_int ioff = wi::to_offset (fold_convert (ptrdiff_type_node,
>>>> cstoff));
>>>> +  offset_int extrema[2] = { 0, wi::abs (ioff) };
>>> You should probably tone back the comment here since the intermediate
>>> offsets do not have to be in bounds.
>>
>> Done.
>>
>>>
>>>
>>> [ Big snip ]
>>>
>>>> +  /* The type of the object being referred to.  It can be an array,
>>>> +     string literal, or a non-array type when the MEM_REF represents
>>>> +     a reference/subscript via a pointer to an object that is not
>>>> +     an element of an array.  References to members of structs and
>>>> +     unions are excluded because MEM_REF doesn't make it possible
>>>> +     to identify the member where the reference orginated.  */
>>> s/orginated/originated/
>>>
>>> OK with those changes.
>>
>> Committed without the SRA change as 262893.
>
> your patch has caused quite a number of testsuite failures:
>
> +FAIL: c-c++-common/Warray-bounds-2.c  -std=gnu++11 (test for excess errors)
> +FAIL: c-c++-common/Warray-bounds-2.c  -std=gnu++14 (test for excess errors)
> +FAIL: c-c++-common/Warray-bounds-2.c  -std=gnu++98 (test for excess errors)
>
> Excess errors:
> /vol/gcc/src/hg/trunk/local/gcc/testsuite/c-c++-common/Warray-bounds-2.c:200:11: warning: array subscript -1 is outside array bounds of 'Array [2]' [-Warray-bounds]
>
> +FAIL: c-c++-common/Warray-bounds-2.c  -Wc++-compat  (test for excess errors)
>
> on 32 and 64-bit Solaris/SPARC and x86, also Linux/x86_64
>
> +XPASS: gcc.dg/Warray-bounds-31.c  (test for warnings, line 26)
> +XPASS: gcc.dg/Warray-bounds-31.c  (test for warnings, line 40)
> +FAIL: gcc.dg/Warray-bounds-31.c (test for excess errors)
> +XPASS: gcc.dg/Warray-bounds-31.c bug 84047 (test for warnings, line 72)
> +XPASS: gcc.dg/Warray-bounds-31.c bug 84047 (test for warnings, line 90)
>
> Excess errors:
> /vol/gcc/src/hg/trunk/local/gcc/testsuite/gcc.dg/Warray-bounds-31.c:100:3: warning: array subscript -2147483648 is outside array bounds of 'char[9]' [-Warray-bounds]
>
> 32-bit Solaris and Linux only.
>
> +FAIL: gcc.dg/Warray-bounds-32.c  (test for warnings, line 28)

There are outstanding issues that prevent some instances
of out-of-bounds indices from being diagnosed depending on
the target/data model.  I raised pr86611 and pr86613 for
those and xfailed the tests in r262906.  There's also a minor
new issue with duplicate -Warray-bounds in C++ (or in C when
strncpy is not defined by libc as a macro), or with the macro
suppressing a -Warray-bounds when it is a macro.  That caused
the c-c++-common/Warray-bounds-2.c to fail on some targets
I undefined the macros to have the test behave consistently
regardless of libc macros, pruned the duplicate diagnostic,
and opened bug 86614 to look into avoiding the duplication.

Martin
diff mbox series

Patch

PR tree-optimization/83776 - [6/7/8 Regression] missing -Warray-bounds indexing past the end of a string literal

gcc/ChangeLog:

	PR tree-optimization/83776
	* tree-vrp.c (vrp_prop::check_mem_ref): New function.
	(check_array_bounds): Call it.

gcc/testsuite/ChangeLog:

	PR tree-optimization/83776
	* gcc.dg/Warray-bounds-27.c: New test.
	* gcc.dg/Warray-bounds-28.c: New test.
	* gcc.dg/Warray-bounds-29.c: New test.

diff --git a/gcc/testsuite/gcc.dg/Warray-bounds-27.c b/gcc/testsuite/gcc.dg/Warray-bounds-27.c
new file mode 100644
index 0000000..ccc88cd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/Warray-bounds-27.c
@@ -0,0 +1,223 @@ 
+/* PR tree-optimization/83776: missing -Warray-bounds indexing past the end
+   of a string literal
+   Test to exercise detection of out-of-bounds indices into narrow string
+   literals.
+   { dg-do compile }
+   { dg-options "-O2 -Warray-bounds -ftrack-macro-expansion=0" } */
+
+#include "range.h"
+
+#define MAX DIFF_MAX
+#define MIN DIFF_MIN
+
+#define S1 "1"
+#define S3 "123"
+#define S7 "1234567"
+#define S8 "12345678"
+#define S9 "123456789"
+
+void sink (int, ...);
+
+#define T(expr)   sink (0, expr)
+
+
+void narrow_direct_cst (void)
+{
+  T (S1[MIN]);                /* { dg-warning "array subscript -\[0-9\]+ is below array bounds of .char\\\[2]" "" { xfail *-*-* } } */
+  T (S1[-1]);                 /* { dg-warning "array subscript -1 is below array bounds of .char\\\[2]" } */
+  T (S1[0]);
+  T (S1[1]);
+  T (S1[2]);                  /* { dg-warning "array subscript 2 is above array bounds of .char\\\[2]" } */
+  T (S1[MAX]);                /* { dg-warning "array subscript \[0-9\]+ is above array bounds of .char\\\[2]" } */
+
+  T (&S1[MIN]);               /* { dg-warning "array subscript -\[0-9\]+ is below array bounds of .char\\\[2]" } */
+  T (&S1[-1]);                /* { dg-warning "array subscript -1 is below array bounds of .char\\\[2]" } */
+  T (&S1[0]);
+  T (&S1[2]);
+  T (&S1[3]);                 /* { dg-warning "array subscript 3 is above array bounds of .char\\\[2]" } */
+  T (&S1[MAX]);               /* { dg-warning "array subscript \[0-9\]+ is above array bounds of .char\\\[2]" } */
+
+  T (S9[MIN]);                /* { dg-warning "array subscript -\[0-9\]+ is below array bounds of .char\\\[10]" "" { xfail *-*-* } } */
+  T (S9[-1]);                 /* { dg-warning "array subscript -1 is below array bounds of .char\\\[10]" } */
+  T (S9[9]);
+  T (S9[10]);                 /* { dg-warning "array subscript 10 is above array bounds of .char\\\[10]" } */
+  T (S9[11]);                 /* { dg-warning "array subscript 11 is above array bounds of .char\\\[10]" } */
+  T (S9[MAX]);                /* { dg-warning "array subscript \[0-9\]+ is above array bounds of .char\\\[10]" } */
+
+  T (&S9[MIN]);               /* { dg-warning "array subscript -\[0-9\]+ is below array bounds of .char\\\[10]" } */
+  T (&S9[-1]);                /* { dg-warning "array subscript -1 is below array bounds of .char\\\[10]" } */
+  T (&S9[9]);
+  T (&S9[10]);
+  T (&S9[11]);                 /* { dg-warning "array subscript 11 is above array bounds of .char\\\[10]" } */
+  T (&S9[MAX]);               /* { dg-warning "array subscript \[0-9\]+ is above array bounds of .char\\\[10]" } */
+}
+
+void narrow_ptr_deref_cst (void)
+{
+  const char *p = S8 + 9;
+
+  T (*(p + MIN));             /* { dg-warning "array subscript -\[0-9\]+ is outside array bounds of .char\\\[9]." } */
+  T (*(p - 10));              /* { dg-warning "array subscript -1 is outside array bounds of .char\\\[9]." } */
+  T (*(p - 9));
+  T (*(p - 1));
+  T (*p);                     /* { dg-warning "array subscript 9 is outside array bounds of .char\\\[9]." } */
+  T (*(p + 1));               /* { dg-warning "array subscript 10 is outside array bounds of .char\\\[9]." } */
+  T (*(p + 2));               /* { dg-warning "array subscript 11 is outside array bounds of .char\\\[9]." } */
+}
+
+void narrow_ptr_index_cst (void)
+{
+  const char *p = S7;
+
+  T (p[MIN + 1]);             /* { dg-warning "array subscript -\[0-9\]+ is outside array bounds of .char\\\[8]." "bug 84047" { xfail *-*-* } } */
+  T (p[-1]);                  /* { dg-warning "array subscript -1 is outside array bounds of .char\\\[8]." } */
+  T (p[0]);
+  T (p[1]);
+  T (p[8]);                   /* { dg-warning "array subscript 8 is outside array bounds of .char\\\[8]." } */
+  T (p[99]);                  /* { dg-warning "array subscript 99 is outside array bounds of .char\\\[8]." } */
+  T (p[MAX]);                 /* { dg-warning "array subscript \[0-9\]+ is outside array bounds of .char\\\[8]." } */
+
+  T (&p[MIN + 1]);            /* { dg-warning "array subscript -\[0-9\]+ is below array bounds of .char\\\[8]." } */
+  T (&p[-1]);                 /* { dg-warning "array subscript -1 is below array bounds of .char\\\[8]." } */
+  T (&p[0]);
+  T (&p[1]);
+  T (&p[8]);
+  T (&p[9]);                  /* { dg-warning "array subscript 9 is above array bounds of .char\\\[8]." } */
+  T (&p[99]);                 /* { dg-warning "array subscript 99 is above array bounds of .char\\\[8]." } */
+  T (&p[MAX]);                /* { dg-warning "array subscript \[0-9\]+ is above array bounds of .char\\\[8]." } */
+
+  const char *q = S8 + 4;
+  T (q[MIN + 1]);             /* { dg-warning "array subscript -?\[0-9\]+ is outside array bounds of .char\\\[9]." "bug 84047" { xfail *-*-* } } */
+  T (q[-5]);                  /* { dg-warning "array subscript -1 is outside array bounds of .char\\\[9]." } */
+  T (q[-4]);
+  T (q[0]);
+  T (q[1]);
+  T (q[3]);
+  T (q[4]);
+  T (q[5]);                   /* { dg-warning "array subscript 9 is outside array bounds of .char\\\[9]." } */
+  T (q[99]);                  /* { dg-warning "array subscript 103 is outside array bounds of .char\\\[9]." } */
+  T (q[MAX - 4]);             /* { dg-warning "array subscript \[0-9\]+ is outside array bounds of .char\\\[9]." } */
+  T (q[MAX - 3]);             /* { dg-warning "array subscript \[0-9\]+ is outside array bounds of .char\\\[9]." "bug 84047" { xfail *-*-* } } */
+
+  T (&q[MIN + 1]);            /* { dg-warning "array subscript -?\[0-9\]+ is below array bounds of .char\\\[9]." } */
+  T (&q[-5]);                 /* { dg-warning "array subscript -1 is below array bounds of .char\\\[9]." } */
+  T (&q[-4]);
+  T (&q[0]);
+  T (&q[1]);
+  T (&q[5]);
+  T (&q[6]);                  /* { dg-warning "array subscript 10 is above array bounds of .char\\\[9]." } */
+  T (&q[99]);                 /* { dg-warning "array subscript 103 is above array bounds of .char\\\[9]." } */
+  T (&q[MAX - 4]);            /* { dg-warning "array subscript \[0-9\]+ is above array bounds of .char\\\[9]." } */
+  T (&q[MAX - 3]);            /* { dg-warning "array subscript -?\[0-9\]+ is below array bounds of .char\\\[9]." } */
+}
+
+
+void narrow_direct_var (int i)
+{
+  T (S3[SR (MIN, -1)]);       /* { dg-warning "array subscript -1 is below array bounds of .char\\\[4]" } */
+  T (S3[SR (MIN, 0)]);
+  T (S3[SR (-2, -1)]);        /* { dg-warning "array subscript -1 is below array bounds of .char\\\[4]" } */
+  T (S3[SR (1, 2)]);
+  T (S3[SR (1, 999)]);
+  T (S3[SR (2, 999)]);
+  T (S3[SR (3, 999)]);
+  T (S3[SR (4, 999)]);       /* { dg-warning "array subscript 4 is above array bounds of .char\\\[4]" } */
+
+  T (&S3[SR (MIN, -1)]);      /* { dg-warning "array subscript -1 is below array bounds of .char\\\[4]" } */
+  T (&S3[SR (MIN, 0)]);
+  T (&S3[SR (-2, -1)]);       /* { dg-warning "array subscript -1 is below array bounds of .char\\\[4]" } */
+  T (&S3[SR (1, 2)]);
+  T (&S3[SR (1, 999)]);
+  T (&S3[SR (2, 999)]);
+  T (&S3[SR (4, 999)]);
+  T (&S3[SR (5, 999)]);      /* { dg-warning "array subscript 5 is above array bounds of .char\\\[4]" } */
+
+  T (S9[SR (MIN, -9)]);       /* { dg-warning "array subscript -9 is below array bounds of .char\\\[10]" } */
+  T (S9[SR (MIN, -1)]);       /* { dg-warning "array subscript -1 is below array bounds of .char\\\[10]" } */
+  T (S9[SR (MIN, 0)]);
+  T (S9[SR (-2, -1)]);        /* { dg-warning "array subscript -1 is below array bounds of .char\\\[10]" } */
+  T (S9[SR (1, 2)]);
+  T (S9[SR (1, 9)]);
+  T (S9[SR (1, 999)]);
+  T (S9[SR (9, 999)]);
+  T (S9[SR (10, 999)]);       /* { dg-warning "array subscript 10 is above array bounds of .char\\\[10]" } */
+  T (S9[SR (99, MAX)]);       /* { dg-warning "array subscript 99 is above array bounds of .char\\\[10]" } */
+}
+
+
+void narrow_ptr_deref_var (void)
+{
+  const char *p;
+
+  p = S1 + SR (-999, 999);
+  T (*p);
+
+  p = S1 + SR (-1, 1);
+  T (*p);
+
+  p = S1 + SR (-1, 0);
+  T (*p);
+
+  p = S1 + SR (0, 1);
+  T (*p);
+
+  p = S1 + SR (1, 2);
+  T (*p);
+
+  p = S1 + SR (2, 3);
+  T (*p);                     /* { dg-warning "array subscript \\\[2, 3] is outside array bounds of .char\\\[2]." } */
+
+  p = S1 + SR (9, 99);
+  T (*p);                     /* { dg-warning "array subscript \\\[9, 99] is outside array bounds of .char\\\[2]." } */
+
+  p = S8 + SR (-999, 999);
+  T (*p);
+
+  p = S8 + SR (-9, -1);
+  T (*p);                     /* { dg-warning "array subscript \\\[-9, -1] is outside array bounds of .char\\\[9]." } */
+
+  p = S8 + SR (-9, 0);
+  T (*p);
+
+  p = S8 + SR (-9, 9);
+  T (*p);
+
+  p = S8 + SR (-9, 123);
+  T (*p);
+
+  p = S8 + SR (8, 123);
+  T (*p);
+
+  p = S8 + SR (9, 123);
+  T (*p);                     /* { dg-warning "array subscript \\\[9, 123] is outside array bounds of .char\\\[9]." } */
+
+  ptrdiff_t i = SR (1, 2);
+
+  const char *p1 = S3 +  SR (1, DIFF_MAX - 1);
+  const char *p2 = p1 + i;
+  const char *p3 = p2 + i;
+  const char *p4 = p3 + i;
+  const char *p5 = p4 + i;
+
+  T (*p1);
+  T (*p2);
+  T (*p3);
+  T (*p4);                    /* { dg-warning "array subscript \\\[4, \[0-9\]+] is outside array bounds of .char\\\[4]." } */
+  T (*p5);                    /* { dg-warning "array subscript \\\[5, \[0-9\]+] is outside array bounds of .char\\\[4]." } */
+}
+
+
+void narrow_ptr_index_var (void)
+{
+  const char *p;
+
+  p = S7;
+  T (p[SR (-9, -1)]);         /* { dg-warning "array subscript \\\[-9, -1] is outside array bounds of .char\\\[8]." } */
+  T (p[SR (-8, 0)]);
+  T (p[SR (0, MAX)]);
+  T (p[SR (1, 9)]);
+  T (p[SR (8, 9)]);           /* { dg-warning "array subscript \\\[8, 9] is outside array bounds of .char\\\[8]." } */
+
+  p = S7 + SR (4, 6);
+  T (p[5]);                   /* { dg-warning "array subscript \\\[9, 11] is outside array bounds of .char\\\[8]." } */
+}
diff --git a/gcc/testsuite/gcc.dg/Warray-bounds-28.c b/gcc/testsuite/gcc.dg/Warray-bounds-28.c
new file mode 100644
index 0000000..7849767
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/Warray-bounds-28.c
@@ -0,0 +1,176 @@ 
+/* PR tree-optimization/83776: missing -Warray-bounds indexing past the end
+   of a string literal
+   Test to exercise indices into wide string literals.
+   { dg-do compile }
+   { dg-options "-O2 -Warray-bounds -ftrack-macro-expansion=0" } */
+
+#include "range.h"
+
+#define MAX DIFF_MAX
+#define MIN DIFF_MIN
+
+typedef __WCHAR_TYPE__ wchar_t;
+
+#define W2 L"12"
+#define W3 L"123"
+#define W4 L"1234"
+#define W7 L"1234567"
+#define W8 L"12345678"
+#define W9 L"123456789"
+
+void sink (int);
+
+#define T(expr)   sink (expr)
+
+
+void wide_direct_cst (void)
+{
+  T (W9[MIN]);                /* { dg-warning "array subscript -\[0-9\]+ is below array bounds of .\[a-z \]+\\\[10]" "" } */
+  T (W9[-1]);                 /* { dg-warning "array subscript -1 is below array bounds of .\[a-z \]+\\\[10]" } */
+  T (W9[11]);                 /* { dg-warning "array subscript 11 is above array bounds of .\[a-z \]+\\\[10]" } */
+  T (W9[MAX]);                /* { dg-warning "array subscript \[0-9\]+ is above array bounds of .\[a-z \]+\\\[10]" } */
+
+}
+
+void wide_ptr_deref_cst (void)
+{
+  const wchar_t *p = W8 + 9;
+  T (*p);                     /* { dg-warning "array subscript 9 is outside array bounds of .\[a-z \]+\\\[9]." } */
+  T (p[1]);                   /* { dg-warning "array subscript 10 is outside array bounds of .\[a-z \]+\\\[9]." } */
+  T (p[99]);                  /* { dg-warning "array subscript 108 is outside array bounds of .\[a-z \]+\\\[9]." } */
+}
+
+void wide_ptr_index_cst (void)
+{
+  const wchar_t *p = W7;
+
+  T (p[1]);
+  T (p[8]);                   /* { dg-warning "array subscript 8 is outside array bounds of .\[a-z \]+\\\[8]." } */
+  T (p[99]);                  /* { dg-warning "array subscript 99 is outside array bounds of .\[a-z \]+\\\[8]." } */
+  T (p[MAX]);                 /* { dg-warning "array subscript -?\[0-9\]+ is outside array bounds of .\[a-z \]+\\\[8]." } */
+}
+
+
+void wide_direct_var (ptrdiff_t i)
+{
+  T (W9[SR (MIN, -9)]);       /* { dg-warning "array subscript -9 is below array bounds of .\[a-z \]+\\\[10]" } */
+  T (W9[SR (MIN, -1)]);       /* { dg-warning "array subscript -1 is below array bounds of .\[a-z \]+\\\[10]" } */
+  T (W9[SR (MIN, 0)]);
+  T (W9[SR (-2, -1)]);        /* { dg-warning "array subscript -1 is below array bounds of .\[a-z \]+\\\[10]" } */
+  T (W9[SR (1, 2)]);
+  T (W9[SR (1, 9)]);
+  T (W9[SR (1, 999)]);
+  T (W9[SR (9, 999)]);
+  T (W9[SR (10, 999)]);       /* { dg-warning "array subscript 10 is above array bounds of .\[a-z \]+\\\[10]" } */
+  T (W9[SR (99, MAX)]);       /* { dg-warning "array subscript 99 is above array bounds of .\[a-z \]+\\\[10]" } */
+}
+
+void wide_ptr_deref_var (ptrdiff_t i)
+{
+  const wchar_t *p;
+
+  p = W8 + SR (-9, -1);
+  T (*p);                     /* { dg-warning "array subscript \\\[-9, -1] is outside array bounds of .\[a-z \]+\\\[9]." } */
+
+  p = W8 + SR (-9, 0);
+  T (*p);
+
+  p = W8 + SR (-9, 9);
+  T (*p);
+
+  p = W8 + SR (9, 123);
+  T (*p);                     /* { dg-warning "array subscript \\\[9, 123] is outside array bounds of .\[a-z \]+\\\[9]." } */
+}
+
+void wide_ptr_index_var (void)
+{
+  const wchar_t *p;
+
+  p = W7;
+  T (p[SR (-9, -1)]);         /* { dg-warning "array subscript \\\[-9, -1] is outside array bounds of .\[a-z \]+\\\[8]." } */
+  T (p[SR (-8, 0)]);
+  T (p[SR (0, MAX)]);
+  T (p[SR (1, 9)]);
+  T (p[SR (8, 9)]);           /* { dg-warning "array subscript \\\[8, 9] is outside array bounds of .\[a-z \]+\\\[8]." } */
+
+  p = W7 + SR (4, 6);
+  T (p[5]);                   /* { dg-warning "array subscript \\\[9, 11] is outside array bounds of .\[a-z \]+\\\[8]." } */
+}
+
+void wide_ptr_index_var_1 (void)
+{
+  {
+    int i = SR (1, 2);
+    const wchar_t *p1 = W2 + i;
+
+    T (p1[0]);
+  }
+  {
+    int i = SR (1, 2);
+    const wchar_t *p1 = W2 + i;
+
+    T (p1[1]);
+  }
+  {
+    int i = SR (1, 2);
+    const wchar_t *p1 = W2 + i;
+
+    T (p1[2]);                /* { dg-warning "array subscript \\\[3, 4] is outside array bounds of .\[a-z \]+\\\[3]." } */
+  }
+}
+
+void wide_ptr_index_var_chain (void)
+{
+  int i = SR (1, 2);
+  {
+    const wchar_t *p1 = W2 + i;
+    const wchar_t *p2 = p1 + i;
+    const wchar_t *p3 = p2 + i;
+
+    T (p1[-3]);               /* { dg-warning "array subscript \\\[-2, -1] is outside array bounds of .\[a-z \]+\\\[3]." } */
+    T (p1[-2]);
+    T (p1[-1]);
+    T (p1[0]);
+    T (p1[1]);
+    T (p1[2]);                /* { dg-warning "array subscript \\\[3, 4] is outside array bounds of .\[a-z \]+\\\[3]." } */
+
+    T (p2[-5]);               /* { dg-warning "array subscript \\\[-3, -1] is outside array bounds of .\[a-z \]+\\\[3]." } */
+    T (p2[-4]);
+    T (p2[-1]);
+    T (p2[0]);
+    T (p2[1]);                /* { dg-warning "array subscript \\\[3, 5] is outside array bounds of .\[a-z \]+\\\[3]." } */
+
+    T (p3[0]);                /* { dg-warning "array subscript \\\[3, 6] is outside array bounds of .\[a-z \]+\\\[3]." } */
+    T (p3[1]);                /* { dg-warning "array subscript \\\[4, 7] is outside array bounds of .\[a-z \]+\\\[3]." } */
+    T (p3[9999]);             /* { dg-warning "array subscript \\\[10002, 10005] is outside array bounds of .\[a-z \]+\\\[3]." } */
+
+    /* Large offsets are indistinguishable from negative values.  */
+    T (p3[DIFF_MAX]);         /* { dg-warning "array subscript" "bug" { xfail *-*-* } } */
+  }
+
+  {
+    const wchar_t *p1 = W3 + i;
+    const wchar_t *p2 = p1 + i;
+    const wchar_t *p3 = p2 + i;
+    const wchar_t *p4 = p3 + i;
+
+    T (p1[-3]);               /* { dg-warning "array subscript \\\[-2, -1] is outside array bounds of .\[a-z \]+\\\[4]." } */
+    T (p1[-2]);
+    T (p1[1]);
+    T (p1[2]);
+    T (p1[3]);                /* { dg-warning "array subscript \\\[4, 5] is outside array bounds of .\[a-z \]+\\\[4]." } */
+
+    T (p3[1]);                /* { dg-warning "array subscript \\\[4, 7] is outside array bounds of .\[a-z \]+\\\[4]." } */
+  }
+}
+
+void wide_ptr_index_var_4 (void)
+{
+  int i = SR (1, 2);
+  const wchar_t *p1 = W4 + i;
+  const wchar_t *p2 = p1 + i;
+  const wchar_t *p3 = p2 + i;
+  const wchar_t *p4 = p3 + i;
+
+  T (p4[1]);                  /* { dg-warning "array subscript \\\[5, 9] is outside array bounds of .\[a-z \]+\\\[5]." } */
+}
diff --git a/gcc/testsuite/gcc.dg/Warray-bounds-29.c b/gcc/testsuite/gcc.dg/Warray-bounds-29.c
new file mode 100644
index 0000000..7f89604
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/Warray-bounds-29.c
@@ -0,0 +1,150 @@ 
+/* PR tree-optimization/83776: missing -Warray-bounds indexing past the end
+   of a string literal
+   Test to exercise warnings for computations of otherwise in-bounds indices
+   into strings that temporarily exceed the bounds of the string.
+   { dg-do compile }
+   { dg-options "-O2 -Warray-bounds=2 -ftrack-macro-expansion=0" } */
+
+#include "range.h"
+
+#define MAX DIFF_MAX
+#define MIN DIFF_MIN
+
+typedef __WCHAR_TYPE__ wchar_t;
+
+void sink (int, ...);
+
+#define T(expr)   sink (0, expr)
+
+void test_narrow (void)
+{
+  int i = SR (1, 2);
+
+  const char *p0 = "12";
+  const char *p1 = p0 + i;    /* points at '2' or beyond */
+  const char *p2 = p1 + i;    /* points at '\0' or beyond */
+  const char *p3 = p2 + i;    /* points just past the end */
+  const char *p4 = p3 + i;    /* invalid */
+
+  T (p0[-1]);                 /* { dg-warning "array subscript \(-1|\[0-9\]+) is outside array bounds of .char\\\[3]." } */
+  T (p0[0]);
+  T (p0[1]);
+  T (p0[2]);
+  T (p0[3]);                  /* { dg-warning "array subscript 3 is outside array bounds of .char\\\[3]." } */
+
+  T (&p0[-1]);                 /* { dg-warning "array subscript \(-1|\[0-9\]+) is \(above|below\) array bounds of .char\\\[3]." } */
+  T (&p0[0]);
+  T (&p0[1]);
+  T (&p0[2]);
+  T (&p0[3]);
+  T (&p0[4]);                  /* { dg-warning "array subscript 4 is above array bounds of .char\\\[3]." } */
+
+  T (p1[-3]);                 /* { dg-warning "array subscript \\\[-2, -1] is outside array bounds of .char\\\[3]." } */
+  T (p1[-2]);
+  T (p1[-1]);
+  T (p1[ 0]);
+  T (p1[ 1]);
+  T (p1[ 2]);                 /* { dg-warning "array subscript \\\[3, 4] is outside array bounds of .char\\\[3]." } */
+  T (p1[ 3]);                 /* { dg-warning "array subscript \\\[4, 5] is outside array bounds of .char\\\[3]." } */
+
+  T (&p1[-3]);                /* { dg-warning "array subscript \\\[-2, -1] is outside array bounds of .char\\\[3]." "bug" { xfail *-*-* } } */
+  T (&p1[-2]);
+  T (&p1[-1]);
+  T (&p1[ 0]);
+  T (&p1[ 1]);
+  T (&p1[ 2]);
+  T (&p1[ 3]);                /* { dg-warning "array subscript \\\[4, 6] is outside array bounds of .char\\\[3]." "bug" { xfail *-*-* } } */
+
+  T (p2[-4]);                 /* { dg-warning "intermediate array offset 4 is outside array bounds of .char\\\[3]." } */
+  T (p2[-3]);
+  T (p2[-2]);
+  T (p2[-1]);
+  T (p2[ 0]);
+
+  /* Even though the lower bound of p3's offsets is in bounds, in order
+     to subtract 4 from p3 and get a dereferenceable pointer its value
+     would have to be out-of-bounds.  */
+  T (p3[-4]);                 /* { dg-warning "intermediate array offset 4 is outside array bounds of .char\\\[3]." } */
+  T (p3[-3]);
+  T (p3[-2]);
+  T (p3[-1]);
+  T (p3[ 0]);                 /* { dg-warning "array subscript \\\[3, 6] is outside array bounds of .char\\\[3]." } */
+
+  T (p4[-4]);                 /* { dg-warning "intermediate array offset 4 is outside array bounds of .char\\\[3]." } */
+  T (p4[-3]);                 /* { dg-warning "intermediate array offset 4 is outside array bounds of .char\\\[3]." } */
+  T (p4[-2]);                 /* { dg-warning "intermediate array offset 4 is outside array bounds of .char\\\[3]." } */
+
+  /* The final subscripts below are invalid.  */
+  T (p4[-1]);                 /* { dg-warning "array subscript \\\[3, 7] is outside array bounds of .char\\\[3]." } */
+  T (p4[ 0]);                 /* { dg-warning "array subscript \\\[4, 8] is outside array bounds of .char\\\[3]." } */
+}
+
+
+void test_narrow_vflow (void)
+{
+  int i = SR (DIFF_MAX - 2, DIFF_MAX);
+  int j = SR (1, DIFF_MAX);
+
+  const char *p0 = "123";
+  const char *p1 = p0 + i;    /* points at '2' or beyond */
+  const char *p2 = p1 + i;    /* points at '\0' or beyond */
+  const char *p3 = p2 + i;    /* points just past the end */
+  const char *p4 = p3 + i;    /* invalid */
+}
+
+
+void test_wide (void)
+{
+  int i = SR (1, 2);
+
+  const wchar_t *p0 = L"123";
+  const wchar_t *p1 = p0 + i;    /* points at L'2' or beyond */
+  const wchar_t *p2 = p1 + i;    /* points at L'3' or beyond */
+  const wchar_t *p3 = p2 + i;    /* points at L'\0' or beyond */
+  const wchar_t *p4 = p3 + i;    /* points just past the end */
+  const wchar_t *p5 = p4 + i;    /* invalid */
+
+  T (p0[0]);
+  T (p0[1]);
+  T (p0[2]);
+  T (p0[3]);
+  T (p0[4]);                  /* { dg-warning "array subscript 4 is outside array bounds of .\[a-z \]+\\\[4]." } */
+
+  T (p1[-1]);
+  T (p1[ 0]);
+  T (p1[ 1]);
+  T (p1[ 2]);
+  T (p1[ 3]);                  /* { dg-warning "array subscript \\\[4, 5] is outside array bounds of .\[a-z \]+\\\[4]." } */
+
+  T (&p1[-1]);
+  T (&p1[ 0]);
+  T (&p1[ 1]);
+  T (&p1[ 2]);
+  T (&p1[ 3]);
+  T (&p1[ 4]);                 /* { dg-warning "array subscript \\\[5, 6] is outside array bounds of .\[a-z \]+\\\[4]." "bug" { xfail *-*-* } } */
+
+  T (p2[-5]);                 /* { dg-warning "array subscript \\\[-3, -1] is outside array bounds of .\[a-z \]+\\\[4]." } */
+  T (p2[-4]);
+  T (p2[-3]);
+  T (p2[-2]);
+  T (p2[-1]);
+  T (p2[ 0]);
+
+  /* Even though the lower bound of p3's offsets is in bounds, in order
+     to subtract 5 from p3 and get a dereferenceable pointer its value
+     would have to be out-of-bounds.  */
+  T (p3[-5]);                 /* { dg-warning "intermediate array offset 5 is outside array bounds of .\[a-z \]+\\\[4]." } */
+  T (p3[-4]);
+  T (p3[-3]);
+  T (p3[-2]);
+  T (p3[-1]);
+  T (p3[ 0]);
+  T (p3[ 1]);                 /* { dg-warning "array subscript \\\[4, 7] is outside array bounds of .\[a-z \]+\\\[4]." } */
+
+  T (p4[-5]);                 /* { dg-warning "intermediate array offset 5 is outside array bounds of .\[a-z \]+\\\[4]." } */
+  T (p4[-4]);
+  T (p4[-3]);
+  T (p4[-2]);
+  T (p4[-1]);
+  T (p4[ 0]);                 /* { dg-warning "array subscript \\\[4, 8] is outside array bounds of .\[a-z \]+\\\[4]." } */
+}
diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index 3294bde..b2e45c9 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -4763,6 +4763,7 @@  class vrp_prop : public ssa_propagation_engine
   void vrp_finalize (bool);
   void check_all_array_refs (void);
   void check_array_ref (location_t, tree, bool);
+  void check_mem_ref (location_t, tree);
   void search_for_addr_array (tree, location_t);
 
   class vr_values vr_values;
@@ -4781,6 +4782,7 @@  class vrp_prop : public ssa_propagation_engine
   void extract_range_from_phi_node (gphi *phi, value_range *vr)
     { vr_values.extract_range_from_phi_node (phi, vr); }
 };
+
 /* Checks one ARRAY_REF in REF, located at LOCUS. Ignores flexible arrays
    and "struct" hacks. If VRP can determine that the
    array subscript is a constant, check if it is outside valid
@@ -4915,6 +4917,179 @@  vrp_prop::check_array_ref (location_t location, tree ref,
     }
 }
 
+/* Checks one MEM_REF in REF, located at LOCATION, for out-of-bounds
+   references to string constants.  If VRP can determine that the array
+   subscript is a constant, check if it is outside valid range.
+   If the array subscript is a RANGE, warn if it is non-overlapping
+   with valid range.
+   IGNORE_OFF_BY_ONE is true if the MEM_REF is inside an ADDR_EXPR.  */
+
+void
+vrp_prop::check_mem_ref (location_t location, tree ref)
+{
+  if (TREE_NO_WARNING (ref))
+    return;
+
+  tree arg = TREE_OPERAND (ref, 0);
+  tree cstoff = TREE_OPERAND (ref, 1);
+  tree varoff = NULL_TREE;
+
+  const offset_int maxobjsize = tree_to_shwi (max_object_size ());
+
+  /* The string constant bounds in bytes.  Initially set to [0, MAXOBJSIZE]
+     until a tighter bound is determined.  */
+  offset_int strbounds[2];
+  strbounds[1] = maxobjsize;
+  strbounds[0] = -strbounds[1] - 1;
+
+  /* The minimum and maximum intermediate offset.  For a reference
+     to be valid, not only does the final offset/subscript must be
+     in bounds but all intermediate offsets must be as well. */
+  offset_int ioff = wi::to_offset (fold_convert (ptrdiff_type_node, cstoff));
+  offset_int extrema[2] = { 0, wi::abs (ioff) };
+
+  /* The range of the byte offset into the reference.  */
+  offset_int offrange[2] = { 0, 0 };
+
+  value_range *vr = NULL;
+
+  /* Determine the offsets and increment OFFRANGE for the bounds of each.  */
+  while (TREE_CODE (arg) == SSA_NAME)
+    {
+      gimple *def = SSA_NAME_DEF_STMT (arg);
+      if (!is_gimple_assign (def))
+	{
+	  if (tree var = SSA_NAME_VAR (arg))
+	    arg = var;
+	  break;
+	}
+
+      tree_code code = gimple_assign_rhs_code (def);
+      if (code == POINTER_PLUS_EXPR)
+	{
+	  arg = gimple_assign_rhs1 (def);
+	  varoff = gimple_assign_rhs2 (def);
+	}
+      else if (code == ASSERT_EXPR)
+	{
+	  arg = TREE_OPERAND (gimple_assign_rhs1 (def), 0);
+	  continue;
+	}
+      else
+	return;
+
+      if (TREE_CODE (varoff) != SSA_NAME)
+	break;
+
+      vr = get_value_range (varoff);
+      if (!vr || vr->type == VR_UNDEFINED || !vr->min || !vr->max)
+	break;
+
+      if (TREE_CODE (vr->min) != INTEGER_CST
+          || TREE_CODE (vr->max) != INTEGER_CST)
+        break;
+
+      offset_int ofr[] = {
+	wi::to_offset (fold_convert (ptrdiff_type_node, vr->min)),
+	wi::to_offset (fold_convert (ptrdiff_type_node, vr->max))
+      };
+
+      if (vr->type == VR_RANGE)
+	{
+	  if (ofr[0] < ofr[1])
+	    {
+	      offrange[0] += ofr[0];
+	      offrange[1] += ofr[1];
+	    }
+	  else
+	    {
+	      offrange[0] += strbounds[0];
+	      offrange[1] += strbounds[1];
+	    }
+	}
+      else
+	{
+	  offrange[0] += strbounds[0];
+	  offrange[1] += strbounds[1];
+	}
+
+      /* Keep track of the minimum and maximum offset.  */
+      if (offrange[1] < 0 && offrange[1] < extrema[0])
+	extrema[0] = offrange[1];
+      if (offrange[0] > 0 && offrange[0] > extrema[1])
+	extrema[1] = offrange[0];
+
+      if (offrange[0] < strbounds[0])
+	offrange[0] = strbounds[0];
+
+      if (offrange[1] > strbounds[1])
+	offrange[1] = strbounds[1];
+    }
+
+  /* Handle just string constants for now.  */
+  if (TREE_CODE (arg) == ADDR_EXPR)
+    {
+      arg = TREE_OPERAND (arg, 0);
+      if (TREE_CODE (arg) != STRING_CST)
+	return;
+    }
+  else
+    return;
+
+  tree strtype = TREE_TYPE (arg);
+
+  if (TREE_CODE (strtype) != ARRAY_TYPE)
+    return;
+
+  offrange[0] += ioff;
+  offrange[1] += ioff;
+
+  offset_int eltsize = wi::to_offset (TYPE_SIZE_UNIT (TREE_TYPE (strtype)));
+
+  tree dom = TYPE_DOMAIN (strtype);
+  strbounds[0] = wi::to_offset (TYPE_MIN_VALUE (dom)) * eltsize;
+  strbounds[1] = wi::to_offset (TYPE_MAX_VALUE (dom)) * eltsize;
+
+  if (offrange[0] > strbounds[1]
+      || offrange[1] < strbounds[0])
+    {
+      HOST_WIDE_INT idxrange[] = {
+	offrange[0].to_shwi () / eltsize.to_shwi (),
+	offrange[1].to_shwi () / eltsize.to_shwi ()
+      };
+
+      if (idxrange[0] == idxrange[1])
+	warning_at (location, OPT_Warray_bounds,
+		    "array subscript %wi is outside array bounds "
+		    "of %qT",
+		    idxrange[0], strtype);
+      else
+	warning_at (location, OPT_Warray_bounds,
+		    "array subscript [%wi, %wi] is outside array bounds "
+		    "of %qT",
+		    idxrange[0], idxrange[1], strtype);
+      TREE_NO_WARNING (ref) = 1;
+      return;
+    }
+
+  if (warn_array_bounds < 2)
+    return;
+
+  /* At level 2 check also intermediate offsets.  */
+  int i = 0;
+  if (extrema[i] < -strbounds[1]
+      || extrema[i = 1] > strbounds[1] + eltsize)
+    {
+      HOST_WIDE_INT tmpidx = extrema[i].to_shwi () / eltsize.to_shwi ();
+
+      warning_at (location, OPT_Warray_bounds,
+		  "intermediate array offset %wi is outside array bounds "
+		  "of %qT",
+		  tmpidx,  strtype);
+      TREE_NO_WARNING (ref) = 1;
+    }
+}
+
 /* Searches if the expr T, located at LOCATION computes
    address of an ARRAY_REF, and call check_array_ref on it.  */
 
@@ -5011,7 +5186,8 @@  check_array_bounds (tree *tp, int *walk_subtree, void *data)
   vrp_prop *vrp_prop = (class vrp_prop *)wi->info;
   if (TREE_CODE (t) == ARRAY_REF)
     vrp_prop->check_array_ref (location, t, false /*ignore_off_by_one*/);
-
+  else if (TREE_CODE (t) == MEM_REF)
+    vrp_prop->check_mem_ref (location, t);
   else if (TREE_CODE (t) == ADDR_EXPR)
     {
       vrp_prop->search_for_addr_array (t, location);