diff mbox series

Adjust tree-ssa-strlen.c for irange API.

Message ID 20200804113356.582734-1-aldyh@redhat.com
State New
Headers show
Series Adjust tree-ssa-strlen.c for irange API. | expand

Commit Message

Aldy Hernandez Aug. 4, 2020, 11:33 a.m. UTC
This patch adapts the strlen pass to use the irange API.

I wasn't able to remove the one annoying use of VR_ANTI_RANGE, because
I'm not sure what to do.  Perhaps Martin can shed some light.  The
current code has:

  else if (rng == VR_ANTI_RANGE)
	{
	  wide_int maxobjsize = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node));
	  if (wi::ltu_p (cntrange[1], maxobjsize))
	    {
	      cntrange[0] = cntrange[1] + 1;
	      cntrange[1] = maxobjsize;

Suppose we have ~[10,20], won't the above set cntrange[] to [21,MAX]?  Won't
this ignore the 0..9 that is part of the range?  What should we do here?

Anyways, I've left the anti-range in place, but the rest of the patch still
stands.

OK?

gcc/ChangeLog:

	* tree-ssa-strlen.c (get_range): Adjust for irange API.
	(compare_nonzero_chars): Same.
	(dump_strlen_info): Same.
	(get_range_strlen_dynamic): Same.
	(set_strlen_range): Same.
	(maybe_diag_stxncpy_trunc): Same.
	(get_len_or_size): Same.
	(count_nonzero_bytes_addr): Same.
	(handle_integral_assign): Same.
---
 gcc/tree-ssa-strlen.c | 122 ++++++++++++++++++++----------------------
 1 file changed, 57 insertions(+), 65 deletions(-)

Comments

Martin Sebor Aug. 4, 2020, 7:34 p.m. UTC | #1
On 8/4/20 5:33 AM, Aldy Hernandez via Gcc-patches wrote:
> This patch adapts the strlen pass to use the irange API.
> 
> I wasn't able to remove the one annoying use of VR_ANTI_RANGE, because
> I'm not sure what to do.  Perhaps Martin can shed some light.  The
> current code has:
> 
>    else if (rng == VR_ANTI_RANGE)
> 	{
> 	  wide_int maxobjsize = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node));
> 	  if (wi::ltu_p (cntrange[1], maxobjsize))
> 	    {
> 	      cntrange[0] = cntrange[1] + 1;
> 	      cntrange[1] = maxobjsize;
> 
> Suppose we have ~[10,20], won't the above set cntrange[] to [21,MAX]?  Won't
> this ignore the 0..9 that is part of the range?  What should we do here?

cntrange is the range of the strncpy (and strncat) bound.  It does
ignore the lower subrange but I think that's intentional because
the lower the bound the more likely the truncation, so it serves
to minimize false positives.

I didn't see any tests fail with the anti-range block disabled but
with some effort I was able to come up with one:

   char a[7];

   void f (int n)
   {
     if (n > 3)
       n = 0;

     strncpy (a, "12345678", n);   // -Wstringop-truncation
   }

The warning disappears when the anti-range handling is removed so
unless that's causing headaches for the new API I think we want to
keep it (and add the test case :)

Martin

> 
> Anyways, I've left the anti-range in place, but the rest of the patch still
> stands.
> 
> OK?
> 
> gcc/ChangeLog:
> 
> 	* tree-ssa-strlen.c (get_range): Adjust for irange API.
> 	(compare_nonzero_chars): Same.
> 	(dump_strlen_info): Same.
> 	(get_range_strlen_dynamic): Same.
> 	(set_strlen_range): Same.
> 	(maybe_diag_stxncpy_trunc): Same.
> 	(get_len_or_size): Same.
> 	(count_nonzero_bytes_addr): Same.
> 	(handle_integral_assign): Same.
> ---
>   gcc/tree-ssa-strlen.c | 122 ++++++++++++++++++++----------------------
>   1 file changed, 57 insertions(+), 65 deletions(-)
> 
> diff --git a/gcc/tree-ssa-strlen.c b/gcc/tree-ssa-strlen.c
> index fbaee745f7d..e6009874ee5 100644
> --- a/gcc/tree-ssa-strlen.c
> +++ b/gcc/tree-ssa-strlen.c
> @@ -220,21 +220,25 @@ get_range (tree val, wide_int minmax[2], const vr_values *rvals /* = NULL */)
>   	 GCC 11).  */
>         const value_range *vr
>   	= (CONST_CAST (class vr_values *, rvals)->get_value_range (val));
> -      value_range_kind rng = vr->kind ();
> -      if (rng != VR_RANGE || !range_int_cst_p (vr))
> +      if (vr->undefined_p () || vr->varying_p ())
>   	return NULL_TREE;
>   
> -      minmax[0] = wi::to_wide (vr->min ());
> -      minmax[1] = wi::to_wide (vr->max ());
> +      minmax[0] = vr->lower_bound ();
> +      minmax[1] = vr->upper_bound ();
>         return val;
>       }
>   
> -  value_range_kind rng = get_range_info (val, minmax, minmax + 1);
> -  if (rng == VR_RANGE)
> -    return val;
> +  value_range vr;
> +  get_range_info (val, vr);
> +  if (!vr.undefined_p () && !vr.varying_p ())
> +    {
> +      minmax[0] = vr.lower_bound ();
> +      minmax[1] = vr.upper_bound ();
> +      return val;
> +    }
>   
> -  /* Do not handle anti-ranges and instead make use of the on-demand
> -     VRP if/when it becomes available (hopefully in GCC 11).  */
> +  /* We should adjust for the on-demand VRP if/when it becomes
> +     available (hopefully in GCC 11).  */
>     return NULL_TREE;
>   }
>   
> @@ -278,16 +282,18 @@ compare_nonzero_chars (strinfo *si, unsigned HOST_WIDE_INT off,
>       = (CONST_CAST (class vr_values *, rvals)
>          ->get_value_range (si->nonzero_chars));
>   
> -  value_range_kind rng = vr->kind ();
> -  if (rng != VR_RANGE || !range_int_cst_p (vr))
> +  if (vr->undefined_p () || vr->varying_p ())
>       return -1;
>   
>     /* If the offset is less than the minimum length or if the bounds
>        of the length range are equal return the result of the comparison
>        same as in the constant case.  Otherwise return a conservative
>        result.  */
> -  int cmpmin = compare_tree_int (vr->min (), off);
> -  if (cmpmin > 0 || tree_int_cst_equal (vr->min (), vr->max ()))
> +  tree type = TREE_TYPE (si->nonzero_chars);
> +  tree tmin = wide_int_to_tree (type, vr->lower_bound ());
> +  tree tmax = wide_int_to_tree (type, vr->upper_bound ());
> +  int cmpmin = compare_tree_int (tmin, off);
> +  if (cmpmin > 0 || tree_int_cst_equal (tmin, tmax))
>       return cmpmin;
>   
>     return -1;
> @@ -905,32 +911,14 @@ dump_strlen_info (FILE *fp, gimple *stmt, const vr_values *rvals)
>   		  print_generic_expr (fp, si->nonzero_chars);
>   		  if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
>   		    {
> -		      value_range_kind rng = VR_UNDEFINED;
> -		      wide_int min, max;
> +		      value_range vr;
>   		      if (rvals)
> -			{
> -			  const value_range *vr
> -			    = CONST_CAST (class vr_values *, rvals)
> -			    ->get_value_range (si->nonzero_chars);
> -			  rng = vr->kind ();
> -			  if (range_int_cst_p (vr))
> -			    {
> -			      min = wi::to_wide (vr->min ());
> -			      max = wi::to_wide (vr->max ());
> -			    }
> -			  else
> -			    rng = VR_UNDEFINED;
> -			}
> +			vr = *(CONST_CAST (class vr_values *, rvals)
> +			       ->get_value_range (si->nonzero_chars));
>   		      else
> -			rng = get_range_info (si->nonzero_chars, &min, &max);
> -
> -		      if (rng == VR_RANGE || rng == VR_ANTI_RANGE)
> -			{
> -			  fprintf (fp, " %s[%llu, %llu]",
> -				   rng == VR_RANGE ? "" : "~",
> -				   (long long) min.to_uhwi (),
> -				   (long long) max.to_uhwi ());
> -			}
> +			get_range_info (si->nonzero_chars, vr);
> +		      vr.normalize_symbolics ();
> +		      vr.dump (fp);
>   		    }
>   		}
>   
> @@ -1113,11 +1101,13 @@ get_range_strlen_dynamic (tree src, c_strlen_data *pdata, bitmap *visited,
>   	      const value_range_equiv *vr
>   		= CONST_CAST (class vr_values *, rvals)
>   		->get_value_range (si->nonzero_chars);
> -	      if (vr->kind () == VR_RANGE
> -		  && range_int_cst_p (vr))
> +	      if (!vr->undefined_p () && !vr->varying_p ())
>   		{
> -		  pdata->minlen = vr->min ();
> -		  pdata->maxlen = vr->max ();
> +		  tree type = TREE_TYPE (si->nonzero_chars);
> +		  tree tmin = wide_int_to_tree (type, vr->lower_bound ());
> +		  tree tmax = wide_int_to_tree (type, vr->upper_bound ());
> +		  pdata->minlen = tmin;
> +		  pdata->maxlen = tmax;
>   		}
>   	      else
>   		pdata->minlen = build_zero_cst (size_type_node);
> @@ -1159,11 +1149,11 @@ get_range_strlen_dynamic (tree src, c_strlen_data *pdata, bitmap *visited,
>   	  const value_range_equiv *vr
>   	    = CONST_CAST (class vr_values *, rvals)
>   	    ->get_value_range (si->nonzero_chars);
> -	  if (vr->kind () == VR_RANGE
> -	      && range_int_cst_p (vr))
> +	  if (!vr->undefined_p () && !vr->varying_p ())
>   	    {
> -	      pdata->minlen = vr->min ();
> -	      pdata->maxlen = vr->max ();
> +	      tree type = TREE_TYPE (si->nonzero_chars);
> +	      pdata->minlen = wide_int_to_tree (type, vr->lower_bound ());
> +	      pdata->maxlen = wide_int_to_tree (type, vr->upper_bound ());
>   	      pdata->maxbound = pdata->maxlen;
>   	    }
>   	  else
> @@ -1803,12 +1793,14 @@ set_strlen_range (tree lhs, wide_int min, wide_int max,
>         else if (TREE_CODE (bound) == SSA_NAME)
>   	{
>   	  wide_int minbound, maxbound;
> -	  value_range_kind rng = get_range_info (bound, &minbound, &maxbound);
> -	  if (rng == VR_RANGE)
> +	  value_range vr;
> +	  get_range_info (bound, vr);
> +	  if (!vr.undefined_p () && !vr.varying_p ())
>   	    {
> +	      wide_int minbound = vr.lower_bound ();
> +	      wide_int maxbound = vr.upper_bound ();
>   	      /* For a bound in a known range, adjust the range determined
> -		 above as necessary.  For a bound in some anti-range or
> -		 in an unknown range, use the range determined by callers.  */
> +		 above as necessary.  */
>   	      if (wi::ltu_p (minbound, min))
>   		min = minbound;
>   	      if (wi::ltu_p (maxbound, max))
> @@ -3045,6 +3037,8 @@ maybe_diag_stxncpy_trunc (gimple_stmt_iterator gsi, tree src, tree cnt)
>   
>   	  if (wi::ltu_p (cntrange[1], maxobjsize))
>   	    {
> +	      /* FIXME: Imagine ~[10,20].  The code here will set
> +		 cntrange[] to [21,MAX].  Why are we ignoring [0,9] ??  */
>   	      cntrange[0] = cntrange[1] + 1;
>   	      cntrange[1] = maxobjsize;
>   	    }
> @@ -4139,10 +4133,12 @@ get_len_or_size (tree arg, int idx, unsigned HOST_WIDE_INT lenrng[2],
>   	}
>         else if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
>   	{
> -	  wide_int min, max;
> -	  value_range_kind rng = get_range_info (si->nonzero_chars, &min, &max);
> -	  if (rng == VR_RANGE)
> +	  value_range vr;
> +	  get_range_info (si->nonzero_chars, vr);
> +	  if (!vr.undefined_p () && !vr.varying_p ())
>   	    {
> +	      wide_int min = vr.lower_bound ();
> +	      wide_int max = vr.upper_bound ();
>   	      lenrng[0] = min.to_uhwi ();
>   	      lenrng[1] = max.to_uhwi ();
>   	      *nulterm = si->full_string_p;
> @@ -4847,11 +4843,11 @@ count_nonzero_bytes_addr (tree exp, unsigned HOST_WIDE_INT offset,
>   	{
>   	  vr_values *v = CONST_CAST (vr_values *, rvals);
>   	  const value_range_equiv *vr = v->get_value_range (si->nonzero_chars);
> -	  if (vr->kind () != VR_RANGE || !range_int_cst_p (vr))
> +	  if (vr->undefined_p () || vr->varying_p ())
>   	    return false;
> -
> -	  minlen = tree_to_uhwi (vr->min ());
> -	  maxlen = tree_to_uhwi (vr->max ());
> +	  tree type = TREE_TYPE (si->nonzero_chars);
> +	  minlen = tree_to_uhwi (wide_int_to_tree (type, vr->lower_bound ()));
> +	  maxlen = tree_to_uhwi (wide_int_to_tree (type, vr->upper_bound ()));
>   	}
>         else
>   	return false;
> @@ -5554,16 +5550,12 @@ handle_integral_assign (gimple_stmt_iterator *gsi, bool *cleanup_eh,
>   		  /* Reading a character before the final '\0'
>   		     character.  Just set the value range to ~[0, 0]
>   		     if we don't have anything better.  */
> -		  wide_int min, max;
> -		  signop sign = TYPE_SIGN (lhs_type);
> -		  int prec = TYPE_PRECISION (lhs_type);
> -		  value_range_kind vr = get_range_info (lhs, &min, &max);
> -		  if (vr == VR_VARYING
> -		      || (vr == VR_RANGE
> -			  && min == wi::min_value (prec, sign)
> -			  && max == wi::max_value (prec, sign)))
> +		  value_range vr;
> +		  get_range_info (lhs, vr);
> +		  if (vr.varying_p ())
>   		    set_range_info (lhs, VR_ANTI_RANGE,
> -				    wi::zero (prec), wi::zero (prec));
> +				    wi::zero (TYPE_PRECISION (lhs_type)),
> +				    wi::zero (TYPE_PRECISION (lhs_type)));
>   		}
>   	    }
>   	}
>
Aldy Hernandez Aug. 4, 2020, 9:23 p.m. UTC | #2
On 8/4/20 9:34 PM, Martin Sebor wrote:
> On 8/4/20 5:33 AM, Aldy Hernandez via Gcc-patches wrote:
>> This patch adapts the strlen pass to use the irange API.
>>
>> I wasn't able to remove the one annoying use of VR_ANTI_RANGE, because
>> I'm not sure what to do.  Perhaps Martin can shed some light.  The
>> current code has:
>>
>>    else if (rng == VR_ANTI_RANGE)
>>     {
>>       wide_int maxobjsize = wi::to_wide (TYPE_MAX_VALUE 
>> (ptrdiff_type_node));
>>       if (wi::ltu_p (cntrange[1], maxobjsize))
>>         {
>>           cntrange[0] = cntrange[1] + 1;
>>           cntrange[1] = maxobjsize;
>>
>> Suppose we have ~[10,20], won't the above set cntrange[] to [21,MAX]?  
>> Won't
>> this ignore the 0..9 that is part of the range?  What should we do here?
> 
> cntrange is the range of the strncpy (and strncat) bound.  It does
> ignore the lower subrange but I think that's intentional because
> the lower the bound the more likely the truncation, so it serves
> to minimize false positives.
> 
> I didn't see any tests fail with the anti-range block disabled but
> with some effort I was able to come up with one:
> 
>    char a[7];
> 
>    void f (int n)
>    {
>      if (n > 3)
>        n = 0;
> 
>      strncpy (a, "12345678", n);   // -Wstringop-truncation
>    }
> 
> The warning disappears when the anti-range handling is removed so
> unless that's causing headaches for the new API I think we want to
> keep it (and add the test case :)

Hi Martin.

Thanks for taking the time to respond.

On the strlen1 dump I see that the 3rd argument to strncpy above is:

   long unsigned int ~[4, 18446744071562067967]

which is a fancy way of saying:

   long unsigned int [0,3][18446744071562067967,+INF]

The second sub-range is basically [INT_MIN,+INF] for the original int N, 
which makes sense because N could be negative on the way in.

I don't understand the warning though:

a.c:8:5: warning: ‘__builtin_strncpy’ output truncated copying between 0 
and 3 bytes from a
  string of length 8 [-Wstringop-truncation]
     8 |     __builtin_strncpy (a, "12345678", n);   // 
-Wstringop-truncation
       |     ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

The range of the bound to strncpy can certainly be [0,3], but it can 
also be [1844...,+INF] which shouldn't warn.

In a world without anti-ranges, we'd see the 2 sub-ranges above.  How 
would you suggest handling it?  We could nuke out the uppermost 
sub-range, but what if the range is [0,3][10,20]?  Perhaps remove from 
some arbitrary number on up?  Say...[0xf.....,+INF]?  This seems like a 
hack, but perhaps is what's needed???

It doesn't seem like the above source should warn.  Am I missing something?

Thanks.
Aldy
Aldy Hernandez Aug. 4, 2020, 9:40 p.m. UTC | #3
On 8/4/20 11:23 PM, Aldy Hernandez wrote:
> 
> 
> On 8/4/20 9:34 PM, Martin Sebor wrote:
>> On 8/4/20 5:33 AM, Aldy Hernandez via Gcc-patches wrote:
>>> This patch adapts the strlen pass to use the irange API.
>>>
>>> I wasn't able to remove the one annoying use of VR_ANTI_RANGE, because
>>> I'm not sure what to do.  Perhaps Martin can shed some light.  The
>>> current code has:
>>>
>>>    else if (rng == VR_ANTI_RANGE)
>>>     {
>>>       wide_int maxobjsize = wi::to_wide (TYPE_MAX_VALUE 
>>> (ptrdiff_type_node));
>>>       if (wi::ltu_p (cntrange[1], maxobjsize))
>>>         {
>>>           cntrange[0] = cntrange[1] + 1;
>>>           cntrange[1] = maxobjsize;
>>>
>>> Suppose we have ~[10,20], won't the above set cntrange[] to [21,MAX]? 
>>> Won't
>>> this ignore the 0..9 that is part of the range?  What should we do here?
>>
>> cntrange is the range of the strncpy (and strncat) bound.  It does
>> ignore the lower subrange but I think that's intentional because
>> the lower the bound the more likely the truncation, so it serves
>> to minimize false positives.
>>
>> I didn't see any tests fail with the anti-range block disabled but
>> with some effort I was able to come up with one:
>>
>>    char a[7];
>>
>>    void f (int n)
>>    {
>>      if (n > 3)
>>        n = 0;
>>
>>      strncpy (a, "12345678", n);   // -Wstringop-truncation
>>    }
>>
>> The warning disappears when the anti-range handling is removed so
>> unless that's causing headaches for the new API I think we want to
>> keep it (and add the test case :)
> 
> Hi Martin.
> 
> Thanks for taking the time to respond.
> 
> On the strlen1 dump I see that the 3rd argument to strncpy above is:
> 
>    long unsigned int ~[4, 18446744071562067967]
> 
> which is a fancy way of saying:
> 
>    long unsigned int [0,3][18446744071562067967,+INF]
> 
> The second sub-range is basically [INT_MIN,+INF] for the original int N, 
> which makes sense because N could be negative on the way in.
> 
> I don't understand the warning though:
> 
> a.c:8:5: warning: ‘__builtin_strncpy’ output truncated copying between 0 
> and 3 bytes from a
>   string of length 8 [-Wstringop-truncation]
>      8 |     __builtin_strncpy (a, "12345678", n);   // 
> -Wstringop-truncation
>        |     ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> 
> The range of the bound to strncpy can certainly be [0,3], but it can 
> also be [1844...,+INF] which shouldn't warn.
> 
> In a world without anti-ranges, we'd see the 2 sub-ranges above.  How 
> would you suggest handling it?  We could nuke out the uppermost 
> sub-range, but what if the range is [0,3][10,20]?  Perhaps remove from 
> some arbitrary number on up?  Say...[0xf.....,+INF]?  This seems like a 
> hack, but perhaps is what's needed???
> 
> It doesn't seem like the above source should warn.  Am I missing something?

FWIW, evrp gets a slightly more pessimistic range:

_1: long unsigned int ~[2147483648, 18446744071562067967]

it isn't until VRP1 that the range excludes [0,3]:

_1: long unsigned int ~[4, 18446744071562067967]

Whereas the ranger can get the more refined range at -O1, and without 
dominators.  I would even venture to say it could get it at -O0, with 
just SSA + CFG.

I noticed that the strlen pass only runs at -O2.  Perhaps we could 
explore running the pass for lower or no optimization levels when the 
ranger becomes available.  Just a thought.

Aldy
Martin Sebor Aug. 5, 2020, 12:11 a.m. UTC | #4
On 8/4/20 3:23 PM, Aldy Hernandez wrote:
> 
> 
> On 8/4/20 9:34 PM, Martin Sebor wrote:
>> On 8/4/20 5:33 AM, Aldy Hernandez via Gcc-patches wrote:
>>> This patch adapts the strlen pass to use the irange API.
>>>
>>> I wasn't able to remove the one annoying use of VR_ANTI_RANGE, because
>>> I'm not sure what to do.  Perhaps Martin can shed some light.  The
>>> current code has:
>>>
>>>    else if (rng == VR_ANTI_RANGE)
>>>     {
>>>       wide_int maxobjsize = wi::to_wide (TYPE_MAX_VALUE 
>>> (ptrdiff_type_node));
>>>       if (wi::ltu_p (cntrange[1], maxobjsize))
>>>         {
>>>           cntrange[0] = cntrange[1] + 1;
>>>           cntrange[1] = maxobjsize;
>>>
>>> Suppose we have ~[10,20], won't the above set cntrange[] to [21,MAX]? 
>>> Won't
>>> this ignore the 0..9 that is part of the range?  What should we do here?
>>
>> cntrange is the range of the strncpy (and strncat) bound.  It does
>> ignore the lower subrange but I think that's intentional because
>> the lower the bound the more likely the truncation, so it serves
>> to minimize false positives.
>>
>> I didn't see any tests fail with the anti-range block disabled but
>> with some effort I was able to come up with one:
>>
>>    char a[7];
>>
>>    void f (int n)
>>    {
>>      if (n > 3)
>>        n = 0;
>>
>>      strncpy (a, "12345678", n);   // -Wstringop-truncation
>>    }
>>
>> The warning disappears when the anti-range handling is removed so
>> unless that's causing headaches for the new API I think we want to
>> keep it (and add the test case :)
> 
> Hi Martin.
> 
> Thanks for taking the time to respond.
> 
> On the strlen1 dump I see that the 3rd argument to strncpy above is:
> 
>    long unsigned int ~[4, 18446744071562067967]
> 
> which is a fancy way of saying:
> 
>    long unsigned int [0,3][18446744071562067967,+INF]
> 
> The second sub-range is basically [INT_MIN,+INF] for the original int N, 
> which makes sense because N could be negative on the way in.
> 
> I don't understand the warning though:
> 
> a.c:8:5: warning: ‘__builtin_strncpy’ output truncated copying between 0 
> and 3 bytes from a
>   string of length 8 [-Wstringop-truncation]
>      8 |     __builtin_strncpy (a, "12345678", n);   // 
> -Wstringop-truncation
>        |     ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> 
> The range of the bound to strncpy can certainly be [0,3], but it can 
> also be [1844...,+INF] which shouldn't warn.

strncpy(d, s, n) zeroes out the destination after it copies s, up
to n - strlen (s).  It's been a while and -Wstringop-truncation
certainly has its quirks (to put it nicely) but I think this one
is a feature.

> 
> In a world without anti-ranges, we'd see the 2 sub-ranges above.  How 
> would you suggest handling it?  We could nuke out the uppermost 
> sub-range, but what if the range is [0,3][10,20]?  Perhaps remove from 
> some arbitrary number on up?  Say...[0xf.....,+INF]?  This seems like a 
> hack, but perhaps is what's needed???

The second subrange in [0, 3][10, 20] is out of bounds for char[7]
and the first one truncates so either we issue -Wstringop-truncation
or if not, -Wstringop-overflow.

A better example might be [0, 3][5, 7] with "1234" as the source.
Only one of these ranges truncates so a warning would probably not
be called for.  Only warn when there's no range that doesn't lead
to truncation.

It will get more interesting if/when the length of the source string
is also in a discontiguous range.  My head is already starting to hurt.

Martin

> 
> It doesn't seem like the above source should warn.  Am I missing something?
> 
> Thanks.
> Aldy
>
diff mbox series

Patch

diff --git a/gcc/tree-ssa-strlen.c b/gcc/tree-ssa-strlen.c
index fbaee745f7d..e6009874ee5 100644
--- a/gcc/tree-ssa-strlen.c
+++ b/gcc/tree-ssa-strlen.c
@@ -220,21 +220,25 @@  get_range (tree val, wide_int minmax[2], const vr_values *rvals /* = NULL */)
 	 GCC 11).  */
       const value_range *vr
 	= (CONST_CAST (class vr_values *, rvals)->get_value_range (val));
-      value_range_kind rng = vr->kind ();
-      if (rng != VR_RANGE || !range_int_cst_p (vr))
+      if (vr->undefined_p () || vr->varying_p ())
 	return NULL_TREE;
 
-      minmax[0] = wi::to_wide (vr->min ());
-      minmax[1] = wi::to_wide (vr->max ());
+      minmax[0] = vr->lower_bound ();
+      minmax[1] = vr->upper_bound ();
       return val;
     }
 
-  value_range_kind rng = get_range_info (val, minmax, minmax + 1);
-  if (rng == VR_RANGE)
-    return val;
+  value_range vr;
+  get_range_info (val, vr);
+  if (!vr.undefined_p () && !vr.varying_p ())
+    {
+      minmax[0] = vr.lower_bound ();
+      minmax[1] = vr.upper_bound ();
+      return val;
+    }
 
-  /* Do not handle anti-ranges and instead make use of the on-demand
-     VRP if/when it becomes available (hopefully in GCC 11).  */
+  /* We should adjust for the on-demand VRP if/when it becomes
+     available (hopefully in GCC 11).  */
   return NULL_TREE;
 }
 
@@ -278,16 +282,18 @@  compare_nonzero_chars (strinfo *si, unsigned HOST_WIDE_INT off,
     = (CONST_CAST (class vr_values *, rvals)
        ->get_value_range (si->nonzero_chars));
 
-  value_range_kind rng = vr->kind ();
-  if (rng != VR_RANGE || !range_int_cst_p (vr))
+  if (vr->undefined_p () || vr->varying_p ())
     return -1;
 
   /* If the offset is less than the minimum length or if the bounds
      of the length range are equal return the result of the comparison
      same as in the constant case.  Otherwise return a conservative
      result.  */
-  int cmpmin = compare_tree_int (vr->min (), off);
-  if (cmpmin > 0 || tree_int_cst_equal (vr->min (), vr->max ()))
+  tree type = TREE_TYPE (si->nonzero_chars);
+  tree tmin = wide_int_to_tree (type, vr->lower_bound ());
+  tree tmax = wide_int_to_tree (type, vr->upper_bound ());
+  int cmpmin = compare_tree_int (tmin, off);
+  if (cmpmin > 0 || tree_int_cst_equal (tmin, tmax))
     return cmpmin;
 
   return -1;
@@ -905,32 +911,14 @@  dump_strlen_info (FILE *fp, gimple *stmt, const vr_values *rvals)
 		  print_generic_expr (fp, si->nonzero_chars);
 		  if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
 		    {
-		      value_range_kind rng = VR_UNDEFINED;
-		      wide_int min, max;
+		      value_range vr;
 		      if (rvals)
-			{
-			  const value_range *vr
-			    = CONST_CAST (class vr_values *, rvals)
-			    ->get_value_range (si->nonzero_chars);
-			  rng = vr->kind ();
-			  if (range_int_cst_p (vr))
-			    {
-			      min = wi::to_wide (vr->min ());
-			      max = wi::to_wide (vr->max ());
-			    }
-			  else
-			    rng = VR_UNDEFINED;
-			}
+			vr = *(CONST_CAST (class vr_values *, rvals)
+			       ->get_value_range (si->nonzero_chars));
 		      else
-			rng = get_range_info (si->nonzero_chars, &min, &max);
-
-		      if (rng == VR_RANGE || rng == VR_ANTI_RANGE)
-			{
-			  fprintf (fp, " %s[%llu, %llu]",
-				   rng == VR_RANGE ? "" : "~",
-				   (long long) min.to_uhwi (),
-				   (long long) max.to_uhwi ());
-			}
+			get_range_info (si->nonzero_chars, vr);
+		      vr.normalize_symbolics ();
+		      vr.dump (fp);
 		    }
 		}
 
@@ -1113,11 +1101,13 @@  get_range_strlen_dynamic (tree src, c_strlen_data *pdata, bitmap *visited,
 	      const value_range_equiv *vr
 		= CONST_CAST (class vr_values *, rvals)
 		->get_value_range (si->nonzero_chars);
-	      if (vr->kind () == VR_RANGE
-		  && range_int_cst_p (vr))
+	      if (!vr->undefined_p () && !vr->varying_p ())
 		{
-		  pdata->minlen = vr->min ();
-		  pdata->maxlen = vr->max ();
+		  tree type = TREE_TYPE (si->nonzero_chars);
+		  tree tmin = wide_int_to_tree (type, vr->lower_bound ());
+		  tree tmax = wide_int_to_tree (type, vr->upper_bound ());
+		  pdata->minlen = tmin;
+		  pdata->maxlen = tmax;
 		}
 	      else
 		pdata->minlen = build_zero_cst (size_type_node);
@@ -1159,11 +1149,11 @@  get_range_strlen_dynamic (tree src, c_strlen_data *pdata, bitmap *visited,
 	  const value_range_equiv *vr
 	    = CONST_CAST (class vr_values *, rvals)
 	    ->get_value_range (si->nonzero_chars);
-	  if (vr->kind () == VR_RANGE
-	      && range_int_cst_p (vr))
+	  if (!vr->undefined_p () && !vr->varying_p ())
 	    {
-	      pdata->minlen = vr->min ();
-	      pdata->maxlen = vr->max ();
+	      tree type = TREE_TYPE (si->nonzero_chars);
+	      pdata->minlen = wide_int_to_tree (type, vr->lower_bound ());
+	      pdata->maxlen = wide_int_to_tree (type, vr->upper_bound ());
 	      pdata->maxbound = pdata->maxlen;
 	    }
 	  else
@@ -1803,12 +1793,14 @@  set_strlen_range (tree lhs, wide_int min, wide_int max,
       else if (TREE_CODE (bound) == SSA_NAME)
 	{
 	  wide_int minbound, maxbound;
-	  value_range_kind rng = get_range_info (bound, &minbound, &maxbound);
-	  if (rng == VR_RANGE)
+	  value_range vr;
+	  get_range_info (bound, vr);
+	  if (!vr.undefined_p () && !vr.varying_p ())
 	    {
+	      wide_int minbound = vr.lower_bound ();
+	      wide_int maxbound = vr.upper_bound ();
 	      /* For a bound in a known range, adjust the range determined
-		 above as necessary.  For a bound in some anti-range or
-		 in an unknown range, use the range determined by callers.  */
+		 above as necessary.  */
 	      if (wi::ltu_p (minbound, min))
 		min = minbound;
 	      if (wi::ltu_p (maxbound, max))
@@ -3045,6 +3037,8 @@  maybe_diag_stxncpy_trunc (gimple_stmt_iterator gsi, tree src, tree cnt)
 
 	  if (wi::ltu_p (cntrange[1], maxobjsize))
 	    {
+	      /* FIXME: Imagine ~[10,20].  The code here will set
+		 cntrange[] to [21,MAX].  Why are we ignoring [0,9] ??  */
 	      cntrange[0] = cntrange[1] + 1;
 	      cntrange[1] = maxobjsize;
 	    }
@@ -4139,10 +4133,12 @@  get_len_or_size (tree arg, int idx, unsigned HOST_WIDE_INT lenrng[2],
 	}
       else if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
 	{
-	  wide_int min, max;
-	  value_range_kind rng = get_range_info (si->nonzero_chars, &min, &max);
-	  if (rng == VR_RANGE)
+	  value_range vr;
+	  get_range_info (si->nonzero_chars, vr);
+	  if (!vr.undefined_p () && !vr.varying_p ())
 	    {
+	      wide_int min = vr.lower_bound ();
+	      wide_int max = vr.upper_bound ();
 	      lenrng[0] = min.to_uhwi ();
 	      lenrng[1] = max.to_uhwi ();
 	      *nulterm = si->full_string_p;
@@ -4847,11 +4843,11 @@  count_nonzero_bytes_addr (tree exp, unsigned HOST_WIDE_INT offset,
 	{
 	  vr_values *v = CONST_CAST (vr_values *, rvals);
 	  const value_range_equiv *vr = v->get_value_range (si->nonzero_chars);
-	  if (vr->kind () != VR_RANGE || !range_int_cst_p (vr))
+	  if (vr->undefined_p () || vr->varying_p ())
 	    return false;
-
-	  minlen = tree_to_uhwi (vr->min ());
-	  maxlen = tree_to_uhwi (vr->max ());
+	  tree type = TREE_TYPE (si->nonzero_chars);
+	  minlen = tree_to_uhwi (wide_int_to_tree (type, vr->lower_bound ()));
+	  maxlen = tree_to_uhwi (wide_int_to_tree (type, vr->upper_bound ()));
 	}
       else
 	return false;
@@ -5554,16 +5550,12 @@  handle_integral_assign (gimple_stmt_iterator *gsi, bool *cleanup_eh,
 		  /* Reading a character before the final '\0'
 		     character.  Just set the value range to ~[0, 0]
 		     if we don't have anything better.  */
-		  wide_int min, max;
-		  signop sign = TYPE_SIGN (lhs_type);
-		  int prec = TYPE_PRECISION (lhs_type);
-		  value_range_kind vr = get_range_info (lhs, &min, &max);
-		  if (vr == VR_VARYING
-		      || (vr == VR_RANGE
-			  && min == wi::min_value (prec, sign)
-			  && max == wi::max_value (prec, sign)))
+		  value_range vr;
+		  get_range_info (lhs, vr);
+		  if (vr.varying_p ())
 		    set_range_info (lhs, VR_ANTI_RANGE,
-				    wi::zero (prec), wi::zero (prec));
+				    wi::zero (TYPE_PRECISION (lhs_type)),
+				    wi::zero (TYPE_PRECISION (lhs_type)));
 		}
 	    }
 	}