diff mbox series

Rewrite get_size_range for irange API.

Message ID 20200806145350.864962-1-aldyh@redhat.com
State New
Headers show
Series Rewrite get_size_range for irange API. | expand

Commit Message

Aldy Hernandez Aug. 6, 2020, 2:53 p.m. UTC
[Martin, does this sound reasonable to you?]

The following patch converts get_size_range to the irange API, thus
removing the use of VR_ANTI_RANGE.

This was a bit tricky because of the gymnastics we do in get_size_range
to ignore negatives and all that.  I didn't convert the function for
multi-ranges.  The code still returns a pair of trees indicating the
massaged range.  But I do believe the code is cleaner and smaller.

I'm not sure the current code (or my adaptation) gets all cases, but
the goal was to keep to the existing functionality, nothing more.

OK?

gcc/ChangeLog:

	* calls.c (range_remove_non_positives): New.
	(set_bounds_from_range): New.
	(get_size_range): Rewrite for irange API.
	* tree-affine.c (expr_to_aff_combination): Call determine_value_range
	with a range.
	* tree-vrp.c (determine_value_range_1): Rename to...
	(determine_value_range): ...this.
	* tree-vrp.h (determine_value_range): Adjust prototype.
---
 gcc/calls.c       | 139 ++++++++++++++++++----------------------------
 gcc/tree-affine.c |   5 +-
 gcc/tree-vrp.c    |  44 ++++++---------
 gcc/tree-vrp.h    |   2 +-
 4 files changed, 73 insertions(+), 117 deletions(-)

Comments

Martin Sebor Aug. 6, 2020, 7:30 p.m. UTC | #1
On 8/6/20 8:53 AM, Aldy Hernandez via Gcc-patches wrote:
> [Martin, does this sound reasonable to you?]

It mostly makes sense to me except one part:

> 
> The following patch converts get_size_range to the irange API, thus
> removing the use of VR_ANTI_RANGE.
> 
> This was a bit tricky because of the gymnastics we do in get_size_range
> to ignore negatives and all that.  I didn't convert the function for
> multi-ranges.  The code still returns a pair of trees indicating the
> massaged range.  But I do believe the code is cleaner and smaller.
> 
> I'm not sure the current code (or my adaptation) gets all cases, but
> the goal was to keep to the existing functionality, nothing more.
> 
> OK?
> 
> gcc/ChangeLog:
> 
> 	* calls.c (range_remove_non_positives): New.
> 	(set_bounds_from_range): New.
> 	(get_size_range): Rewrite for irange API.
> 	* tree-affine.c (expr_to_aff_combination): Call determine_value_range
> 	with a range.
> 	* tree-vrp.c (determine_value_range_1): Rename to...
> 	(determine_value_range): ...this.
> 	* tree-vrp.h (determine_value_range): Adjust prototype.
> ---
>   gcc/calls.c       | 139 ++++++++++++++++++----------------------------
>   gcc/tree-affine.c |   5 +-
>   gcc/tree-vrp.c    |  44 ++++++---------
>   gcc/tree-vrp.h    |   2 +-
>   4 files changed, 73 insertions(+), 117 deletions(-)
> 
> diff --git a/gcc/calls.c b/gcc/calls.c
> index 44401e6350d..4aeeb36a2be 100644
> --- a/gcc/calls.c
> +++ b/gcc/calls.c
> @@ -57,6 +57,7 @@ along with GCC; see the file COPYING3.  If not see
>   #include "attribs.h"
>   #include "builtins.h"
>   #include "gimple-fold.h"
> +#include "range.h"
>   
>   /* Like PREFERRED_STACK_BOUNDARY but in units of bytes, not bits.  */
>   #define STACK_BYTES (PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
> @@ -1237,6 +1238,31 @@ alloc_max_size (void)
>     return alloc_object_size_limit;
>   }
>   
> +// Remove non-positive numbers from a range.  ALLOW_ZERO is TRUE if 0
> +// is considered positive.
> +
> +static void
> +range_remove_non_positives (irange *vr, bool allow_zero)
> +{
> +  tree floor, type = vr->type ();
> +  if (allow_zero)
> +    floor = build_zero_cst (type);
> +  else
> +    floor = build_one_cst (type);
> +  value_range positives (floor, TYPE_MAX_VALUE (type));
> +  vr->intersect (positives);
> +}
> +
> +// Set the extreme bounds of range VR into range[].
> +
> +static bool
> +set_bounds_from_range (const irange *vr, tree range[2])
> +{
> +  range[0] = wide_int_to_tree (vr->type (), vr->lower_bound ());
> +  range[1] = wide_int_to_tree (vr->type (), vr->upper_bound ());
> +  return true;
> +}
> +
>   /* Return true when EXP's range can be determined and set RANGE[] to it
>      after adjusting it if necessary to make EXP a represents a valid size
>      of object, or a valid size argument to an allocation function declared
> @@ -1250,9 +1276,11 @@ alloc_max_size (void)
>   bool
>   get_size_range (tree exp, tree range[2], bool allow_zero /* = false */)
>   {
> -  if (!exp)
> -    return false;
> -
> +  if (!exp || !INTEGRAL_TYPE_P (TREE_TYPE (exp)))
> +    {
> +      range[0] = range[1] = NULL_TREE;
> +      return false;
> +    }
>     if (tree_fits_uhwi_p (exp))
>       {
>         /* EXP is a constant.  */
> @@ -1261,91 +1289,30 @@ get_size_range (tree exp, tree range[2], bool allow_zero /* = false */)
>       }
>   
>     tree exptype = TREE_TYPE (exp);
> -  bool integral = INTEGRAL_TYPE_P (exptype);
> -
> -  wide_int min, max;
> -  enum value_range_kind range_type;
> -
> -  if (integral)
> -    range_type = determine_value_range (exp, &min, &max);
> -  else
> -    range_type = VR_VARYING;
> -
> -  if (range_type == VR_VARYING)
> +  value_range vr;
> +  determine_value_range (&vr, exp);
> +  if (vr.num_pairs () == 1)
> +    return set_bounds_from_range (&vr, range);
> +
> +  widest_irange positives (vr);
> +  range_remove_non_positives (&positives, allow_zero);
> +
> +  // If all numbers are negative, let the caller sort it out.
> +  if (positives.undefined_p ())
> +    return set_bounds_from_range (&vr, range);
> +
> +  // Remove the unknown parts of a multi-range.
> +  // This will transform [5,10][20,MAX] into [5,10].

Is this comment correct?  Wouldn't this result in returning smaller
sizes than the actual value allows?  If so, I'd expect that to cause
false positives (and in that case, if none of our tests fail we need
to add some that would).

By my reading of the code below it seems to return the upper range
(i.e., [20, MAX]) but I'm not fully acquainted with the new ranger
APIs yet.

Thanks
Martin


> +  int pairs = positives.num_pairs ();
> +  if (pairs > 1
> +      && positives.upper_bound () == wi::to_wide (TYPE_MAX_VALUE (exptype)))
>       {
> -      if (integral)
> -	{
> -	  /* Use the full range of the type of the expression when
> -	     no value range information is available.  */
> -	  range[0] = TYPE_MIN_VALUE (exptype);
> -	  range[1] = TYPE_MAX_VALUE (exptype);
> -	  return true;
> -	}
> -
> -      range[0] = NULL_TREE;
> -      range[1] = NULL_TREE;
> -      return false;
> +      value_range last_range (exptype,
> +			      positives.lower_bound (pairs - 1),
> +			      positives.upper_bound (pairs - 1), VR_ANTI_RANGE);
> +      positives.intersect (last_range);
>       }
> -
> -  unsigned expprec = TYPE_PRECISION (exptype);
> -
> -  bool signed_p = !TYPE_UNSIGNED (exptype);
> -
> -  if (range_type == VR_ANTI_RANGE)
> -    {
> -      if (signed_p)
> -	{
> -	  if (wi::les_p (max, 0))
> -	    {
> -	      /* EXP is not in a strictly negative range.  That means
> -		 it must be in some (not necessarily strictly) positive
> -		 range which includes zero.  Since in signed to unsigned
> -		 conversions negative values end up converted to large
> -		 positive values, and otherwise they are not valid sizes,
> -		 the resulting range is in both cases [0, TYPE_MAX].  */
> -	      min = wi::zero (expprec);
> -	      max = wi::to_wide (TYPE_MAX_VALUE (exptype));
> -	    }
> -	  else if (wi::les_p (min - 1, 0))
> -	    {
> -	      /* EXP is not in a negative-positive range.  That means EXP
> -		 is either negative, or greater than max.  Since negative
> -		 sizes are invalid make the range [MAX + 1, TYPE_MAX].  */
> -	      min = max + 1;
> -	      max = wi::to_wide (TYPE_MAX_VALUE (exptype));
> -	    }
> -	  else
> -	    {
> -	      max = min - 1;
> -	      min = wi::zero (expprec);
> -	    }
> -	}
> -      else if (wi::eq_p (0, min - 1))
> -	{
> -	  /* EXP is unsigned and not in the range [1, MAX].  That means
> -	     it's either zero or greater than MAX.  Even though 0 would
> -	     normally be detected by -Walloc-zero, unless ALLOW_ZERO
> -	     is true, set the range to [MAX, TYPE_MAX] so that when MAX
> -	     is greater than the limit the whole range is diagnosed.  */
> -	  if (allow_zero)
> -	    min = max = wi::zero (expprec);
> -	  else
> -	    {
> -	      min = max + 1;
> -	      max = wi::to_wide (TYPE_MAX_VALUE (exptype));
> -	    }
> -	}
> -      else
> -	{
> -	  max = min - 1;
> -	  min = wi::zero (expprec);
> -	}
> -    }
> -
> -  range[0] = wide_int_to_tree (exptype, min);
> -  range[1] = wide_int_to_tree (exptype, max);
> -
> -  return true;
> +  return set_bounds_from_range (&positives, range);
>   }
>   
>   /* Diagnose a call EXP to function FN decorated with attribute alloc_size
> diff --git a/gcc/tree-affine.c b/gcc/tree-affine.c
> index 5620e6bf28f..2a1b1872e27 100644
> --- a/gcc/tree-affine.c
> +++ b/gcc/tree-affine.c
> @@ -340,7 +340,7 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
>   		op1 = fold_convert (otype, op1);
>   		return expr_to_aff_combination (comb, icode, otype, op0, op1);
>   	      }
> -	    wide_int minv, maxv;
> +	    value_range vr;
>   	    /* If inner type has wrapping overflow behavior, fold conversion
>   	       for below case:
>   		 (T1)(X *+- CST) -> (T1)X *+- (T1)CST
> @@ -348,10 +348,11 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
>   	    if (TYPE_UNSIGNED (itype)
>   		&& TYPE_OVERFLOW_WRAPS (itype)
>   		&& TREE_CODE (op1) == INTEGER_CST
> -		&& determine_value_range (op0, &minv, &maxv) == VR_RANGE)
> +		&& determine_value_range (&vr, op0))
>   	      {
>   		wi::overflow_type overflow = wi::OVF_NONE;
>   		signop sign = UNSIGNED;
> +		wide_int minv = vr.lower_bound (), maxv = vr.upper_bound ();
>   		if (icode == PLUS_EXPR)
>   		  wi::add (maxv, wi::to_wide (op1), sign, &overflow);
>   		else if (icode == MULT_EXPR)
> diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
> index de84c1d505d..2078cd246c6 100644
> --- a/gcc/tree-vrp.c
> +++ b/gcc/tree-vrp.c
> @@ -4565,25 +4565,30 @@ make_pass_vrp (gcc::context *ctxt)
>   }
>   
>   
> -/* Worker for determine_value_range.  */
> +/* Compute a value-range for EXPR and set it in *VR.  Return TRUE if range
> +   found was neither varying nor undefined.  */
>   
> -static void
> -determine_value_range_1 (value_range *vr, tree expr)
> +bool
> +determine_value_range (irange *vr, tree expr)
>   {
>     if (BINARY_CLASS_P (expr))
>       {
> -      value_range vr0, vr1;
> -      determine_value_range_1 (&vr0, TREE_OPERAND (expr, 0));
> -      determine_value_range_1 (&vr1, TREE_OPERAND (expr, 1));
> -      range_fold_binary_expr (vr, TREE_CODE (expr), TREE_TYPE (expr),
> +      value_range res, vr0, vr1;
> +      determine_value_range (&vr0, TREE_OPERAND (expr, 0));
> +      determine_value_range (&vr1, TREE_OPERAND (expr, 1));
> +      range_fold_binary_expr (&res, TREE_CODE (expr), TREE_TYPE (expr),
>   			      &vr0, &vr1);
> +      res.normalize_symbolics ();
> +      *vr = res;
>       }
>     else if (UNARY_CLASS_P (expr))
>       {
> -      value_range vr0;
> -      determine_value_range_1 (&vr0, TREE_OPERAND (expr, 0));
> -      range_fold_unary_expr (vr, TREE_CODE (expr), TREE_TYPE (expr),
> +      value_range res, vr0;
> +      determine_value_range (&vr0, TREE_OPERAND (expr, 0));
> +      range_fold_unary_expr (&res, TREE_CODE (expr), TREE_TYPE (expr),
>   			     &vr0, TREE_TYPE (TREE_OPERAND (expr, 0)));
> +      res.normalize_symbolics ();
> +      *vr = res;
>       }
>     else if (TREE_CODE (expr) == INTEGER_CST)
>       vr->set (expr);
> @@ -4602,22 +4607,5 @@ determine_value_range_1 (value_range *vr, tree expr)
>         else
>   	vr->set_varying (TREE_TYPE (expr));
>       }
> -}
> -
> -/* Compute a value-range for EXPR and set it in *MIN and *MAX.  Return
> -   the determined range type.  */
> -
> -value_range_kind
> -determine_value_range (tree expr, wide_int *min, wide_int *max)
> -{
> -  value_range vr;
> -  determine_value_range_1 (&vr, expr);
> -  if (vr.constant_p ())
> -    {
> -      *min = wi::to_wide (vr.min ());
> -      *max = wi::to_wide (vr.max ());
> -      return vr.kind ();
> -    }
> -
> -  return VR_VARYING;
> +  return !vr->varying_p () && !vr->undefined_p ();
>   }
> diff --git a/gcc/tree-vrp.h b/gcc/tree-vrp.h
> index 371e58aa0ea..54c88c93d48 100644
> --- a/gcc/tree-vrp.h
> +++ b/gcc/tree-vrp.h
> @@ -61,7 +61,7 @@ extern bool find_case_label_index (gswitch *, size_t, tree, size_t *);
>   extern bool overflow_comparison_p (tree_code, tree, tree, bool, tree *);
>   extern tree get_single_symbol (tree, bool *, tree *);
>   extern void maybe_set_nonzero_bits (edge, tree);
> -extern value_range_kind determine_value_range (tree, wide_int *, wide_int *);
> +extern bool determine_value_range (irange *vr, tree);
>   extern wide_int masked_increment (const wide_int &val_in, const wide_int &mask,
>   				  const wide_int &sgnbit, unsigned int prec);
>   
>
Aldy Hernandez Aug. 10, 2020, 11:47 a.m. UTC | #2
On 8/6/20 9:30 PM, Martin Sebor wrote:
> On 8/6/20 8:53 AM, Aldy Hernandez via Gcc-patches wrote:

>> +  // Remove the unknown parts of a multi-range.
>> +  // This will transform [5,10][20,MAX] into [5,10].
> 
> Is this comment correct?  Wouldn't this result in returning smaller
> sizes than the actual value allows?  If so, I'd expect that to cause
> false positives (and in that case, if none of our tests fail we need
> to add some that would).
> 
> By my reading of the code below it seems to return the upper range
> (i.e., [20, MAX]) but I'm not fully acquainted with the new ranger
> APIs yet.

I believe the comment is correct.  What I'm trying to do is remove 
[X,TYPE_MAX_VALUE] from the range

>> +  int pairs = positives.num_pairs ();
>> +  if (pairs > 1
>> +      && positives.upper_bound () == wi::to_wide (TYPE_MAX_VALUE 
>> (exptype)))
>>       {
>> -      if (integral)
>> -    {
>> -      /* Use the full range of the type of the expression when
>> -         no value range information is available.  */
>> -      range[0] = TYPE_MIN_VALUE (exptype);
>> -      range[1] = TYPE_MAX_VALUE (exptype);
>> -      return true;
>> -    }
>> -
>> -      range[0] = NULL_TREE;
>> -      range[1] = NULL_TREE;
>> -      return false;
>> +      value_range last_range (exptype,
>> +                  positives.lower_bound (pairs - 1),
>> +                  positives.upper_bound (pairs - 1), VR_ANTI_RANGE);
>> +      positives.intersect (last_range);
>>       }

Notice that we start with positives == vr (where vr is the range 
returned by determine_value_range).

Then we build last_range which is the *opposite* of [X,TYPE_MAX_VALUE]. 
Notice the VR_ANTI_RANGE in the constructor.

(Note that the nomenclature VR_ANTI_RANGE is being used in the 
constructor to keep with the original API.  It means "create the inverse 
of this range".  Ideally, when the m_kind field disappears along with 
VR_ANTI_RANGEs, the enum could become RANGE_INVERSE or something less 
confusing.)

Finally, last_range is intersected with positives, which will become 
whatever VR had, minus the last sub-range.

Those that make sense, or did I miss something?

If you have a testcase that yields a false positive in my proposed 
patch, but not in the original code, please let me know so I can adjust 
the patch.

Thanks for your feedback.
Aldy
Martin Sebor Aug. 10, 2020, 4:35 p.m. UTC | #3
On 8/10/20 5:47 AM, Aldy Hernandez wrote:
> 
> 
> On 8/6/20 9:30 PM, Martin Sebor wrote:
>> On 8/6/20 8:53 AM, Aldy Hernandez via Gcc-patches wrote:
> 
>>> +  // Remove the unknown parts of a multi-range.
>>> +  // This will transform [5,10][20,MAX] into [5,10].
>>
>> Is this comment correct?  Wouldn't this result in returning smaller
>> sizes than the actual value allows?  If so, I'd expect that to cause
>> false positives (and in that case, if none of our tests fail we need
>> to add some that would).
>>
>> By my reading of the code below it seems to return the upper range
>> (i.e., [20, MAX]) but I'm not fully acquainted with the new ranger
>> APIs yet.
> 
> I believe the comment is correct.  What I'm trying to do is remove 
> [X,TYPE_MAX_VALUE] from the range
> 
>>> +  int pairs = positives.num_pairs ();
>>> +  if (pairs > 1
>>> +      && positives.upper_bound () == wi::to_wide (TYPE_MAX_VALUE 
>>> (exptype)))
>>>       {
>>> -      if (integral)
>>> -    {
>>> -      /* Use the full range of the type of the expression when
>>> -         no value range information is available.  */
>>> -      range[0] = TYPE_MIN_VALUE (exptype);
>>> -      range[1] = TYPE_MAX_VALUE (exptype);
>>> -      return true;
>>> -    }
>>> -
>>> -      range[0] = NULL_TREE;
>>> -      range[1] = NULL_TREE;
>>> -      return false;
>>> +      value_range last_range (exptype,
>>> +                  positives.lower_bound (pairs - 1),
>>> +                  positives.upper_bound (pairs - 1), VR_ANTI_RANGE);
>>> +      positives.intersect (last_range);
>>>       }
> 
> Notice that we start with positives == vr (where vr is the range 
> returned by determine_value_range).
> 
> Then we build last_range which is the *opposite* of [X,TYPE_MAX_VALUE]. 
> Notice the VR_ANTI_RANGE in the constructor.
> 
> (Note that the nomenclature VR_ANTI_RANGE is being used in the 
> constructor to keep with the original API.  It means "create the inverse 
> of this range".  Ideally, when the m_kind field disappears along with 
> VR_ANTI_RANGEs, the enum could become RANGE_INVERSE or something less 
> confusing.)
> 
> Finally, last_range is intersected with positives, which will become 
> whatever VR had, minus the last sub-range.
> 
> Those that make sense, or did I miss something?
> 
> If you have a testcase that yields a false positive in my proposed 
> patch, but not in the original code, please let me know so I can adjust 
> the patch.

Sorry, I misremembered what get_size_range was used for.  It's used
to constrain the maximum size of memory accesses by functions like
memcpy (the third argument).  The use case I was thinking of was
to determine the bounds of the size of variably modified objects
(like VLAs).  It doesn't look like it's used for that.

With the current use taking the lower bound is the conservative
thing to do and using the upper bound would cause false positives.
With the latter it would be the other way around.  So the comment
makes sense to me now.

As an aside, here's a test case I played with.  I'd expect it to
trigger a warning because the upper bound of the range of array's
size is less than the lower bound of the range of the number of
bytes being written to it.

void f (void*);

void g (unsigned n1, unsigned n2)
{
   if (!(n1 >= 2 && n1 <= 3))   // n1 = [2, 3]
     return;

   char a[n1];

   if (!((n2 >= 5 && n2 <= 10)
         || (n2 >= 20)))        // n2 = [5, 10] U [20, UINT_MAX]
     return;

   __builtin_memset (a, 0, n2);
   f (a);
}

But I couldn't get it to either produce a multipart range, or
a union of the two parts (i.e., [5, UINT_MAX]).  It makes me
wonder what I'm missing about how such ranges are supposed to
come into existence.

Martin
Andrew MacLeod Aug. 10, 2020, 5:50 p.m. UTC | #4
On 8/10/20 12:35 PM, Martin Sebor via Gcc-patches wrote:
> On 8/10/20 5:47 AM, Aldy Hernandez wrote:
>>
>>
>> On 8/6/20 9:30 PM, Martin Sebor wrote:
>>> On 8/6/20 8:53 AM, Aldy Hernandez via Gcc-patches wrote:
>>
>>>> +  // Remove the unknown parts of a multi-range.
>>>> +  // This will transform [5,10][20,MAX] into [5,10].
>>>
>>> Is this comment correct?  Wouldn't this result in returning smaller
>>> sizes than the actual value allows?  If so, I'd expect that to cause
>>> false positives (and in that case, if none of our tests fail we need
>>> to add some that would).
>>>
>>> By my reading of the code below it seems to return the upper range
>>> (i.e., [20, MAX]) but I'm not fully acquainted with the new ranger
>>> APIs yet.
>>
>> I believe the comment is correct.  What I'm trying to do is remove 
>> [X,TYPE_MAX_VALUE] from the range
>>
>>>> +  int pairs = positives.num_pairs ();
>>>> +  if (pairs > 1
>>>> +      && positives.upper_bound () == wi::to_wide (TYPE_MAX_VALUE 
>>>> (exptype)))
>>>>       {
>>>> -      if (integral)
>>>> -    {
>>>> -      /* Use the full range of the type of the expression when
>>>> -         no value range information is available.  */
>>>> -      range[0] = TYPE_MIN_VALUE (exptype);
>>>> -      range[1] = TYPE_MAX_VALUE (exptype);
>>>> -      return true;
>>>> -    }
>>>> -
>>>> -      range[0] = NULL_TREE;
>>>> -      range[1] = NULL_TREE;
>>>> -      return false;
>>>> +      value_range last_range (exptype,
>>>> +                  positives.lower_bound (pairs - 1),
>>>> +                  positives.upper_bound (pairs - 1), VR_ANTI_RANGE);
>>>> +      positives.intersect (last_range);
>>>>       }
>>
>> Notice that we start with positives == vr (where vr is the range 
>> returned by determine_value_range).
>>
>> Then we build last_range which is the *opposite* of 
>> [X,TYPE_MAX_VALUE]. Notice the VR_ANTI_RANGE in the constructor.
>>
>> (Note that the nomenclature VR_ANTI_RANGE is being used in the 
>> constructor to keep with the original API.  It means "create the 
>> inverse of this range".  Ideally, when the m_kind field disappears 
>> along with VR_ANTI_RANGEs, the enum could become RANGE_INVERSE or 
>> something less confusing.)
>>
>> Finally, last_range is intersected with positives, which will become 
>> whatever VR had, minus the last sub-range.
>>
>> Those that make sense, or did I miss something?
>>
>> If you have a testcase that yields a false positive in my proposed 
>> patch, but not in the original code, please let me know so I can 
>> adjust the patch.
>
> Sorry, I misremembered what get_size_range was used for.  It's used
> to constrain the maximum size of memory accesses by functions like
> memcpy (the third argument).  The use case I was thinking of was
> to determine the bounds of the size of variably modified objects
> (like VLAs).  It doesn't look like it's used for that.
>
> With the current use taking the lower bound is the conservative
> thing to do and using the upper bound would cause false positives.
> With the latter it would be the other way around.  So the comment
> makes sense to me now.
>
> As an aside, here's a test case I played with.  I'd expect it to
> trigger a warning because the upper bound of the range of array's
> size is less than the lower bound of the range of the number of
> bytes being written to it.
>
> void f (void*);
>
> void g (unsigned n1, unsigned n2)
> {
>   if (!(n1 >= 2 && n1 <= 3))   // n1 = [2, 3]
>     return;
>
>   char a[n1];
>
>   if (!((n2 >= 5 && n2 <= 10)
>         || (n2 >= 20)))        // n2 = [5, 10] U [20, UINT_MAX]
>     return;
>
>   __builtin_memset (a, 0, n2);
>   f (a);
> }
>
> But I couldn't get it to either produce a multipart range, or
> a union of the two parts (i.e., [5, UINT_MAX]).  It makes me
> wonder what I'm missing about how such ranges are supposed to
> come into existence.
>
> Martin
>
There mare a couple of things..

My guess is you are using a value_range?   value_ranges are an 
int_range<1> under the covers, so will never have more than a single 
range, or anti range.

int_range<X> is the type which allows for up to X subranges. 
calculations will be merged to fit within X subranges
widest_irange is the type which allows for "unlimited" subranges... 
which currently happens to be capped at 255.. .  (its typedef'd as 
int_range<255>).

widest_irange is the type used within the range-ops machinery and such, 
and then whatever result is calculated is "toned down" to whatever to 
user provides.

so if union results in [5,10] and [20, MAX]   and you provide a 
value_range for the result (, or int_range<1>), the result you get back 
will be [5, MAX].. so won't look like there are any multi-ranges going on.

and don't forget, EVRP only works with value_range, or int_range<1>.    
You cant currently get these ranges without the Ranger either... which 
is still on our branch.  We're hoping to get moving into trunk in the 
next couple of weeks.
I think Aldy is just trying to get the code functioning the same with 
the new API, even if we cant get the more precise ranges yet. Then when 
the ranger is added in, we switch you over and voila... multi-ranges.

Andrew
Aldy Hernandez Aug. 10, 2020, 5:54 p.m. UTC | #5
Yes, the goal is that anything that may take multi ranges be rewritten to
use an irange * and use the API exclusively.  Then when multi ranges are
passed down eventually, things will work transparently.

Aldy

On Mon, Aug 10, 2020, 19:50 Andrew MacLeod <amacleod@redhat.com> wrote:

> On 8/10/20 12:35 PM, Martin Sebor via Gcc-patches wrote:
> > On 8/10/20 5:47 AM, Aldy Hernandez wrote:
> >>
> >>
> >> On 8/6/20 9:30 PM, Martin Sebor wrote:
> >>> On 8/6/20 8:53 AM, Aldy Hernandez via Gcc-patches wrote:
> >>
> >>>> +  // Remove the unknown parts of a multi-range.
> >>>> +  // This will transform [5,10][20,MAX] into [5,10].
> >>>
> >>> Is this comment correct?  Wouldn't this result in returning smaller
> >>> sizes than the actual value allows?  If so, I'd expect that to cause
> >>> false positives (and in that case, if none of our tests fail we need
> >>> to add some that would).
> >>>
> >>> By my reading of the code below it seems to return the upper range
> >>> (i.e., [20, MAX]) but I'm not fully acquainted with the new ranger
> >>> APIs yet.
> >>
> >> I believe the comment is correct.  What I'm trying to do is remove
> >> [X,TYPE_MAX_VALUE] from the range
> >>
> >>>> +  int pairs = positives.num_pairs ();
> >>>> +  if (pairs > 1
> >>>> +      && positives.upper_bound () == wi::to_wide (TYPE_MAX_VALUE
> >>>> (exptype)))
> >>>>       {
> >>>> -      if (integral)
> >>>> -    {
> >>>> -      /* Use the full range of the type of the expression when
> >>>> -         no value range information is available.  */
> >>>> -      range[0] = TYPE_MIN_VALUE (exptype);
> >>>> -      range[1] = TYPE_MAX_VALUE (exptype);
> >>>> -      return true;
> >>>> -    }
> >>>> -
> >>>> -      range[0] = NULL_TREE;
> >>>> -      range[1] = NULL_TREE;
> >>>> -      return false;
> >>>> +      value_range last_range (exptype,
> >>>> +                  positives.lower_bound (pairs - 1),
> >>>> +                  positives.upper_bound (pairs - 1), VR_ANTI_RANGE);
> >>>> +      positives.intersect (last_range);
> >>>>       }
> >>
> >> Notice that we start with positives == vr (where vr is the range
> >> returned by determine_value_range).
> >>
> >> Then we build last_range which is the *opposite* of
> >> [X,TYPE_MAX_VALUE]. Notice the VR_ANTI_RANGE in the constructor.
> >>
> >> (Note that the nomenclature VR_ANTI_RANGE is being used in the
> >> constructor to keep with the original API.  It means "create the
> >> inverse of this range".  Ideally, when the m_kind field disappears
> >> along with VR_ANTI_RANGEs, the enum could become RANGE_INVERSE or
> >> something less confusing.)
> >>
> >> Finally, last_range is intersected with positives, which will become
> >> whatever VR had, minus the last sub-range.
> >>
> >> Those that make sense, or did I miss something?
> >>
> >> If you have a testcase that yields a false positive in my proposed
> >> patch, but not in the original code, please let me know so I can
> >> adjust the patch.
> >
> > Sorry, I misremembered what get_size_range was used for.  It's used
> > to constrain the maximum size of memory accesses by functions like
> > memcpy (the third argument).  The use case I was thinking of was
> > to determine the bounds of the size of variably modified objects
> > (like VLAs).  It doesn't look like it's used for that.
> >
> > With the current use taking the lower bound is the conservative
> > thing to do and using the upper bound would cause false positives.
> > With the latter it would be the other way around.  So the comment
> > makes sense to me now.
> >
> > As an aside, here's a test case I played with.  I'd expect it to
> > trigger a warning because the upper bound of the range of array's
> > size is less than the lower bound of the range of the number of
> > bytes being written to it.
> >
> > void f (void*);
> >
> > void g (unsigned n1, unsigned n2)
> > {
> >   if (!(n1 >= 2 && n1 <= 3))   // n1 = [2, 3]
> >     return;
> >
> >   char a[n1];
> >
> >   if (!((n2 >= 5 && n2 <= 10)
> >         || (n2 >= 20)))        // n2 = [5, 10] U [20, UINT_MAX]
> >     return;
> >
> >   __builtin_memset (a, 0, n2);
> >   f (a);
> > }
> >
> > But I couldn't get it to either produce a multipart range, or
> > a union of the two parts (i.e., [5, UINT_MAX]).  It makes me
> > wonder what I'm missing about how such ranges are supposed to
> > come into existence.
> >
> > Martin
> >
> There mare a couple of things..
>
> My guess is you are using a value_range?   value_ranges are an
> int_range<1> under the covers, so will never have more than a single
> range, or anti range.
>
> int_range<X> is the type which allows for up to X subranges.
> calculations will be merged to fit within X subranges
> widest_irange is the type which allows for "unlimited" subranges...
> which currently happens to be capped at 255.. .  (its typedef'd as
> int_range<255>).
>
> widest_irange is the type used within the range-ops machinery and such,
> and then whatever result is calculated is "toned down" to whatever to
> user provides.
>
> so if union results in [5,10] and [20, MAX]   and you provide a
> value_range for the result (, or int_range<1>), the result you get back
> will be [5, MAX].. so won't look like there are any multi-ranges going on.
>
> and don't forget, EVRP only works with value_range, or int_range<1>.
> You cant currently get these ranges without the Ranger either... which
> is still on our branch.  We're hoping to get moving into trunk in the
> next couple of weeks.
> I think Aldy is just trying to get the code functioning the same with
> the new API, even if we cant get the more precise ranges yet. Then when
> the ranger is added in, we switch you over and voila... multi-ranges.
>
> Andrew
>
>
Martin Sebor Aug. 10, 2020, 6:46 p.m. UTC | #6
On 8/10/20 11:50 AM, Andrew MacLeod wrote:
> On 8/10/20 12:35 PM, Martin Sebor via Gcc-patches wrote:
>> On 8/10/20 5:47 AM, Aldy Hernandez wrote:
>>>
>>>
>>> On 8/6/20 9:30 PM, Martin Sebor wrote:
>>>> On 8/6/20 8:53 AM, Aldy Hernandez via Gcc-patches wrote:
>>>
>>>>> +  // Remove the unknown parts of a multi-range.
>>>>> +  // This will transform [5,10][20,MAX] into [5,10].
>>>>
>>>> Is this comment correct?  Wouldn't this result in returning smaller
>>>> sizes than the actual value allows?  If so, I'd expect that to cause
>>>> false positives (and in that case, if none of our tests fail we need
>>>> to add some that would).
>>>>
>>>> By my reading of the code below it seems to return the upper range
>>>> (i.e., [20, MAX]) but I'm not fully acquainted with the new ranger
>>>> APIs yet.
>>>
>>> I believe the comment is correct.  What I'm trying to do is remove 
>>> [X,TYPE_MAX_VALUE] from the range
>>>
>>>>> +  int pairs = positives.num_pairs ();
>>>>> +  if (pairs > 1
>>>>> +      && positives.upper_bound () == wi::to_wide (TYPE_MAX_VALUE 
>>>>> (exptype)))
>>>>>       {
>>>>> -      if (integral)
>>>>> -    {
>>>>> -      /* Use the full range of the type of the expression when
>>>>> -         no value range information is available.  */
>>>>> -      range[0] = TYPE_MIN_VALUE (exptype);
>>>>> -      range[1] = TYPE_MAX_VALUE (exptype);
>>>>> -      return true;
>>>>> -    }
>>>>> -
>>>>> -      range[0] = NULL_TREE;
>>>>> -      range[1] = NULL_TREE;
>>>>> -      return false;
>>>>> +      value_range last_range (exptype,
>>>>> +                  positives.lower_bound (pairs - 1),
>>>>> +                  positives.upper_bound (pairs - 1), VR_ANTI_RANGE);
>>>>> +      positives.intersect (last_range);
>>>>>       }
>>>
>>> Notice that we start with positives == vr (where vr is the range 
>>> returned by determine_value_range).
>>>
>>> Then we build last_range which is the *opposite* of 
>>> [X,TYPE_MAX_VALUE]. Notice the VR_ANTI_RANGE in the constructor.
>>>
>>> (Note that the nomenclature VR_ANTI_RANGE is being used in the 
>>> constructor to keep with the original API.  It means "create the 
>>> inverse of this range".  Ideally, when the m_kind field disappears 
>>> along with VR_ANTI_RANGEs, the enum could become RANGE_INVERSE or 
>>> something less confusing.)
>>>
>>> Finally, last_range is intersected with positives, which will become 
>>> whatever VR had, minus the last sub-range.
>>>
>>> Those that make sense, or did I miss something?
>>>
>>> If you have a testcase that yields a false positive in my proposed 
>>> patch, but not in the original code, please let me know so I can 
>>> adjust the patch.
>>
>> Sorry, I misremembered what get_size_range was used for.  It's used
>> to constrain the maximum size of memory accesses by functions like
>> memcpy (the third argument).  The use case I was thinking of was
>> to determine the bounds of the size of variably modified objects
>> (like VLAs).  It doesn't look like it's used for that.
>>
>> With the current use taking the lower bound is the conservative
>> thing to do and using the upper bound would cause false positives.
>> With the latter it would be the other way around.  So the comment
>> makes sense to me now.
>>
>> As an aside, here's a test case I played with.  I'd expect it to
>> trigger a warning because the upper bound of the range of array's
>> size is less than the lower bound of the range of the number of
>> bytes being written to it.
>>
>> void f (void*);
>>
>> void g (unsigned n1, unsigned n2)
>> {
>>   if (!(n1 >= 2 && n1 <= 3))   // n1 = [2, 3]
>>     return;
>>
>>   char a[n1];
>>
>>   if (!((n2 >= 5 && n2 <= 10)
>>         || (n2 >= 20)))        // n2 = [5, 10] U [20, UINT_MAX]
>>     return;
>>
>>   __builtin_memset (a, 0, n2);
>>   f (a);
>> }
>>
>> But I couldn't get it to either produce a multipart range, or
>> a union of the two parts (i.e., [5, UINT_MAX]).  It makes me
>> wonder what I'm missing about how such ranges are supposed to
>> come into existence.
>>
>> Martin
>>
> There mare a couple of things..
> 
> My guess is you are using a value_range?   value_ranges are an 
> int_range<1> under the covers, so will never have more than a single 
> range, or anti range.

I see.  Yes, get_size_range calls determine_value_range which uses
a value_range.

> 
> int_range<X> is the type which allows for up to X subranges. 
> calculations will be merged to fit within X subranges
> widest_irange is the type which allows for "unlimited" subranges... 
> which currently happens to be capped at 255.. .  (its typedef'd as 
> int_range<255>).
> 
> widest_irange is the type used within the range-ops machinery and such, 
> and then whatever result is calculated is "toned down" to whatever to 
> user provides.
> 
> so if union results in [5,10] and [20, MAX]   and you provide a 
> value_range for the result (, or int_range<1>), the result you get back 
> will be [5, MAX].. so won't look like there are any multi-ranges going on.

This is one part of the puzzle (for me).  I don't get [5, MAX] but
[0, MAX], on trunk as well as in GCC 10:

   void f (unsigned n)
   {
     if (!((n >= 5 && n <= 10)
           || (n >= 20)))        // n2 = [5, 10] U [20, UINT_MAX]
       return;

     if (n == 3)                 // not folded
       __builtin_abort ();
   }

I'd expect this to get optimized regardless of Ranger (Clang folds
the whole function body into a return statement).

> and don't forget, EVRP only works with value_range, or int_range<1>. You 
> cant currently get these ranges without the Ranger either... which is 
> still on our branch.  We're hoping to get moving into trunk in the next 
> couple of weeks.
> I think Aldy is just trying to get the code functioning the same with 
> the new API, even if we cant get the more precise ranges yet. Then when 
> the ranger is added in, we switch you over and voila... multi-ranges.

Ah, this is the other part of what I was missing.  I thought it
was already (or mostly) on trunk.  It'll be fun (in a good way)
to start wiring the new muli-range classes into all these places.

Thanks
Martin
Andrew MacLeod Aug. 10, 2020, 7:57 p.m. UTC | #7
On 8/10/20 2:46 PM, Martin Sebor wrote:
> On 8/10/20 11:50 AM, Andrew MacLeod wrote:
>> On 8/10/20 12:35 PM, Martin Sebor via Gcc-patches wrote:
>>> On 8/10/20 5:47 AM, Aldy Hernandez wrote:
>>>>
>>>>
>>>> On 8/6/20 9:30 PM, Martin Sebor wrote:
>>>>> On 8/6/20 8:53 AM, Aldy Hernandez via Gcc-patches wrote:
>>>>
>>>>>> +  // Remove the unknown parts of a multi-range.
>>>>>> +  // This will transform [5,10][20,MAX] into [5,10].
>>>>>
>>>>> Is this comment correct?  Wouldn't this result in returning smaller
>>>>> sizes than the actual value allows?  If so, I'd expect that to cause
>>>>> false positives (and in that case, if none of our tests fail we need
>>>>> to add some that would).
>>>>>
>>>>> By my reading of the code below it seems to return the upper range
>>>>> (i.e., [20, MAX]) but I'm not fully acquainted with the new ranger
>>>>> APIs yet.
>>>>
>>>> I believe the comment is correct.  What I'm trying to do is remove 
>>>> [X,TYPE_MAX_VALUE] from the range
>>>>
>>>>>> +  int pairs = positives.num_pairs ();
>>>>>> +  if (pairs > 1
>>>>>> +      && positives.upper_bound () == wi::to_wide (TYPE_MAX_VALUE 
>>>>>> (exptype)))
>>>>>>       {
>>>>>> -      if (integral)
>>>>>> -    {
>>>>>> -      /* Use the full range of the type of the expression when
>>>>>> -         no value range information is available.  */
>>>>>> -      range[0] = TYPE_MIN_VALUE (exptype);
>>>>>> -      range[1] = TYPE_MAX_VALUE (exptype);
>>>>>> -      return true;
>>>>>> -    }
>>>>>> -
>>>>>> -      range[0] = NULL_TREE;
>>>>>> -      range[1] = NULL_TREE;
>>>>>> -      return false;
>>>>>> +      value_range last_range (exptype,
>>>>>> +                  positives.lower_bound (pairs - 1),
>>>>>> +                  positives.upper_bound (pairs - 1), 
>>>>>> VR_ANTI_RANGE);
>>>>>> +      positives.intersect (last_range);
>>>>>>       }
>>>>
>>>> Notice that we start with positives == vr (where vr is the range 
>>>> returned by determine_value_range).
>>>>
>>>> Then we build last_range which is the *opposite* of 
>>>> [X,TYPE_MAX_VALUE]. Notice the VR_ANTI_RANGE in the constructor.
>>>>
>>>> (Note that the nomenclature VR_ANTI_RANGE is being used in the 
>>>> constructor to keep with the original API.  It means "create the 
>>>> inverse of this range".  Ideally, when the m_kind field disappears 
>>>> along with VR_ANTI_RANGEs, the enum could become RANGE_INVERSE or 
>>>> something less confusing.)
>>>>
>>>> Finally, last_range is intersected with positives, which will 
>>>> become whatever VR had, minus the last sub-range.
>>>>
>>>> Those that make sense, or did I miss something?
>>>>
>>>> If you have a testcase that yields a false positive in my proposed 
>>>> patch, but not in the original code, please let me know so I can 
>>>> adjust the patch.
>>>
>>> Sorry, I misremembered what get_size_range was used for.  It's used
>>> to constrain the maximum size of memory accesses by functions like
>>> memcpy (the third argument).  The use case I was thinking of was
>>> to determine the bounds of the size of variably modified objects
>>> (like VLAs).  It doesn't look like it's used for that.
>>>
>>> With the current use taking the lower bound is the conservative
>>> thing to do and using the upper bound would cause false positives.
>>> With the latter it would be the other way around.  So the comment
>>> makes sense to me now.
>>>
>>> As an aside, here's a test case I played with.  I'd expect it to
>>> trigger a warning because the upper bound of the range of array's
>>> size is less than the lower bound of the range of the number of
>>> bytes being written to it.
>>>
>>> void f (void*);
>>>
>>> void g (unsigned n1, unsigned n2)
>>> {
>>>   if (!(n1 >= 2 && n1 <= 3))   // n1 = [2, 3]
>>>     return;
>>>
>>>   char a[n1];
>>>
>>>   if (!((n2 >= 5 && n2 <= 10)
>>>         || (n2 >= 20)))        // n2 = [5, 10] U [20, UINT_MAX]
>>>     return;
>>>
>>>   __builtin_memset (a, 0, n2);
>>>   f (a);
>>> }
>>>
>>> But I couldn't get it to either produce a multipart range, or
>>> a union of the two parts (i.e., [5, UINT_MAX]).  It makes me
>>> wonder what I'm missing about how such ranges are supposed to
>>> come into existence.
>>>
>>> Martin
>>>
>> There mare a couple of things..
>>
>> My guess is you are using a value_range?   value_ranges are an 
>> int_range<1> under the covers, so will never have more than a single 
>> range, or anti range.
>
> I see.  Yes, get_size_range calls determine_value_range which uses
> a value_range.
>
>>
>> int_range<X> is the type which allows for up to X subranges. 
>> calculations will be merged to fit within X subranges
>> widest_irange is the type which allows for "unlimited" subranges... 
>> which currently happens to be capped at 255.. . (its typedef'd as 
>> int_range<255>).
>>
>> widest_irange is the type used within the range-ops machinery and 
>> such, and then whatever result is calculated is "toned down" to 
>> whatever to user provides.
>>
>> so if union results in [5,10] and [20, MAX]   and you provide a 
>> value_range for the result (, or int_range<1>), the result you get 
>> back will be [5, MAX].. so won't look like there are any multi-ranges 
>> going on.
>
> This is one part of the puzzle (for me).  I don't get [5, MAX] but
> [0, MAX], on trunk as well as in GCC 10:
>
>   void f (unsigned n)
>   {
>     if (!((n >= 5 && n <= 10)
>           || (n >= 20)))        // n2 = [5, 10] U [20, UINT_MAX]
>       return;
>
>     if (n == 3)                 // not folded
>       __builtin_abort ();
>   }
>
> I'd expect this to get optimized regardless of Ranger (Clang folds
> the whole function body into a return statement).
>

well, on our branch, with multirange, the rvrp pass does rewrite the 
condition:

=========== BB 4 ============
n_5(D)  unsigned int [5, 10][20, +INF]
     <bb 4> :
     if (0 != 0)
       goto <bb 5>; [INV]
     else
       goto <bb 6>; [INV]


=========== BB 5 ============
     <bb 5> :
     __builtin_abort ();


=========== BB 6 ============
     <bb 6> :
     return;

EVRP doesn't get it because the code has been contorted into:
<bb 2> :
   _1 = n_5(D) + 4294967291;
   _2 = _1 > 5;
   _3 = n_5(D) <= 19;
   _4 = _2 & _3;
   if (_4 != 0)
     goto <bb 3>; [INV]
   else
     goto <bb 4>; [INV]

so without the multirange support, it sees:

Intersecting
   unsigned int ~[5, 10]
and
   unsigned int [0, 19]
to
   unsigned int [0, 19]

so it cant tell that 3 isn't part of it any more since it has to be 
pessimistic.   I always find things like anti range and regular range 
interactions "difficult".. I like multi range way better, and it sees 
through those contortions just fine :-).


>> and don't forget, EVRP only works with value_range, or int_range<1>. 
>> You cant currently get these ranges without the Ranger either... 
>> which is still on our branch.  We're hoping to get moving into trunk 
>> in the next couple of weeks.
>> I think Aldy is just trying to get the code functioning the same with 
>> the new API, even if we cant get the more precise ranges yet. Then 
>> when the ranger is added in, we switch you over and voila... 
>> multi-ranges.
>
> Ah, this is the other part of what I was missing.  I thought it
> was already (or mostly) on trunk.  It'll be fun (in a good way)
> to start wiring the new muli-range classes into all these places.
>

I look forward to fixing the problems you will inevitably find :-P


> Thanks
> Martin
>
Andrew MacLeod Aug. 10, 2020, 8:08 p.m. UTC | #8
On 8/10/20 2:46 PM, Martin Sebor wrote:
> On 8/10/20 11:50 AM, Andrew MacLeod wrote:
>> On 8/10/20 12:35 PM, Martin Sebor via Gcc-patches wrote:
>>> On 8/10/20 5:47 AM, Aldy Hernandez wrote:
>
>>
>> int_range<X> is the type which allows for up to X subranges. 
>> calculations will be merged to fit within X subranges
>> widest_irange is the type which allows for "unlimited" subranges... 
>> which currently happens to be capped at 255.. . (its typedef'd as 
>> int_range<255>).
>>
>> widest_irange is the type used within the range-ops machinery and 
>> such, and then whatever result is calculated is "toned down" to 
>> whatever to user provides.
>>
>> so if union results in [5,10] and [20, MAX]   and you provide a 
>> value_range for the result (, or int_range<1>), the result you get 
>> back will be [5, MAX].. so won't look like there are any multi-ranges 
>> going on.
>
> This is one part of the puzzle (for me).  I don't get [5, MAX] but
> [0, MAX], on trunk as well as in GCC 10:
>
>   void f (unsigned n)
>   {
>     if (!((n >= 5 && n <= 10)
>           || (n >= 20)))        // n2 = [5, 10] U [20, UINT_MAX]
>       return;
>
>     if (n == 3)                 // not folded
>       __builtin_abort ();
>   }
>
> I'd expect this to get optimized regardless of Ranger (Clang folds
> the whole function body into a return statement).
>
You mean like this? (from our branch.optimized output)  :-)

f (unsigned int n)
{
   <bb 2> [local count: 1073741824]:
   return;
}
Martin Sebor Aug. 10, 2020, 8:27 p.m. UTC | #9
On 8/10/20 2:08 PM, Andrew MacLeod wrote:
> On 8/10/20 2:46 PM, Martin Sebor wrote:
>> On 8/10/20 11:50 AM, Andrew MacLeod wrote:
>>> On 8/10/20 12:35 PM, Martin Sebor via Gcc-patches wrote:
>>>> On 8/10/20 5:47 AM, Aldy Hernandez wrote:
>>
>>>
>>> int_range<X> is the type which allows for up to X subranges. 
>>> calculations will be merged to fit within X subranges
>>> widest_irange is the type which allows for "unlimited" subranges... 
>>> which currently happens to be capped at 255.. . (its typedef'd as 
>>> int_range<255>).
>>>
>>> widest_irange is the type used within the range-ops machinery and 
>>> such, and then whatever result is calculated is "toned down" to 
>>> whatever to user provides.
>>>
>>> so if union results in [5,10] and [20, MAX]   and you provide a 
>>> value_range for the result (, or int_range<1>), the result you get 
>>> back will be [5, MAX].. so won't look like there are any multi-ranges 
>>> going on.
>>
>> This is one part of the puzzle (for me).  I don't get [5, MAX] but
>> [0, MAX], on trunk as well as in GCC 10:
>>
>>   void f (unsigned n)
>>   {
>>     if (!((n >= 5 && n <= 10)
>>           || (n >= 20)))        // n2 = [5, 10] U [20, UINT_MAX]
>>       return;
>>
>>     if (n == 3)                 // not folded
>>       __builtin_abort ();
>>   }
>>
>> I'd expect this to get optimized regardless of Ranger (Clang folds
>> the whole function body into a return statement).
>>
> You mean like this? (from our branch.optimized output)  :-)
> 
> f (unsigned int n)
> {
>    <bb 2> [local count: 1073741824]:
>    return;
> }

Sweet!  I want!  ;-) https://www.youtube.com/watch?v=IrCEhRNgGHY

Martin
diff mbox series

Patch

diff --git a/gcc/calls.c b/gcc/calls.c
index 44401e6350d..4aeeb36a2be 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -57,6 +57,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "attribs.h"
 #include "builtins.h"
 #include "gimple-fold.h"
+#include "range.h"
 
 /* Like PREFERRED_STACK_BOUNDARY but in units of bytes, not bits.  */
 #define STACK_BYTES (PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
@@ -1237,6 +1238,31 @@  alloc_max_size (void)
   return alloc_object_size_limit;
 }
 
+// Remove non-positive numbers from a range.  ALLOW_ZERO is TRUE if 0
+// is considered positive.
+
+static void
+range_remove_non_positives (irange *vr, bool allow_zero)
+{
+  tree floor, type = vr->type ();
+  if (allow_zero)
+    floor = build_zero_cst (type);
+  else
+    floor = build_one_cst (type);
+  value_range positives (floor, TYPE_MAX_VALUE (type));
+  vr->intersect (positives);
+}
+
+// Set the extreme bounds of range VR into range[].
+
+static bool
+set_bounds_from_range (const irange *vr, tree range[2])
+{
+  range[0] = wide_int_to_tree (vr->type (), vr->lower_bound ());
+  range[1] = wide_int_to_tree (vr->type (), vr->upper_bound ());
+  return true;
+}
+
 /* Return true when EXP's range can be determined and set RANGE[] to it
    after adjusting it if necessary to make EXP a represents a valid size
    of object, or a valid size argument to an allocation function declared
@@ -1250,9 +1276,11 @@  alloc_max_size (void)
 bool
 get_size_range (tree exp, tree range[2], bool allow_zero /* = false */)
 {
-  if (!exp)
-    return false;
-
+  if (!exp || !INTEGRAL_TYPE_P (TREE_TYPE (exp)))
+    {
+      range[0] = range[1] = NULL_TREE;
+      return false;
+    }
   if (tree_fits_uhwi_p (exp))
     {
       /* EXP is a constant.  */
@@ -1261,91 +1289,30 @@  get_size_range (tree exp, tree range[2], bool allow_zero /* = false */)
     }
 
   tree exptype = TREE_TYPE (exp);
-  bool integral = INTEGRAL_TYPE_P (exptype);
-
-  wide_int min, max;
-  enum value_range_kind range_type;
-
-  if (integral)
-    range_type = determine_value_range (exp, &min, &max);
-  else
-    range_type = VR_VARYING;
-
-  if (range_type == VR_VARYING)
+  value_range vr;
+  determine_value_range (&vr, exp);
+  if (vr.num_pairs () == 1)
+    return set_bounds_from_range (&vr, range);
+
+  widest_irange positives (vr);
+  range_remove_non_positives (&positives, allow_zero);
+
+  // If all numbers are negative, let the caller sort it out.
+  if (positives.undefined_p ())
+    return set_bounds_from_range (&vr, range);
+
+  // Remove the unknown parts of a multi-range.
+  // This will transform [5,10][20,MAX] into [5,10].
+  int pairs = positives.num_pairs ();
+  if (pairs > 1
+      && positives.upper_bound () == wi::to_wide (TYPE_MAX_VALUE (exptype)))
     {
-      if (integral)
-	{
-	  /* Use the full range of the type of the expression when
-	     no value range information is available.  */
-	  range[0] = TYPE_MIN_VALUE (exptype);
-	  range[1] = TYPE_MAX_VALUE (exptype);
-	  return true;
-	}
-
-      range[0] = NULL_TREE;
-      range[1] = NULL_TREE;
-      return false;
+      value_range last_range (exptype,
+			      positives.lower_bound (pairs - 1),
+			      positives.upper_bound (pairs - 1), VR_ANTI_RANGE);
+      positives.intersect (last_range);
     }
-
-  unsigned expprec = TYPE_PRECISION (exptype);
-
-  bool signed_p = !TYPE_UNSIGNED (exptype);
-
-  if (range_type == VR_ANTI_RANGE)
-    {
-      if (signed_p)
-	{
-	  if (wi::les_p (max, 0))
-	    {
-	      /* EXP is not in a strictly negative range.  That means
-		 it must be in some (not necessarily strictly) positive
-		 range which includes zero.  Since in signed to unsigned
-		 conversions negative values end up converted to large
-		 positive values, and otherwise they are not valid sizes,
-		 the resulting range is in both cases [0, TYPE_MAX].  */
-	      min = wi::zero (expprec);
-	      max = wi::to_wide (TYPE_MAX_VALUE (exptype));
-	    }
-	  else if (wi::les_p (min - 1, 0))
-	    {
-	      /* EXP is not in a negative-positive range.  That means EXP
-		 is either negative, or greater than max.  Since negative
-		 sizes are invalid make the range [MAX + 1, TYPE_MAX].  */
-	      min = max + 1;
-	      max = wi::to_wide (TYPE_MAX_VALUE (exptype));
-	    }
-	  else
-	    {
-	      max = min - 1;
-	      min = wi::zero (expprec);
-	    }
-	}
-      else if (wi::eq_p (0, min - 1))
-	{
-	  /* EXP is unsigned and not in the range [1, MAX].  That means
-	     it's either zero or greater than MAX.  Even though 0 would
-	     normally be detected by -Walloc-zero, unless ALLOW_ZERO
-	     is true, set the range to [MAX, TYPE_MAX] so that when MAX
-	     is greater than the limit the whole range is diagnosed.  */
-	  if (allow_zero)
-	    min = max = wi::zero (expprec);
-	  else
-	    {
-	      min = max + 1;
-	      max = wi::to_wide (TYPE_MAX_VALUE (exptype));
-	    }
-	}
-      else
-	{
-	  max = min - 1;
-	  min = wi::zero (expprec);
-	}
-    }
-
-  range[0] = wide_int_to_tree (exptype, min);
-  range[1] = wide_int_to_tree (exptype, max);
-
-  return true;
+  return set_bounds_from_range (&positives, range);
 }
 
 /* Diagnose a call EXP to function FN decorated with attribute alloc_size
diff --git a/gcc/tree-affine.c b/gcc/tree-affine.c
index 5620e6bf28f..2a1b1872e27 100644
--- a/gcc/tree-affine.c
+++ b/gcc/tree-affine.c
@@ -340,7 +340,7 @@  expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
 		op1 = fold_convert (otype, op1);
 		return expr_to_aff_combination (comb, icode, otype, op0, op1);
 	      }
-	    wide_int minv, maxv;
+	    value_range vr;
 	    /* If inner type has wrapping overflow behavior, fold conversion
 	       for below case:
 		 (T1)(X *+- CST) -> (T1)X *+- (T1)CST
@@ -348,10 +348,11 @@  expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
 	    if (TYPE_UNSIGNED (itype)
 		&& TYPE_OVERFLOW_WRAPS (itype)
 		&& TREE_CODE (op1) == INTEGER_CST
-		&& determine_value_range (op0, &minv, &maxv) == VR_RANGE)
+		&& determine_value_range (&vr, op0))
 	      {
 		wi::overflow_type overflow = wi::OVF_NONE;
 		signop sign = UNSIGNED;
+		wide_int minv = vr.lower_bound (), maxv = vr.upper_bound ();
 		if (icode == PLUS_EXPR)
 		  wi::add (maxv, wi::to_wide (op1), sign, &overflow);
 		else if (icode == MULT_EXPR)
diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index de84c1d505d..2078cd246c6 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -4565,25 +4565,30 @@  make_pass_vrp (gcc::context *ctxt)
 }
 
 
-/* Worker for determine_value_range.  */
+/* Compute a value-range for EXPR and set it in *VR.  Return TRUE if range
+   found was neither varying nor undefined.  */
 
-static void
-determine_value_range_1 (value_range *vr, tree expr)
+bool
+determine_value_range (irange *vr, tree expr)
 {
   if (BINARY_CLASS_P (expr))
     {
-      value_range vr0, vr1;
-      determine_value_range_1 (&vr0, TREE_OPERAND (expr, 0));
-      determine_value_range_1 (&vr1, TREE_OPERAND (expr, 1));
-      range_fold_binary_expr (vr, TREE_CODE (expr), TREE_TYPE (expr),
+      value_range res, vr0, vr1;
+      determine_value_range (&vr0, TREE_OPERAND (expr, 0));
+      determine_value_range (&vr1, TREE_OPERAND (expr, 1));
+      range_fold_binary_expr (&res, TREE_CODE (expr), TREE_TYPE (expr),
 			      &vr0, &vr1);
+      res.normalize_symbolics ();
+      *vr = res;
     }
   else if (UNARY_CLASS_P (expr))
     {
-      value_range vr0;
-      determine_value_range_1 (&vr0, TREE_OPERAND (expr, 0));
-      range_fold_unary_expr (vr, TREE_CODE (expr), TREE_TYPE (expr),
+      value_range res, vr0;
+      determine_value_range (&vr0, TREE_OPERAND (expr, 0));
+      range_fold_unary_expr (&res, TREE_CODE (expr), TREE_TYPE (expr),
 			     &vr0, TREE_TYPE (TREE_OPERAND (expr, 0)));
+      res.normalize_symbolics ();
+      *vr = res;
     }
   else if (TREE_CODE (expr) == INTEGER_CST)
     vr->set (expr);
@@ -4602,22 +4607,5 @@  determine_value_range_1 (value_range *vr, tree expr)
       else
 	vr->set_varying (TREE_TYPE (expr));
     }
-}
-
-/* Compute a value-range for EXPR and set it in *MIN and *MAX.  Return
-   the determined range type.  */
-
-value_range_kind
-determine_value_range (tree expr, wide_int *min, wide_int *max)
-{
-  value_range vr;
-  determine_value_range_1 (&vr, expr);
-  if (vr.constant_p ())
-    {
-      *min = wi::to_wide (vr.min ());
-      *max = wi::to_wide (vr.max ());
-      return vr.kind ();
-    }
-
-  return VR_VARYING;
+  return !vr->varying_p () && !vr->undefined_p ();
 }
diff --git a/gcc/tree-vrp.h b/gcc/tree-vrp.h
index 371e58aa0ea..54c88c93d48 100644
--- a/gcc/tree-vrp.h
+++ b/gcc/tree-vrp.h
@@ -61,7 +61,7 @@  extern bool find_case_label_index (gswitch *, size_t, tree, size_t *);
 extern bool overflow_comparison_p (tree_code, tree, tree, bool, tree *);
 extern tree get_single_symbol (tree, bool *, tree *);
 extern void maybe_set_nonzero_bits (edge, tree);
-extern value_range_kind determine_value_range (tree, wide_int *, wide_int *);
+extern bool determine_value_range (irange *vr, tree);
 extern wide_int masked_increment (const wide_int &val_in, const wide_int &mask,
 				  const wide_int &sgnbit, unsigned int prec);