diff mbox

PR 64454: Improve VRP for %

Message ID alpine.DEB.2.02.1505020023190.30644@stedding.saclay.inria.fr
State New
Headers show

Commit Message

Marc Glisse May 1, 2015, 10:46 p.m. UTC
Hello,

this patch tries to tighten a bit the range estimate for x%y. slp-perm-7.c 
started failing by vectorizing more than expected, I assumed it was a good 
thing and updated the test. I am less conservative than Jakub with 
division by 0, but I still don't really understand how empty ranges are 
supposed to be represented in VRP.

Bootstrap+testsuite on x86_64-linux-gnu.

2015-05-02  Marc Glisse  <marc.glisse@inria.fr>

 	PR tree-optimization/64454
gcc/
 	* tree-vrp.c (extract_range_from_binary_expr_1) <TRUNC_MOD_EXPR>:
 	Rewrite.
gcc/testsuite/
 	* gcc.dg/tree-ssa/vrp97.c: New file.
 	* gcc.dg/vect/slp-perm-7.c: Update.

Comments

Richard Biener May 4, 2015, 8:58 a.m. UTC | #1
On Sat, May 2, 2015 at 12:46 AM, Marc Glisse <marc.glisse@inria.fr> wrote:
> Hello,
>
> this patch tries to tighten a bit the range estimate for x%y. slp-perm-7.c
> started failing by vectorizing more than expected, I assumed it was a good
> thing and updated the test. I am less conservative than Jakub with division
> by 0, but I still don't really understand how empty ranges are supposed to
> be represented in VRP.
>
> Bootstrap+testsuite on x86_64-linux-gnu.

Hmm, so I don't like how you (continute to) use trees for the constant
computations.
wide-ints would be a better fit today.  I also notice that
fold_unary_to_constant can
return NULL_TREE and neither the old nor your code handles that.

"empty" ranges are basically UNDEFINED.

Aren't you pessimizing the case where the old code used
value_range_nonnegative_p()
by just using TYPE_UNSIGNED?

Thanks,
Richard.

> 2015-05-02  Marc Glisse  <marc.glisse@inria.fr>
>
>         PR tree-optimization/64454
> gcc/
>         * tree-vrp.c (extract_range_from_binary_expr_1) <TRUNC_MOD_EXPR>:
>         Rewrite.
> gcc/testsuite/
>         * gcc.dg/tree-ssa/vrp97.c: New file.
>         * gcc.dg/vect/slp-perm-7.c: Update.
>
> --
> Marc Glisse
> Index: gcc/testsuite/gcc.dg/tree-ssa/vrp97.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-ssa/vrp97.c       (revision 0)
> +++ gcc/testsuite/gcc.dg/tree-ssa/vrp97.c       (working copy)
> @@ -0,0 +1,13 @@
> +/* PR tree-optimization/64454 */
> +/* { dg-options "-O2 -fdump-tree-vrp1" } */
> +
> +int f(int a, int b)
> +{
> +    if (a < -3 || a > 13) __builtin_unreachable();
> +    if (b < -6 || b > 9) __builtin_unreachable();
> +    int c = a % b;
> +    return c >= -3 && c <= 8;
> +}
> +
> +/* { dg-final { scan-tree-dump "return 1;" "vrp1" } } */
> +/* { dg-final { cleanup-tree-dump "vrp1" } } */
> Index: gcc/testsuite/gcc.dg/vect/slp-perm-7.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/vect/slp-perm-7.c      (revision 222708)
> +++ gcc/testsuite/gcc.dg/vect/slp-perm-7.c      (working copy)
> @@ -63,15 +63,15 @@ int main (int argc, const char* argv[])
>
>    foo (input, output, input2, output2);
>
>    for (i = 0; i < N; i++)
>       if (output[i] != check_results[i] || output2[i] != check_results2[i])
>         abort ();
>
>    return 0;
>  }
>
> -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  {
> target vect_perm } } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect"  {
> target vect_perm } } } */
>  /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect"
> { target vect_perm } } } */
>  /* { dg-final { cleanup-tree-dump "vect" } } */
>
>
> Index: gcc/tree-vrp.c
> ===================================================================
> --- gcc/tree-vrp.c      (revision 222708)
> +++ gcc/tree-vrp.c      (working copy)
> @@ -3189,40 +3189,83 @@ extract_range_from_binary_expr_1 (value_
>             }
>         }
>        else
>         {
>           extract_range_from_multiplicative_op_1 (vr, code, &vr0, &vr1);
>           return;
>         }
>      }
>    else if (code == TRUNC_MOD_EXPR)
>      {
> -      if (vr1.type != VR_RANGE
> -         || range_includes_zero_p (vr1.min, vr1.max) != 0
> -         || vrp_val_is_min (vr1.min))
> +      if (range_is_null (&vr1))
> +       {
> +         set_value_range_to_undefined (vr);
> +         return;
> +       }
> +      // Some propagation of symbolic ranges should be possible
> +      // at least in the unsigned case.
> +      bool has_vr0 = vr0.type == VR_RANGE && !symbolic_range_p (&vr0);
> +      bool has_vr1 = vr1.type == VR_RANGE && !symbolic_range_p (&vr1);
> +      if (!has_vr0 && !has_vr1)
>         {
>           set_value_range_to_varying (vr);
>           return;
>         }
>        type = VR_RANGE;
> -      /* Compute MAX <|vr1.min|, |vr1.max|> - 1.  */
> -      max = fold_unary_to_constant (ABS_EXPR, expr_type, vr1.min);
> -      if (tree_int_cst_lt (max, vr1.max))
> -       max = vr1.max;
> -      max = int_const_binop (MINUS_EXPR, max, build_int_cst (TREE_TYPE
> (max), 1));
> -      /* If the dividend is non-negative the modulus will be
> -        non-negative as well.  */
> -      if (TYPE_UNSIGNED (expr_type)
> -         || value_range_nonnegative_p (&vr0))
> -       min = build_int_cst (TREE_TYPE (max), 0);
> +      if (TYPE_UNSIGNED (expr_type))
> +       {
> +         // A % B is at most A and smaller than B.
> +         min = build_int_cst (expr_type, 0);
> +         if (has_vr0 && (!has_vr1 || tree_int_cst_lt (vr0.max, vr1.max)))
> +           max = vr0.max;
> +         else
> +           max = int_const_binop (MINUS_EXPR, vr1.max,
> +                                  build_int_cst (expr_type, 1));
> +       }
>        else
> -       min = fold_unary_to_constant (NEGATE_EXPR, expr_type, max);
> +       {
> +         tree min1 = NULL_TREE;
> +         tree max1 = NULL_TREE;
> +         if (has_vr1)
> +           {
> +             // ABS (A % B) < ABS (B)
> +             max1 = fold_unary_to_constant (ABS_EXPR, expr_type, vr1.min);
> +             if (tree_int_cst_lt (max1, vr1.max))
> +               max1 = vr1.max;
> +             max1 = int_const_binop (MINUS_EXPR, max1,
> +                                     build_int_cst (expr_type, 1));
> +             min1 = fold_unary_to_constant (NEGATE_EXPR, expr_type, max1);
> +           }
> +         if (has_vr0)
> +           {
> +             // Either 0 <= A % B <= A or A <= A % B <= 0.
> +             max = vr0.max;
> +             min = vr0.min;
> +             tree zero = build_int_cst (expr_type, 0);
> +             if (tree_int_cst_lt (zero, min))
> +               min = zero;
> +             if (tree_int_cst_lt (max, zero))
> +               max = zero;
> +             if (has_vr1)
> +               {
> +                 if (tree_int_cst_lt (min, min1))
> +                   min = min1;
> +                 if (tree_int_cst_lt (max1, max))
> +                   max = max1;
> +               }
> +           }
> +         else
> +           {
> +             min = min1;
> +             max = max1;
> +           }
> +       }
>      }
>    else if (code == BIT_AND_EXPR || code == BIT_IOR_EXPR || code ==
> BIT_XOR_EXPR)
>      {
>        bool int_cst_range0, int_cst_range1;
>        wide_int may_be_nonzero0, may_be_nonzero1;
>        wide_int must_be_nonzero0, must_be_nonzero1;
>
>        int_cst_range0 = zero_nonzero_bits_from_vr (expr_type, &vr0,
>                                                   &may_be_nonzero0,
>                                                   &must_be_nonzero0);
>
Marc Glisse May 4, 2015, 1:57 p.m. UTC | #2
On Mon, 4 May 2015, Richard Biener wrote:

> On Sat, May 2, 2015 at 12:46 AM, Marc Glisse <marc.glisse@inria.fr> wrote:
>> Hello,
>>
>> this patch tries to tighten a bit the range estimate for x%y. slp-perm-7.c
>> started failing by vectorizing more than expected, I assumed it was a good
>> thing and updated the test. I am less conservative than Jakub with division
>> by 0, but I still don't really understand how empty ranges are supposed to
>> be represented in VRP.
>>
>> Bootstrap+testsuite on x86_64-linux-gnu.
>
> Hmm, so I don't like how you (continute to) use trees for the constant 
> computations. wide-ints would be a better fit today.  I also notice that 
> fold_unary_to_constant can return NULL_TREE and neither the old nor your 
> code handles that.

You are right. I was lazy and tried to keep this part of the old code, I 
shouldn't have...

> "empty" ranges are basically UNDEFINED.

Cool, that's what I did. But I don't see code adding calls to 
__builtin_unreachable() when an empty range is detected. Maybe that almost 
never happens?

> Aren't you pessimizing the case where the old code used 
> value_range_nonnegative_p() by just using TYPE_UNSIGNED?

I don't think so. The old code only handled signed types in the positive 
case, while I have a more complete handling of signed types, which should 
do at least as good as the old one even in the positive case.
Richard Biener May 8, 2015, 9:14 a.m. UTC | #3
On Mon, May 4, 2015 at 3:57 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> On Mon, 4 May 2015, Richard Biener wrote:
>
>> On Sat, May 2, 2015 at 12:46 AM, Marc Glisse <marc.glisse@inria.fr> wrote:
>>>
>>> Hello,
>>>
>>> this patch tries to tighten a bit the range estimate for x%y.
>>> slp-perm-7.c
>>> started failing by vectorizing more than expected, I assumed it was a
>>> good
>>> thing and updated the test. I am less conservative than Jakub with
>>> division
>>> by 0, but I still don't really understand how empty ranges are supposed
>>> to
>>> be represented in VRP.
>>>
>>> Bootstrap+testsuite on x86_64-linux-gnu.
>>
>>
>> Hmm, so I don't like how you (continute to) use trees for the constant
>> computations. wide-ints would be a better fit today.  I also notice that
>> fold_unary_to_constant can return NULL_TREE and neither the old nor your
>> code handles that.
>
>
> You are right. I was lazy and tried to keep this part of the old code, I
> shouldn't have...
>
>> "empty" ranges are basically UNDEFINED.
>
>
> Cool, that's what I did. But I don't see code adding calls to
> __builtin_unreachable() when an empty range is detected. Maybe that almost
> never happens?

No, it's just nobody bothered to implement it.  You also have to be careful
as you can't replace reproducers of UNDEFINED but only uses that still
result in UNDEFINED result (for example 0 * UNDEFINED is defined again).

What I'd like to do at some point is have some common code that you
can query what OP1 tree_code OP2 evaluates to if either op is UNDEFINED.
For example UNDEF + X == UNDEF but UNDEF * X == 0 (as optimistical
result, of course - the only not undefined case is for X == 0).  UNDEF << X == 0
but UNDEF >> X == signed(x) ? UNDEF : 0.

We have multiple passes that duplicate only parts of those "optimizations".

Having a central place implementing this correctly would be nice.

>> Aren't you pessimizing the case where the old code used
>> value_range_nonnegative_p() by just using TYPE_UNSIGNED?
>
>
> I don't think so. The old code only handled signed types in the positive
> case, while I have a more complete handling of signed types, which should do
> at least as good as the old one even in the positive case.

Ok, I see.

Richard.

> --
> Marc Glisse
diff mbox

Patch

Index: gcc/testsuite/gcc.dg/tree-ssa/vrp97.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/vrp97.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/vrp97.c	(working copy)
@@ -0,0 +1,13 @@ 
+/* PR tree-optimization/64454 */
+/* { dg-options "-O2 -fdump-tree-vrp1" } */
+
+int f(int a, int b)
+{
+    if (a < -3 || a > 13) __builtin_unreachable();
+    if (b < -6 || b > 9) __builtin_unreachable();
+    int c = a % b;
+    return c >= -3 && c <= 8;
+}
+
+/* { dg-final { scan-tree-dump "return 1;" "vrp1" } } */
+/* { dg-final { cleanup-tree-dump "vrp1" } } */
Index: gcc/testsuite/gcc.dg/vect/slp-perm-7.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/slp-perm-7.c	(revision 222708)
+++ gcc/testsuite/gcc.dg/vect/slp-perm-7.c	(working copy)
@@ -63,15 +63,15 @@  int main (int argc, const char* argv[])
 
   foo (input, output, input2, output2);
 
   for (i = 0; i < N; i++)
      if (output[i] != check_results[i] || output2[i] != check_results2[i])
        abort ();
 
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_perm } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect"  { target vect_perm } } } */
 /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 
 
Index: gcc/tree-vrp.c
===================================================================
--- gcc/tree-vrp.c	(revision 222708)
+++ gcc/tree-vrp.c	(working copy)
@@ -3189,40 +3189,83 @@  extract_range_from_binary_expr_1 (value_
 	    }
 	}
       else
 	{
 	  extract_range_from_multiplicative_op_1 (vr, code, &vr0, &vr1);
 	  return;
 	}
     }
   else if (code == TRUNC_MOD_EXPR)
     {
-      if (vr1.type != VR_RANGE
-	  || range_includes_zero_p (vr1.min, vr1.max) != 0
-	  || vrp_val_is_min (vr1.min))
+      if (range_is_null (&vr1))
+	{
+	  set_value_range_to_undefined (vr);
+	  return;
+	}
+      // Some propagation of symbolic ranges should be possible
+      // at least in the unsigned case.
+      bool has_vr0 = vr0.type == VR_RANGE && !symbolic_range_p (&vr0);
+      bool has_vr1 = vr1.type == VR_RANGE && !symbolic_range_p (&vr1);
+      if (!has_vr0 && !has_vr1)
 	{
 	  set_value_range_to_varying (vr);
 	  return;
 	}
       type = VR_RANGE;
-      /* Compute MAX <|vr1.min|, |vr1.max|> - 1.  */
-      max = fold_unary_to_constant (ABS_EXPR, expr_type, vr1.min);
-      if (tree_int_cst_lt (max, vr1.max))
-	max = vr1.max;
-      max = int_const_binop (MINUS_EXPR, max, build_int_cst (TREE_TYPE (max), 1));
-      /* If the dividend is non-negative the modulus will be
-	 non-negative as well.  */
-      if (TYPE_UNSIGNED (expr_type)
-	  || value_range_nonnegative_p (&vr0))
-	min = build_int_cst (TREE_TYPE (max), 0);
+      if (TYPE_UNSIGNED (expr_type))
+	{
+	  // A % B is at most A and smaller than B.
+	  min = build_int_cst (expr_type, 0);
+	  if (has_vr0 && (!has_vr1 || tree_int_cst_lt (vr0.max, vr1.max)))
+	    max = vr0.max;
+	  else
+	    max = int_const_binop (MINUS_EXPR, vr1.max,
+				   build_int_cst (expr_type, 1));
+	}
       else
-	min = fold_unary_to_constant (NEGATE_EXPR, expr_type, max);
+	{
+	  tree min1 = NULL_TREE;
+	  tree max1 = NULL_TREE;
+	  if (has_vr1)
+	    {
+	      // ABS (A % B) < ABS (B)
+	      max1 = fold_unary_to_constant (ABS_EXPR, expr_type, vr1.min);
+	      if (tree_int_cst_lt (max1, vr1.max))
+		max1 = vr1.max;
+	      max1 = int_const_binop (MINUS_EXPR, max1,
+				      build_int_cst (expr_type, 1));
+	      min1 = fold_unary_to_constant (NEGATE_EXPR, expr_type, max1);
+	    }
+	  if (has_vr0)
+	    {
+	      // Either 0 <= A % B <= A or A <= A % B <= 0.
+	      max = vr0.max;
+	      min = vr0.min;
+	      tree zero = build_int_cst (expr_type, 0);
+	      if (tree_int_cst_lt (zero, min))
+		min = zero;
+	      if (tree_int_cst_lt (max, zero))
+		max = zero;
+	      if (has_vr1)
+		{
+		  if (tree_int_cst_lt (min, min1))
+		    min = min1;
+		  if (tree_int_cst_lt (max1, max))
+		    max = max1;
+		}
+	    }
+	  else
+	    {
+	      min = min1;
+	      max = max1;
+	    }
+	}
     }
   else if (code == BIT_AND_EXPR || code == BIT_IOR_EXPR || code == BIT_XOR_EXPR)
     {
       bool int_cst_range0, int_cst_range1;
       wide_int may_be_nonzero0, may_be_nonzero1;
       wide_int must_be_nonzero0, must_be_nonzero1;
 
       int_cst_range0 = zero_nonzero_bits_from_vr (expr_type, &vr0,
 						  &may_be_nonzero0,
 						  &must_be_nonzero0);