diff mbox series

[V5,2/2] Optimize '(X - N * M) / N' to 'X / N - M' if valid

Message ID 20230718140544.3497370-2-guojiufu@linux.ibm.com
State New
Headers show
Series [V5,1/2] Add overflow API for plus minus mult on range | expand

Commit Message

Jiufu Guo July 18, 2023, 2:05 p.m. UTC
Hi,

Integer expression "(X - N * M) / N" can be optimized to "X / N - M"
if there is no wrap/overflow/underflow and "X - N * M" has the same
sign with "X".

Compare the previous version:
https://gcc.gnu.org/pipermail/gcc-patches/2023-July/624067.html
- APIs: overflow, nonnegative_p and nonpositive_p are moved close
  to value range.
- Use above APIs in match.pd.

Bootstrap & regtest pass on ppc64{,le} and x86_64.
Is this patch ok for trunk?

BR,
Jeff (Jiufu Guo)

	PR tree-optimization/108757

gcc/ChangeLog:

	* match.pd ((X - N * M) / N): New pattern.
	((X + N * M) / N): New pattern.
	((X + C) div_rshift N): New pattern.

gcc/testsuite/ChangeLog:

	* gcc.dg/pr108757-1.c: New test.
	* gcc.dg/pr108757-2.c: New test.
	* gcc.dg/pr108757.h: New test.

---
 gcc/match.pd                      |  85 +++++++++++
 gcc/testsuite/gcc.dg/pr108757-1.c |  18 +++
 gcc/testsuite/gcc.dg/pr108757-2.c |  19 +++
 gcc/testsuite/gcc.dg/pr108757.h   | 233 ++++++++++++++++++++++++++++++
 4 files changed, 355 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/pr108757-1.c
 create mode 100644 gcc/testsuite/gcc.dg/pr108757-2.c
 create mode 100644 gcc/testsuite/gcc.dg/pr108757.h

Comments

Jiufu Guo Aug. 7, 2023, 2:45 a.m. UTC | #1
Hi,

Gentle ping...

On 2023-07-18 22:05, Jiufu Guo wrote:
> Hi,
> 
> Integer expression "(X - N * M) / N" can be optimized to "X / N - M"
> if there is no wrap/overflow/underflow and "X - N * M" has the same
> sign with "X".
> 
> Compare the previous version:
> https://gcc.gnu.org/pipermail/gcc-patches/2023-July/624067.html
> - APIs: overflow, nonnegative_p and nonpositive_p are moved close
>   to value range.
> - Use above APIs in match.pd.
> 
> Bootstrap & regtest pass on ppc64{,le} and x86_64.
> Is this patch ok for trunk?
> 
> BR,
> Jeff (Jiufu Guo)
> 
> 	PR tree-optimization/108757
> 
> gcc/ChangeLog:
> 
> 	* match.pd ((X - N * M) / N): New pattern.
> 	((X + N * M) / N): New pattern.
> 	((X + C) div_rshift N): New pattern.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.dg/pr108757-1.c: New test.
> 	* gcc.dg/pr108757-2.c: New test.
> 	* gcc.dg/pr108757.h: New test.
> 
> ---
>  gcc/match.pd                      |  85 +++++++++++
>  gcc/testsuite/gcc.dg/pr108757-1.c |  18 +++
>  gcc/testsuite/gcc.dg/pr108757-2.c |  19 +++
>  gcc/testsuite/gcc.dg/pr108757.h   | 233 ++++++++++++++++++++++++++++++
>  4 files changed, 355 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.dg/pr108757-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/pr108757-2.c
>  create mode 100644 gcc/testsuite/gcc.dg/pr108757.h
> 
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 8543f777a28..39dbb0567dc 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -942,6 +942,91 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>  #endif
>     ))))
> 
> +#if GIMPLE
> +(for div (trunc_div exact_div)
> + /* Simplify (t + M*N) / N -> t / N + M.  */
> + (simplify
> +  (div (plus:c@4 @0 (mult:c@3 @1 @2)) @2)
> +  (with {value_range vr0, vr1, vr2, vr3, vr4;}
> +  (if (INTEGRAL_TYPE_P (type)
> +       && get_range_query (cfun)->range_of_expr (vr1, @1)
> +       && get_range_query (cfun)->range_of_expr (vr2, @2)
> +       && range_op_handler (MULT_EXPR).overflow_free_p (vr1, vr2)
> +       && get_range_query (cfun)->range_of_expr (vr0, @0)
> +       && get_range_query (cfun)->range_of_expr (vr3, @3)
> +       && range_op_handler (PLUS_EXPR).overflow_free_p (vr0, vr3)
> +       && get_range_query (cfun)->range_of_expr (vr4, @4)
> +       && (TYPE_UNSIGNED (type)
> +	   || (vr0.nonnegative_p () && vr4.nonnegative_p ())
> +	   || (vr0.nonpositive_p () && vr4.nonpositive_p ())))
> +  (plus (div @0 @2) @1))))
> +
> + /* Simplify (t - M*N) / N -> t / N - M.  */
> + (simplify
> +  (div (minus@4 @0 (mult:c@3 @1 @2)) @2)
> +  (with {value_range vr0, vr1, vr2, vr3, vr4;}
> +  (if (INTEGRAL_TYPE_P (type)
> +       && get_range_query (cfun)->range_of_expr (vr1, @1)
> +       && get_range_query (cfun)->range_of_expr (vr2, @2)
> +       && range_op_handler (MULT_EXPR).overflow_free_p (vr1, vr2)
> +       && get_range_query (cfun)->range_of_expr (vr0, @0)
> +       && get_range_query (cfun)->range_of_expr (vr3, @3)
> +       && range_op_handler (MINUS_EXPR).overflow_free_p (vr0, vr3)
> +       && get_range_query (cfun)->range_of_expr (vr4, @4)
> +       && (TYPE_UNSIGNED (type)
> +	   || (vr0.nonnegative_p () && vr4.nonnegative_p ())
> +	   || (vr0.nonpositive_p () && vr4.nonpositive_p ())))
> +  (minus (div @0 @2) @1)))))
> +
> +/* Simplify
> +   (t + C) / N -> t / N + C / N where C is multiple of N.
> +   (t + C) >> N -> t >> N + C>>N if low N bits of C is 0.  */
> +(for op (trunc_div exact_div rshift)
> + (simplify
> +  (op (plus@3 @0 INTEGER_CST@1) INTEGER_CST@2)
> +   (with
> +    {
> +      wide_int c = wi::to_wide (@1);
> +      wide_int n = wi::to_wide (@2);
> +      bool is_rshift = op == RSHIFT_EXPR;
> +      bool neg_c = false;
> +      bool ok = false;
> +      value_range vr0;
> +      if (INTEGRAL_TYPE_P (type)
> +	  && get_range_query (cfun)->range_of_expr (vr0, @0))
> +        {
> +	  ok = is_rshift ? wi::ctz (c) >= n.to_shwi ()
> +			 : wi::multiple_of_p (c, n, TYPE_SIGN (type));
> +	  value_range vr1, vr3;
> +	  ok = ok && get_range_query (cfun)->range_of_expr (vr1, @1)
> +	       && range_op_handler (PLUS_EXPR).overflow_free_p (vr0, vr1)
> +	       && get_range_query (cfun)->range_of_expr (vr3, @3)
> +	       && (TYPE_UNSIGNED (type)
> +		   || (vr0.nonnegative_p () && vr3.nonnegative_p ())
> +		   || (vr0.nonpositive_p () && vr3.nonpositive_p ()));
> +
> +	  /* Try check 'X + C' as 'X - -C' for unsigned.  */
> +	  if (!ok && TYPE_UNSIGNED (type) && c.sign_mask () < 0)
> +	    {
> +	      neg_c = true;
> +	      c = -c;
> +	      ok = is_rshift ? wi::ctz (c) >= n.to_shwi ()
> +			     : wi::multiple_of_p (c, n, UNSIGNED);
> +	      ok = ok && wi::geu_p (vr0.lower_bound (), c);
> +	    }
> +	}
> +    }
> +   (if (ok)
> +   (with
> +    {
> +      wide_int m;
> +      m = is_rshift ? wi::rshift (c, n, TYPE_SIGN (type))
> +		    : wi::div_trunc (c, n, TYPE_SIGN (type));
> +      m = neg_c ? -m : m;
> +    }
> +   (plus (op @0 @2) { wide_int_to_tree(type, m); }))))))
> +#endif
> +
>  (for op (negate abs)
>   /* Simplify cos(-x) and cos(|x|) -> cos(x).  Similarly for cosh.  */
>   (for coss (COS COSH)
> diff --git a/gcc/testsuite/gcc.dg/pr108757-1.c
> b/gcc/testsuite/gcc.dg/pr108757-1.c
> new file mode 100644
> index 00000000000..7908f4bdcb8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr108757-1.c
> @@ -0,0 +1,18 @@
> +/* PR tree-optimization/108757 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +#include <limits.h>
> +#define N 5
> +#define M 3
> +#define GAP 0
> +typedef unsigned int UINT;
> +typedef int INT;
> +#define UMAX UINT_MAX
> +#define IMAX INT_MAX
> +#define IMIN INT_MIN
> +#include "pr108757.h"
> +
> +/* { dg-final { scan-tree-dump-not " = x_\[0-9\]+\\(D\\) \\+ "
> "optimized" } } *
> +/* { dg-final { scan-tree-dump-not " = x_\[0-9\]+\\(D\\) \\- "
> "optimized" } } */
> +/* { dg-final { scan-tree-dump-not " = b_\[0-9\]+ \\+ " "optimized" } 
> } */
> diff --git a/gcc/testsuite/gcc.dg/pr108757-2.c
> b/gcc/testsuite/gcc.dg/pr108757-2.c
> new file mode 100644
> index 00000000000..82bebd09944
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr108757-2.c
> @@ -0,0 +1,19 @@
> +/* PR tree-optimization/108757 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized -fwrapv" } */
> +
> +#include <limits.h>
> +#define N 4
> +#define M 3
> +#define GAP 2
> +typedef unsigned int UINT;
> +typedef int INT;
> +#define UMAX UINT_MAX
> +#define IMAX INT_MAX
> +#define IMIN INT_MIN
> +#include "pr108757.h"
> +
> +/* { dg-final { scan-tree-dump-times " = x_\[0-9\]+\\(D\\) \\+ " 16
> "optimized" } } */
> +/* { dg-final { scan-tree-dump-times " = x_\[0-9\]+\\(D\\) \\- " 3
> "optimized" } } */
> +/* { dg-final { scan-tree-dump-times " \\+ x_\[0-9\]+\\(D\\)" 3
> "optimized" } } */
> +
> diff --git a/gcc/testsuite/gcc.dg/pr108757.h 
> b/gcc/testsuite/gcc.dg/pr108757.h
> new file mode 100644
> index 00000000000..5550199c1ef
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr108757.h
> @@ -0,0 +1,233 @@
> +#define NOINLINE __attribute__ ((noinline))
> +UINT NOINLINE
> +opt_u1 (UINT x)
> +{
> +  if (x < (M * N) - GAP)
> +    return 0;
> +  UINT a = x - (M * N);
> +  UINT b = a / N;
> +  return b + M;
> +}
> +
> +UINT NOINLINE
> +opt_u2 (UINT x)
> +{
> +  if (x > (UMAX - (M * N) + GAP))
> +    return 0;
> +  UINT a = x + (M * N);
> +  UINT b = a / N;
> +  return b - M;
> +}
> +
> +INT NOINLINE
> +opt_s1 (INT x)
> +{
> +  if (x < (M * N) - GAP)
> +    return 0;
> +  INT a = x - (M * N);
> +  INT b = a / N;
> +  return b + M;
> +}
> +
> +INT NOINLINE
> +opt_s2 (INT x)
> +{
> +  if (x < IMIN + (M * N) - GAP || x > 0)
> +    return 0;
> +  INT a = x - (M * N);
> +  INT b = a / N;
> +  return b + M;
> +}
> +
> +INT NOINLINE
> +opt_s3 (INT x)
> +{
> +  if (x < (M * N) - GAP)
> +    return 0;
> +  INT a = x - (M * N);
> +  INT b = a / -N;
> +  return b + -M;
> +}
> +
> +INT NOINLINE
> +opt_s4 (INT x)
> +{
> +  if (x < IMIN + (M * N) - GAP || x > 0)
> +    return 0;
> +  INT a = x - (M * N);
> +  INT b = a / -N;
> +  return b + -M;
> +}
> +
> +INT NOINLINE
> +opt_s5 (INT x)
> +{
> +  if (x > (-M * N) + GAP)
> +    return 0;
> +  INT a = x - (-M * N);
> +  INT b = a / N;
> +  return b + -M;
> +}
> +
> +INT NOINLINE
> +opt_s6 (INT x)
> +{
> +  if (x > IMAX - (M * N) + GAP || x < 0)
> +    return 0;
> +  INT a = x - (-M * N);
> +  INT b = a / N;
> +  return b + -M;
> +}
> +
> +INT NOINLINE
> +opt_s7 (INT x)
> +{
> +  if (x > (M * -N) + GAP)
> +    return 0;
> +  INT a = x - (M * -N);
> +  INT b = a / -N;
> +  return b + M;
> +}
> +
> +INT NOINLINE
> +opt_s8 (INT x)
> +{
> +  if (x > IMAX - (M * N) + GAP || x < 0)
> +    return 0;
> +  INT a = x - (M * -N);
> +  INT b = a / -N;
> +  return b + M;
> +}
> +
> +UINT NOINLINE
> +opt_u3 (UINT x)
> +{
> +  if (x < (M << N) - GAP)
> +    return 0;
> +  UINT a = x - (M << N);
> +  UINT b = a >> N;
> +  return b + M;
> +}
> +
> +UINT NOINLINE
> +opt_u4 (UINT x)
> +{
> +  if (x > (UMAX - (M << N)) + GAP)
> +    return 0;
> +  UINT a = x + (M << N);
> +  UINT b = a >> N;
> +  return b - M;
> +}
> +
> +INT NOINLINE
> +opt_s9 (INT x)
> +{
> +  if (x < (M << N) - GAP)
> +    return 0;
> +  INT a = x - (M << N);
> +  INT b = a >> N;
> +  return b + M;
> +}
> +
> +INT NOINLINE
> +opt_s10 (INT x)
> +{
> +  if (x < IMIN + (M << N) - GAP || x > 0)
> +    return 0;
> +  INT a = x - (M << N);
> +  INT b = a >> N;
> +  return b + M;
> +}
> +
> +INT NOINLINE
> +opt_s11 (INT x)
> +{
> +  if (x > (-M << N) + GAP)
> +    return 0;
> +  INT a = x - (-M << N);
> +  INT b = a >> N;
> +  return b + -M;
> +}
> +
> +INT NOINLINE
> +opt_s12 (INT x)
> +{
> +  if (x > IMAX - (M << N) + GAP || x < 0)
> +    return 0;
> +  INT a = x - (-M << N);
> +  INT b = a >> N;
> +  return b + -M;
> +}
> +
> +UINT NOINLINE
> +opt_u5 (UINT x, UINT n, UINT m)
> +{
> +  if (n > N || m > M)
> +    return 0;
> +  if (x < (M*N) - GAP)
> +    return 0;
> +  UINT a = x - (m * n);
> +  UINT b = a / n;
> +  return b + m;
> +}
> +
> +UINT NOINLINE
> +opt_u6 (UINT x, UINT n, UINT m)
> +{
> +  if (n > N || m > M)
> +    return 0;
> +  if (x > (UMAX - M*N) + GAP)
> +    return 0;
> +  UINT a = x + (m * n);
> +  UINT b = a / n;
> +  return b - m;
> +}
> +
> +INT NOINLINE
> +opt_s13 (INT x, INT n, INT m)
> +{
> +  if (n > N || m > M || n < 0 || m < 0)
> +    return 0;
> +  if (x < (M*N) - GAP)
> +    return 0;
> +  INT a = x - (m * n);
> +  INT b = a / n;
> +  return b + m;
> +}
> +
> +INT NOINLINE
> +opt_s14 (INT x, INT n, INT m)
> +{
> +  if (n > N || m > M || n < 0 || m < 0)
> +    return 0;
> +  if (x > -M*N + GAP)
> +    return 0;
> +  INT a = x + (m * n);
> +  INT b = a / n;
> +  return b - m;
> +}
> +
> +INT
> +opt_s15 (INT x, INT n, INT m)
> +{
> +  if (n > 0 || m > 0 || n < -N || m < -M)
> +    return 0;
> +  if (x < (M*N) - GAP)
> +    return 0;
> +  INT a = x - (m * n);
> +  INT b = a / n;
> +  return b + m;
> +}
> +
> +INT NOINLINE
> +opt_s16 (INT x, INT n, INT m)
> +{
> +  if (n > 0 || m > 0 || n < -N || m < -M)
> +    return 0;
> +  if (x < 0 || x > (IMAX - M*N) + GAP)
> +    return 0;
> +  INT a = x + (m * n);
> +  INT b = a / n;
> +  return b - m;
> +}
> +
Jiufu Guo Aug. 23, 2023, 2:04 a.m. UTC | #2
Hi,

I would like to have a gentle ping...

BR,
Jeff (Jiufu Guo)

On 2023-08-07 10:45, guojiufu via Gcc-patches wrote:
> Hi,
> 
> Gentle ping...
> 
> On 2023-07-18 22:05, Jiufu Guo wrote:
>> Hi,
>> 
>> Integer expression "(X - N * M) / N" can be optimized to "X / N - M"
>> if there is no wrap/overflow/underflow and "X - N * M" has the same
>> sign with "X".
>> 
>> Compare the previous version:
>> https://gcc.gnu.org/pipermail/gcc-patches/2023-July/624067.html
>> - APIs: overflow, nonnegative_p and nonpositive_p are moved close
>>   to value range.
>> - Use above APIs in match.pd.
>> 
>> Bootstrap & regtest pass on ppc64{,le} and x86_64.
>> Is this patch ok for trunk?
>> 
>> BR,
>> Jeff (Jiufu Guo)
>> 
>> 	PR tree-optimization/108757
>> 
>> gcc/ChangeLog:
>> 
>> 	* match.pd ((X - N * M) / N): New pattern.
>> 	((X + N * M) / N): New pattern.
>> 	((X + C) div_rshift N): New pattern.
>> 
>> gcc/testsuite/ChangeLog:
>> 
>> 	* gcc.dg/pr108757-1.c: New test.
>> 	* gcc.dg/pr108757-2.c: New test.
>> 	* gcc.dg/pr108757.h: New test.
>> 
>> ---
>>  gcc/match.pd                      |  85 +++++++++++
>>  gcc/testsuite/gcc.dg/pr108757-1.c |  18 +++
>>  gcc/testsuite/gcc.dg/pr108757-2.c |  19 +++
>>  gcc/testsuite/gcc.dg/pr108757.h   | 233 
>> ++++++++++++++++++++++++++++++
>>  4 files changed, 355 insertions(+)
>>  create mode 100644 gcc/testsuite/gcc.dg/pr108757-1.c
>>  create mode 100644 gcc/testsuite/gcc.dg/pr108757-2.c
>>  create mode 100644 gcc/testsuite/gcc.dg/pr108757.h
>> 
>> diff --git a/gcc/match.pd b/gcc/match.pd
>> index 8543f777a28..39dbb0567dc 100644
>> --- a/gcc/match.pd
>> +++ b/gcc/match.pd
>> @@ -942,6 +942,91 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>>  #endif
>>     ))))
>> 
>> +#if GIMPLE
>> +(for div (trunc_div exact_div)
>> + /* Simplify (t + M*N) / N -> t / N + M.  */
>> + (simplify
>> +  (div (plus:c@4 @0 (mult:c@3 @1 @2)) @2)
>> +  (with {value_range vr0, vr1, vr2, vr3, vr4;}
>> +  (if (INTEGRAL_TYPE_P (type)
>> +       && get_range_query (cfun)->range_of_expr (vr1, @1)
>> +       && get_range_query (cfun)->range_of_expr (vr2, @2)
>> +       && range_op_handler (MULT_EXPR).overflow_free_p (vr1, vr2)
>> +       && get_range_query (cfun)->range_of_expr (vr0, @0)
>> +       && get_range_query (cfun)->range_of_expr (vr3, @3)
>> +       && range_op_handler (PLUS_EXPR).overflow_free_p (vr0, vr3)
>> +       && get_range_query (cfun)->range_of_expr (vr4, @4)
>> +       && (TYPE_UNSIGNED (type)
>> +	   || (vr0.nonnegative_p () && vr4.nonnegative_p ())
>> +	   || (vr0.nonpositive_p () && vr4.nonpositive_p ())))
>> +  (plus (div @0 @2) @1))))
>> +
>> + /* Simplify (t - M*N) / N -> t / N - M.  */
>> + (simplify
>> +  (div (minus@4 @0 (mult:c@3 @1 @2)) @2)
>> +  (with {value_range vr0, vr1, vr2, vr3, vr4;}
>> +  (if (INTEGRAL_TYPE_P (type)
>> +       && get_range_query (cfun)->range_of_expr (vr1, @1)
>> +       && get_range_query (cfun)->range_of_expr (vr2, @2)
>> +       && range_op_handler (MULT_EXPR).overflow_free_p (vr1, vr2)
>> +       && get_range_query (cfun)->range_of_expr (vr0, @0)
>> +       && get_range_query (cfun)->range_of_expr (vr3, @3)
>> +       && range_op_handler (MINUS_EXPR).overflow_free_p (vr0, vr3)
>> +       && get_range_query (cfun)->range_of_expr (vr4, @4)
>> +       && (TYPE_UNSIGNED (type)
>> +	   || (vr0.nonnegative_p () && vr4.nonnegative_p ())
>> +	   || (vr0.nonpositive_p () && vr4.nonpositive_p ())))
>> +  (minus (div @0 @2) @1)))))
>> +
>> +/* Simplify
>> +   (t + C) / N -> t / N + C / N where C is multiple of N.
>> +   (t + C) >> N -> t >> N + C>>N if low N bits of C is 0.  */
>> +(for op (trunc_div exact_div rshift)
>> + (simplify
>> +  (op (plus@3 @0 INTEGER_CST@1) INTEGER_CST@2)
>> +   (with
>> +    {
>> +      wide_int c = wi::to_wide (@1);
>> +      wide_int n = wi::to_wide (@2);
>> +      bool is_rshift = op == RSHIFT_EXPR;
>> +      bool neg_c = false;
>> +      bool ok = false;
>> +      value_range vr0;
>> +      if (INTEGRAL_TYPE_P (type)
>> +	  && get_range_query (cfun)->range_of_expr (vr0, @0))
>> +        {
>> +	  ok = is_rshift ? wi::ctz (c) >= n.to_shwi ()
>> +			 : wi::multiple_of_p (c, n, TYPE_SIGN (type));
>> +	  value_range vr1, vr3;
>> +	  ok = ok && get_range_query (cfun)->range_of_expr (vr1, @1)
>> +	       && range_op_handler (PLUS_EXPR).overflow_free_p (vr0, vr1)
>> +	       && get_range_query (cfun)->range_of_expr (vr3, @3)
>> +	       && (TYPE_UNSIGNED (type)
>> +		   || (vr0.nonnegative_p () && vr3.nonnegative_p ())
>> +		   || (vr0.nonpositive_p () && vr3.nonpositive_p ()));
>> +
>> +	  /* Try check 'X + C' as 'X - -C' for unsigned.  */
>> +	  if (!ok && TYPE_UNSIGNED (type) && c.sign_mask () < 0)
>> +	    {
>> +	      neg_c = true;
>> +	      c = -c;
>> +	      ok = is_rshift ? wi::ctz (c) >= n.to_shwi ()
>> +			     : wi::multiple_of_p (c, n, UNSIGNED);
>> +	      ok = ok && wi::geu_p (vr0.lower_bound (), c);
>> +	    }
>> +	}
>> +    }
>> +   (if (ok)
>> +   (with
>> +    {
>> +      wide_int m;
>> +      m = is_rshift ? wi::rshift (c, n, TYPE_SIGN (type))
>> +		    : wi::div_trunc (c, n, TYPE_SIGN (type));
>> +      m = neg_c ? -m : m;
>> +    }
>> +   (plus (op @0 @2) { wide_int_to_tree(type, m); }))))))
>> +#endif
>> +
>>  (for op (negate abs)
>>   /* Simplify cos(-x) and cos(|x|) -> cos(x).  Similarly for cosh.  */
>>   (for coss (COS COSH)
>> diff --git a/gcc/testsuite/gcc.dg/pr108757-1.c
>> b/gcc/testsuite/gcc.dg/pr108757-1.c
>> new file mode 100644
>> index 00000000000..7908f4bdcb8
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/pr108757-1.c
>> @@ -0,0 +1,18 @@
>> +/* PR tree-optimization/108757 */
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -fdump-tree-optimized" } */
>> +
>> +#include <limits.h>
>> +#define N 5
>> +#define M 3
>> +#define GAP 0
>> +typedef unsigned int UINT;
>> +typedef int INT;
>> +#define UMAX UINT_MAX
>> +#define IMAX INT_MAX
>> +#define IMIN INT_MIN
>> +#include "pr108757.h"
>> +
>> +/* { dg-final { scan-tree-dump-not " = x_\[0-9\]+\\(D\\) \\+ "
>> "optimized" } } *
>> +/* { dg-final { scan-tree-dump-not " = x_\[0-9\]+\\(D\\) \\- "
>> "optimized" } } */
>> +/* { dg-final { scan-tree-dump-not " = b_\[0-9\]+ \\+ " "optimized" } 
>> } */
>> diff --git a/gcc/testsuite/gcc.dg/pr108757-2.c
>> b/gcc/testsuite/gcc.dg/pr108757-2.c
>> new file mode 100644
>> index 00000000000..82bebd09944
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/pr108757-2.c
>> @@ -0,0 +1,19 @@
>> +/* PR tree-optimization/108757 */
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -fdump-tree-optimized -fwrapv" } */
>> +
>> +#include <limits.h>
>> +#define N 4
>> +#define M 3
>> +#define GAP 2
>> +typedef unsigned int UINT;
>> +typedef int INT;
>> +#define UMAX UINT_MAX
>> +#define IMAX INT_MAX
>> +#define IMIN INT_MIN
>> +#include "pr108757.h"
>> +
>> +/* { dg-final { scan-tree-dump-times " = x_\[0-9\]+\\(D\\) \\+ " 16
>> "optimized" } } */
>> +/* { dg-final { scan-tree-dump-times " = x_\[0-9\]+\\(D\\) \\- " 3
>> "optimized" } } */
>> +/* { dg-final { scan-tree-dump-times " \\+ x_\[0-9\]+\\(D\\)" 3
>> "optimized" } } */
>> +
>> diff --git a/gcc/testsuite/gcc.dg/pr108757.h 
>> b/gcc/testsuite/gcc.dg/pr108757.h
>> new file mode 100644
>> index 00000000000..5550199c1ef
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/pr108757.h
>> @@ -0,0 +1,233 @@
>> +#define NOINLINE __attribute__ ((noinline))
>> +UINT NOINLINE
>> +opt_u1 (UINT x)
>> +{
>> +  if (x < (M * N) - GAP)
>> +    return 0;
>> +  UINT a = x - (M * N);
>> +  UINT b = a / N;
>> +  return b + M;
>> +}
>> +
>> +UINT NOINLINE
>> +opt_u2 (UINT x)
>> +{
>> +  if (x > (UMAX - (M * N) + GAP))
>> +    return 0;
>> +  UINT a = x + (M * N);
>> +  UINT b = a / N;
>> +  return b - M;
>> +}
>> +
>> +INT NOINLINE
>> +opt_s1 (INT x)
>> +{
>> +  if (x < (M * N) - GAP)
>> +    return 0;
>> +  INT a = x - (M * N);
>> +  INT b = a / N;
>> +  return b + M;
>> +}
>> +
>> +INT NOINLINE
>> +opt_s2 (INT x)
>> +{
>> +  if (x < IMIN + (M * N) - GAP || x > 0)
>> +    return 0;
>> +  INT a = x - (M * N);
>> +  INT b = a / N;
>> +  return b + M;
>> +}
>> +
>> +INT NOINLINE
>> +opt_s3 (INT x)
>> +{
>> +  if (x < (M * N) - GAP)
>> +    return 0;
>> +  INT a = x - (M * N);
>> +  INT b = a / -N;
>> +  return b + -M;
>> +}
>> +
>> +INT NOINLINE
>> +opt_s4 (INT x)
>> +{
>> +  if (x < IMIN + (M * N) - GAP || x > 0)
>> +    return 0;
>> +  INT a = x - (M * N);
>> +  INT b = a / -N;
>> +  return b + -M;
>> +}
>> +
>> +INT NOINLINE
>> +opt_s5 (INT x)
>> +{
>> +  if (x > (-M * N) + GAP)
>> +    return 0;
>> +  INT a = x - (-M * N);
>> +  INT b = a / N;
>> +  return b + -M;
>> +}
>> +
>> +INT NOINLINE
>> +opt_s6 (INT x)
>> +{
>> +  if (x > IMAX - (M * N) + GAP || x < 0)
>> +    return 0;
>> +  INT a = x - (-M * N);
>> +  INT b = a / N;
>> +  return b + -M;
>> +}
>> +
>> +INT NOINLINE
>> +opt_s7 (INT x)
>> +{
>> +  if (x > (M * -N) + GAP)
>> +    return 0;
>> +  INT a = x - (M * -N);
>> +  INT b = a / -N;
>> +  return b + M;
>> +}
>> +
>> +INT NOINLINE
>> +opt_s8 (INT x)
>> +{
>> +  if (x > IMAX - (M * N) + GAP || x < 0)
>> +    return 0;
>> +  INT a = x - (M * -N);
>> +  INT b = a / -N;
>> +  return b + M;
>> +}
>> +
>> +UINT NOINLINE
>> +opt_u3 (UINT x)
>> +{
>> +  if (x < (M << N) - GAP)
>> +    return 0;
>> +  UINT a = x - (M << N);
>> +  UINT b = a >> N;
>> +  return b + M;
>> +}
>> +
>> +UINT NOINLINE
>> +opt_u4 (UINT x)
>> +{
>> +  if (x > (UMAX - (M << N)) + GAP)
>> +    return 0;
>> +  UINT a = x + (M << N);
>> +  UINT b = a >> N;
>> +  return b - M;
>> +}
>> +
>> +INT NOINLINE
>> +opt_s9 (INT x)
>> +{
>> +  if (x < (M << N) - GAP)
>> +    return 0;
>> +  INT a = x - (M << N);
>> +  INT b = a >> N;
>> +  return b + M;
>> +}
>> +
>> +INT NOINLINE
>> +opt_s10 (INT x)
>> +{
>> +  if (x < IMIN + (M << N) - GAP || x > 0)
>> +    return 0;
>> +  INT a = x - (M << N);
>> +  INT b = a >> N;
>> +  return b + M;
>> +}
>> +
>> +INT NOINLINE
>> +opt_s11 (INT x)
>> +{
>> +  if (x > (-M << N) + GAP)
>> +    return 0;
>> +  INT a = x - (-M << N);
>> +  INT b = a >> N;
>> +  return b + -M;
>> +}
>> +
>> +INT NOINLINE
>> +opt_s12 (INT x)
>> +{
>> +  if (x > IMAX - (M << N) + GAP || x < 0)
>> +    return 0;
>> +  INT a = x - (-M << N);
>> +  INT b = a >> N;
>> +  return b + -M;
>> +}
>> +
>> +UINT NOINLINE
>> +opt_u5 (UINT x, UINT n, UINT m)
>> +{
>> +  if (n > N || m > M)
>> +    return 0;
>> +  if (x < (M*N) - GAP)
>> +    return 0;
>> +  UINT a = x - (m * n);
>> +  UINT b = a / n;
>> +  return b + m;
>> +}
>> +
>> +UINT NOINLINE
>> +opt_u6 (UINT x, UINT n, UINT m)
>> +{
>> +  if (n > N || m > M)
>> +    return 0;
>> +  if (x > (UMAX - M*N) + GAP)
>> +    return 0;
>> +  UINT a = x + (m * n);
>> +  UINT b = a / n;
>> +  return b - m;
>> +}
>> +
>> +INT NOINLINE
>> +opt_s13 (INT x, INT n, INT m)
>> +{
>> +  if (n > N || m > M || n < 0 || m < 0)
>> +    return 0;
>> +  if (x < (M*N) - GAP)
>> +    return 0;
>> +  INT a = x - (m * n);
>> +  INT b = a / n;
>> +  return b + m;
>> +}
>> +
>> +INT NOINLINE
>> +opt_s14 (INT x, INT n, INT m)
>> +{
>> +  if (n > N || m > M || n < 0 || m < 0)
>> +    return 0;
>> +  if (x > -M*N + GAP)
>> +    return 0;
>> +  INT a = x + (m * n);
>> +  INT b = a / n;
>> +  return b - m;
>> +}
>> +
>> +INT
>> +opt_s15 (INT x, INT n, INT m)
>> +{
>> +  if (n > 0 || m > 0 || n < -N || m < -M)
>> +    return 0;
>> +  if (x < (M*N) - GAP)
>> +    return 0;
>> +  INT a = x - (m * n);
>> +  INT b = a / n;
>> +  return b + m;
>> +}
>> +
>> +INT NOINLINE
>> +opt_s16 (INT x, INT n, INT m)
>> +{
>> +  if (n > 0 || m > 0 || n < -N || m < -M)
>> +    return 0;
>> +  if (x < 0 || x > (IMAX - M*N) + GAP)
>> +    return 0;
>> +  INT a = x + (m * n);
>> +  INT b = a / n;
>> +  return b - m;
>> +}
>> +
Richard Biener Aug. 28, 2023, 1:14 p.m. UTC | #3
On Wed, 23 Aug 2023, guojiufu wrote:

> Hi,
> 
> I would like to have a gentle ping...
> 
> BR,
> Jeff (Jiufu Guo)
> 
> On 2023-08-07 10:45, guojiufu via Gcc-patches wrote:
> > Hi,
> > 
> > Gentle ping...
> > 
> > On 2023-07-18 22:05, Jiufu Guo wrote:
> >> Hi,
> >> 
> >> Integer expression "(X - N * M) / N" can be optimized to "X / N - M"
> >> if there is no wrap/overflow/underflow and "X - N * M" has the same
> >> sign with "X".
> >> 
> >> Compare the previous version:
> >> https://gcc.gnu.org/pipermail/gcc-patches/2023-July/624067.html
> >> - APIs: overflow, nonnegative_p and nonpositive_p are moved close
> >>   to value range.
> >> - Use above APIs in match.pd.
> >> 
> >> Bootstrap & regtest pass on ppc64{,le} and x86_64.
> >> Is this patch ok for trunk?
> >> 
> >> BR,
> >> Jeff (Jiufu Guo)
> >> 
> >>  PR tree-optimization/108757
> >> 
> >> gcc/ChangeLog:
> >> 
> >>  * match.pd ((X - N * M) / N): New pattern.
> >>  ((X + N * M) / N): New pattern.
> >>  ((X + C) div_rshift N): New pattern.
> >> 
> >> gcc/testsuite/ChangeLog:
> >> 
> >>  * gcc.dg/pr108757-1.c: New test.
> >>  * gcc.dg/pr108757-2.c: New test.
> >>  * gcc.dg/pr108757.h: New test.
> >> 
> >> ---
> >>  gcc/match.pd                      |  85 +++++++++++
> >>  gcc/testsuite/gcc.dg/pr108757-1.c |  18 +++
> >>  gcc/testsuite/gcc.dg/pr108757-2.c |  19 +++
> >>  gcc/testsuite/gcc.dg/pr108757.h   | 233 
> >> ++++++++++++++++++++++++++++++
> >>  4 files changed, 355 insertions(+)
> >>  create mode 100644 gcc/testsuite/gcc.dg/pr108757-1.c
> >>  create mode 100644 gcc/testsuite/gcc.dg/pr108757-2.c
> >>  create mode 100644 gcc/testsuite/gcc.dg/pr108757.h
> >> 
> >> diff --git a/gcc/match.pd b/gcc/match.pd
> >> index 8543f777a28..39dbb0567dc 100644
> >> --- a/gcc/match.pd
> >> +++ b/gcc/match.pd
> >> @@ -942,6 +942,91 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >>  #endif
> >>     ))))
> >> 
> >> +#if GIMPLE
> >> +(for div (trunc_div exact_div)
> >> + /* Simplify (t + M*N) / N -> t / N + M.  */
> >> + (simplify
> >> +  (div (plus:c@4 @0 (mult:c@3 @1 @2)) @2)

The :c on the plus isn't necessary?

> >> +  (with {value_range vr0, vr1, vr2, vr3, vr4;}
> >> +  (if (INTEGRAL_TYPE_P (type)
> >> +       && get_range_query (cfun)->range_of_expr (vr1, @1)
> >> +       && get_range_query (cfun)->range_of_expr (vr2, @2)
> >> +       && range_op_handler (MULT_EXPR).overflow_free_p (vr1, vr2)

the multiplication doesn't overflow

> >> +       && get_range_query (cfun)->range_of_expr (vr0, @0)
> >> +       && get_range_query (cfun)->range_of_expr (vr3, @3)
> >> +       && range_op_handler (PLUS_EXPR).overflow_free_p (vr0, vr3)

the add doesn't overflow

> >> +       && get_range_query (cfun)->range_of_expr (vr4, @4)
> >> +       && (TYPE_UNSIGNED (type)
> >> +	   || (vr0.nonnegative_p () && vr4.nonnegative_p ())
> >> +	   || (vr0.nonpositive_p () && vr4.nonpositive_p ())))

I don't know what this checks - the add result and the add first
argument are not of opposite sign.  Huh.  At least this part
needs an explaining comment.

Sorry if we hashed this out before, but you can see I forgot
and it's not obvious.

> >> +  (plus (div @0 @2) @1))))
> >> +
> >> + /* Simplify (t - M*N) / N -> t / N - M.  */
> >> + (simplify
> >> +  (div (minus@4 @0 (mult:c@3 @1 @2)) @2)
> >> +  (with {value_range vr0, vr1, vr2, vr3, vr4;}
> >> +  (if (INTEGRAL_TYPE_P (type)
> >> +       && get_range_query (cfun)->range_of_expr (vr1, @1)
> >> +       && get_range_query (cfun)->range_of_expr (vr2, @2)
> >> +       && range_op_handler (MULT_EXPR).overflow_free_p (vr1, vr2)
> >> +       && get_range_query (cfun)->range_of_expr (vr0, @0)
> >> +       && get_range_query (cfun)->range_of_expr (vr3, @3)
> >> +       && range_op_handler (MINUS_EXPR).overflow_free_p (vr0, vr3)
> >> +       && get_range_query (cfun)->range_of_expr (vr4, @4)
> >> +       && (TYPE_UNSIGNED (type)
> >> +	   || (vr0.nonnegative_p () && vr4.nonnegative_p ())
> >> +	   || (vr0.nonpositive_p () && vr4.nonpositive_p ())))
> >> +  (minus (div @0 @2) @1)))))

looks like exactly the same - if you use a

 (for addsub (plus minus)

you should be able to do range_op_handler (addsub).

> >> +
> >> +/* Simplify
> >> +   (t + C) / N -> t / N + C / N where C is multiple of N.
> >> +   (t + C) >> N -> t >> N + C>>N if low N bits of C is 0.  */
> >> +(for op (trunc_div exact_div rshift)
> >> + (simplify
> >> +  (op (plus@3 @0 INTEGER_CST@1) INTEGER_CST@2)
> >> +   (with
> >> +    {
> >> +      wide_int c = wi::to_wide (@1);
> >> +      wide_int n = wi::to_wide (@2);
> >> +      bool is_rshift = op == RSHIFT_EXPR;
> >> +      bool neg_c = false;
> >> +      bool ok = false;
> >> +      value_range vr0;
> >> +      if (INTEGRAL_TYPE_P (type)
> >> +	  && get_range_query (cfun)->range_of_expr (vr0, @0))
> >> +        {
> >> +	  ok = is_rshift ? wi::ctz (c) >= n.to_shwi ()
> >> +			 : wi::multiple_of_p (c, n, TYPE_SIGN (type));
> >> +	  value_range vr1, vr3;
> >> +	  ok = ok && get_range_query (cfun)->range_of_expr (vr1, @1)
> >> +	       && range_op_handler (PLUS_EXPR).overflow_free_p (vr0, vr1)
> >> +	       && get_range_query (cfun)->range_of_expr (vr3, @3)
> >> +	       && (TYPE_UNSIGNED (type)
> >> +		   || (vr0.nonnegative_p () && vr3.nonnegative_p ())
> >> +		   || (vr0.nonpositive_p () && vr3.nonpositive_p ()));
> >> +
> >> +	  /* Try check 'X + C' as 'X - -C' for unsigned.  */
> >> +	  if (!ok && TYPE_UNSIGNED (type) && c.sign_mask () < 0)
> >> +	    {
> >> +	      neg_c = true;
> >> +	      c = -c;
> >> +	      ok = is_rshift ? wi::ctz (c) >= n.to_shwi ()
> >> +			     : wi::multiple_of_p (c, n, UNSIGNED);
> >> +	      ok = ok && wi::geu_p (vr0.lower_bound (), c);
> >> +	    }
> >> +	}
> >> +    }
> >> +   (if (ok)
> >> +   (with
> >> +    {
> >> +      wide_int m;
> >> +      m = is_rshift ? wi::rshift (c, n, TYPE_SIGN (type))
> >> +		    : wi::div_trunc (c, n, TYPE_SIGN (type));
> >> +      m = neg_c ? -m : m;
> >> +    }

why doesn't this look as nice as the other pattern?  I'd put

 (if (wi::multiple_of_p ( ....))

for @1 and @2 outside, then the rest should follow the pattern
of the above patterns?

Thanks,
Richard.

> >> +   (plus (op @0 @2) { wide_int_to_tree(type, m); }))))))
> >> +#endif
> >> +
> >>  (for op (negate abs)
> >>   /* Simplify cos(-x) and cos(|x|) -> cos(x).  Similarly for cosh.  */
> >>   (for coss (COS COSH)
> >> diff --git a/gcc/testsuite/gcc.dg/pr108757-1.c
> >> b/gcc/testsuite/gcc.dg/pr108757-1.c
> >> new file mode 100644
> >> index 00000000000..7908f4bdcb8
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.dg/pr108757-1.c
> >> @@ -0,0 +1,18 @@
> >> +/* PR tree-optimization/108757 */
> >> +/* { dg-do compile } */
> >> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> >> +
> >> +#include <limits.h>
> >> +#define N 5
> >> +#define M 3
> >> +#define GAP 0
> >> +typedef unsigned int UINT;
> >> +typedef int INT;
> >> +#define UMAX UINT_MAX
> >> +#define IMAX INT_MAX
> >> +#define IMIN INT_MIN
> >> +#include "pr108757.h"
> >> +
> >> +/* { dg-final { scan-tree-dump-not " = x_\[0-9\]+\\(D\\) \\+ "
> >> "optimized" } } *
> >> +/* { dg-final { scan-tree-dump-not " = x_\[0-9\]+\\(D\\) \\- "
> >> "optimized" } } */
> >> +/* { dg-final { scan-tree-dump-not " = b_\[0-9\]+ \\+ " "optimized" } } */
> >> diff --git a/gcc/testsuite/gcc.dg/pr108757-2.c
> >> b/gcc/testsuite/gcc.dg/pr108757-2.c
> >> new file mode 100644
> >> index 00000000000..82bebd09944
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.dg/pr108757-2.c
> >> @@ -0,0 +1,19 @@
> >> +/* PR tree-optimization/108757 */
> >> +/* { dg-do compile } */
> >> +/* { dg-options "-O2 -fdump-tree-optimized -fwrapv" } */
> >> +
> >> +#include <limits.h>
> >> +#define N 4
> >> +#define M 3
> >> +#define GAP 2
> >> +typedef unsigned int UINT;
> >> +typedef int INT;
> >> +#define UMAX UINT_MAX
> >> +#define IMAX INT_MAX
> >> +#define IMIN INT_MIN
> >> +#include "pr108757.h"
> >> +
> >> +/* { dg-final { scan-tree-dump-times " = x_\[0-9\]+\\(D\\) \\+ " 16
> >> "optimized" } } */
> >> +/* { dg-final { scan-tree-dump-times " = x_\[0-9\]+\\(D\\) \\- " 3
> >> "optimized" } } */
> >> +/* { dg-final { scan-tree-dump-times " \\+ x_\[0-9\]+\\(D\\)" 3
> >> "optimized" } } */
> >> +
> >> diff --git a/gcc/testsuite/gcc.dg/pr108757.h
> >> b/gcc/testsuite/gcc.dg/pr108757.h
> >> new file mode 100644
> >> index 00000000000..5550199c1ef
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.dg/pr108757.h
> >> @@ -0,0 +1,233 @@
> >> +#define NOINLINE __attribute__ ((noinline))
> >> +UINT NOINLINE
> >> +opt_u1 (UINT x)
> >> +{
> >> +  if (x < (M * N) - GAP)
> >> +    return 0;
> >> +  UINT a = x - (M * N);
> >> +  UINT b = a / N;
> >> +  return b + M;
> >> +}
> >> +
> >> +UINT NOINLINE
> >> +opt_u2 (UINT x)
> >> +{
> >> +  if (x > (UMAX - (M * N) + GAP))
> >> +    return 0;
> >> +  UINT a = x + (M * N);
> >> +  UINT b = a / N;
> >> +  return b - M;
> >> +}
> >> +
> >> +INT NOINLINE
> >> +opt_s1 (INT x)
> >> +{
> >> +  if (x < (M * N) - GAP)
> >> +    return 0;
> >> +  INT a = x - (M * N);
> >> +  INT b = a / N;
> >> +  return b + M;
> >> +}
> >> +
> >> +INT NOINLINE
> >> +opt_s2 (INT x)
> >> +{
> >> +  if (x < IMIN + (M * N) - GAP || x > 0)
> >> +    return 0;
> >> +  INT a = x - (M * N);
> >> +  INT b = a / N;
> >> +  return b + M;
> >> +}
> >> +
> >> +INT NOINLINE
> >> +opt_s3 (INT x)
> >> +{
> >> +  if (x < (M * N) - GAP)
> >> +    return 0;
> >> +  INT a = x - (M * N);
> >> +  INT b = a / -N;
> >> +  return b + -M;
> >> +}
> >> +
> >> +INT NOINLINE
> >> +opt_s4 (INT x)
> >> +{
> >> +  if (x < IMIN + (M * N) - GAP || x > 0)
> >> +    return 0;
> >> +  INT a = x - (M * N);
> >> +  INT b = a / -N;
> >> +  return b + -M;
> >> +}
> >> +
> >> +INT NOINLINE
> >> +opt_s5 (INT x)
> >> +{
> >> +  if (x > (-M * N) + GAP)
> >> +    return 0;
> >> +  INT a = x - (-M * N);
> >> +  INT b = a / N;
> >> +  return b + -M;
> >> +}
> >> +
> >> +INT NOINLINE
> >> +opt_s6 (INT x)
> >> +{
> >> +  if (x > IMAX - (M * N) + GAP || x < 0)
> >> +    return 0;
> >> +  INT a = x - (-M * N);
> >> +  INT b = a / N;
> >> +  return b + -M;
> >> +}
> >> +
> >> +INT NOINLINE
> >> +opt_s7 (INT x)
> >> +{
> >> +  if (x > (M * -N) + GAP)
> >> +    return 0;
> >> +  INT a = x - (M * -N);
> >> +  INT b = a / -N;
> >> +  return b + M;
> >> +}
> >> +
> >> +INT NOINLINE
> >> +opt_s8 (INT x)
> >> +{
> >> +  if (x > IMAX - (M * N) + GAP || x < 0)
> >> +    return 0;
> >> +  INT a = x - (M * -N);
> >> +  INT b = a / -N;
> >> +  return b + M;
> >> +}
> >> +
> >> +UINT NOINLINE
> >> +opt_u3 (UINT x)
> >> +{
> >> +  if (x < (M << N) - GAP)
> >> +    return 0;
> >> +  UINT a = x - (M << N);
> >> +  UINT b = a >> N;
> >> +  return b + M;
> >> +}
> >> +
> >> +UINT NOINLINE
> >> +opt_u4 (UINT x)
> >> +{
> >> +  if (x > (UMAX - (M << N)) + GAP)
> >> +    return 0;
> >> +  UINT a = x + (M << N);
> >> +  UINT b = a >> N;
> >> +  return b - M;
> >> +}
> >> +
> >> +INT NOINLINE
> >> +opt_s9 (INT x)
> >> +{
> >> +  if (x < (M << N) - GAP)
> >> +    return 0;
> >> +  INT a = x - (M << N);
> >> +  INT b = a >> N;
> >> +  return b + M;
> >> +}
> >> +
> >> +INT NOINLINE
> >> +opt_s10 (INT x)
> >> +{
> >> +  if (x < IMIN + (M << N) - GAP || x > 0)
> >> +    return 0;
> >> +  INT a = x - (M << N);
> >> +  INT b = a >> N;
> >> +  return b + M;
> >> +}
> >> +
> >> +INT NOINLINE
> >> +opt_s11 (INT x)
> >> +{
> >> +  if (x > (-M << N) + GAP)
> >> +    return 0;
> >> +  INT a = x - (-M << N);
> >> +  INT b = a >> N;
> >> +  return b + -M;
> >> +}
> >> +
> >> +INT NOINLINE
> >> +opt_s12 (INT x)
> >> +{
> >> +  if (x > IMAX - (M << N) + GAP || x < 0)
> >> +    return 0;
> >> +  INT a = x - (-M << N);
> >> +  INT b = a >> N;
> >> +  return b + -M;
> >> +}
> >> +
> >> +UINT NOINLINE
> >> +opt_u5 (UINT x, UINT n, UINT m)
> >> +{
> >> +  if (n > N || m > M)
> >> +    return 0;
> >> +  if (x < (M*N) - GAP)
> >> +    return 0;
> >> +  UINT a = x - (m * n);
> >> +  UINT b = a / n;
> >> +  return b + m;
> >> +}
> >> +
> >> +UINT NOINLINE
> >> +opt_u6 (UINT x, UINT n, UINT m)
> >> +{
> >> +  if (n > N || m > M)
> >> +    return 0;
> >> +  if (x > (UMAX - M*N) + GAP)
> >> +    return 0;
> >> +  UINT a = x + (m * n);
> >> +  UINT b = a / n;
> >> +  return b - m;
> >> +}
> >> +
> >> +INT NOINLINE
> >> +opt_s13 (INT x, INT n, INT m)
> >> +{
> >> +  if (n > N || m > M || n < 0 || m < 0)
> >> +    return 0;
> >> +  if (x < (M*N) - GAP)
> >> +    return 0;
> >> +  INT a = x - (m * n);
> >> +  INT b = a / n;
> >> +  return b + m;
> >> +}
> >> +
> >> +INT NOINLINE
> >> +opt_s14 (INT x, INT n, INT m)
> >> +{
> >> +  if (n > N || m > M || n < 0 || m < 0)
> >> +    return 0;
> >> +  if (x > -M*N + GAP)
> >> +    return 0;
> >> +  INT a = x + (m * n);
> >> +  INT b = a / n;
> >> +  return b - m;
> >> +}
> >> +
> >> +INT
> >> +opt_s15 (INT x, INT n, INT m)
> >> +{
> >> +  if (n > 0 || m > 0 || n < -N || m < -M)
> >> +    return 0;
> >> +  if (x < (M*N) - GAP)
> >> +    return 0;
> >> +  INT a = x - (m * n);
> >> +  INT b = a / n;
> >> +  return b + m;
> >> +}
> >> +
> >> +INT NOINLINE
> >> +opt_s16 (INT x, INT n, INT m)
> >> +{
> >> +  if (n > 0 || m > 0 || n < -N || m < -M)
> >> +    return 0;
> >> +  if (x < 0 || x > (IMAX - M*N) + GAP)
> >> +    return 0;
> >> +  INT a = x + (m * n);
> >> +  INT b = a / n;
> >> +  return b - m;
> >> +}
> >> +
>
Jiufu Guo Aug. 29, 2023, 7:25 a.m. UTC | #4
Hi Richard,

Thanks a lot for your review!

Richard Biener <rguenther@suse.de> writes:

> On Wed, 23 Aug 2023, guojiufu wrote:
>
>> Hi,
>> 
>> I would like to have a gentle ping...
>> 
>> BR,
>> Jeff (Jiufu Guo)
>> 
>> On 2023-08-07 10:45, guojiufu via Gcc-patches wrote:
>> > Hi,
>> > 
>> > Gentle ping...
>> > 
>> > On 2023-07-18 22:05, Jiufu Guo wrote:
>> >> Hi,
>> >> 
>> >> Integer expression "(X - N * M) / N" can be optimized to "X / N - M"
>> >> if there is no wrap/overflow/underflow and "X - N * M" has the same
>> >> sign with "X".
>> >> 
>> >> Compare the previous version:
>> >> https://gcc.gnu.org/pipermail/gcc-patches/2023-July/624067.html
>> >> - APIs: overflow, nonnegative_p and nonpositive_p are moved close
>> >>   to value range.
>> >> - Use above APIs in match.pd.
>> >> 
>> >> Bootstrap & regtest pass on ppc64{,le} and x86_64.
>> >> Is this patch ok for trunk?
>> >> 
>> >> BR,
>> >> Jeff (Jiufu Guo)
>> >> 
>> >>  PR tree-optimization/108757
>> >> 
>> >> gcc/ChangeLog:
>> >> 
>> >>  * match.pd ((X - N * M) / N): New pattern.
>> >>  ((X + N * M) / N): New pattern.
>> >>  ((X + C) div_rshift N): New pattern.
>> >> 
>> >> gcc/testsuite/ChangeLog:
>> >> 
>> >>  * gcc.dg/pr108757-1.c: New test.
>> >>  * gcc.dg/pr108757-2.c: New test.
>> >>  * gcc.dg/pr108757.h: New test.
>> >> 
>> >> ---
>> >>  gcc/match.pd                      |  85 +++++++++++
>> >>  gcc/testsuite/gcc.dg/pr108757-1.c |  18 +++
>> >>  gcc/testsuite/gcc.dg/pr108757-2.c |  19 +++
>> >>  gcc/testsuite/gcc.dg/pr108757.h   | 233 
>> >> ++++++++++++++++++++++++++++++
>> >>  4 files changed, 355 insertions(+)
>> >>  create mode 100644 gcc/testsuite/gcc.dg/pr108757-1.c
>> >>  create mode 100644 gcc/testsuite/gcc.dg/pr108757-2.c
>> >>  create mode 100644 gcc/testsuite/gcc.dg/pr108757.h
>> >> 
>> >> diff --git a/gcc/match.pd b/gcc/match.pd
>> >> index 8543f777a28..39dbb0567dc 100644
>> >> --- a/gcc/match.pd
>> >> +++ b/gcc/match.pd
>> >> @@ -942,6 +942,91 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>> >>  #endif
>> >>     ))))
>> >> 
>> >> +#if GIMPLE
>> >> +(for div (trunc_div exact_div)
>> >> + /* Simplify (t + M*N) / N -> t / N + M.  */
>> >> + (simplify
>> >> +  (div (plus:c@4 @0 (mult:c@3 @1 @2)) @2)
>
> The :c on the plus isn't necessary?

":c" would be needed.  Because when the pattern is matched
in gimple passes(e.g. vrp), the insn sequences would looks
like: 
"%_6 = N * M; %_7 = %_6 + t":  "%_6" is leading "t".

Without ":c", the pattern may need write as:
(plus@4 (mult:c@3 @1 @2) $0).

>
>> >> +  (with {value_range vr0, vr1, vr2, vr3, vr4;}
>> >> +  (if (INTEGRAL_TYPE_P (type)
>> >> +       && get_range_query (cfun)->range_of_expr (vr1, @1)
>> >> +       && get_range_query (cfun)->range_of_expr (vr2, @2)
>> >> +       && range_op_handler (MULT_EXPR).overflow_free_p (vr1, vr2)
>
> the multiplication doesn't overflow
Yes, this is checking no overflow on mult.
>
>> >> +       && get_range_query (cfun)->range_of_expr (vr0, @0)
>> >> +       && get_range_query (cfun)->range_of_expr (vr3, @3)
>> >> +       && range_op_handler (PLUS_EXPR).overflow_free_p (vr0, vr3)
>
> the add doesn't overflow
Yes, this is checking no overflow on add.
>
>> >> +       && get_range_query (cfun)->range_of_expr (vr4, @4)
>> >> +       && (TYPE_UNSIGNED (type)
>> >> +	   || (vr0.nonnegative_p () && vr4.nonnegative_p ())
>> >> +	   || (vr0.nonpositive_p () && vr4.nonpositive_p ())))
>
> I don't know what this checks - the add result and the add first
> argument are not of opposite sign.  Huh.  At least this part
> needs an explaining comment.

Right, "X-N*M" is not with opposite sign of "X".

Because it is trunc_div in this pattern.  Which cutting towards
zero, if "X-N*M" changes the sign of "X", then "(X-N*M)/N" and
"X/N" cut mod to different direction.

A comment is needed, I will add.

>
> Sorry if we hashed this out before, but you can see I forgot
> and it's not obvious.
>
>> >> +  (plus (div @0 @2) @1))))
>> >> +
>> >> + /* Simplify (t - M*N) / N -> t / N - M.  */
>> >> + (simplify
>> >> +  (div (minus@4 @0 (mult:c@3 @1 @2)) @2)
>> >> +  (with {value_range vr0, vr1, vr2, vr3, vr4;}
>> >> +  (if (INTEGRAL_TYPE_P (type)
>> >> +       && get_range_query (cfun)->range_of_expr (vr1, @1)
>> >> +       && get_range_query (cfun)->range_of_expr (vr2, @2)
>> >> +       && range_op_handler (MULT_EXPR).overflow_free_p (vr1, vr2)
>> >> +       && get_range_query (cfun)->range_of_expr (vr0, @0)
>> >> +       && get_range_query (cfun)->range_of_expr (vr3, @3)
>> >> +       && range_op_handler (MINUS_EXPR).overflow_free_p (vr0, vr3)
>> >> +       && get_range_query (cfun)->range_of_expr (vr4, @4)
>> >> +       && (TYPE_UNSIGNED (type)
>> >> +	   || (vr0.nonnegative_p () && vr4.nonnegative_p ())
>> >> +	   || (vr0.nonpositive_p () && vr4.nonpositive_p ())))
>> >> +  (minus (div @0 @2) @1)))))
>
> looks like exactly the same - if you use a
>
>  (for addsub (plus minus)

I also tried to use this.  But fail, the reason is similar
with adding ":c" for "plus".
For "plus", the insn sequences would like:
"%_6 = N * M; %_7 = %_6 + t":  "%_6" is leading "t".
(plus@4 (mult:c@3 @1 @2) $0).

But for "sub", the insn sequence would be:
"%_6 = N * M; %_7 = t - %_6":  "t" is leading "%6".
(sub@4 $0 (mult:c@3 @1 @2)).

So, I fail to combine these two patterns into one.

>
> you should be able to do range_op_handler (addsub).
>
>> >> +
>> >> +/* Simplify
>> >> +   (t + C) / N -> t / N + C / N where C is multiple of N.
>> >> +   (t + C) >> N -> t >> N + C>>N if low N bits of C is 0.  */
>> >> +(for op (trunc_div exact_div rshift)
>> >> + (simplify
>> >> +  (op (plus@3 @0 INTEGER_CST@1) INTEGER_CST@2)
>> >> +   (with
>> >> +    {
>> >> +      wide_int c = wi::to_wide (@1);
>> >> +      wide_int n = wi::to_wide (@2);
>> >> +      bool is_rshift = op == RSHIFT_EXPR;
>> >> +      bool neg_c = false;
>> >> +      bool ok = false;
>> >> +      value_range vr0;
>> >> +      if (INTEGRAL_TYPE_P (type)
>> >> +	  && get_range_query (cfun)->range_of_expr (vr0, @0))
>> >> +        {
>> >> +	  ok = is_rshift ? wi::ctz (c) >= n.to_shwi ()
>> >> +			 : wi::multiple_of_p (c, n, TYPE_SIGN (type));
>> >> +	  value_range vr1, vr3;
>> >> +	  ok = ok && get_range_query (cfun)->range_of_expr (vr1, @1)
>> >> +	       && range_op_handler (PLUS_EXPR).overflow_free_p (vr0, vr1)
>> >> +	       && get_range_query (cfun)->range_of_expr (vr3, @3)
>> >> +	       && (TYPE_UNSIGNED (type)
>> >> +		   || (vr0.nonnegative_p () && vr3.nonnegative_p ())
>> >> +		   || (vr0.nonpositive_p () && vr3.nonpositive_p ()));
>> >> +
>> >> +	  /* Try check 'X + C' as 'X - -C' for unsigned.  */
>> >> +	  if (!ok && TYPE_UNSIGNED (type) && c.sign_mask () < 0)
>> >> +	    {
>> >> +	      neg_c = true;
>> >> +	      c = -c;
>> >> +	      ok = is_rshift ? wi::ctz (c) >= n.to_shwi ()
>> >> +			     : wi::multiple_of_p (c, n, UNSIGNED);
>> >> +	      ok = ok && wi::geu_p (vr0.lower_bound (), c);
>> >> +	    }
>> >> +	}
>> >> +    }
>> >> +   (if (ok)
>> >> +   (with
>> >> +    {
>> >> +      wide_int m;
>> >> +      m = is_rshift ? wi::rshift (c, n, TYPE_SIGN (type))
>> >> +		    : wi::div_trunc (c, n, TYPE_SIGN (type));
>> >> +      m = neg_c ? -m : m;
>> >> +    }
>
> why doesn't this look as nice as the other pattern?  I'd put
>
>  (if (wi::multiple_of_p ( ....))
>
> for @1 and @2 outside, then the rest should follow the pattern
> of the above patterns?

Thanks, I would try to refactor the pattern.

BR,
Jeff (Jiufu Guo)

>
> Thanks,
> Richard.
>
>> >> +   (plus (op @0 @2) { wide_int_to_tree(type, m); }))))))
>> >> +#endif
>> >> +
>> >>  (for op (negate abs)
>> >>   /* Simplify cos(-x) and cos(|x|) -> cos(x).  Similarly for cosh.  */
>> >>   (for coss (COS COSH)
>> >> diff --git a/gcc/testsuite/gcc.dg/pr108757-1.c
>> >> b/gcc/testsuite/gcc.dg/pr108757-1.c
>> >> new file mode 100644
>> >> index 00000000000..7908f4bdcb8
>> >> --- /dev/null
>> >> +++ b/gcc/testsuite/gcc.dg/pr108757-1.c
>> >> @@ -0,0 +1,18 @@
>> >> +/* PR tree-optimization/108757 */
>> >> +/* { dg-do compile } */
>> >> +/* { dg-options "-O2 -fdump-tree-optimized" } */
>> >> +
>> >> +#include <limits.h>
>> >> +#define N 5
>> >> +#define M 3
>> >> +#define GAP 0
>> >> +typedef unsigned int UINT;
>> >> +typedef int INT;
>> >> +#define UMAX UINT_MAX
>> >> +#define IMAX INT_MAX
>> >> +#define IMIN INT_MIN
>> >> +#include "pr108757.h"
>> >> +
>> >> +/* { dg-final { scan-tree-dump-not " = x_\[0-9\]+\\(D\\) \\+ "
>> >> "optimized" } } *
>> >> +/* { dg-final { scan-tree-dump-not " = x_\[0-9\]+\\(D\\) \\- "
>> >> "optimized" } } */
>> >> +/* { dg-final { scan-tree-dump-not " = b_\[0-9\]+ \\+ " "optimized" } } */
>> >> diff --git a/gcc/testsuite/gcc.dg/pr108757-2.c
>> >> b/gcc/testsuite/gcc.dg/pr108757-2.c
>> >> new file mode 100644
>> >> index 00000000000..82bebd09944
>> >> --- /dev/null
>> >> +++ b/gcc/testsuite/gcc.dg/pr108757-2.c
>> >> @@ -0,0 +1,19 @@
>> >> +/* PR tree-optimization/108757 */
>> >> +/* { dg-do compile } */
>> >> +/* { dg-options "-O2 -fdump-tree-optimized -fwrapv" } */
>> >> +
>> >> +#include <limits.h>
>> >> +#define N 4
>> >> +#define M 3
>> >> +#define GAP 2
>> >> +typedef unsigned int UINT;
>> >> +typedef int INT;
>> >> +#define UMAX UINT_MAX
>> >> +#define IMAX INT_MAX
>> >> +#define IMIN INT_MIN
>> >> +#include "pr108757.h"
>> >> +
>> >> +/* { dg-final { scan-tree-dump-times " = x_\[0-9\]+\\(D\\) \\+ " 16
>> >> "optimized" } } */
>> >> +/* { dg-final { scan-tree-dump-times " = x_\[0-9\]+\\(D\\) \\- " 3
>> >> "optimized" } } */
>> >> +/* { dg-final { scan-tree-dump-times " \\+ x_\[0-9\]+\\(D\\)" 3
>> >> "optimized" } } */
>> >> +
>> >> diff --git a/gcc/testsuite/gcc.dg/pr108757.h
>> >> b/gcc/testsuite/gcc.dg/pr108757.h
>> >> new file mode 100644
>> >> index 00000000000..5550199c1ef
>> >> --- /dev/null
>> >> +++ b/gcc/testsuite/gcc.dg/pr108757.h
>> >> @@ -0,0 +1,233 @@
>> >> +#define NOINLINE __attribute__ ((noinline))
>> >> +UINT NOINLINE
>> >> +opt_u1 (UINT x)
>> >> +{
>> >> +  if (x < (M * N) - GAP)
>> >> +    return 0;
>> >> +  UINT a = x - (M * N);
>> >> +  UINT b = a / N;
>> >> +  return b + M;
>> >> +}
>> >> +
>> >> +UINT NOINLINE
>> >> +opt_u2 (UINT x)
>> >> +{
>> >> +  if (x > (UMAX - (M * N) + GAP))
>> >> +    return 0;
>> >> +  UINT a = x + (M * N);
>> >> +  UINT b = a / N;
>> >> +  return b - M;
>> >> +}
>> >> +
>> >> +INT NOINLINE
>> >> +opt_s1 (INT x)
>> >> +{
>> >> +  if (x < (M * N) - GAP)
>> >> +    return 0;
>> >> +  INT a = x - (M * N);
>> >> +  INT b = a / N;
>> >> +  return b + M;
>> >> +}
>> >> +
>> >> +INT NOINLINE
>> >> +opt_s2 (INT x)
>> >> +{
>> >> +  if (x < IMIN + (M * N) - GAP || x > 0)
>> >> +    return 0;
>> >> +  INT a = x - (M * N);
>> >> +  INT b = a / N;
>> >> +  return b + M;
>> >> +}
>> >> +
>> >> +INT NOINLINE
>> >> +opt_s3 (INT x)
>> >> +{
>> >> +  if (x < (M * N) - GAP)
>> >> +    return 0;
>> >> +  INT a = x - (M * N);
>> >> +  INT b = a / -N;
>> >> +  return b + -M;
>> >> +}
>> >> +
>> >> +INT NOINLINE
>> >> +opt_s4 (INT x)
>> >> +{
>> >> +  if (x < IMIN + (M * N) - GAP || x > 0)
>> >> +    return 0;
>> >> +  INT a = x - (M * N);
>> >> +  INT b = a / -N;
>> >> +  return b + -M;
>> >> +}
>> >> +
>> >> +INT NOINLINE
>> >> +opt_s5 (INT x)
>> >> +{
>> >> +  if (x > (-M * N) + GAP)
>> >> +    return 0;
>> >> +  INT a = x - (-M * N);
>> >> +  INT b = a / N;
>> >> +  return b + -M;
>> >> +}
>> >> +
>> >> +INT NOINLINE
>> >> +opt_s6 (INT x)
>> >> +{
>> >> +  if (x > IMAX - (M * N) + GAP || x < 0)
>> >> +    return 0;
>> >> +  INT a = x - (-M * N);
>> >> +  INT b = a / N;
>> >> +  return b + -M;
>> >> +}
>> >> +
>> >> +INT NOINLINE
>> >> +opt_s7 (INT x)
>> >> +{
>> >> +  if (x > (M * -N) + GAP)
>> >> +    return 0;
>> >> +  INT a = x - (M * -N);
>> >> +  INT b = a / -N;
>> >> +  return b + M;
>> >> +}
>> >> +
>> >> +INT NOINLINE
>> >> +opt_s8 (INT x)
>> >> +{
>> >> +  if (x > IMAX - (M * N) + GAP || x < 0)
>> >> +    return 0;
>> >> +  INT a = x - (M * -N);
>> >> +  INT b = a / -N;
>> >> +  return b + M;
>> >> +}
>> >> +
>> >> +UINT NOINLINE
>> >> +opt_u3 (UINT x)
>> >> +{
>> >> +  if (x < (M << N) - GAP)
>> >> +    return 0;
>> >> +  UINT a = x - (M << N);
>> >> +  UINT b = a >> N;
>> >> +  return b + M;
>> >> +}
>> >> +
>> >> +UINT NOINLINE
>> >> +opt_u4 (UINT x)
>> >> +{
>> >> +  if (x > (UMAX - (M << N)) + GAP)
>> >> +    return 0;
>> >> +  UINT a = x + (M << N);
>> >> +  UINT b = a >> N;
>> >> +  return b - M;
>> >> +}
>> >> +
>> >> +INT NOINLINE
>> >> +opt_s9 (INT x)
>> >> +{
>> >> +  if (x < (M << N) - GAP)
>> >> +    return 0;
>> >> +  INT a = x - (M << N);
>> >> +  INT b = a >> N;
>> >> +  return b + M;
>> >> +}
>> >> +
>> >> +INT NOINLINE
>> >> +opt_s10 (INT x)
>> >> +{
>> >> +  if (x < IMIN + (M << N) - GAP || x > 0)
>> >> +    return 0;
>> >> +  INT a = x - (M << N);
>> >> +  INT b = a >> N;
>> >> +  return b + M;
>> >> +}
>> >> +
>> >> +INT NOINLINE
>> >> +opt_s11 (INT x)
>> >> +{
>> >> +  if (x > (-M << N) + GAP)
>> >> +    return 0;
>> >> +  INT a = x - (-M << N);
>> >> +  INT b = a >> N;
>> >> +  return b + -M;
>> >> +}
>> >> +
>> >> +INT NOINLINE
>> >> +opt_s12 (INT x)
>> >> +{
>> >> +  if (x > IMAX - (M << N) + GAP || x < 0)
>> >> +    return 0;
>> >> +  INT a = x - (-M << N);
>> >> +  INT b = a >> N;
>> >> +  return b + -M;
>> >> +}
>> >> +
>> >> +UINT NOINLINE
>> >> +opt_u5 (UINT x, UINT n, UINT m)
>> >> +{
>> >> +  if (n > N || m > M)
>> >> +    return 0;
>> >> +  if (x < (M*N) - GAP)
>> >> +    return 0;
>> >> +  UINT a = x - (m * n);
>> >> +  UINT b = a / n;
>> >> +  return b + m;
>> >> +}
>> >> +
>> >> +UINT NOINLINE
>> >> +opt_u6 (UINT x, UINT n, UINT m)
>> >> +{
>> >> +  if (n > N || m > M)
>> >> +    return 0;
>> >> +  if (x > (UMAX - M*N) + GAP)
>> >> +    return 0;
>> >> +  UINT a = x + (m * n);
>> >> +  UINT b = a / n;
>> >> +  return b - m;
>> >> +}
>> >> +
>> >> +INT NOINLINE
>> >> +opt_s13 (INT x, INT n, INT m)
>> >> +{
>> >> +  if (n > N || m > M || n < 0 || m < 0)
>> >> +    return 0;
>> >> +  if (x < (M*N) - GAP)
>> >> +    return 0;
>> >> +  INT a = x - (m * n);
>> >> +  INT b = a / n;
>> >> +  return b + m;
>> >> +}
>> >> +
>> >> +INT NOINLINE
>> >> +opt_s14 (INT x, INT n, INT m)
>> >> +{
>> >> +  if (n > N || m > M || n < 0 || m < 0)
>> >> +    return 0;
>> >> +  if (x > -M*N + GAP)
>> >> +    return 0;
>> >> +  INT a = x + (m * n);
>> >> +  INT b = a / n;
>> >> +  return b - m;
>> >> +}
>> >> +
>> >> +INT
>> >> +opt_s15 (INT x, INT n, INT m)
>> >> +{
>> >> +  if (n > 0 || m > 0 || n < -N || m < -M)
>> >> +    return 0;
>> >> +  if (x < (M*N) - GAP)
>> >> +    return 0;
>> >> +  INT a = x - (m * n);
>> >> +  INT b = a / n;
>> >> +  return b + m;
>> >> +}
>> >> +
>> >> +INT NOINLINE
>> >> +opt_s16 (INT x, INT n, INT m)
>> >> +{
>> >> +  if (n > 0 || m > 0 || n < -N || m < -M)
>> >> +    return 0;
>> >> +  if (x < 0 || x > (IMAX - M*N) + GAP)
>> >> +    return 0;
>> >> +  INT a = x + (m * n);
>> >> +  INT b = a / n;
>> >> +  return b - m;
>> >> +}
>> >> +
>>
diff mbox series

Patch

diff --git a/gcc/match.pd b/gcc/match.pd
index 8543f777a28..39dbb0567dc 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -942,6 +942,91 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 #endif
    ))))
 
+#if GIMPLE
+(for div (trunc_div exact_div)
+ /* Simplify (t + M*N) / N -> t / N + M.  */
+ (simplify
+  (div (plus:c@4 @0 (mult:c@3 @1 @2)) @2)
+  (with {value_range vr0, vr1, vr2, vr3, vr4;}
+  (if (INTEGRAL_TYPE_P (type)
+       && get_range_query (cfun)->range_of_expr (vr1, @1)
+       && get_range_query (cfun)->range_of_expr (vr2, @2)
+       && range_op_handler (MULT_EXPR).overflow_free_p (vr1, vr2)
+       && get_range_query (cfun)->range_of_expr (vr0, @0)
+       && get_range_query (cfun)->range_of_expr (vr3, @3)
+       && range_op_handler (PLUS_EXPR).overflow_free_p (vr0, vr3)
+       && get_range_query (cfun)->range_of_expr (vr4, @4)
+       && (TYPE_UNSIGNED (type)
+	   || (vr0.nonnegative_p () && vr4.nonnegative_p ())
+	   || (vr0.nonpositive_p () && vr4.nonpositive_p ())))
+  (plus (div @0 @2) @1))))
+
+ /* Simplify (t - M*N) / N -> t / N - M.  */
+ (simplify
+  (div (minus@4 @0 (mult:c@3 @1 @2)) @2)
+  (with {value_range vr0, vr1, vr2, vr3, vr4;}
+  (if (INTEGRAL_TYPE_P (type)
+       && get_range_query (cfun)->range_of_expr (vr1, @1)
+       && get_range_query (cfun)->range_of_expr (vr2, @2)
+       && range_op_handler (MULT_EXPR).overflow_free_p (vr1, vr2)
+       && get_range_query (cfun)->range_of_expr (vr0, @0)
+       && get_range_query (cfun)->range_of_expr (vr3, @3)
+       && range_op_handler (MINUS_EXPR).overflow_free_p (vr0, vr3)
+       && get_range_query (cfun)->range_of_expr (vr4, @4)
+       && (TYPE_UNSIGNED (type)
+	   || (vr0.nonnegative_p () && vr4.nonnegative_p ())
+	   || (vr0.nonpositive_p () && vr4.nonpositive_p ())))
+  (minus (div @0 @2) @1)))))
+
+/* Simplify
+   (t + C) / N -> t / N + C / N where C is multiple of N.
+   (t + C) >> N -> t >> N + C>>N if low N bits of C is 0.  */
+(for op (trunc_div exact_div rshift)
+ (simplify
+  (op (plus@3 @0 INTEGER_CST@1) INTEGER_CST@2)
+   (with
+    {
+      wide_int c = wi::to_wide (@1);
+      wide_int n = wi::to_wide (@2);
+      bool is_rshift = op == RSHIFT_EXPR;
+      bool neg_c = false;
+      bool ok = false;
+      value_range vr0;
+      if (INTEGRAL_TYPE_P (type)
+	  && get_range_query (cfun)->range_of_expr (vr0, @0))
+        {
+	  ok = is_rshift ? wi::ctz (c) >= n.to_shwi ()
+			 : wi::multiple_of_p (c, n, TYPE_SIGN (type));
+	  value_range vr1, vr3;
+	  ok = ok && get_range_query (cfun)->range_of_expr (vr1, @1)
+	       && range_op_handler (PLUS_EXPR).overflow_free_p (vr0, vr1)
+	       && get_range_query (cfun)->range_of_expr (vr3, @3)
+	       && (TYPE_UNSIGNED (type)
+		   || (vr0.nonnegative_p () && vr3.nonnegative_p ())
+		   || (vr0.nonpositive_p () && vr3.nonpositive_p ()));
+
+	  /* Try check 'X + C' as 'X - -C' for unsigned.  */
+	  if (!ok && TYPE_UNSIGNED (type) && c.sign_mask () < 0)
+	    {
+	      neg_c = true;
+	      c = -c;
+	      ok = is_rshift ? wi::ctz (c) >= n.to_shwi ()
+			     : wi::multiple_of_p (c, n, UNSIGNED);
+	      ok = ok && wi::geu_p (vr0.lower_bound (), c);
+	    }
+	}
+    }
+   (if (ok)
+   (with
+    {
+      wide_int m;
+      m = is_rshift ? wi::rshift (c, n, TYPE_SIGN (type))
+		    : wi::div_trunc (c, n, TYPE_SIGN (type));
+      m = neg_c ? -m : m;
+    }
+   (plus (op @0 @2) { wide_int_to_tree(type, m); }))))))
+#endif
+
 (for op (negate abs)
  /* Simplify cos(-x) and cos(|x|) -> cos(x).  Similarly for cosh.  */
  (for coss (COS COSH)
diff --git a/gcc/testsuite/gcc.dg/pr108757-1.c b/gcc/testsuite/gcc.dg/pr108757-1.c
new file mode 100644
index 00000000000..7908f4bdcb8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr108757-1.c
@@ -0,0 +1,18 @@ 
+/* PR tree-optimization/108757 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+#include <limits.h>
+#define N 5
+#define M 3
+#define GAP 0
+typedef unsigned int UINT;
+typedef int INT;
+#define UMAX UINT_MAX
+#define IMAX INT_MAX
+#define IMIN INT_MIN
+#include "pr108757.h"
+
+/* { dg-final { scan-tree-dump-not " = x_\[0-9\]+\\(D\\) \\+ " "optimized" } } *
+/* { dg-final { scan-tree-dump-not " = x_\[0-9\]+\\(D\\) \\- " "optimized" } } */
+/* { dg-final { scan-tree-dump-not " = b_\[0-9\]+ \\+ " "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/pr108757-2.c b/gcc/testsuite/gcc.dg/pr108757-2.c
new file mode 100644
index 00000000000..82bebd09944
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr108757-2.c
@@ -0,0 +1,19 @@ 
+/* PR tree-optimization/108757 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized -fwrapv" } */
+
+#include <limits.h>
+#define N 4
+#define M 3
+#define GAP 2
+typedef unsigned int UINT;
+typedef int INT;
+#define UMAX UINT_MAX
+#define IMAX INT_MAX
+#define IMIN INT_MIN
+#include "pr108757.h"
+
+/* { dg-final { scan-tree-dump-times " = x_\[0-9\]+\\(D\\) \\+ " 16 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " = x_\[0-9\]+\\(D\\) \\- " 3 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \\+ x_\[0-9\]+\\(D\\)" 3 "optimized" } } */
+
diff --git a/gcc/testsuite/gcc.dg/pr108757.h b/gcc/testsuite/gcc.dg/pr108757.h
new file mode 100644
index 00000000000..5550199c1ef
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr108757.h
@@ -0,0 +1,233 @@ 
+#define NOINLINE __attribute__ ((noinline))
+UINT NOINLINE
+opt_u1 (UINT x)
+{
+  if (x < (M * N) - GAP)
+    return 0;
+  UINT a = x - (M * N);
+  UINT b = a / N;
+  return b + M;
+}
+
+UINT NOINLINE
+opt_u2 (UINT x)
+{
+  if (x > (UMAX - (M * N) + GAP))
+    return 0;
+  UINT a = x + (M * N);
+  UINT b = a / N;
+  return b - M;
+}
+
+INT NOINLINE
+opt_s1 (INT x)
+{
+  if (x < (M * N) - GAP)
+    return 0;
+  INT a = x - (M * N);
+  INT b = a / N;
+  return b + M;
+}
+
+INT NOINLINE
+opt_s2 (INT x)
+{
+  if (x < IMIN + (M * N) - GAP || x > 0)
+    return 0;
+  INT a = x - (M * N);
+  INT b = a / N;
+  return b + M;
+}
+
+INT NOINLINE
+opt_s3 (INT x)
+{
+  if (x < (M * N) - GAP)
+    return 0;
+  INT a = x - (M * N);
+  INT b = a / -N;
+  return b + -M;
+}
+
+INT NOINLINE
+opt_s4 (INT x)
+{
+  if (x < IMIN + (M * N) - GAP || x > 0)
+    return 0;
+  INT a = x - (M * N);
+  INT b = a / -N;
+  return b + -M;
+}
+
+INT NOINLINE
+opt_s5 (INT x)
+{
+  if (x > (-M * N) + GAP)
+    return 0;
+  INT a = x - (-M * N);
+  INT b = a / N;
+  return b + -M;
+}
+
+INT NOINLINE
+opt_s6 (INT x)
+{
+  if (x > IMAX - (M * N) + GAP || x < 0)
+    return 0;
+  INT a = x - (-M * N);
+  INT b = a / N;
+  return b + -M;
+}
+
+INT NOINLINE
+opt_s7 (INT x)
+{
+  if (x > (M * -N) + GAP)
+    return 0;
+  INT a = x - (M * -N);
+  INT b = a / -N;
+  return b + M;
+}
+
+INT NOINLINE
+opt_s8 (INT x)
+{
+  if (x > IMAX - (M * N) + GAP || x < 0)
+    return 0;
+  INT a = x - (M * -N);
+  INT b = a / -N;
+  return b + M;
+}
+
+UINT NOINLINE
+opt_u3 (UINT x)
+{
+  if (x < (M << N) - GAP)
+    return 0;
+  UINT a = x - (M << N);
+  UINT b = a >> N;
+  return b + M;
+}
+
+UINT NOINLINE
+opt_u4 (UINT x)
+{
+  if (x > (UMAX - (M << N)) + GAP)
+    return 0;
+  UINT a = x + (M << N);
+  UINT b = a >> N;
+  return b - M;
+}
+
+INT NOINLINE
+opt_s9 (INT x)
+{
+  if (x < (M << N) - GAP)
+    return 0;
+  INT a = x - (M << N);
+  INT b = a >> N;
+  return b + M;
+}
+
+INT NOINLINE
+opt_s10 (INT x)
+{
+  if (x < IMIN + (M << N) - GAP || x > 0)
+    return 0;
+  INT a = x - (M << N);
+  INT b = a >> N;
+  return b + M;
+}
+
+INT NOINLINE
+opt_s11 (INT x)
+{
+  if (x > (-M << N) + GAP)
+    return 0;
+  INT a = x - (-M << N);
+  INT b = a >> N;
+  return b + -M;
+}
+
+INT NOINLINE
+opt_s12 (INT x)
+{
+  if (x > IMAX - (M << N) + GAP || x < 0)
+    return 0;
+  INT a = x - (-M << N);
+  INT b = a >> N;
+  return b + -M;
+}
+
+UINT NOINLINE
+opt_u5 (UINT x, UINT n, UINT m)
+{
+  if (n > N || m > M)
+    return 0;
+  if (x < (M*N) - GAP)
+    return 0;
+  UINT a = x - (m * n);
+  UINT b = a / n;
+  return b + m;
+}
+
+UINT NOINLINE
+opt_u6 (UINT x, UINT n, UINT m)
+{
+  if (n > N || m > M)
+    return 0;
+  if (x > (UMAX - M*N) + GAP)
+    return 0;
+  UINT a = x + (m * n);
+  UINT b = a / n;
+  return b - m;
+}
+
+INT NOINLINE
+opt_s13 (INT x, INT n, INT m)
+{
+  if (n > N || m > M || n < 0 || m < 0)
+    return 0;
+  if (x < (M*N) - GAP)
+    return 0;
+  INT a = x - (m * n);
+  INT b = a / n;
+  return b + m;
+}
+
+INT NOINLINE
+opt_s14 (INT x, INT n, INT m)
+{
+  if (n > N || m > M || n < 0 || m < 0)
+    return 0;
+  if (x > -M*N + GAP)
+    return 0;
+  INT a = x + (m * n);
+  INT b = a / n;
+  return b - m;
+}
+
+INT
+opt_s15 (INT x, INT n, INT m)
+{
+  if (n > 0 || m > 0 || n < -N || m < -M)
+    return 0;
+  if (x < (M*N) - GAP)
+    return 0;
+  INT a = x - (m * n);
+  INT b = a / n;
+  return b + m;
+}
+
+INT NOINLINE
+opt_s16 (INT x, INT n, INT m)
+{
+  if (n > 0 || m > 0 || n < -N || m < -M)
+    return 0;
+  if (x < 0 || x > (IMAX - M*N) + GAP)
+    return 0;
+  INT a = x + (m * n);
+  INT b = a / n;
+  return b - m;
+}
+