diff mbox series

Simplify X * C1 == C2 with wrapping overflow

Message ID alpine.DEB.2.23.453.2008092251290.17577@stedding.saclay.inria.fr
State New
Headers show
Series Simplify X * C1 == C2 with wrapping overflow | expand

Commit Message

Marc Glisse Aug. 9, 2020, 9:24 p.m. UTC
Odd numbers are invertible in Z / 2^n Z, so X * C1 == C2 can be rewritten 
as X == C2 * inv(C1) when overflow wraps.

mod_inv should probably be updated to better match the other wide_int 
functions, but that's a separate issue.

Bootstrap+regtest on x86_64-pc-linux-gnu.

2020-08-10  Marc Glisse  <marc.glisse@inria.fr>

 	PR tree-optimization/95433
 	* match.pd (X * C1 == C2): Handle wrapping overflow.
 	* expr.c (maybe_optimize_mod_cmp): Qualify call to mod_inv.
 	(mod_inv): Move...
 	* wide-int.cc (mod_inv): ... here.
 	* wide-int.h (mod_inv): Declare it.

 	* gcc.dg/tree-ssa/pr95433-2.c: New file.

Comments

Jakub Jelinek Aug. 10, 2020, 8:39 a.m. UTC | #1
On Sun, Aug 09, 2020 at 11:24:54PM +0200, Marc Glisse wrote:
> Odd numbers are invertible in Z / 2^n Z, so X * C1 == C2 can be rewritten as
> X == C2 * inv(C1) when overflow wraps.
> 
> mod_inv should probably be updated to better match the other wide_int
> functions, but that's a separate issue.
> 
> Bootstrap+regtest on x86_64-pc-linux-gnu.
> 
> 2020-08-10  Marc Glisse  <marc.glisse@inria.fr>
> 
> 	PR tree-optimization/95433
> 	* match.pd (X * C1 == C2): Handle wrapping overflow.
> 	* expr.c (maybe_optimize_mod_cmp): Qualify call to mod_inv.
> 	(mod_inv): Move...
> 	* wide-int.cc (mod_inv): ... here.
> 	* wide-int.h (mod_inv): Declare it.
> 
> 	* gcc.dg/tree-ssa/pr95433-2.c: New file.

LGTM, thanks.

	Jakub
diff mbox series

Patch

diff --git a/gcc/expr.c b/gcc/expr.c
index a150fa0d3b5..ebf0c9e4797 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -11859,38 +11859,6 @@  string_constant (tree arg, tree *ptr_offset, tree *mem_size, tree *decl)
   return init;
 }
 
-/* Compute the modular multiplicative inverse of A modulo M
-   using extended Euclid's algorithm.  Assumes A and M are coprime.  */
-static wide_int
-mod_inv (const wide_int &a, const wide_int &b)
-{
-  /* Verify the assumption.  */
-  gcc_checking_assert (wi::eq_p (wi::gcd (a, b), 1));
-
-  unsigned int p = a.get_precision () + 1;
-  gcc_checking_assert (b.get_precision () + 1 == p);
-  wide_int c = wide_int::from (a, p, UNSIGNED);
-  wide_int d = wide_int::from (b, p, UNSIGNED);
-  wide_int x0 = wide_int::from (0, p, UNSIGNED);
-  wide_int x1 = wide_int::from (1, p, UNSIGNED);
-
-  if (wi::eq_p (b, 1))
-    return wide_int::from (1, p, UNSIGNED);
-
-  while (wi::gt_p (c, 1, UNSIGNED))
-    {
-      wide_int t = d;
-      wide_int q = wi::divmod_trunc (c, d, UNSIGNED, &d);
-      c = t;
-      wide_int s = x0;
-      x0 = wi::sub (x1, wi::mul (q, x0));
-      x1 = s;
-    }
-  if (wi::lt_p (x1, 0, SIGNED))
-    x1 += d;
-  return x1;
-}
-
 /* Optimize x % C1 == C2 for signed modulo if C1 is a power of two and C2
    is non-zero and C3 ((1<<(prec-1)) | (C1 - 1)):
    for C2 > 0 to x & C3 == C2
@@ -12101,7 +12069,7 @@  maybe_optimize_mod_cmp (enum tree_code code, tree *arg0, tree *arg1)
   w = wi::lrshift (w, shift);
   wide_int a = wide_int::from (w, prec + 1, UNSIGNED);
   wide_int b = wi::shifted_mask (prec, 1, false, prec + 1);
-  wide_int m = wide_int::from (mod_inv (a, b), prec, UNSIGNED);
+  wide_int m = wide_int::from (wi::mod_inv (a, b), prec, UNSIGNED);
   tree c3 = wide_int_to_tree (type, m);
   tree c5 = NULL_TREE;
   wide_int d, e;
diff --git a/gcc/match.pd b/gcc/match.pd
index 7e5c5a6eae6..c3b88168ac4 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3828,7 +3828,9 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
      (cmp @0 @2))))))
 
 /* For integral types with undefined overflow fold
-   x * C1 == C2 into x == C2 / C1 or false.  */
+   x * C1 == C2 into x == C2 / C1 or false.
+   If overflow wraps and C1 is odd, simplify to x == C2 / C1 in the ring
+   Z / 2^n Z.  */
 (for cmp (eq ne)
  (simplify
   (cmp (mult @0 INTEGER_CST@1) INTEGER_CST@2)
@@ -3839,7 +3841,20 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
     (if (wi::multiple_of_p (wi::to_widest (@2), wi::to_widest (@1),
 			    TYPE_SIGN (TREE_TYPE (@0)), &quot))
      (cmp @0 { wide_int_to_tree (TREE_TYPE (@0), quot); })
-     { constant_boolean_node (cmp == NE_EXPR, type); })))))
+     { constant_boolean_node (cmp == NE_EXPR, type); }))
+   (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+	&& TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0))
+	&& (wi::bit_and (wi::to_wide (@1), 1) == 1))
+    (cmp @0
+     {
+       tree itype = TREE_TYPE (@0);
+       int p = TYPE_PRECISION (itype);
+       wide_int m = wi::one (p + 1) << p;
+       wide_int a = wide_int::from (wi::to_wide (@1), p + 1, UNSIGNED);
+       wide_int i = wide_int::from (wi::mod_inv (a, m),
+				    p, TYPE_SIGN (itype));
+       wide_int_to_tree (itype, wi::mul (i, wi::to_wide (@2)));
+     })))))
 
 /* Simplify comparison of something with itself.  For IEEE
    floating-point, we can only do some of these simplifications.  */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr95433-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr95433-2.c
new file mode 100644
index 00000000000..c830a3d195f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr95433-2.c
@@ -0,0 +1,15 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O -fwrapv -fdump-tree-gimple" } */
+
+typedef __INT32_TYPE__ int32_t;
+typedef unsigned __INT32_TYPE__ uint32_t;
+
+int e(int32_t x){return 3*x==5;}
+int f(int32_t x){return 3*x==-5;}
+int g(int32_t x){return -3*x==5;}
+int h(int32_t x){return 7*x==3;}
+int i(uint32_t x){return 7*x==3;}
+
+/* { dg-final { scan-tree-dump-times "== 1431655767" 1 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "== -1431655767" 2 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "== 613566757" 2 "gimple" } } */
diff --git a/gcc/wide-int.cc b/gcc/wide-int.cc
index 03be0074816..f4d949c38a0 100644
--- a/gcc/wide-int.cc
+++ b/gcc/wide-int.cc
@@ -2223,6 +2223,39 @@  wi::round_up_for_mask (const wide_int &val, const wide_int &mask)
   return (val | tmp) & -tmp;
 }
 
+/* Compute the modular multiplicative inverse of A modulo B
+   using extended Euclid's algorithm.  Assumes A and B are coprime,
+   and that A and B have the same precision.  */
+wide_int
+wi::mod_inv (const wide_int &a, const wide_int &b)
+{
+  /* Verify the assumption.  */
+  gcc_checking_assert (wi::eq_p (wi::gcd (a, b), 1));
+
+  unsigned int p = a.get_precision () + 1;
+  gcc_checking_assert (b.get_precision () + 1 == p);
+  wide_int c = wide_int::from (a, p, UNSIGNED);
+  wide_int d = wide_int::from (b, p, UNSIGNED);
+  wide_int x0 = wide_int::from (0, p, UNSIGNED);
+  wide_int x1 = wide_int::from (1, p, UNSIGNED);
+
+  if (wi::eq_p (b, 1))
+    return wide_int::from (1, p, UNSIGNED);
+
+  while (wi::gt_p (c, 1, UNSIGNED))
+    {
+      wide_int t = d;
+      wide_int q = wi::divmod_trunc (c, d, UNSIGNED, &d);
+      c = t;
+      wide_int s = x0;
+      x0 = wi::sub (x1, wi::mul (q, x0));
+      x1 = s;
+    }
+  if (wi::lt_p (x1, 0, SIGNED))
+    x1 += d;
+  return x1;
+}
+
 /*
  * Private utilities.
  */
diff --git a/gcc/wide-int.h b/gcc/wide-int.h
index bb0d16b99b1..39cd5b9bd17 100644
--- a/gcc/wide-int.h
+++ b/gcc/wide-int.h
@@ -3389,6 +3389,8 @@  namespace wi
   wide_int round_down_for_mask (const wide_int &, const wide_int &);
   wide_int round_up_for_mask (const wide_int &, const wide_int &);
 
+  wide_int mod_inv (const wide_int &a, const wide_int &b);
+
   template <typename T>
   T mask (unsigned int, bool);