diff mbox

PR67351 Implement << N & >> N optimizers

Message ID SN2PR0701MB10245B423874D1D4C9B93F9F8E6A0@SN2PR0701MB1024.namprd07.prod.outlook.com
State New
Headers show

Commit Message

Hurugalawadi, Naveen Sept. 1, 2015, 8:57 a.m. UTC
Hi,

Please find attached the patch "pr67351.patch" that implements the 
pattern << N  & >> N optimizers.

Please review and let me know if its okay. 

Regression tested on AARH64 and x86_64.

Thanks,
Naveen 

2015-09-01  Naveen H.S  <Naveen.Hurugalawadi@caviumnetworks.com>

	gcc/ChangeLog: 

	PR middle-end/67351
	* fold-const.c (fold_binary_loc) : Move 
	Transform (x >> c) << c into x & (-1<<c) or
	transform (x << c) >> c into x & ((unsigned)-1 >> c) for unsigned
	types using simplify and match.
	* match.pd (lshift (rshift @0 INTEGER_CST@1) @1) : New simplifier.
	(rshift (lshift @0 INTEGER_CST@1) @1) : New Simplifier.

	gcc/testsuite/ChangeLog: 

	PR middle-end/67351
	* g++.dg/pr66752-2.C: New test.

Comments

Richard Biener Sept. 1, 2015, 9:15 a.m. UTC | #1
On Tue, Sep 1, 2015 at 10:57 AM, Hurugalawadi, Naveen
<Naveen.Hurugalawadi@caviumnetworks.com> wrote:
> Hi,
>
> Please find attached the patch "pr67351.patch" that implements the
> pattern << N  & >> N optimizers.

+  (bit_and @0 (lshift { build_minus_one_cst (type); } @1))))

please use

  (bit_and @0 { wide_int_to_tree (type, wi::lshift (-1, @1)); })

and wi::arshift for the other pattern.  It should then be possible
to drop the tree_fits_uhwi_p tests and replace the precision test
with wi::ltu_p (@1, TYPE_PRECISION (type)).

Ok with these changes.

Thanks,
Richard.

>
> Please review and let me know if its okay.
>
> Regression tested on AARH64 and x86_64.
>
> Thanks,
> Naveen
>
> 2015-09-01  Naveen H.S  <Naveen.Hurugalawadi@caviumnetworks.com>
>
>         gcc/ChangeLog:
>
>         PR middle-end/67351
>         * fold-const.c (fold_binary_loc) : Move
>         Transform (x >> c) << c into x & (-1<<c) or
>         transform (x << c) >> c into x & ((unsigned)-1 >> c) for unsigned
>         types using simplify and match.
>         * match.pd (lshift (rshift @0 INTEGER_CST@1) @1) : New simplifier.
>         (rshift (lshift @0 INTEGER_CST@1) @1) : New Simplifier.
>
>         gcc/testsuite/ChangeLog:
>
>         PR middle-end/67351
>         * g++.dg/pr66752-2.C: New test.
Marc Glisse Sept. 2, 2015, 11:18 a.m. UTC | #2
+/* Optimize (x >> c) << c into x & (-1<<c).  */
+(simplify
+ (lshift (rshift @0 INTEGER_CST@1) @1)
+ (if (tree_fits_uhwi_p (@1)
+      && tree_to_uhwi (@1) < TYPE_PRECISION (type))
+  (bit_and @0 (lshift { build_minus_one_cst (type); } @1))))

It looks like vectors might match, so please use element_precision instead 
of TYPE_PRECISION, as in the fold-const.c code you are converting from.
diff mbox

Patch

diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index c826e67..4746836 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -10502,32 +10502,6 @@  fold_binary_loc (location_t loc,
 
       prec = element_precision (type);
 
-      /* Transform (x >> c) << c into x & (-1<<c), or transform (x << c) >> c
-         into x & ((unsigned)-1 >> c) for unsigned types.  */
-      if (((code == LSHIFT_EXPR && TREE_CODE (arg0) == RSHIFT_EXPR)
-           || (TYPE_UNSIGNED (type)
-	       && code == RSHIFT_EXPR && TREE_CODE (arg0) == LSHIFT_EXPR))
-	  && tree_fits_uhwi_p (arg1)
-	  && tree_to_uhwi (arg1) < prec
-	  && tree_fits_uhwi_p (TREE_OPERAND (arg0, 1))
-	  && tree_to_uhwi (TREE_OPERAND (arg0, 1)) < prec)
-	{
-	  HOST_WIDE_INT low0 = tree_to_uhwi (TREE_OPERAND (arg0, 1));
-	  HOST_WIDE_INT low1 = tree_to_uhwi (arg1);
-	  tree lshift;
-	  tree arg00;
-
-	  if (low0 == low1)
-	    {
-	      arg00 = fold_convert_loc (loc, type, TREE_OPERAND (arg0, 0));
-
-	      lshift = build_minus_one_cst (type);
-	      lshift = const_binop (code, lshift, arg1);
-
-	      return fold_build2_loc (loc, BIT_AND_EXPR, type, arg00, lshift);
-	    }
-	}
-
       /* If we have a rotate of a bit operation with the rotate count and
 	 the second operand of the bit operation both constant,
 	 permute the two operations.  */
diff --git a/gcc/match.pd b/gcc/match.pd
index 289bc5c..9b9f09d 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -929,6 +929,22 @@  along with GCC; see the file COPYING3.  If not see
       && tree_expr_nonnegative_p (@1))
   @0))
 
+/* Optimize (x >> c) << c into x & (-1<<c).  */
+(simplify
+ (lshift (rshift @0 INTEGER_CST@1) @1)
+ (if (tree_fits_uhwi_p (@1)
+      && tree_to_uhwi (@1) < TYPE_PRECISION (type))
+  (bit_and @0 (lshift { build_minus_one_cst (type); } @1))))
+
+/* Optimize (x << c) >> c into x & ((unsigned)-1 >> c) for unsigned
+   types.  */
+(simplify
+ (rshift (lshift @0 INTEGER_CST@1) @1)
+ (if (TYPE_UNSIGNED (type)
+      && tree_fits_uhwi_p (@1)
+      && tree_to_uhwi (@1) < TYPE_PRECISION (type))
+  (bit_and @0 (rshift { build_minus_one_cst (type); } @1))))
+
 (for shiftrotate (lrotate rrotate lshift rshift)
  (simplify
   (shiftrotate @0 integer_zerop)
diff --git a/gcc/testsuite/g++.dg/pr67351.C b/gcc/testsuite/g++.dg/pr67351.C
new file mode 100644
index 0000000..c86c920
--- /dev/null
+++ b/gcc/testsuite/g++.dg/pr67351.C
@@ -0,0 +1,106 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+typedef unsigned char uchar;
+typedef unsigned short ushort;
+typedef unsigned int uint;
+typedef unsigned long long uint64;
+
+class MyRgba
+{
+  uint rgba;
+
+public:
+    explicit MyRgba (uint c):rgba (c)
+  {
+  };
+
+  static MyRgba fromRgba (uchar r, uchar g, uchar b, uchar a)
+  {
+    return MyRgba (uint (r) << 24
+		   | uint (g) << 16 | uint (b) << 8 | uint (a));
+  }
+
+  uchar r ()
+  {
+    return rgba >> 24;
+  }
+  uchar g ()
+  {
+    return rgba >> 16;
+  }
+  uchar b ()
+  {
+    return rgba >> 8;
+  }
+  uchar a ()
+  {
+    return rgba;
+  }
+
+  void setG (uchar _g)
+  {
+    *this = fromRgba (r (), _g, b (), a ());
+  }
+};
+
+extern MyRgba giveMe ();
+
+MyRgba
+test ()
+{
+  MyRgba a = giveMe ();
+  a.setG (0xf0);
+  return a;
+}
+
+class MyRgba64
+{
+  uint64 rgba;
+
+public:
+    explicit MyRgba64 (uint64 c):rgba (c)
+  {
+  };
+
+  static MyRgba64 fromRgba64 (ushort r, ushort g, ushort b, ushort a)
+  {
+    return MyRgba64 (uint64 (r) << 48
+		     | uint64 (g) << 32 | uint64 (b) << 16 | uint64 (a));
+  }
+
+  ushort r ()
+  {
+    return rgba >> 48;
+  }
+  ushort g ()
+  {
+    return rgba >> 32;
+  }
+  ushort b ()
+  {
+    return rgba >> 16;
+  }
+  ushort a ()
+  {
+    return rgba;
+  }
+
+  void setG (ushort _g)
+  {
+    *this = fromRgba64 (r (), _g, b (), a ());
+  }
+};
+
+extern MyRgba64 giveMe64 ();
+
+MyRgba64
+test64 ()
+{
+  MyRgba64 a = giveMe64 ();
+  a.setG (0xf0f0);
+  return a;
+}
+
+/* { dg-final { scan-assembler-not "<<" } } */
+/* { dg-final { scan-assembler-not ">>" } } */