diff mbox series

tree-optimization/103345: Improved load merging

Message ID 001601d7df7c$7f739920$7e5acb60$@nextmovesoftware.com
State New
Headers show
Series tree-optimization/103345: Improved load merging | expand

Commit Message

Roger Sayle Nov. 22, 2021, 8:39 a.m. UTC
This patch implements PR tree-optimization/103345 to merge adjacent
loads when combined with addition or bitwise xor.  The current code
in gimple-ssa-store-merging.c's find_bswap_or_nop alreay handles ior,
so that all that's required is to treat PLUS_EXPR and BIT_XOR_EXPR in
the same way at BIT_IOR_EXPR.  Many thanks to Andrew Pinski for
pointing out that this also resolves PR target/98953.

This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check with no new failures.  The new testcases should also
pass (but haven't been tested) on other endian targets.

Ok for mainline?


2021-11-22  Roger Sayle  <roger@nextmovesoftware.com>

gcc/ChangeLog
	PR tree-optimization/98953
	PR tree-optimization/103345
	* gimple-ssa-store-merging.c (find_bswap_or_nop_1): Handle
	BIT_XOR_EXPR and PLUS_EXPR the same as BIT_IOR_EXPR.
	(pass_optimize_bswap::execute): Likewise.

gcc/testsuite/ChangeLog
	PR tree-optimization/98953
	PR tree-optimization/103345
	* gcc.dg/tree-ssa/pr98953.c: New test case.
	* gcc.dg/tree-ssa/pr103345.c: New test case.


Thanks in advance,
Roger
--

Comments

Richard Biener Nov. 22, 2021, 12:28 p.m. UTC | #1
On Mon, Nov 22, 2021 at 9:40 AM Roger Sayle <roger@nextmovesoftware.com> wrote:
>
>
> This patch implements PR tree-optimization/103345 to merge adjacent
> loads when combined with addition or bitwise xor.  The current code
> in gimple-ssa-store-merging.c's find_bswap_or_nop alreay handles ior,
> so that all that's required is to treat PLUS_EXPR and BIT_XOR_EXPR in
> the same way at BIT_IOR_EXPR.  Many thanks to Andrew Pinski for
> pointing out that this also resolves PR target/98953.
>
> This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
> and make -k check with no new failures.  The new testcases should also
> pass (but haven't been tested) on other endian targets.
>
> Ok for mainline?

OK.

Thanks,
Richard.

>
> 2021-11-22  Roger Sayle  <roger@nextmovesoftware.com>
>
> gcc/ChangeLog
>         PR tree-optimization/98953
>         PR tree-optimization/103345
>         * gimple-ssa-store-merging.c (find_bswap_or_nop_1): Handle
>         BIT_XOR_EXPR and PLUS_EXPR the same as BIT_IOR_EXPR.
>         (pass_optimize_bswap::execute): Likewise.
>
> gcc/testsuite/ChangeLog
>         PR tree-optimization/98953
>         PR tree-optimization/103345
>         * gcc.dg/tree-ssa/pr98953.c: New test case.
>         * gcc.dg/tree-ssa/pr103345.c: New test case.
>
>
> Thanks in advance,
> Roger
> --
>
diff mbox series

Patch

diff --git a/gcc/gimple-ssa-store-merging.c b/gcc/gimple-ssa-store-merging.c
index 4efa200..1740c9e 100644
--- a/gcc/gimple-ssa-store-merging.c
+++ b/gcc/gimple-ssa-store-merging.c
@@ -742,10 +742,7 @@  find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
       struct symbolic_number n1, n2;
       gimple *source_stmt, *source_stmt2;
 
-      if (code != BIT_IOR_EXPR)
-	return NULL;
-
-      if (TREE_CODE (rhs2) != SSA_NAME)
+      if (!rhs2 || TREE_CODE (rhs2) != SSA_NAME)
 	return NULL;
 
       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
@@ -753,6 +750,8 @@  find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
       switch (code)
 	{
 	case BIT_IOR_EXPR:
+	case BIT_XOR_EXPR:
+	case PLUS_EXPR:
 	  source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
 
 	  if (!source_stmt1)
@@ -1495,6 +1494,8 @@  pass_optimize_bswap::execute (function *fun)
 		continue;
 	      /* Fall through.  */
 	    case BIT_IOR_EXPR:
+	    case BIT_XOR_EXPR:
+	    case PLUS_EXPR:
 	      break;
 	    case CONSTRUCTOR:
 	      {
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr103345.c b/gcc/testsuite/gcc.dg/tree-ssa/pr103345.c
new file mode 100644
index 0000000..94388b5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr103345.c
@@ -0,0 +1,53 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-bswap-details" } */
+
+typedef unsigned int uint32_t;
+typedef unsigned char uint8_t;
+
+uint32_t load_le_32_or(const uint8_t *ptr)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+  return ((uint32_t)ptr[0]) |
+         ((uint32_t)ptr[1] << 8) |
+         ((uint32_t)ptr[2] << 16) |
+         ((uint32_t)ptr[3] << 24);
+#else
+  return ((uint32_t)ptr[3]) |
+         ((uint32_t)ptr[2] << 8) |
+         ((uint32_t)ptr[1] << 16) |
+         ((uint32_t)ptr[0] << 24);
+#endif
+}
+
+uint32_t load_le_32_add(const uint8_t *ptr)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+  return ((uint32_t)ptr[0]) +
+         ((uint32_t)ptr[1] << 8) +
+         ((uint32_t)ptr[2] << 16) +
+         ((uint32_t)ptr[3] << 24);
+#else
+  return ((uint32_t)ptr[3]) +
+         ((uint32_t)ptr[2] << 8) +
+         ((uint32_t)ptr[1] << 16) +
+         ((uint32_t)ptr[0] << 24);
+#endif
+}
+
+uint32_t load_le_32_xor(const uint8_t *ptr)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+  return ((uint32_t)ptr[0]) ^
+         ((uint32_t)ptr[1] << 8) ^
+         ((uint32_t)ptr[2] << 16) ^
+         ((uint32_t)ptr[3] << 24);
+#else
+  return ((uint32_t)ptr[0]) ^
+         ((uint32_t)ptr[1] << 8) ^
+         ((uint32_t)ptr[2] << 16) ^
+         ((uint32_t)ptr[3] << 24);
+#endif
+}
+
+/* { dg-final { scan-tree-dump-times "32 bit load in target endianness found" 3 "bswap" } } */
+
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr98953.c b/gcc/testsuite/gcc.dg/tree-ssa/pr98953.c
new file mode 100644
index 0000000..7687dc2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr98953.c
@@ -0,0 +1,14 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-bswap-details" } */
+
+int foo(unsigned char *ptr)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+    return ptr[0] + (ptr[1] << 8);
+#else
+    return ptr[1] + (ptr[0] << 8);
+#endif
+}
+
+/* { dg-final { scan-tree-dump "16 bit load in target endianness found" "bswap" } } */
+