Patchwork Fix PR45199: do not aggregate memory accesses to the same array for -ftree-loop-distribute-patterns

login
register
mail settings
Submitter Sebastian Pop
Date Dec. 1, 2010, 12:27 a.m.
Message ID <AANLkTinS62P3rXboku+sSg4+-dpWaWfpRFG=797+C4wm@mail.gmail.com>
Download mbox | patch
Permalink /patch/73688/
State New
Headers show

Comments

Sebastian Pop - Dec. 1, 2010, 12:27 a.m.
Hi,

This patch disables for -ftree-loop-distribute-patterns a heuristic
that is used by the loop distribution pass to avoid the split of loops
with reads and writes to the same array, like this loop:

loop
  A[2*i] = 0
  A[2*i+1] = 1
end_loop

would be transformed into these two loops:

loop
  A[2*i] = 0
end_loop

loop
  A[2*i+1] = 1
end_loop

and that would hurt data locality.  Now, because we don't want these
two loops to be distributed when -ftree-loop-distribute-patterns is
enabled, we have to select the seeds of the memset (0) partitions to
be only those statements "A[i] = 0" that also satisfy the test in
generate_memset_zero, that is dataref_stride_has_unit_type_stride.

Bootstrap and test in progress on amd64-linux.  Ok for trunk when that
passes?

Thanks,
Sebastian

Patch

From dc42b8c6001cafaba2b38beb8760af609f68fde8 Mon Sep 17 00:00:00 2001
From: Sebastian Pop <sebpop@gmail.com>
Date: Thu, 5 Aug 2010 15:36:33 -0500
Subject: [PATCH] Fix PR45199: do not aggregate memory accesses to the same array for -ftree-loop-distribute-patterns

2010-11-30  Sebastian Pop  <sebastian.pop@amd.com>

	PR tree-optimization/45199
	* tree-data-ref.c (dataref_stride_has_unit_type_stride): New.
	(stores_zero_from_loop): Call dataref_stride_has_unit_type_stride.
	* tree-loop-distribution.c (build_rdg_partition_for_component): Do
	not call rdg_flag_similar_memory_accesses when
	flag_tree_loop_distribute_patterns is set.

	* gcc.dg/tree-ssa/ldist-15.c: New.
	* gcc.dg/tree-ssa/ldist-16.c: New.
	* gfortran.dg/ldist-pr45199.f: New.
---
 gcc/ChangeLog                             |    9 +++++++++
 gcc/testsuite/ChangeLog                   |    7 +++++++
 gcc/testsuite/gcc.dg/tree-ssa/ldist-15.c  |   23 +++++++++++++++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c  |   21 +++++++++++++++++++++
 gcc/testsuite/gfortran.dg/ldist-pr45199.f |   27 +++++++++++++++++++++++++++
 gcc/tree-data-ref.c                       |   27 ++++++++++++++++++++++++++-
 gcc/tree-loop-distribution.c              |    5 +++--
 7 files changed, 116 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ldist-15.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c
 create mode 100644 gcc/testsuite/gfortran.dg/ldist-pr45199.f

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 5706ef8..b73aeae 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@ 
+2010-11-30  Sebastian Pop  <sebastian.pop@amd.com>
+
+	PR tree-optimization/45199
+	* tree-data-ref.c (dataref_stride_has_unit_type_stride): New.
+	(stores_zero_from_loop): Call dataref_stride_has_unit_type_stride.
+	* tree-loop-distribution.c (build_rdg_partition_for_component): Do
+	not call rdg_flag_similar_memory_accesses when
+	flag_tree_loop_distribute_patterns is set.
+
 2010-11-29  H.J. Lu  <hongjiu.lu@intel.com>
 
 	PR driver/46712
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 5792e66..20aaf04 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@ 
+2010-11-30  Sebastian Pop  <sebastian.pop@amd.com>
+
+	PR tree-optimization/45199
+	* gcc.dg/tree-ssa/ldist-15.c: New.
+	* gcc.dg/tree-ssa/ldist-16.c: New.
+	* gfortran.dg/ldist-pr45199.f: New.
+
 2010-11-29  Nicola Pero  <nicola.pero@meta-innovation.com>
 
 	* objc.dg/duplicate-class-1.m: New.
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-15.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-15.c
new file mode 100644
index 0000000..7ce3b95
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-15.c
@@ -0,0 +1,23 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-ldist-details" } */
+
+int x[1000];
+
+void foo (int n)
+{
+  int i;
+
+  for (i = 0; i < n; ++i)
+    {
+      x[2*i] = 0;
+      x[2*i + 1] = 1;
+    }
+}
+
+/* We should not apply loop distribution as it is not beneficial from
+   a data locality point of view.  Also it is not possible to generate
+   a memset (0) as the write has a stride of 2.  */
+
+/* { dg-final { scan-tree-dump-not "distributed: split to" "ldist" } } */
+/* { dg-final { scan-tree-dump-not "__builtin_memset" "ldist" } } */
+/* { dg-final { cleanup-tree-dump "ldist" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c
new file mode 100644
index 0000000..61e8e56
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c
@@ -0,0 +1,21 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-ldist-details" } */
+
+int x[1000];
+
+void foo (int n)
+{
+  int i;
+
+  for (i = 0; i < n; ++i)
+    {
+      x[i] = 0;
+      x[2*i + 1] = 1;
+    }
+}
+
+/* We should apply loop distribution and generate a memset (0).  */
+
+/* { dg-final { scan-tree-dump "distributed: split to 2" "ldist" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_memset" 2 "ldist" } } */
+/* { dg-final { cleanup-tree-dump "ldist" } } */
diff --git a/gcc/testsuite/gfortran.dg/ldist-pr45199.f b/gcc/testsuite/gfortran.dg/ldist-pr45199.f
new file mode 100644
index 0000000..2fab43a
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/ldist-pr45199.f
@@ -0,0 +1,27 @@ 
+! { dg-do compile }
+! { dg-options "-O3 -fdump-tree-ldist-details" }
+
+      parameter(numlev=3,numoblev=1000)
+      integer i_otyp(numoblev,numlev), i_styp(numoblev,numlev)
+      logical l_numob(numoblev,numlev)
+      do ixe=1,numoblev
+         do iye=1,numlev
+            i_otyp(ixe,iye)=0
+            i_styp(ixe,iye)=0
+            l_numob(ixe,iye)=.false.
+         enddo
+      enddo
+      do i=1,m
+         do j=1,n
+            if (l_numob(i,j)) then
+               write(20,'(7I4,F12.2,4F16.10)') i_otyp(i,j),i_styp(i,j)
+            endif 
+         enddo
+      enddo
+      end
+
+! GCC should apply memset zero loop distribution and it should not ICE.
+
+! { dg-final { scan-tree-dump "distributed: split to 9 loops" "ldist" } }
+! { dg-final { scan-tree-dump-times "__builtin_memset" 18 "ldist" } }
+! { dg-final { cleanup-tree-dump "ldist" } }
diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c
index 3cee320..17f34de 100644
--- a/gcc/tree-data-ref.c
+++ b/gcc/tree-data-ref.c
@@ -4974,6 +4974,30 @@  stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
   free (bbs);
 }
 
+/* Returns true when STMT is an assignment that contains a data
+   reference on its LHS with a stride of the same size as its unit
+   type.  This is the same test as in generate_memset_zero.  */
+
+static bool
+dataref_stride_has_unit_type_stride (gimple stmt)
+{
+  struct data_reference *dr = XCNEW (struct data_reference);
+  tree op0 = gimple_assign_lhs (stmt);
+
+  DR_STMT (dr) = stmt;
+  DR_REF (dr) = op0;
+  if (!dr_analyze_innermost (dr))
+    return false;
+
+  return dr_analyze_innermost (dr) &&
+    (integer_zerop (size_binop (MINUS_EXPR,
+				fold_convert (sizetype, DR_STEP (dr)),
+				TYPE_SIZE_UNIT (TREE_TYPE (op0))))
+     || integer_zerop (size_binop (PLUS_EXPR,
+				   TYPE_SIZE_UNIT (TREE_TYPE (op0)),
+				   fold_convert (sizetype, DR_STEP (dr)))));
+}
+
 /* Initialize STMTS with all the statements of LOOP that contain a
    store to memory of the form "A[i] = 0".  */
 
@@ -4994,7 +5018,8 @@  stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
 	  && is_gimple_assign (stmt)
 	  && gimple_assign_rhs_code (stmt) == INTEGER_CST
 	  && (op = gimple_assign_rhs1 (stmt))
-	  && (integer_zerop (op) || real_zerop (op)))
+	  && (integer_zerop (op) || real_zerop (op))
+	  && dataref_stride_has_unit_type_stride (stmt))
 	VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
 
   free (bbs);
diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
index 007c4f3..2c2af2c 100644
--- a/gcc/tree-loop-distribution.c
+++ b/gcc/tree-loop-distribution.c
@@ -781,8 +781,9 @@  build_rdg_partition_for_component (struct graph *rdg, rdgc c,
      and determine those vertices that have some memory affinity with
      the current nodes in the component: these are stores to the same
      arrays, i.e. we're taking care of cache locality.  */
-  rdg_flag_similar_memory_accesses (rdg, partition, loops, processed,
-				    other_stores);
+  if (!flag_tree_loop_distribute_patterns)
+    rdg_flag_similar_memory_accesses (rdg, partition, loops, processed,
+				      other_stores);
 
   rdg_flag_loop_exits (rdg, loops, partition, processed, part_has_writes);
 
-- 
1.7.1