[RFC] Improve tree DSE

Message ID CAELXzTOSDWemBGrLbJqu+1fUoWWJ9bUJCvzT8P8HPGci67HQEg@mail.gmail.com
State New
Headers show
Series
  • [RFC] Improve tree DSE
Related show

Commit Message

Kugan Vivekanandarajah April 10, 2018, 12:52 a.m.
I would like to queue this patch for stage1 review.

In DSE, while in dse_classify_store, as soon as we see a PHI use
statement that is part of the loop, we are immediately giving up.

As far as I understand, this can be improved. Attached patch is trying
to walk the uses of the PHI statement (by recursively calling
dse_classify_store) and then making sure the obtained store is indeed
redundant.

This is partly as reported in one of the testcase from PR44612. But
this PR is about other issues that is not handled in this patch.

Bootstrapped and regression tested on aarch64-linux-gnu with no new regressions.

Is this OK for next stage1?

Thanks,
Kugan


gcc/ChangeLog:

2018-04-10  Kugan Vivekanandarajah  <kuganv@linaro.org>

    * tree-ssa-dse.c (dse_classify_store): Handle recursive PHI.
    (dse_dom_walker::dse_optimize_stmt): Update call dse_classify_store.

gcc/testsuite/ChangeLog:

2018-04-10  Kugan Vivekanandarajah  <kuganv@linaro.org>

    * gcc.dg/tree-ssa/ssa-dse-31.c: New test.
    * gcc.dg/tree-ssa/ssa-dse-32.c: New test.

Patch

From 5751eaff3d1c263e8631d5a07e43fecaaa0e9d26 Mon Sep 17 00:00:00 2001
From: Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
Date: Tue, 10 Apr 2018 09:49:10 +1000
Subject: [PATCH] improve dse

---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-31.c | 16 ++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-32.c | 23 ++++++++++++++
 gcc/tree-ssa-dse.c                         | 51 ++++++++++++++++++++++++------
 3 files changed, 81 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-31.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-32.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-31.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-31.c
new file mode 100644
index 0000000..e4d71b2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-31.c
@@ -0,0 +1,16 @@ 
+
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-dse-details" } */
+#define SIZE 4
+
+int main ()
+{
+  static float a[SIZE];
+  int i;
+  for (i = 0; i < SIZE; i++)
+   __builtin_memset ((void *) a, 0, sizeof(float)*3);
+   __builtin_memset ((void *) a, 0, sizeof(float)*SIZE);
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Deleted dead calls" 1 "dse1"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-32.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-32.c
new file mode 100644
index 0000000..3d8fd5f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-32.c
@@ -0,0 +1,23 @@ 
+
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-dse-details" } */
+#define SIZE 4
+
+void s4 (float *restrict a)
+{
+  (void) __builtin_memset ((void *) a, 0, sizeof(float)*SIZE);
+}
+
+
+int main ()
+{
+  int i;
+  float a[10];
+  printf("Start\n");
+  for (i = 0; i < SIZE; i++)
+    s4 (a);
+  printf("Done\n");
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Deleted dead calls" 1 "dse1"} } */
diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c
index 9220fea..3513fda 100644
--- a/gcc/tree-ssa-dse.c
+++ b/gcc/tree-ssa-dse.c
@@ -521,11 +521,11 @@  live_bytes_read (ao_ref use_ref, ao_ref *ref, sbitmap live)
    Return TRUE if the above conditions are met, otherwise FALSE.  */
 
 static dse_store_status
-dse_classify_store (ao_ref *ref, gimple *stmt, gimple **use_stmt,
-		    bool byte_tracking_enabled, sbitmap live_bytes)
+dse_classify_store (ao_ref *ref, gimple *stmt_outer, gimple *stmt,
+		    gimple **use_stmt, bool byte_tracking_enabled,
+		    sbitmap live_bytes, unsigned cnt = 0)
 {
   gimple *temp;
-  unsigned cnt = 0;
 
   *use_stmt = NULL;
 
@@ -556,9 +556,11 @@  dse_classify_store (ao_ref *ref, gimple *stmt, gimple **use_stmt,
 	{
 	  cnt++;
 
+	  if (use_stmt == stmt_outer)
+	    continue;
 	  /* If we ever reach our DSE candidate stmt again fail.  We
 	     cannot handle dead stores in loops.  */
-	  if (use_stmt == stmt)
+	  else if (use_stmt == stmt)
 	    {
 	      fail = true;
 	      BREAK_FROM_IMM_USE_STMT (ui);
@@ -572,8 +574,6 @@  dse_classify_store (ao_ref *ref, gimple *stmt, gimple **use_stmt,
 	      if (temp
 		  /* Make sure we are not in a loop latch block.  */
 		  || gimple_bb (stmt) == gimple_bb (use_stmt)
-		  || dominated_by_p (CDI_DOMINATORS,
-				     gimple_bb (stmt), gimple_bb (use_stmt))
 		  /* We can look through PHIs to regions post-dominating
 		     the DSE candidate stmt.  */
 		  || !dominated_by_p (CDI_POST_DOMINATORS,
@@ -582,8 +582,41 @@  dse_classify_store (ao_ref *ref, gimple *stmt, gimple **use_stmt,
 		  fail = true;
 		  BREAK_FROM_IMM_USE_STMT (ui);
 		}
+	      else if (dominated_by_p (CDI_DOMINATORS,
+				       gimple_bb (stmt), gimple_bb (use_stmt)))
+		{
+		  gphi *phi = as_a <gphi *> (use_stmt);
+		  gimple *def_stmt = SSA_NAME_DEF_STMT (PHI_RESULT (phi));
+		  enum dse_store_status status = DSE_STORE_LIVE;
+		  ao_ref use_ref;
+		  gimple *inner_use_stmt;
+
+		  /* If stmt dominates PHI stmt, follow the PHI stmt.  */
+		  if (!temp)
+		    status = dse_classify_store (ref, stmt, def_stmt, &inner_use_stmt,
+						 byte_tracking_enabled,
+						 live_bytes, cnt);
+
+		  if (status == DSE_STORE_DEAD
+		      /* Makesure the use stmt found is post dominated.  */
+		      && dominated_by_p (CDI_POST_DOMINATORS,
+					 gimple_bb (stmt_outer), gimple_bb (inner_use_stmt))
+		      /* Also check that the store is redundant.  */
+		      && initialize_ao_ref_for_dse (inner_use_stmt, &use_ref)
+		      && valid_ao_ref_for_dse (&use_ref)
+		      && use_ref.base == ref->base
+		      && known_eq (use_ref.size, use_ref.max_size)
+		      && known_ge (use_ref.size, ref->size)
+		      && known_eq (use_ref.offset, ref->offset))
+		    temp = inner_use_stmt;
+		  else
+		    {
+		      fail = true;
+		      BREAK_FROM_IMM_USE_STMT (ui);
+		    }
+		}
 	      /* Do not consider the PHI as use if it dominates the
-	         stmt defining the virtual operand we are processing,
+		 stmt defining the virtual operand we are processing,
 		 we have processed it already in this case.  */
 	      if (gimple_bb (defvar_def) != gimple_bb (use_stmt)
 		  && !dominated_by_p (CDI_DOMINATORS,
@@ -799,7 +832,7 @@  dse_dom_walker::dse_optimize_stmt (gimple_stmt_iterator *gsi)
 	      enum dse_store_status store_status;
 	      m_byte_tracking_enabled
 		= setup_live_bytes_from_ref (&ref, m_live_bytes);
-	      store_status = dse_classify_store (&ref, stmt, &use_stmt,
+	      store_status = dse_classify_store (&ref, stmt, stmt, &use_stmt,
 						 m_byte_tracking_enabled,
 						 m_live_bytes);
 	      if (store_status == DSE_STORE_LIVE)
@@ -834,7 +867,7 @@  dse_dom_walker::dse_optimize_stmt (gimple_stmt_iterator *gsi)
 	  m_byte_tracking_enabled
 	    = setup_live_bytes_from_ref (&ref, m_live_bytes);
 	  enum dse_store_status store_status;
-	  store_status = dse_classify_store (&ref, stmt, &use_stmt,
+	  store_status = dse_classify_store (&ref, stmt, stmt, &use_stmt,
 					     m_byte_tracking_enabled,
 					     m_live_bytes);
 	  if (store_status == DSE_STORE_LIVE)
-- 
2.7.4