diff mbox

PR tree-optimization/58380

Message ID 522FD5D7.6010306@redhat.com
State New
Headers show

Commit Message

Jeff Law Sept. 11, 2013, 2:30 a.m. UTC
58380 is another instance of trying to thread through a buried loop 
header that was missed by the pruning code.  The change to 
tree-ssa-threadupdate.c catches this case.

While looking at this I remembered why I initially didn't allow 
threading through blocks with a single successor after threading through 
a joiner -- doing so isn't always profitable and results in unnecessary 
block duplication and churn.

Thankfully the restructured code makes it easy to detect when those jump 
threads are profitable and indicate that to the caller.  Specifically, 
threading through a block with a single successor is profitable when we 
later reach and thread through a block with > 1 successor (as is the 
case with the FSA optimization).  This patch implements that check.

I verified each patch independently fixes 58380.  The latter change also 
fixes the ia64 bootstrap issue.

Bootstrapped and regression tested on x86_64-unknown-linux-gnu. 
Installed onto the trunk.
diff mbox

Patch

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index d4d32e1..21c8538 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@ 
+2013-09-10  Jeff Law  <law@redhat.com>
+
+	PR tree-optimization/58380
+	* tree-ssa-threadupdate.c (thread_block): Recognize another case
+	of threading through a buried loop header.
+
+	* tree-ssa-threadedge.c (thread_around_empty_blocks): Correct
+	return value for single successor case.
+
 2013-09-10  Jan Hubicka  <jh@suse.cz>
 
 	* ipa-devirt.c (ipa_devirt): Enable with LTO.
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 8d58e2d..ae9e6e2 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@ 
+2013-09-10  Jeff Law  <law@redhat.com>
+
+	* g++.dg/torture/pr58380.C: New test.
+
 2013-09-10  Jan Hubicka  <jh@suse.cz>
 	    Paolo Carlini  <paolo.carlini@oracle.com>
 
diff --git a/gcc/testsuite/g++.dg/torture/pr58380.C b/gcc/testsuite/g++.dg/torture/pr58380.C
new file mode 100644
index 0000000..b2c185d
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/pr58380.C
@@ -0,0 +1,171 @@ 
+// { dg-do compile }
+class iplugin_factory;
+class idocument_plugin_factory {
+  virtual idocument_plugin_factory *create_plugin(iplugin_factory &, int &);
+};
+template <typename _Iterator, typename> class __normal_iterator {
+  _Iterator _M_current;
+
+public:
+  _Iterator iterator_type;
+  __normal_iterator(const _Iterator &p1) : _M_current(p1) {}
+  void operator++();
+  _Iterator &base() { return _M_current; }
+};
+
+template <typename _IteratorL, typename _IteratorR, typename _Container>
+int operator!=(__normal_iterator<_IteratorL, _Container> &p1,
+               __normal_iterator<_IteratorR, _Container> &p2) {
+  return p1.base() != p2.base();
+}
+
+class new_allocator {
+public:
+  typedef int *const_pointer;
+  int *allocate();
+};
+template <typename> class allocator : public new_allocator {};
+
+class basic_string {
+public:
+  basic_string(char *);
+};
+struct __uninitialized_copy {
+  template <typename _InputIterator, typename _ForwardIterator>
+  static _ForwardIterator __uninit_copy(_InputIterator p1, _InputIterator p2,
+                                        _ForwardIterator p3) try {
+    for (; p1 != p2; ++p1, ++p3)
+      ;
+    return p3;
+  }
+  catch (...) {
+  }
+};
+
+template <typename _InputIterator, typename _ForwardIterator>
+_ForwardIterator uninitialized_copy(_InputIterator p1, _InputIterator p2,
+                                    _ForwardIterator p3) {
+  return __uninitialized_copy::__uninit_copy(p1, p2, p3);
+}
+
+template <typename _InputIterator, typename _ForwardIterator, typename _Tp>
+_ForwardIterator __uninitialized_copy_a(_InputIterator p1, _InputIterator p2,
+                                        _ForwardIterator p3, allocator<_Tp> &) {
+  return uninitialized_copy(p1, p2, p3);
+}
+
+struct _Vector_base {
+  struct _Vector_impl : allocator<int> {
+    int *_M_start;
+    int *_M_finish;
+  };
+  allocator<int> &_M_get_Tp_allocator() {}
+  _Vector_base() {}
+  _Vector_base(int p1) { _M_create_storage(p1); }
+  _Vector_impl _M_impl;
+  int *_M_allocate(int p1) { p1 ? _M_impl.allocate() : 0; }
+  void _M_create_storage(int p1) {
+    this->_M_impl._M_start = this->_M_allocate(p1);
+  }
+};
+
+class vector : _Vector_base {
+  _Vector_base _Base;
+
+public:
+  vector() {}
+  vector(const vector &p1) : _Base(p1.size()) {
+    this->_M_impl._M_finish = __uninitialized_copy_a(
+        p1.begin(), p1.end(), this->_M_impl._M_start, _M_get_Tp_allocator());
+  }
+  ~vector();
+  __normal_iterator<typename allocator<int>::const_pointer, int> begin() const {
+    return this->_M_impl._M_start;
+  }
+  __normal_iterator<typename allocator<int>::const_pointer, int> end() const {
+    return this->_M_impl._M_finish;
+  }
+  int size() const { return this->_M_impl._M_finish - this->_M_impl._M_start; }
+};
+class iplugin_factory {
+public:
+  typedef enum {
+    STABLE,
+    EXPERIMENTAL
+  } quality_t;
+};
+class plugin_factory : public iplugin_factory {
+public:
+  plugin_factory(const int &, const basic_string &, const basic_string &,
+                 const basic_string &, quality_t);
+};
+template <typename plugin_t>
+class document_plugin_factory : plugin_factory, idocument_plugin_factory {
+public:
+  document_plugin_factory(const int &p1, const basic_string &,
+                          const basic_string &, const basic_string &, quality_t)
+      : plugin_factory(0, 0, 0, 0, STABLE) {}
+  idocument_plugin_factory *create_plugin(iplugin_factory &p1, int &p2) {
+    plugin_t(p1, p2);
+  }
+};
+
+class container {
+public:
+  template <typename init_t> container(init_t &);
+};
+template <class init_t> class initializer_t : init_t {
+public:
+  initializer_t(const init_t &p1) : init_t(p1) {}
+};
+
+class composition_t {};
+template <typename lhs_t, typename rhs_t>
+const initializer_t<composition_t> operator+(const initializer_t<lhs_t> &,
+                                             const initializer_t<rhs_t> &);
+template <typename value_t> class value_initializer_t {
+public:
+  value_initializer_t(const value_t &p1) : m_value(p1) {}
+  value_t m_value;
+};
+
+template <typename value_t>
+initializer_t<value_initializer_t<value_t> > init_value(const value_t &p1) {
+  initializer_t<value_initializer_t<value_t> >(
+      value_initializer_t<value_t>(p1));
+}
+
+class name_t {};
+class label_t {};
+class description_t {};
+class owner_initializer_t {};
+template <typename owner_t>
+initializer_t<owner_initializer_t> init_owner(owner_t &);
+class set : vector {};
+class node {
+public:
+  node(iplugin_factory &, int &);
+};
+initializer_t<name_t> init_name();
+initializer_t<label_t> init_label();
+initializer_t<description_t> init_description();
+template <typename base_t> class mesh_selection_sink : base_t {
+public:
+  mesh_selection_sink(iplugin_factory &p1, int &p2)
+      : base_t(p1, p2),
+        m_mesh_selection(init_owner(*this) + init_name() + init_label() +
+                         init_description() + init_value(set())) {}
+  container m_mesh_selection;
+};
+
+class selection_to_stdout : mesh_selection_sink<node> {
+public:
+  selection_to_stdout(iplugin_factory &p1, int &p2)
+      : mesh_selection_sink(p1, p2) {}
+  static iplugin_factory &get_factory() {
+    document_plugin_factory<selection_to_stdout>(0, "", 0, "",
+                                                 iplugin_factory::EXPERIMENTAL);
+  }
+};
+
+void selection_to_stdout_factory() { selection_to_stdout::get_factory(); }
diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c
index 14bc4e3..839f185 100644
--- a/gcc/tree-ssa-threadedge.c
+++ b/gcc/tree-ssa-threadedge.c
@@ -770,7 +770,19 @@  thread_around_empty_blocks (edge taken_edge,
   gsi = gsi_start_nondebug_bb (bb);
 
   /* If the block has no statements, but does have a single successor, then
-     it's just a forwarding block and we can thread through it trivially.  */
+     it's just a forwarding block and we can thread through it trivially. 
+
+     However, note that just threading through empty blocks with single
+     successors is not inherently profitable.  For the jump thread to
+     be profitable, we must avoid a runtime conditional.
+
+     By taking the return value from the recursive call, we get the
+     desired effect of returning TRUE when we found a profitable jump
+     threading opportunity and FALSE otherwise. 
+
+     This is particularly important when this routine is called after
+     processing a joiner block.  Returning TRUE too aggressively in
+     that case results in pointless duplication of the joiner block.  */
   if (gsi_end_p (gsi))
     {
       if (single_succ_p (bb))
@@ -781,15 +793,16 @@  thread_around_empty_blocks (edge taken_edge,
 	    {
 	      bitmap_set_bit (visited, taken_edge->dest->index);
 	      path->safe_push (taken_edge);
-	      thread_around_empty_blocks (taken_edge,
-					  dummy_cond,
-					  handle_dominating_asserts,
-					  simplify,
-					  visited,
-					  path);
-	      return true;
+	      return thread_around_empty_blocks (taken_edge,
+						 dummy_cond,
+						 handle_dominating_asserts,
+						 simplify,
+						 visited,
+						 path);
 	    }
 	}
+
+      /* We have a block with no statements, but multiple successors?  */
       return false;
     }
 
diff --git a/gcc/tree-ssa-threadupdate.c b/gcc/tree-ssa-threadupdate.c
index 3c3d3bc..6d43dc3 100644
--- a/gcc/tree-ssa-threadupdate.c
+++ b/gcc/tree-ssa-threadupdate.c
@@ -659,8 +659,10 @@  thread_block (basic_block bb, bool noloop_only)
 	     threading path.  We do not try and thread this elsewhere, so
 	     just cancel the jump threading request by clearing the AUX
 	     field now.  */
-	  if (bb->loop_father != e2->src->loop_father
-	      && !loop_exit_edge_p (e2->src->loop_father, e2))
+	  if ((bb->loop_father != e2->src->loop_father
+	       && !loop_exit_edge_p (e2->src->loop_father, e2))
+	      || (e2->src->loop_father != e2->dest->loop_father
+		  && !loop_exit_edge_p (e2->src->loop_father, e2)))
 	    {
 	      /* Since this case is not handled by our special code
 		 to thread through a loop header, we must explicitly