diff --git a/gcc/basic-block.h b/gcc/basic-block.h
index 135c0c2..1bf192d 100644
--- a/gcc/basic-block.h
+++ b/gcc/basic-block.h
@@ -854,6 +854,8 @@ extern VEC (basic_block, heap) *get_dominated_by (enum cdi_direction, basic_bloc
 extern VEC (basic_block, heap) *get_dominated_by_region (enum cdi_direction,
 							 basic_block *,
 							 unsigned);
+extern VEC (basic_block, heap) *get_dominated_to_depth (enum cdi_direction,
+							basic_block, int);
 extern VEC (basic_block, heap) *get_all_dominated_blocks (enum cdi_direction,
 							  basic_block);
 extern void add_to_dominance_info (enum cdi_direction, basic_block);
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 3d576bf..83d019c 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -8232,6 +8232,12 @@ an expression can travel.  This is currently supported only in
 code hoisting pass.  The lesser the cost, the more aggressive code hoisting
 will be.  The default value is 3.
 
+@item max-hoist-depth
+The depth of search in the dominator tree for expressions to hoist.
+This is used to avoid quadratic behavior in hoisting algorithm.
+The value of 0 will avoid limiting the search, but may slow down compilation
+of huge functions.  The default value is 30.
+
 @item max-unrolled-insns
 The maximum number of instructions that a loop should have if that loop
 is unrolled, and if the loop is unrolled, it determines how many times
diff --git a/gcc/dominance.c b/gcc/dominance.c
index 9c2dcf0..7861439 100644
--- a/gcc/dominance.c
+++ b/gcc/dominance.c
@@ -783,16 +783,20 @@ get_dominated_by_region (enum cdi_direction dir, basic_block *region,
 }
 
 /* Returns the list of basic blocks including BB dominated by BB, in the
-   direction DIR.  The vector will be sorted in preorder.  */
+   direction DIR up to DEPTH in the dominator tree.  The DEPTH of zero will
+   produce a vector containing all dominated blocks.  The vector will be sorted
+   in preorder.  */
 
 VEC (basic_block, heap) *
-get_all_dominated_blocks (enum cdi_direction dir, basic_block bb)
+get_dominated_to_depth (enum cdi_direction dir, basic_block bb, int depth)
 {
   VEC(basic_block, heap) *bbs = NULL;
   unsigned i;
+  unsigned next_level_start;
 
   i = 0;
   VEC_safe_push (basic_block, heap, bbs, bb);
+  next_level_start = 1; /* = VEC_length (basic_block, bbs); */
 
   do
     {
@@ -803,12 +807,24 @@ get_all_dominated_blocks (enum cdi_direction dir, basic_block bb)
 	   son;
 	   son = next_dom_son (dir, son))
 	VEC_safe_push (basic_block, heap, bbs, son);
+
+      if (i == next_level_start && --depth)
+	next_level_start = VEC_length (basic_block, bbs);
     }
-  while (i < VEC_length (basic_block, bbs));
+  while (i < next_level_start);
 
   return bbs;
 }
 
+/* Returns the list of basic blocks including BB dominated by BB, in the
+   direction DIR.  The vector will be sorted in preorder.  */
+
+VEC (basic_block, heap) *
+get_all_dominated_blocks (enum cdi_direction dir, basic_block bb)
+{
+  return get_dominated_to_depth (dir, bb, 0);
+}
+
 /* Redirect all edges pointing to BB to TO.  */
 void
 redirect_immediate_dominators (enum cdi_direction dir, basic_block bb,
diff --git a/gcc/gcse.c b/gcc/gcse.c
index d734fa4..d00a788 100644
--- a/gcc/gcse.c
+++ b/gcc/gcse.c
@@ -4219,6 +4219,7 @@ compute_code_hoist_vbeinout (void)
 {
   int changed, passes;
   basic_block bb;
+  sbitmap tmp1, tmp2;
 
   sbitmap_vector_zero (hoist_vbeout, last_basic_block);
   sbitmap_vector_zero (hoist_vbein, last_basic_block);
@@ -4235,8 +4236,14 @@ compute_code_hoist_vbeinout (void)
       FOR_EACH_BB_REVERSE (bb)
 	{
 	  if (bb->next_bb != EXIT_BLOCK_PTR)
-	    sbitmap_intersection_of_succs (hoist_vbeout[bb->index],
-					   hoist_vbein, bb->index);
+	    {
+	      sbitmap_intersection_of_succs (hoist_vbeout[bb->index],
+					     hoist_vbein, bb->index);
+
+	      /* Remove from VBEout expressions that die right after BB.  */
+	      sbitmap_a_and_b (hoist_vbeout[bb->index],
+			       hoist_vbeout[bb->index], transpout[bb->index]);
+	    }
 
 	  changed |= sbitmap_a_or_b_and_c_cg (hoist_vbein[bb->index],
 					      antloc[bb->index],
@@ -4248,7 +4255,144 @@ compute_code_hoist_vbeinout (void)
     }
 
   if (dump_file)
-    fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);
+    {
+      fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);
+
+      FOR_EACH_BB (bb)
+        {
+	  fprintf (dump_file, "vbein (%d): ", bb->index);
+	  dump_sbitmap_file (dump_file, hoist_vbein[bb->index]);
+	  fprintf (dump_file, "vbeout(%d): ", bb->index);
+	  dump_sbitmap_file (dump_file, hoist_vbeout[bb->index]);
+	}
+    }
+
+  /* Now cleanup VBEout to avoid moving expressions too far up.
+
+     We follow two rules to clean up VBEout[BB]:
+
+     1. If BB does not have any dominated blocks, nothing will ever be hoisted
+     to BB, so we can just wipe its VBEout clean.
+
+     2. If an expression can be hoisted both to BB and to a *single* successor
+     of BB in the dominator tree, then there is no point of hoisting
+     the expression to BB over BB's successor.  Doing otherwise would
+     unnecessarily extend live ranges.  */
+
+  /* Wipe VBEout of leaf blocks in the dominator tree.  */
+  FOR_EACH_BB (bb)
+    if (first_dom_son (CDI_DOMINATORS, bb) == NULL)
+      sbitmap_zero (hoist_vbeout[bb->index]);
+
+  tmp1 = sbitmap_alloc (expr_hash_table.n_elems);
+  tmp2 = sbitmap_alloc (expr_hash_table.n_elems);
+
+  /* We cannot cleanup VBEout in a single traversal.  There has to be both
+     upward and downward links when computing VBEout of current block to
+     avoid removing bits that shouldn't be removed.  E.g., consider
+     the following dominator tree; '*' marks blocks which compute same
+     expression, the expression can be freely moved; the expected result
+     is that we move computations of '*' from (3) and (6) to (2).
+
+       2
+      / \
+     3*  4
+        / \
+       5   6*
+
+     A walk over the above tree considering only downward links will first
+     remove '*' from VBEout[4] [as there's no point of hoisting
+     the expression to (4) if it's not computed in both (5) and (6)].
+     Then, when processing VBEout[2]. we won't see '*' as needed in (4),
+     so '*' will be removed from VBEout[2] too, leaving a copy of '*' in (3).
+
+     Therefore, we use iterative algorithm with both upward and downward
+     links to solve this problem.  The algorithm obviously converges as at
+     each iteration we make VBEout sets only smaller.  */
+
+  passes = 0;
+  changed = 1;
+
+  while (changed)
+    {
+      changed = 0;
+
+      FOR_EACH_BB (bb)
+        {
+	  basic_block son;
+	  bool first_p = true;
+
+	  /* Walk through dominated blocks and calculate the set of expressions
+	     that are needed in any one, and only one, of the blocks.
+	     TMP1 is the basis of what we want to remove from VBEout[BB].  */
+	  for (son = first_dom_son (CDI_DOMINATORS, bb);
+	       son != NULL;
+	       son = next_dom_son (CDI_DOMINATORS, son))
+	    {
+	      if (first_p)
+		{
+		  sbitmap_copy (tmp1, hoist_vbeout[son->index]);
+		  first_p = false;
+		}
+	      else
+		sbitmap_difference (tmp1, tmp1, hoist_vbeout[son->index]);
+	    }
+
+	  if (!first_p)
+	    {
+	      /* Now trim TMP1 to avoid removing too much.  */
+
+	      if (bb->prev_bb != ENTRY_BLOCK_PTR)
+		/* Remove epxressions from TMP1 that are needed upwards.
+		   These are VBEout[parent] minus expressions that are
+		   killed in BB (and, hence, don't get to VBEout[parent] from
+		   BB).  */
+		{
+		  basic_block parent;
+
+		  parent = get_immediate_dominator (CDI_DOMINATORS, bb);
+
+		  /* Expressions killed in BB.  */
+		  sbitmap_not (tmp2, transp[bb->index]);
+
+		  /* Expressions that reach from PARENT to the end of BB.  */
+		  sbitmap_difference (tmp2, hoist_vbeout[parent->index],
+				      tmp2);
+
+		  sbitmap_difference (tmp1, tmp1, tmp2);
+		}
+
+	      /* Never remove any of expressions computed in BB from
+		 VBEout[BB].  */
+	      sbitmap_difference (tmp1, tmp1, comp[bb->index]);
+
+	      if (sbitmap_any_common_bits (hoist_vbeout[bb->index], tmp1))
+		/* There is at least one bit that can be removed from
+		   VBEout[BB].  */
+		{
+		  sbitmap_difference (hoist_vbeout[bb->index],
+				      hoist_vbeout[bb->index], tmp1);
+		  changed = 1;
+		}
+	    }
+	}
+
+      passes++;
+    }
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "hoisting vbeout cleanup: %d passes\n", passes);
+
+      FOR_EACH_BB (bb)
+        {
+	  fprintf (dump_file, "vbeout(%d): ", bb->index);
+	  dump_sbitmap_file (dump_file, hoist_vbeout[bb->index]);
+	}
+    }
+
+  sbitmap_free (tmp1);
+  sbitmap_free (tmp2);
 }
 
 /* Top level routine to do the dataflow analysis needed by code hoisting.  */
@@ -4258,8 +4402,8 @@ compute_code_hoist_data (void)
 {
   compute_local_properties (transp, comp, antloc, &expr_hash_table);
   compute_transpout ();
-  compute_code_hoist_vbeinout ();
   calculate_dominance_info (CDI_DOMINATORS);
+  compute_code_hoist_vbeinout ();
   if (dump_file)
     fprintf (dump_file, "\n");
 }
@@ -4311,11 +4455,12 @@ hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb,
 
       if (pred->src == ENTRY_BLOCK_PTR)
 	break;
-      else if (pred_bb == expr_bb)
-	continue;
       else if (visited[pred_bb->index])
 	continue;
-
+      else if (!TEST_BIT (transpout[pred_bb->index], expr_index))
+	break;
+      else if (pred_bb == expr_bb)
+	continue;
       /* Does this predecessor generate this expression?  */
       else if (TEST_BIT (comp[pred_bb->index], expr_index))
 	break;
@@ -4403,15 +4548,15 @@ hoist_code (void)
       int found = 0;
       int insn_inserted_p;
 
-      domby = get_dominated_by (CDI_DOMINATORS, bb);
+      domby = get_dominated_to_depth (CDI_DOMINATORS, bb, MAX_HOIST_DEPTH);
+
       /* Examine each expression that is very busy at the exit of this
 	 block.  These are the potentially hoistable expressions.  */
       for (i = 0; i < hoist_vbeout[bb->index]->n_bits; i++)
 	{
 	  int hoistable = 0;
 
-	  if (TEST_BIT (hoist_vbeout[bb->index], i)
-	      && TEST_BIT (transpout[bb->index], i))
+	  if (TEST_BIT (hoist_vbeout[bb->index], i))
 	    {
 	      /* We've found a potentially hoistable expression, now
 		 we look at every block BB dominates to see if it
@@ -4438,6 +4583,14 @@ hoist_code (void)
 		      occr = find_occr_in_bb (expr->antic_occr, dominated);
 		      gcc_assert (occr);
 
+		      /* An occurence might've been already deleted
+			 while processing a dominator of BB.  */
+		      if (occr->deleted_p)
+			{
+			  gcc_assert (MAX_HOIST_DEPTH > 1);
+			  continue;
+			}
+
 		      gcc_assert (NONDEBUG_INSN_P (occr->insn));
 
 		      /* Adjust MAX_DISTANCE to account for the fact that
@@ -4516,6 +4669,14 @@ hoist_code (void)
 		      occr = find_occr_in_bb (expr->antic_occr, dominated);
 		      gcc_assert (occr);
 
+		      /* An occurence might've been already deleted
+			 while processing a dominator of BB.  */
+		      if (occr->deleted_p)
+			{
+			  gcc_assert (MAX_HOIST_DEPTH > 1);
+			  continue;
+			}
+
 		      gcc_assert (NONDEBUG_INSN_P (occr->insn));
 
 		      /* Adjust MAX_DISTANCE to account for the fact that
@@ -4546,6 +4707,14 @@ hoist_code (void)
 			  gcc_assert (occr);
 			}
 
+		      /* An occurence might've been already deleted
+			 while processing a dominator of BB.  */
+		      if (occr->deleted_p)
+			{
+			  gcc_assert (MAX_HOIST_DEPTH > 1);
+			  continue;
+			}
+
 		      insn = occr->insn;
 		      set = single_set (insn);
 		      gcc_assert (set);
diff --git a/gcc/params.def b/gcc/params.def
index 551e8e2..22737a0 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -240,6 +240,14 @@ DEFPARAM(PARAM_GCSE_UNRESTRICTED_COST,
 	 "Cost at which GCSE optimizations will not constraint the distance an expression can travel",
 	 3, 0, 0)
 
+/* How deep from a given basic block the dominator tree should be searched
+   for expressions to hoist to the block.  The value of 0 will avoid limiting
+   the search.  */
+DEFPARAM(PARAM_MAX_HOIST_DEPTH,
+	 "max-hoist-depth",
+	 "Maximum depth of search in the dominator tree for expressions to hoist",
+	 30, 0, 0)
+
 /* This parameter limits the number of insns in a loop that will be unrolled,
    and by how much the loop is unrolled.
 
diff --git a/gcc/params.h b/gcc/params.h
index 174edc1..aa96c81 100644
--- a/gcc/params.h
+++ b/gcc/params.h
@@ -129,6 +129,8 @@ typedef enum compiler_param
   PARAM_VALUE (PARAM_GCSE_COST_DISTANCE_RATIO)
 #define GCSE_UNRESTRICTED_COST \
   PARAM_VALUE (PARAM_GCSE_UNRESTRICTED_COST)
+#define MAX_HOIST_DEPTH \
+  PARAM_VALUE (PARAM_MAX_HOIST_DEPTH)
 #define MAX_UNROLLED_INSNS \
   PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS)
 #define MAX_SMS_LOOP_NUMBER \
