Patchwork Simplify attempt_builtin_powi logic

login
register
mail settings
Submitter William J. Schmidt
Date May 17, 2012, 9:54 p.m.
Message ID <1337291691.25646.13.camel@gnopaine>
Download mbox | patch
Permalink /patch/160023/
State New
Headers show

Comments

William J. Schmidt - May 17, 2012, 9:54 p.m.
This patch gives up on using the reassociation rank algorithm to
correctly place __builtin_powi calls and their feeding multiplies.  In
the end this proved to introduce more complexity than it saved, due in
part to the poor fit of introducing DAG expressions into the
reassociated operand tree.  This patch returns to generating explicit
multiplies to bind the builtin calls together and to the results of the
expression tree rewrite.  I feel this version is smaller, easier to
understand, and less fragile than the existing code.

Bootstrapped and tested on powerpc64-unknown-linux-gnu with no new
regressions.  Ok for trunk?

Thanks,
Bill


2012-05-17  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	* tree-ssa-reassoc.c (bip_map): Remove decl.
	(completely_remove_stmt): Remove function.
	(remove_def_if_absorbed_call): Remove function.
	(remove_visited_stmt_chain): Remove __builtin_powi handling.
	(possibly_move_powi): Remove function.
	(rewrite_expr_tree): Remove calls to possibly_move_powi.
	(rewrite_expr_tree_parallel): Likewise.
	(attempt_builtin_powi): Build multiplies explicitly rather than
	relying on the ops vector and rank system.
	(transform_stmt_to_copy): New function.
	(transform_stmt_to_multiply): Likewise.
	(reassociate_bb): Handle leftover operations after __builtin_powi
	optimization; build a final multiply if necessary.
Richard Guenther - May 18, 2012, 10:08 a.m.
On Thu, 17 May 2012, William J. Schmidt wrote:

> This patch gives up on using the reassociation rank algorithm to
> correctly place __builtin_powi calls and their feeding multiplies.  In
> the end this proved to introduce more complexity than it saved, due in
> part to the poor fit of introducing DAG expressions into the
> reassociated operand tree.  This patch returns to generating explicit
> multiplies to bind the builtin calls together and to the results of the
> expression tree rewrite.  I feel this version is smaller, easier to
> understand, and less fragile than the existing code.
> 
> Bootstrapped and tested on powerpc64-unknown-linux-gnu with no new
> regressions.  Ok for trunk?

Ok.

Thanks,
Richard.

> Thanks,
> Bill
> 
> 
> 2012-05-17  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
> 
> 	* tree-ssa-reassoc.c (bip_map): Remove decl.
> 	(completely_remove_stmt): Remove function.
> 	(remove_def_if_absorbed_call): Remove function.
> 	(remove_visited_stmt_chain): Remove __builtin_powi handling.
> 	(possibly_move_powi): Remove function.
> 	(rewrite_expr_tree): Remove calls to possibly_move_powi.
> 	(rewrite_expr_tree_parallel): Likewise.
> 	(attempt_builtin_powi): Build multiplies explicitly rather than
> 	relying on the ops vector and rank system.
> 	(transform_stmt_to_copy): New function.
> 	(transform_stmt_to_multiply): Likewise.
> 	(reassociate_bb): Handle leftover operations after __builtin_powi
> 	optimization; build a final multiply if necessary.
> 
> 
> Index: gcc/tree-ssa-reassoc.c
> ===================================================================
> --- gcc/tree-ssa-reassoc.c	(revision 187626)
> +++ gcc/tree-ssa-reassoc.c	(working copy)
> @@ -200,10 +200,6 @@ static long *bb_rank;
>  /* Operand->rank hashtable.  */
>  static struct pointer_map_t *operand_rank;
>  
> -/* Map from inserted __builtin_powi calls to multiply chains that
> -   feed them.  */
> -static struct pointer_map_t *bip_map;
> -
>  /* Forward decls.  */
>  static long get_rank (tree);
>  
> @@ -2184,32 +2180,6 @@ is_phi_for_stmt (gimple stmt, tree operand)
>    return false;
>  }
>  
> -/* Remove STMT, unlink its virtual defs, and release its SSA defs.  */
> -
> -static inline void
> -completely_remove_stmt (gimple stmt)
> -{
> -  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
> -  gsi_remove (&gsi, true);
> -  unlink_stmt_vdef (stmt);
> -  release_defs (stmt);
> -}
> -
> -/* If OP is defined by a builtin call that has been absorbed by
> -   reassociation, remove its defining statement completely.  */
> -
> -static inline void
> -remove_def_if_absorbed_call (tree op)
> -{
> -  gimple stmt;
> -
> -  if (TREE_CODE (op) == SSA_NAME
> -      && has_zero_uses (op)
> -      && is_gimple_call ((stmt = SSA_NAME_DEF_STMT (op)))
> -      && gimple_visited_p (stmt))
> -    completely_remove_stmt (stmt);
> -}
> -
>  /* Remove def stmt of VAR if VAR has zero uses and recurse
>     on rhs1 operand if so.  */
>  
> @@ -2218,7 +2188,6 @@ remove_visited_stmt_chain (tree var)
>  {
>    gimple stmt;
>    gimple_stmt_iterator gsi;
> -  tree var2;
>  
>    while (1)
>      {
> @@ -2228,95 +2197,15 @@ remove_visited_stmt_chain (tree var)
>        if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
>  	{
>  	  var = gimple_assign_rhs1 (stmt);
> -	  var2 = gimple_assign_rhs2 (stmt);
>  	  gsi = gsi_for_stmt (stmt);
>  	  gsi_remove (&gsi, true);
>  	  release_defs (stmt);
> -	  /* A multiply whose operands are both fed by builtin pow/powi
> -	     calls must check whether to remove rhs2 as well.  */
> -	  remove_def_if_absorbed_call (var2);
>  	}
> -      else if (is_gimple_call (stmt) && gimple_visited_p (stmt))
> -	{
> -	  completely_remove_stmt (stmt);
> -	  return;
> -	}
>        else
>  	return;
>      }
>  }
>  
> -/* If OP is an SSA name, find its definition and determine whether it
> -   is a call to __builtin_powi.  If so, move the definition prior to
> -   STMT.  Only do this during early reassociation.  */
> -
> -static void
> -possibly_move_powi (gimple stmt, tree op)
> -{
> -  gimple stmt2, *mpy;
> -  tree fndecl;
> -  gimple_stmt_iterator gsi1, gsi2;
> -
> -  if (!first_pass_instance
> -      || !flag_unsafe_math_optimizations
> -      || TREE_CODE (op) != SSA_NAME)
> -    return;
> -  
> -  stmt2 = SSA_NAME_DEF_STMT (op);
> -
> -  if (!is_gimple_call (stmt2)
> -      || !has_single_use (gimple_call_lhs (stmt2)))
> -    return;
> -
> -  fndecl = gimple_call_fndecl (stmt2);
> -
> -  if (!fndecl
> -      || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
> -    return;
> -
> -  switch (DECL_FUNCTION_CODE (fndecl))
> -    {
> -    CASE_FLT_FN (BUILT_IN_POWI):
> -      break;
> -    default:
> -      return;
> -    }
> -
> -  /* Move the __builtin_powi.  */
> -  gsi1 = gsi_for_stmt (stmt);
> -  gsi2 = gsi_for_stmt (stmt2);
> -  gsi_move_before (&gsi2, &gsi1);
> -
> -  /* See if there are multiplies feeding the __builtin_powi base
> -     argument that must also be moved.  */
> -  while ((mpy = (gimple *) pointer_map_contains (bip_map, stmt2)) != NULL)
> -    {
> -      /* If we've already moved this statement, we're done.  This is
> -         identified by a NULL entry for the statement in bip_map.  */
> -      gimple *next = (gimple *) pointer_map_contains (bip_map, *mpy);
> -      if (next && !*next)
> -	return;
> -
> -      stmt = stmt2;
> -      stmt2 = *mpy;
> -      gsi1 = gsi_for_stmt (stmt);
> -      gsi2 = gsi_for_stmt (stmt2);
> -      gsi_move_before (&gsi2, &gsi1);
> -
> -      /* The moved multiply may be DAG'd from multiple calls if it
> -	 was the result of a cached multiply.  Only move it once.
> -	 Rank order ensures we move it to the right place the first
> -	 time.  */
> -      if (next)
> -	*next = NULL;
> -      else
> -	{
> -	  next = (gimple *) pointer_map_insert (bip_map, *mpy);
> -	  *next = NULL;
> -	}
> -    }
> -}
> -
>  /* This function checks three consequtive operands in
>     passed operands vector OPS starting from OPINDEX and
>     swaps two operands if it is profitable for binary operation
> @@ -2421,9 +2310,6 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>  	      fprintf (dump_file, " into ");
>  	      print_gimple_stmt (dump_file, stmt, 0, 0);
>  	    }
> -
> -	  possibly_move_powi (stmt, oe1->op);
> -	  possibly_move_powi (stmt, oe2->op);
>  	}
>        return;
>      }
> @@ -2469,8 +2355,6 @@ rewrite_expr_tree (gimple stmt, unsigned int opind
>  	  fprintf (dump_file, " into ");
>  	  print_gimple_stmt (dump_file, stmt, 0, 0);
>  	}
> -
> -      possibly_move_powi (stmt, oe->op);
>      }
>    /* Recurse on the LHS of the binary operator, which is guaranteed to
>       be the non-leaf side.  */
> @@ -2644,9 +2528,6 @@ rewrite_expr_tree_parallel (gimple stmt, int width
>  	  fprintf (dump_file, " into ");
>  	  print_gimple_stmt (dump_file, stmts[i], 0, 0);
>  	}
> -
> -      possibly_move_powi (stmts[i], op1);
> -      possibly_move_powi (stmts[i], op2);
>      }
>  
>    remove_visited_stmt_chain (last_rhs1);
> @@ -3222,11 +3103,10 @@ get_reassoc_pow_ssa_name (tree *target, tree type)
>  
>  /* Look for repeated operands in OPS in the multiply tree rooted at
>     STMT.  Replace them with an optimal sequence of multiplies and powi
> -   builtin calls, and remove the used operands from OPS.  Push new
> -   SSA names onto OPS that represent the introduced multiplies and
> -   builtin calls.  */
> +   builtin calls, and remove the used operands from OPS.  Return an
> +   SSA name representing the value of the replacement sequence.  */
>  
> -static void
> +static tree
>  attempt_builtin_powi (gimple stmt, VEC(operand_entry_t, heap) **ops,
>  		      tree *target)
>  {
> @@ -3235,6 +3115,7 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent
>    operand_entry_t oe;
>    repeat_factor_t rf1, rf2;
>    repeat_factor rfnew;
> +  tree result = NULL_TREE;
>    tree target_ssa, iter_result;
>    tree type = TREE_TYPE (gimple_get_lhs (stmt));
>    tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
> @@ -3244,7 +3125,7 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent
>    /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
>       target.  */
>    if (!powi_fndecl)
> -    return;
> +    return NULL_TREE;
>  
>    /* Allocate the repeated factor vector.  */
>    repeat_factor_vec = VEC_alloc (repeat_factor, heap, 10);
> @@ -3315,7 +3196,6 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent
>    while (true)
>      {
>        HOST_WIDE_INT power;
> -      gimple last_mul = NULL;
>  
>        /* First look for the largest cached product of factors from
>  	 preceding iterations.  If found, create a builtin_powi for
> @@ -3353,25 +3233,14 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent
>  	    }
>  	  else
>  	    {
> -	      gimple *value;
> -
>  	      iter_result = get_reassoc_pow_ssa_name (target, type);
>  	      pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, 
>  					    build_int_cst (integer_type_node,
>  							   power));
>  	      gimple_call_set_lhs (pow_stmt, iter_result);
>  	      gimple_set_location (pow_stmt, gimple_location (stmt));
> -	      /* Temporarily place the call; we will move it and its
> -		 feeding multiplies to the correct place during
> -		 rewrite_expr.  */
>  	      gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
>  
> -	      if (!operand_equal_p (rf1->repr, rf1->factor, 0))
> -		{
> -		  value = (gimple *) pointer_map_insert (bip_map, pow_stmt);
> -		  *value = SSA_NAME_DEF_STMT (rf1->repr);
> -		}
> -
>  	      if (dump_file && (dump_flags & TDF_DETAILS))
>  		{
>  		  unsigned elt;
> @@ -3457,15 +3326,6 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent
>  		  gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
>  		  rf1->repr = target_ssa;
>  
> -		  /* Chain multiplies together for later movement.  */
> -		  if (last_mul)
> -		    {
> -		      gimple *value
> -			= (gimple *) pointer_map_insert (bip_map, mul_stmt);
> -		      *value = last_mul;
> -		    }
> -		  last_mul = mul_stmt;
> -
>  		  /* Don't reprocess the multiply we just introduced.  */
>  		  gimple_set_visited (mul_stmt, true);
>  		}
> @@ -3481,20 +3341,23 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent
>  	  gimple_call_set_lhs (pow_stmt, iter_result);
>  	  gimple_set_location (pow_stmt, gimple_location (stmt));
>  	  gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
> +	}
>  
> -	  /* If we inserted a chain of multiplies before the pow_stmt,
> -	     record that fact so we can move it later when we move the
> -	     pow_stmt.  */
> -	  if (last_mul)
> -	    {
> -	      gimple *value = (gimple *) pointer_map_insert (bip_map, pow_stmt);
> -	      *value = last_mul;
> -	    }
> +      /* If we previously formed at least one other builtin_powi call,
> +	 form the product of this one and those others.  */
> +      if (result)
> +	{
> +	  tree new_result = get_reassoc_pow_ssa_name (target, type);
> +	  mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
> +						   result, iter_result);
> +	  gimple_set_location (mul_stmt, gimple_location (stmt));
> +	  gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
> +	  gimple_set_visited (mul_stmt, true);
> +	  result = new_result;
>  	}
> +      else
> +	result = iter_result;
>  
> -      /* Append the result of this iteration to the ops vector.  */
> -      add_to_ops_vec (ops, iter_result);
> -
>        /* Decrement the occurrence count of each element in the product
>  	 by the count found above, and remove this many copies of each
>  	 factor from OPS.  */
> @@ -3534,8 +3397,61 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent
>       clean up.  */
>    VEC_qsort (operand_entry_t, *ops, sort_by_operand_rank);
>    VEC_free (repeat_factor, heap, repeat_factor_vec);
> +
> +  /* Return the final product computed herein.  Note that there may
> +     still be some elements with single occurrence count left in OPS;
> +     those will be handled by the normal reassociation logic.  */
> +  return result;
>  }
>  
> +/* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
> +
> +static void
> +transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
> +{
> +  tree rhs1;
> +
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    {
> +      fprintf (dump_file, "Transforming ");
> +      print_gimple_stmt (dump_file, stmt, 0, 0);
> +    }
> +
> +  rhs1 = gimple_assign_rhs1 (stmt);
> +  gimple_assign_set_rhs_from_tree (gsi, new_rhs);
> +  update_stmt (stmt);
> +  remove_visited_stmt_chain (rhs1);
> +
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    {
> +      fprintf (dump_file, " into ");
> +      print_gimple_stmt (dump_file, stmt, 0, 0);
> +    }
> +}
> +
> +/* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
> +
> +static void
> +transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
> +			    tree rhs1, tree rhs2)
> +{
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    {
> +      fprintf (dump_file, "Transforming ");
> +      print_gimple_stmt (dump_file, stmt, 0, 0);
> +    }
> +
> +  gimple_assign_set_rhs_with_ops_1 (gsi, MULT_EXPR, rhs1, rhs2, NULL_TREE);
> +  update_stmt (gsi_stmt (*gsi));
> +  remove_visited_stmt_chain (rhs1);
> +
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    {
> +      fprintf (dump_file, " into ");
> +      print_gimple_stmt (dump_file, stmt, 0, 0);
> +    }
> +}
> +
>  /* Reassociate expressions in basic block BB and its post-dominator as
>     children.  */
>  
> @@ -3606,7 +3522,7 @@ reassociate_bb (basic_block bb)
>  	  if (associative_tree_code (rhs_code))
>  	    {
>  	      VEC(operand_entry_t, heap) *ops = NULL;
> -	      bip_map = pointer_map_create ();
> +	      tree powi_result = NULL_TREE;
>  
>  	      /* There may be no immediate uses left by the time we
>  		 get here because we may have eliminated them all.  */
> @@ -3630,28 +3546,21 @@ reassociate_bb (basic_block bb)
>  	      if (first_pass_instance
>  		  && rhs_code == MULT_EXPR
>  		  && flag_unsafe_math_optimizations)
> -		attempt_builtin_powi (stmt, &ops, &target);
> +		powi_result = attempt_builtin_powi (stmt, &ops, &target);
>  
> -	      if (VEC_length (operand_entry_t, ops) == 1)
> +	      /* If the operand vector is now empty, all operands were 
> +		 consumed by the __builtin_powi optimization.  */
> +	      if (VEC_length (operand_entry_t, ops) == 0)
> +		transform_stmt_to_copy (&gsi, stmt, powi_result);
> +	      else if (VEC_length (operand_entry_t, ops) == 1)
>  		{
> -		  if (dump_file && (dump_flags & TDF_DETAILS))
> -		    {
> -		      fprintf (dump_file, "Transforming ");
> -		      print_gimple_stmt (dump_file, stmt, 0, 0);
> -		    }
> -
> -		  rhs1 = gimple_assign_rhs1 (stmt);
> -		  gimple_assign_set_rhs_from_tree (&gsi,
> -						   VEC_last (operand_entry_t,
> -							     ops)->op);
> -		  update_stmt (stmt);
> -		  remove_visited_stmt_chain (rhs1);
> -
> -		  if (dump_file && (dump_flags & TDF_DETAILS))
> -		    {
> -		      fprintf (dump_file, " into ");
> -		      print_gimple_stmt (dump_file, stmt, 0, 0);
> -		    }
> +		  tree last_op = VEC_last (operand_entry_t, ops)->op;
> +		  
> +		  if (powi_result)
> +		    transform_stmt_to_multiply (&gsi, stmt, last_op,
> +						powi_result);
> +		  else
> +		    transform_stmt_to_copy (&gsi, stmt, last_op);
>  		}
>  	      else
>  		{
> @@ -3668,10 +3577,27 @@ reassociate_bb (basic_block bb)
>  		    rewrite_expr_tree_parallel (stmt, width, ops);
>  		  else
>  		    rewrite_expr_tree (stmt, 0, ops, false);
> +
> +		  /* If we combined some repeated factors into a 
> +		     __builtin_powi call, multiply that result by the
> +		     reassociated operands.  */
> +		  if (powi_result)
> +		    {
> +		      gimple mul_stmt;
> +		      tree type = TREE_TYPE (gimple_get_lhs (stmt));
> +		      tree target_ssa = get_reassoc_pow_ssa_name (&target,
> +								  type);
> +		      gimple_set_lhs (stmt, target_ssa);
> +		      update_stmt (stmt);
> +		      mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
> +							       powi_result,
> +							       target_ssa);
> +		      gimple_set_location (mul_stmt, gimple_location (stmt));
> +		      gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
> +		    }
>  		}
>  
>  	      VEC_free (operand_entry_t, heap, ops);
> -	      pointer_map_destroy (bip_map);
>  	    }
>  	}
>      }
> 
> 
>

Patch

Index: gcc/tree-ssa-reassoc.c
===================================================================
--- gcc/tree-ssa-reassoc.c	(revision 187626)
+++ gcc/tree-ssa-reassoc.c	(working copy)
@@ -200,10 +200,6 @@  static long *bb_rank;
 /* Operand->rank hashtable.  */
 static struct pointer_map_t *operand_rank;
 
-/* Map from inserted __builtin_powi calls to multiply chains that
-   feed them.  */
-static struct pointer_map_t *bip_map;
-
 /* Forward decls.  */
 static long get_rank (tree);
 
@@ -2184,32 +2180,6 @@  is_phi_for_stmt (gimple stmt, tree operand)
   return false;
 }
 
-/* Remove STMT, unlink its virtual defs, and release its SSA defs.  */
-
-static inline void
-completely_remove_stmt (gimple stmt)
-{
-  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
-  gsi_remove (&gsi, true);
-  unlink_stmt_vdef (stmt);
-  release_defs (stmt);
-}
-
-/* If OP is defined by a builtin call that has been absorbed by
-   reassociation, remove its defining statement completely.  */
-
-static inline void
-remove_def_if_absorbed_call (tree op)
-{
-  gimple stmt;
-
-  if (TREE_CODE (op) == SSA_NAME
-      && has_zero_uses (op)
-      && is_gimple_call ((stmt = SSA_NAME_DEF_STMT (op)))
-      && gimple_visited_p (stmt))
-    completely_remove_stmt (stmt);
-}
-
 /* Remove def stmt of VAR if VAR has zero uses and recurse
    on rhs1 operand if so.  */
 
@@ -2218,7 +2188,6 @@  remove_visited_stmt_chain (tree var)
 {
   gimple stmt;
   gimple_stmt_iterator gsi;
-  tree var2;
 
   while (1)
     {
@@ -2228,95 +2197,15 @@  remove_visited_stmt_chain (tree var)
       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
 	{
 	  var = gimple_assign_rhs1 (stmt);
-	  var2 = gimple_assign_rhs2 (stmt);
 	  gsi = gsi_for_stmt (stmt);
 	  gsi_remove (&gsi, true);
 	  release_defs (stmt);
-	  /* A multiply whose operands are both fed by builtin pow/powi
-	     calls must check whether to remove rhs2 as well.  */
-	  remove_def_if_absorbed_call (var2);
 	}
-      else if (is_gimple_call (stmt) && gimple_visited_p (stmt))
-	{
-	  completely_remove_stmt (stmt);
-	  return;
-	}
       else
 	return;
     }
 }
 
-/* If OP is an SSA name, find its definition and determine whether it
-   is a call to __builtin_powi.  If so, move the definition prior to
-   STMT.  Only do this during early reassociation.  */
-
-static void
-possibly_move_powi (gimple stmt, tree op)
-{
-  gimple stmt2, *mpy;
-  tree fndecl;
-  gimple_stmt_iterator gsi1, gsi2;
-
-  if (!first_pass_instance
-      || !flag_unsafe_math_optimizations
-      || TREE_CODE (op) != SSA_NAME)
-    return;
-  
-  stmt2 = SSA_NAME_DEF_STMT (op);
-
-  if (!is_gimple_call (stmt2)
-      || !has_single_use (gimple_call_lhs (stmt2)))
-    return;
-
-  fndecl = gimple_call_fndecl (stmt2);
-
-  if (!fndecl
-      || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
-    return;
-
-  switch (DECL_FUNCTION_CODE (fndecl))
-    {
-    CASE_FLT_FN (BUILT_IN_POWI):
-      break;
-    default:
-      return;
-    }
-
-  /* Move the __builtin_powi.  */
-  gsi1 = gsi_for_stmt (stmt);
-  gsi2 = gsi_for_stmt (stmt2);
-  gsi_move_before (&gsi2, &gsi1);
-
-  /* See if there are multiplies feeding the __builtin_powi base
-     argument that must also be moved.  */
-  while ((mpy = (gimple *) pointer_map_contains (bip_map, stmt2)) != NULL)
-    {
-      /* If we've already moved this statement, we're done.  This is
-         identified by a NULL entry for the statement in bip_map.  */
-      gimple *next = (gimple *) pointer_map_contains (bip_map, *mpy);
-      if (next && !*next)
-	return;
-
-      stmt = stmt2;
-      stmt2 = *mpy;
-      gsi1 = gsi_for_stmt (stmt);
-      gsi2 = gsi_for_stmt (stmt2);
-      gsi_move_before (&gsi2, &gsi1);
-
-      /* The moved multiply may be DAG'd from multiple calls if it
-	 was the result of a cached multiply.  Only move it once.
-	 Rank order ensures we move it to the right place the first
-	 time.  */
-      if (next)
-	*next = NULL;
-      else
-	{
-	  next = (gimple *) pointer_map_insert (bip_map, *mpy);
-	  *next = NULL;
-	}
-    }
-}
-
 /* This function checks three consequtive operands in
    passed operands vector OPS starting from OPINDEX and
    swaps two operands if it is profitable for binary operation
@@ -2421,9 +2310,6 @@  rewrite_expr_tree (gimple stmt, unsigned int opind
 	      fprintf (dump_file, " into ");
 	      print_gimple_stmt (dump_file, stmt, 0, 0);
 	    }
-
-	  possibly_move_powi (stmt, oe1->op);
-	  possibly_move_powi (stmt, oe2->op);
 	}
       return;
     }
@@ -2469,8 +2355,6 @@  rewrite_expr_tree (gimple stmt, unsigned int opind
 	  fprintf (dump_file, " into ");
 	  print_gimple_stmt (dump_file, stmt, 0, 0);
 	}
-
-      possibly_move_powi (stmt, oe->op);
     }
   /* Recurse on the LHS of the binary operator, which is guaranteed to
      be the non-leaf side.  */
@@ -2644,9 +2528,6 @@  rewrite_expr_tree_parallel (gimple stmt, int width
 	  fprintf (dump_file, " into ");
 	  print_gimple_stmt (dump_file, stmts[i], 0, 0);
 	}
-
-      possibly_move_powi (stmts[i], op1);
-      possibly_move_powi (stmts[i], op2);
     }
 
   remove_visited_stmt_chain (last_rhs1);
@@ -3222,11 +3103,10 @@  get_reassoc_pow_ssa_name (tree *target, tree type)
 
 /* Look for repeated operands in OPS in the multiply tree rooted at
    STMT.  Replace them with an optimal sequence of multiplies and powi
-   builtin calls, and remove the used operands from OPS.  Push new
-   SSA names onto OPS that represent the introduced multiplies and
-   builtin calls.  */
+   builtin calls, and remove the used operands from OPS.  Return an
+   SSA name representing the value of the replacement sequence.  */
 
-static void
+static tree
 attempt_builtin_powi (gimple stmt, VEC(operand_entry_t, heap) **ops,
 		      tree *target)
 {
@@ -3235,6 +3115,7 @@  attempt_builtin_powi (gimple stmt, VEC(operand_ent
   operand_entry_t oe;
   repeat_factor_t rf1, rf2;
   repeat_factor rfnew;
+  tree result = NULL_TREE;
   tree target_ssa, iter_result;
   tree type = TREE_TYPE (gimple_get_lhs (stmt));
   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
@@ -3244,7 +3125,7 @@  attempt_builtin_powi (gimple stmt, VEC(operand_ent
   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
      target.  */
   if (!powi_fndecl)
-    return;
+    return NULL_TREE;
 
   /* Allocate the repeated factor vector.  */
   repeat_factor_vec = VEC_alloc (repeat_factor, heap, 10);
@@ -3315,7 +3196,6 @@  attempt_builtin_powi (gimple stmt, VEC(operand_ent
   while (true)
     {
       HOST_WIDE_INT power;
-      gimple last_mul = NULL;
 
       /* First look for the largest cached product of factors from
 	 preceding iterations.  If found, create a builtin_powi for
@@ -3353,25 +3233,14 @@  attempt_builtin_powi (gimple stmt, VEC(operand_ent
 	    }
 	  else
 	    {
-	      gimple *value;
-
 	      iter_result = get_reassoc_pow_ssa_name (target, type);
 	      pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, 
 					    build_int_cst (integer_type_node,
 							   power));
 	      gimple_call_set_lhs (pow_stmt, iter_result);
 	      gimple_set_location (pow_stmt, gimple_location (stmt));
-	      /* Temporarily place the call; we will move it and its
-		 feeding multiplies to the correct place during
-		 rewrite_expr.  */
 	      gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
 
-	      if (!operand_equal_p (rf1->repr, rf1->factor, 0))
-		{
-		  value = (gimple *) pointer_map_insert (bip_map, pow_stmt);
-		  *value = SSA_NAME_DEF_STMT (rf1->repr);
-		}
-
 	      if (dump_file && (dump_flags & TDF_DETAILS))
 		{
 		  unsigned elt;
@@ -3457,15 +3326,6 @@  attempt_builtin_powi (gimple stmt, VEC(operand_ent
 		  gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
 		  rf1->repr = target_ssa;
 
-		  /* Chain multiplies together for later movement.  */
-		  if (last_mul)
-		    {
-		      gimple *value
-			= (gimple *) pointer_map_insert (bip_map, mul_stmt);
-		      *value = last_mul;
-		    }
-		  last_mul = mul_stmt;
-
 		  /* Don't reprocess the multiply we just introduced.  */
 		  gimple_set_visited (mul_stmt, true);
 		}
@@ -3481,20 +3341,23 @@  attempt_builtin_powi (gimple stmt, VEC(operand_ent
 	  gimple_call_set_lhs (pow_stmt, iter_result);
 	  gimple_set_location (pow_stmt, gimple_location (stmt));
 	  gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
+	}
 
-	  /* If we inserted a chain of multiplies before the pow_stmt,
-	     record that fact so we can move it later when we move the
-	     pow_stmt.  */
-	  if (last_mul)
-	    {
-	      gimple *value = (gimple *) pointer_map_insert (bip_map, pow_stmt);
-	      *value = last_mul;
-	    }
+      /* If we previously formed at least one other builtin_powi call,
+	 form the product of this one and those others.  */
+      if (result)
+	{
+	  tree new_result = get_reassoc_pow_ssa_name (target, type);
+	  mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
+						   result, iter_result);
+	  gimple_set_location (mul_stmt, gimple_location (stmt));
+	  gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
+	  gimple_set_visited (mul_stmt, true);
+	  result = new_result;
 	}
+      else
+	result = iter_result;
 
-      /* Append the result of this iteration to the ops vector.  */
-      add_to_ops_vec (ops, iter_result);
-
       /* Decrement the occurrence count of each element in the product
 	 by the count found above, and remove this many copies of each
 	 factor from OPS.  */
@@ -3534,8 +3397,61 @@  attempt_builtin_powi (gimple stmt, VEC(operand_ent
      clean up.  */
   VEC_qsort (operand_entry_t, *ops, sort_by_operand_rank);
   VEC_free (repeat_factor, heap, repeat_factor_vec);
+
+  /* Return the final product computed herein.  Note that there may
+     still be some elements with single occurrence count left in OPS;
+     those will be handled by the normal reassociation logic.  */
+  return result;
 }
 
+/* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
+
+static void
+transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
+{
+  tree rhs1;
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    {
+      fprintf (dump_file, "Transforming ");
+      print_gimple_stmt (dump_file, stmt, 0, 0);
+    }
+
+  rhs1 = gimple_assign_rhs1 (stmt);
+  gimple_assign_set_rhs_from_tree (gsi, new_rhs);
+  update_stmt (stmt);
+  remove_visited_stmt_chain (rhs1);
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    {
+      fprintf (dump_file, " into ");
+      print_gimple_stmt (dump_file, stmt, 0, 0);
+    }
+}
+
+/* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
+
+static void
+transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
+			    tree rhs1, tree rhs2)
+{
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    {
+      fprintf (dump_file, "Transforming ");
+      print_gimple_stmt (dump_file, stmt, 0, 0);
+    }
+
+  gimple_assign_set_rhs_with_ops_1 (gsi, MULT_EXPR, rhs1, rhs2, NULL_TREE);
+  update_stmt (gsi_stmt (*gsi));
+  remove_visited_stmt_chain (rhs1);
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    {
+      fprintf (dump_file, " into ");
+      print_gimple_stmt (dump_file, stmt, 0, 0);
+    }
+}
+
 /* Reassociate expressions in basic block BB and its post-dominator as
    children.  */
 
@@ -3606,7 +3522,7 @@  reassociate_bb (basic_block bb)
 	  if (associative_tree_code (rhs_code))
 	    {
 	      VEC(operand_entry_t, heap) *ops = NULL;
-	      bip_map = pointer_map_create ();
+	      tree powi_result = NULL_TREE;
 
 	      /* There may be no immediate uses left by the time we
 		 get here because we may have eliminated them all.  */
@@ -3630,28 +3546,21 @@  reassociate_bb (basic_block bb)
 	      if (first_pass_instance
 		  && rhs_code == MULT_EXPR
 		  && flag_unsafe_math_optimizations)
-		attempt_builtin_powi (stmt, &ops, &target);
+		powi_result = attempt_builtin_powi (stmt, &ops, &target);
 
-	      if (VEC_length (operand_entry_t, ops) == 1)
+	      /* If the operand vector is now empty, all operands were 
+		 consumed by the __builtin_powi optimization.  */
+	      if (VEC_length (operand_entry_t, ops) == 0)
+		transform_stmt_to_copy (&gsi, stmt, powi_result);
+	      else if (VEC_length (operand_entry_t, ops) == 1)
 		{
-		  if (dump_file && (dump_flags & TDF_DETAILS))
-		    {
-		      fprintf (dump_file, "Transforming ");
-		      print_gimple_stmt (dump_file, stmt, 0, 0);
-		    }
-
-		  rhs1 = gimple_assign_rhs1 (stmt);
-		  gimple_assign_set_rhs_from_tree (&gsi,
-						   VEC_last (operand_entry_t,
-							     ops)->op);
-		  update_stmt (stmt);
-		  remove_visited_stmt_chain (rhs1);
-
-		  if (dump_file && (dump_flags & TDF_DETAILS))
-		    {
-		      fprintf (dump_file, " into ");
-		      print_gimple_stmt (dump_file, stmt, 0, 0);
-		    }
+		  tree last_op = VEC_last (operand_entry_t, ops)->op;
+		  
+		  if (powi_result)
+		    transform_stmt_to_multiply (&gsi, stmt, last_op,
+						powi_result);
+		  else
+		    transform_stmt_to_copy (&gsi, stmt, last_op);
 		}
 	      else
 		{
@@ -3668,10 +3577,27 @@  reassociate_bb (basic_block bb)
 		    rewrite_expr_tree_parallel (stmt, width, ops);
 		  else
 		    rewrite_expr_tree (stmt, 0, ops, false);
+
+		  /* If we combined some repeated factors into a 
+		     __builtin_powi call, multiply that result by the
+		     reassociated operands.  */
+		  if (powi_result)
+		    {
+		      gimple mul_stmt;
+		      tree type = TREE_TYPE (gimple_get_lhs (stmt));
+		      tree target_ssa = get_reassoc_pow_ssa_name (&target,
+								  type);
+		      gimple_set_lhs (stmt, target_ssa);
+		      update_stmt (stmt);
+		      mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
+							       powi_result,
+							       target_ssa);
+		      gimple_set_location (mul_stmt, gimple_location (stmt));
+		      gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
+		    }
 		}
 
 	      VEC_free (operand_entry_t, heap, ops);
-	      pointer_map_destroy (bip_map);
 	    }
 	}
     }